X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Ffile.c;h=34bb7f71d99404e8b91a4bfb0654505d1e537318;hb=815409d22df870ea0b0d86f2a3bf33c35bcef55c;hp=f6fbcb49faf716e033a362b28598b87fe16843ce;hpb=9f3acc3140444a900ab280de942291959f0f615d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/file.c b/fs/file.c index f6fbcb4..34bb7f7 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,9 +6,11 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include #include #include #include +#include #include #include #include @@ -26,6 +28,8 @@ struct fdtable_defer { }; int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */ /* * We use this list to defer free fdtables that have vmalloced @@ -119,8 +123,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) unsigned int cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); - if (ofdt->max_fds == 0) - return; cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -150,8 +152,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > sysctl_nr_open) - nr = sysctl_nr_open; + /* + * Note that this can drive nr *below* what we had passed if sysctl_nr_open + * had been set lower between the check in expand_files() and here. Deal + * with that in caller, it's cheaper that way. + * + * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise + * bitmaps handling below becomes unpleasant, to put it mildly... + */ + if (unlikely(nr > sysctl_nr_open)) + nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -200,6 +210,16 @@ static int expand_fdtable(struct files_struct *files, int nr) if (!new_fdt) return -ENOMEM; /* + * extremely unlikely race - sysctl_nr_open decreased between the check in + * caller and alloc_fdtable(). Cheaper to catch it here... + */ + if (unlikely(new_fdt->max_fds <= nr)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + return -EMFILE; + } + /* * Check again since another task may have expanded the fd table while * we dropped the lock */ @@ -232,9 +252,18 @@ int expand_files(struct files_struct *files, int nr) struct fdtable *fdt; fdt = files_fdtable(files); + + /* + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ + if (nr >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + /* Do we need to expand? */ if (nr < fdt->max_fds) return 0; + /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; @@ -243,6 +272,139 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static int count_open_files(struct fdtable *fdt) +{ + int size = fdt->max_fds; + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (fdt->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ + struct files_struct *newf; + struct file **old_fds, **new_fds; + int open_files, size, i; + struct fdtable *old_fdt, *new_fdt; + + *errorp = -ENOMEM; + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + new_fdt = &newf->fdtab; + new_fdt->max_fds = NR_OPEN_DEFAULT; + new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&new_fdt->rcu); + new_fdt->next = NULL; + + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); + + /* + * Check whether we need to allocate a larger fd array and fd set. + */ + while (unlikely(open_files > new_fdt->max_fds)) { + spin_unlock(&oldf->file_lock); + + if (new_fdt != &newf->fdtab) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + } + + new_fdt = alloc_fdtable(open_files - 1); + if (!new_fdt) { + *errorp = -ENOMEM; + goto out_release; + } + + /* beyond sysctl_nr_open; nothing to do */ + if (unlikely(new_fdt->max_fds < open_files)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + *errorp = -EMFILE; + goto out_release; + } + + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); + } + + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; + + memcpy(new_fdt->open_fds->fds_bits, + old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, + old_fdt->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, new_fdt->open_fds); + } + rcu_assign_pointer(*new_fds++, f); + } + spin_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + } + + rcu_assign_pointer(newf->fdt, new_fdt); + + return newf; + +out_release: + kmem_cache_free(files_cachep, newf); +out: + return NULL; +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); @@ -256,4 +418,79 @@ void __init files_defer_init(void) int i; for_each_possible_cpu(i) fdtable_defer_list_init(i); + sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; +} + +struct files_struct init_files = { + .count = ATOMIC_INIT(1), + .fdt = &init_files.fdtab, + .fdtab = { + .max_fds = NR_OPEN_DEFAULT, + .fd = &init_files.fd_array[0], + .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .open_fds = (fd_set *)&init_files.open_fds_init, + .rcu = RCU_HEAD_INIT, + }, + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), +}; + +/* + * allocate a file descriptor, mark it busy. + */ +int alloc_fd(unsigned start, unsigned flags) +{ + struct files_struct *files = current->files; + unsigned int fd; + int error; + struct fdtable *fdt; + + spin_lock(&files->file_lock); +repeat: + fdt = files_fdtable(files); + fd = start; + if (fd < files->next_fd) + fd = files->next_fd; + + if (fd < fdt->max_fds) + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fds, fd); + + error = expand_files(files, fd); + if (error < 0) + goto out; + + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + + if (start <= files->next_fd) + files->next_fd = fd + 1; + + FD_SET(fd, fdt->open_fds); + if (flags & O_CLOEXEC) + FD_SET(fd, fdt->close_on_exec); + else + FD_CLR(fd, fdt->close_on_exec); + error = fd; +#if 1 + /* Sanity check */ + if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { + printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); + rcu_assign_pointer(fdt->fd[fd], NULL); + } +#endif + +out: + spin_unlock(&files->file_lock); + return error; +} + +int get_unused_fd(void) +{ + return alloc_fd(0, 0); } +EXPORT_SYMBOL(get_unused_fd);