X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Ffcntl.c;h=ae413086db978093123b2a59947d78e23a02a693;hb=444528b3e614f7f2391488d9bca8e0b872db909b;hp=bfecc623808358013379e400ccb4117bfbc8a57e;hpb=badf16621c1f9d1ac753be056fce11b43d6e0be5;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/fcntl.c b/fs/fcntl.c index bfecc62..ae41308 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -9,19 +9,22 @@ #include #include #include +#include +#include #include -#include #include #include #include #include #include +#include +#include #include #include #include -void fastcall set_close_on_exec(unsigned int fd, int flag) +void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; struct fdtable *fdt; @@ -34,179 +37,128 @@ void fastcall set_close_on_exec(unsigned int fd, int flag) spin_unlock(&files->file_lock); } -static inline int get_close_on_exec(unsigned int fd) +static int get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; struct fdtable *fdt; int res; - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); res = FD_ISSET(fd, fdt->close_on_exec); - spin_unlock(&files->file_lock); + rcu_read_unlock(); return res; } -/* - * locate_fd finds a free file descriptor in the open_fds fdset, - * expanding the fd arrays if necessary. Must be called with the - * file_lock held for write. - */ - -static int locate_fd(struct files_struct *files, - struct file *file, unsigned int orig_start) -{ - unsigned int newfd; - unsigned int start; - int error; - struct fdtable *fdt; - - error = -EINVAL; - if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - - fdt = files_fdtable(files); -repeat: - /* - * Someone might have closed fd's in the range - * orig_start..fdt->next_fd - */ - start = orig_start; - if (start < fdt->next_fd) - start = fdt->next_fd; - - newfd = start; - if (start < fdt->max_fdset) { - newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fdset, start); - } - - error = -EMFILE; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - - error = expand_files(files, newfd); - if (error < 0) - goto out; - - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - - if (start <= fdt->next_fd) - fdt->next_fd = newfd + 1; - - error = newfd; - -out: - return error; -} - -static int dupfd(struct file *file, unsigned int start) -{ - struct files_struct * files = current->files; - struct fdtable *fdt; - int fd; - - spin_lock(&files->file_lock); - fd = locate_fd(files, file, start); - if (fd >= 0) { - /* locate_fd() may have expanded fdtable, load the ptr */ - fdt = files_fdtable(files); - FD_SET(fd, fdt->open_fds); - FD_CLR(fd, fdt->close_on_exec); - spin_unlock(&files->file_lock); - fd_install(fd, file); - } else { - spin_unlock(&files->file_lock); - fput(file); - } - - return fd; -} - -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; struct fdtable *fdt; - spin_lock(&files->file_lock); - if (!(file = fcheck(oldfd))) - goto out_unlock; - err = newfd; - if (newfd == oldfd) - goto out_unlock; - err = -EBADF; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out_unlock; - get_file(file); /* We are now finished with oldfd */ - - err = expand_files(files, newfd); - if (err < 0) - goto out_fput; + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; - /* To avoid races with open() and dup(), we will mark the fd as - * in-use in the open-file bitmap throughout the entire dup2() - * process. This is quite safe: do_close() uses the fd array - * entry, not the bitmap, to decide what work needs to be - * done. --sct */ - /* Doesn't work. open() might be there first. --AV */ + if (unlikely(oldfd == newfd)) + return -EINVAL; - /* Yes. It's a race. In user space. Nothing sane to do */ + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ err = -EBUSY; fdt = files_fdtable(files); tofree = fdt->fd[newfd]; if (!tofree && FD_ISSET(newfd, fdt->open_fds)) - goto out_fput; - - fdt->fd[newfd] = file; + goto out_unlock; + get_file(file); + rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); - FD_CLR(newfd, fdt->close_on_exec); + if (flags & O_CLOEXEC) + FD_SET(newfd, fdt->close_on_exec); + else + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); - err = newfd; -out: - return err; + + return newfd; + +Ebadf: + err = -EBADF; out_unlock: spin_unlock(&files->file_lock); - goto out; + return err; +} -out_fput: - spin_unlock(&files->file_lock); - fput(file); - goto out; +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) +{ + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + int retval = oldfd; + + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + retval = -EBADF; + rcu_read_unlock(); + return retval; + } + return sys_dup3(oldfd, newfd, 0); } -asmlinkage long sys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; - struct file * file = fget(fildes); - - if (file) - ret = dupfd(file, 0); + struct file *file = fget(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { - struct inode * inode = filp->f_dentry->d_inode; + struct inode * inode = filp->f_path.dentry->d_inode; int error = 0; - /* O_APPEND cannot be cleared if the file is marked as append-only */ - if (!(arg & O_APPEND) && IS_APPEND(inode)) + /* + * O_APPEND cannot be cleared if the file is marked as append-only + * and the file is open for write. + */ + if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) - if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + if (!is_owner_or_cap(inode)) return -EPERM; /* required for strict SunOS emulation */ @@ -225,50 +177,90 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; - lock_kernel(); - if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + /* + * ->fasync() is responsible for setting the FASYNC bit. + */ + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; + if (error > 0) + error = 0; } - + spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); + spin_unlock(&filp->f_lock); + out: - unlock_kernel(); return error; } -static void f_modown(struct file *filp, unsigned long pid, - uid_t uid, uid_t euid, int force) +static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, + int force) { write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { - filp->f_owner.pid = pid; - filp->f_owner.uid = uid; - filp->f_owner.euid = euid; + put_pid(filp->f_owner.pid); + filp->f_owner.pid = get_pid(pid); + filp->f_owner.pid_type = type; + + if (pid) { + const struct cred *cred = current_cred(); + filp->f_owner.uid = cred->uid; + filp->f_owner.euid = cred->euid; + } } write_unlock_irq(&filp->f_owner.lock); } -int f_setown(struct file *filp, unsigned long arg, int force) +int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, + int force) { int err; - + err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, arg, current->uid, current->euid, force); + f_modown(filp, pid, type, force); return 0; } +EXPORT_SYMBOL(__f_setown); +int f_setown(struct file *filp, unsigned long arg, int force) +{ + enum pid_type type; + struct pid *pid; + int who = arg; + int result; + type = PIDTYPE_PID; + if (who < 0) { + type = PIDTYPE_PGID; + who = -who; + } + rcu_read_lock(); + pid = find_vpid(who); + result = __f_setown(filp, pid, type, force); + rcu_read_unlock(); + return result; +} EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { - f_modown(filp, 0, 0, 0, 1); + f_modown(filp, NULL, PIDTYPE_PID, 1); +} + +pid_t f_getown(struct file *filp) +{ + pid_t pid; + read_lock(&filp->f_owner.lock); + pid = pid_vnr(filp->f_owner.pid); + if (filp->f_owner.pid_type == PIDTYPE_PGID) + pid = -pid; + read_unlock(&filp->f_owner.lock); + return pid; } static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, @@ -278,8 +270,14 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: - get_file(filp); - err = dupfd(filp, arg); + case F_DUPFD_CLOEXEC: + if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + break; + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); + if (err >= 0) { + get_file(filp); + fd_install(err, filp); + } break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; @@ -309,7 +307,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, * current syscall conventions, the only way * to fix this will be in libc. */ - err = filp->f_owner.pid; + err = f_getown(filp); force_successful_syscall_return(); break; case F_SETOWN: @@ -341,7 +339,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, return err; } -asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; long err = -EBADF; @@ -364,7 +362,8 @@ out: } #if BITS_PER_LONG == 32 -asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + unsigned long, arg) { struct file * filp; long err; @@ -402,7 +401,7 @@ out: /* Table to convert sigio signal codes into poll band bitmaps */ -static long band_table[NSIGPOLL] = { +static const long band_table[NSIGPOLL] = { POLLIN | POLLRDNORM, /* POLL_IN */ POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ @@ -414,21 +413,34 @@ static long band_table[NSIGPOLL] = { static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { - return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + const struct cred *cred; + int ret; + + rcu_read_lock(); + cred = __task_cred(p); + ret = ((fown->euid == 0 || + fown->euid == cred->suid || fown->euid == cred->uid || + fown->uid == cred->suid || fown->uid == cred->uid) && + !security_file_send_sigiotask(p, fown, sig)); + rcu_read_unlock(); + return ret; } static void send_sigio_to_task(struct task_struct *p, - struct fown_struct *fown, + struct fown_struct *fown, int fd, int reason) { - if (!sigio_perm(p, fown, fown->signum)) + /* + * F_SETSIG can change ->signum lockless in parallel, make + * sure we read it once and use the same value throughout. + */ + int signum = ACCESS_ONCE(fown->signum); + + if (!sigio_perm(p, fown, signum)) return; - switch (fown->signum) { + switch (signum) { siginfo_t si; default: /* Queue a rt signal with the appropriate fd as its @@ -437,48 +449,42 @@ static void send_sigio_to_task(struct task_struct *p, delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall back to SIGIO in that case. --sct */ - si.si_signo = fown->signum; + si.si_signo = signum; si.si_errno = 0; si.si_code = reason; /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ - if ((reason & __SI_MASK) != __SI_POLL) - BUG(); + BUG_ON((reason & __SI_MASK) != __SI_POLL); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!send_group_sig_info(fown->signum, &si, p)) + if (!group_send_sig_info(signum, &si, p)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); + group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); } } void send_sigio(struct fown_struct *fown, int fd, int band) { struct task_struct *p; - int pid; + enum pid_type type; + struct pid *pid; read_lock(&fown->lock); + type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; read_lock(&tasklist_lock); - if (pid > 0) { - p = find_task_by_pid(pid); - if (p) { - send_sigio_to_task(p, fown, fd, band); - } - } else { - do_each_task_pid(-pid, PIDTYPE_PGID, p) { - send_sigio_to_task(p, fown, fd, band); - } while_each_task_pid(-pid, PIDTYPE_PGID, p); - } + do_each_pid_task(pid, type, p) { + send_sigio_to_task(p, fown, fd, band); + } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: read_unlock(&fown->lock); @@ -488,15 +494,18 @@ static void send_sigurg_to_task(struct task_struct *p, struct fown_struct *fown) { if (sigio_perm(p, fown, SIGURG)) - send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); + group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); } int send_sigurg(struct fown_struct *fown) { struct task_struct *p; - int pid, ret = 0; + enum pid_type type; + struct pid *pid; + int ret = 0; read_lock(&fown->lock); + type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; @@ -504,16 +513,9 @@ int send_sigurg(struct fown_struct *fown) ret = 1; read_lock(&tasklist_lock); - if (pid > 0) { - p = find_task_by_pid(pid); - if (p) { - send_sigurg_to_task(p, fown); - } - } else { - do_each_task_pid(-pid, PIDTYPE_PGID, p) { - send_sigurg_to_task(p, fown); - } while_each_task_pid(-pid, PIDTYPE_PGID, p); - } + do_each_pid_task(pid, type, p) { + send_sigurg_to_task(p, fown); + } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: read_unlock(&fown->lock); @@ -521,10 +523,10 @@ int send_sigurg(struct fown_struct *fown) } static DEFINE_RWLOCK(fasync_lock); -static kmem_cache_t *fasync_cache; +static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by some character device drivers (mainly mice) + * fasync_helper() is used by almost all character device drivers * to set up the fasync queue. It returns negative on error, 0 if it did * no changes and positive if it added/deleted the entry. */ @@ -535,10 +537,16 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap int result = 0; if (on) { - new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); + new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); if (!new) return -ENOMEM; } + + /* + * We need to take f_lock first since it's not an IRQ-safe + * lock. + */ + spin_lock(&filp->f_lock); write_lock_irq(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file == filp) { @@ -563,7 +571,12 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap result = 1; } out: + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; write_unlock_irq(&fasync_lock); + spin_unlock(&filp->f_lock); return result; } @@ -607,7 +620,7 @@ EXPORT_SYMBOL(kill_fasync); static int __init fasync_init(void) { fasync_cache = kmem_cache_create("fasync_cache", - sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); return 0; }