X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=fs%2Ffcntl.c;h=97e01dc0d95fc4fe8464d729ce94d32b888b567d;hp=2a2479196f965b1157a9404eb6d69397a1f6b1fd;hb=e071041be037eca208b62b84469a06bdfc692bea;hpb=fa3536cc144c1298f2ed9416c33f3b77fa2cd37a diff --git a/fs/fcntl.c b/fs/fcntl.c index 2a24791..97e01dc 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -9,21 +9,22 @@ #include #include #include +#include #include #include -#include #include #include #include #include #include #include +#include #include #include #include -void fastcall set_close_on_exec(unsigned int fd, int flag) +void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; struct fdtable *fdt; @@ -48,163 +49,104 @@ static int get_close_on_exec(unsigned int fd) return res; } -/* - * locate_fd finds a free file descriptor in the open_fds fdset, - * expanding the fd arrays if necessary. Must be called with the - * file_lock held for write. - */ - -static int locate_fd(struct files_struct *files, - struct file *file, unsigned int orig_start) -{ - unsigned int newfd; - unsigned int start; - int error; - struct fdtable *fdt; - - error = -EINVAL; - if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - -repeat: - fdt = files_fdtable(files); - /* - * Someone might have closed fd's in the range - * orig_start..fdt->next_fd - */ - start = orig_start; - if (start < files->next_fd) - start = files->next_fd; - - newfd = start; - if (start < fdt->max_fdset) { - newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fdset, start); - } - - error = -EMFILE; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - - error = expand_files(files, newfd); - if (error < 0) - goto out; - - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - - /* - * We reacquired files_lock, so we are safe as long as - * we reacquire the fdtable pointer and use it while holding - * the lock, no one can free it during that time. - */ - if (start <= files->next_fd) - files->next_fd = newfd + 1; - - error = newfd; - -out: - return error; -} - -static int dupfd(struct file *file, unsigned int start) -{ - struct files_struct * files = current->files; - struct fdtable *fdt; - int fd; - - spin_lock(&files->file_lock); - fd = locate_fd(files, file, start); - if (fd >= 0) { - /* locate_fd() may have expanded fdtable, load the ptr */ - fdt = files_fdtable(files); - FD_SET(fd, fdt->open_fds); - FD_CLR(fd, fdt->close_on_exec); - spin_unlock(&files->file_lock); - fd_install(fd, file); - } else { - spin_unlock(&files->file_lock); - fput(file); - } - - return fd; -} - -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; struct fdtable *fdt; - spin_lock(&files->file_lock); - if (!(file = fcheck(oldfd))) - goto out_unlock; - err = newfd; - if (newfd == oldfd) - goto out_unlock; - err = -EBADF; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out_unlock; - get_file(file); /* We are now finished with oldfd */ - - err = expand_files(files, newfd); - if (err < 0) - goto out_fput; + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; - /* To avoid races with open() and dup(), we will mark the fd as - * in-use in the open-file bitmap throughout the entire dup2() - * process. This is quite safe: do_close() uses the fd array - * entry, not the bitmap, to decide what work needs to be - * done. --sct */ - /* Doesn't work. open() might be there first. --AV */ + if (unlikely(oldfd == newfd)) + return -EINVAL; - /* Yes. It's a race. In user space. Nothing sane to do */ + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ err = -EBUSY; fdt = files_fdtable(files); tofree = fdt->fd[newfd]; if (!tofree && FD_ISSET(newfd, fdt->open_fds)) - goto out_fput; - + goto out_unlock; + get_file(file); rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); - FD_CLR(newfd, fdt->close_on_exec); + if (flags & O_CLOEXEC) + FD_SET(newfd, fdt->close_on_exec); + else + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); - err = newfd; -out: - return err; + + return newfd; + +Ebadf: + err = -EBADF; out_unlock: spin_unlock(&files->file_lock); - goto out; + return err; +} -out_fput: - spin_unlock(&files->file_lock); - fput(file); - goto out; +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) +{ + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + int retval = oldfd; + + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + retval = -EBADF; + rcu_read_unlock(); + return retval; + } + return sys_dup3(oldfd, newfd, 0); } -asmlinkage long sys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; - struct file * file = fget(fildes); - - if (file) - ret = dupfd(file, 0); + struct file *file = fget(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { - struct inode * inode = filp->f_dentry->d_inode; + struct inode * inode = filp->f_path.dentry->d_inode; int error = 0; /* @@ -216,7 +158,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) - if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + if (!is_owner_or_cap(inode)) return -EPERM; /* required for strict SunOS emulation */ @@ -235,50 +177,163 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; - lock_kernel(); - if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + /* + * ->fasync() is responsible for setting the FASYNC bit. + */ + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; + if (error > 0) + error = 0; } - + spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); + spin_unlock(&filp->f_lock); + out: - unlock_kernel(); return error; } -static void f_modown(struct file *filp, unsigned long pid, - uid_t uid, uid_t euid, int force) +static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, + int force) { write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { - filp->f_owner.pid = pid; - filp->f_owner.uid = uid; - filp->f_owner.euid = euid; + put_pid(filp->f_owner.pid); + filp->f_owner.pid = get_pid(pid); + filp->f_owner.pid_type = type; + + if (pid) { + const struct cred *cred = current_cred(); + filp->f_owner.uid = cred->uid; + filp->f_owner.euid = cred->euid; + } } write_unlock_irq(&filp->f_owner.lock); } -int f_setown(struct file *filp, unsigned long arg, int force) +int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, + int force) { int err; - + err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, arg, current->uid, current->euid, force); + f_modown(filp, pid, type, force); return 0; } +EXPORT_SYMBOL(__f_setown); +int f_setown(struct file *filp, unsigned long arg, int force) +{ + enum pid_type type; + struct pid *pid; + int who = arg; + int result; + type = PIDTYPE_PID; + if (who < 0) { + type = PIDTYPE_PGID; + who = -who; + } + rcu_read_lock(); + pid = find_vpid(who); + result = __f_setown(filp, pid, type, force); + rcu_read_unlock(); + return result; +} EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { - f_modown(filp, 0, 0, 0, 1); + f_modown(filp, NULL, PIDTYPE_PID, 1); +} + +pid_t f_getown(struct file *filp) +{ + pid_t pid; + read_lock(&filp->f_owner.lock); + pid = pid_vnr(filp->f_owner.pid); + if (filp->f_owner.pid_type == PIDTYPE_PGID) + pid = -pid; + read_unlock(&filp->f_owner.lock); + return pid; +} + +static int f_setown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + struct pid *pid; + int type; + int ret; + + ret = copy_from_user(&owner, owner_p, sizeof(owner)); + if (ret) + return ret; + + switch (owner.type) { + case F_OWNER_TID: + type = PIDTYPE_MAX; + break; + + case F_OWNER_PID: + type = PIDTYPE_PID; + break; + + case F_OWNER_PGRP: + type = PIDTYPE_PGID; + break; + + default: + return -EINVAL; + } + + rcu_read_lock(); + pid = find_vpid(owner.pid); + if (owner.pid && !pid) + ret = -ESRCH; + else + ret = __f_setown(filp, pid, type, 1); + rcu_read_unlock(); + + return ret; +} + +static int f_getown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + int ret = 0; + + read_lock(&filp->f_owner.lock); + owner.pid = pid_vnr(filp->f_owner.pid); + switch (filp->f_owner.pid_type) { + case PIDTYPE_MAX: + owner.type = F_OWNER_TID; + break; + + case PIDTYPE_PID: + owner.type = F_OWNER_PID; + break; + + case PIDTYPE_PGID: + owner.type = F_OWNER_PGRP; + break; + + default: + WARN_ON(1); + ret = -EINVAL; + break; + } + read_unlock(&filp->f_owner.lock); + + if (!ret) + ret = copy_to_user(owner_p, &owner, sizeof(owner)); + return ret; } static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, @@ -288,8 +343,14 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: - get_file(filp); - err = dupfd(filp, arg); + case F_DUPFD_CLOEXEC: + if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + break; + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); + if (err >= 0) { + get_file(filp); + fd_install(err, filp); + } break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; @@ -319,12 +380,18 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, * current syscall conventions, the only way * to fix this will be in libc. */ - err = filp->f_owner.pid; + err = f_getown(filp); force_successful_syscall_return(); break; case F_SETOWN: err = f_setown(filp, arg, 1); break; + case F_GETOWN_EX: + err = f_getown_ex(filp, arg); + break; + case F_SETOWN_EX: + err = f_setown_ex(filp, arg); + break; case F_GETSIG: err = filp->f_owner.signum; break; @@ -351,7 +418,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, return err; } -asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; long err = -EBADF; @@ -374,7 +441,8 @@ out: } #if BITS_PER_LONG == 32 -asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + unsigned long, arg) { struct file * filp; long err; @@ -424,21 +492,33 @@ static const long band_table[NSIGPOLL] = { static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { - return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + const struct cred *cred; + int ret; + + rcu_read_lock(); + cred = __task_cred(p); + ret = ((fown->euid == 0 || + fown->euid == cred->suid || fown->euid == cred->uid || + fown->uid == cred->suid || fown->uid == cred->uid) && + !security_file_send_sigiotask(p, fown, sig)); + rcu_read_unlock(); + return ret; } static void send_sigio_to_task(struct task_struct *p, - struct fown_struct *fown, - int fd, - int reason) + struct fown_struct *fown, + int fd, int reason, int group) { - if (!sigio_perm(p, fown, fown->signum)) + /* + * F_SETSIG can change ->signum lockless in parallel, make + * sure we read it once and use the same value throughout. + */ + int signum = ACCESS_ONCE(fown->signum); + + if (!sigio_perm(p, fown, signum)) return; - switch (fown->signum) { + switch (signum) { siginfo_t si; default: /* Queue a rt signal with the appropriate fd as its @@ -447,66 +527,77 @@ static void send_sigio_to_task(struct task_struct *p, delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall back to SIGIO in that case. --sct */ - si.si_signo = fown->signum; + si.si_signo = signum; si.si_errno = 0; si.si_code = reason; /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ - if ((reason & __SI_MASK) != __SI_POLL) - BUG(); + BUG_ON((reason & __SI_MASK) != __SI_POLL); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!group_send_sig_info(fown->signum, &si, p)) + if (!do_send_sig_info(signum, &si, p, group)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); + do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); } } void send_sigio(struct fown_struct *fown, int fd, int band) { struct task_struct *p; - int pid; + enum pid_type type; + struct pid *pid; + int group = 1; read_lock(&fown->lock); + + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; read_lock(&tasklist_lock); - if (pid > 0) { - p = find_task_by_pid(pid); - if (p) { - send_sigio_to_task(p, fown, fd, band); - } - } else { - do_each_task_pid(-pid, PIDTYPE_PGID, p) { - send_sigio_to_task(p, fown, fd, band); - } while_each_task_pid(-pid, PIDTYPE_PGID, p); - } + do_each_pid_task(pid, type, p) { + send_sigio_to_task(p, fown, fd, band, group); + } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: read_unlock(&fown->lock); } static void send_sigurg_to_task(struct task_struct *p, - struct fown_struct *fown) + struct fown_struct *fown, int group) { if (sigio_perm(p, fown, SIGURG)) - group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); + do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); } int send_sigurg(struct fown_struct *fown) { struct task_struct *p; - int pid, ret = 0; + enum pid_type type; + struct pid *pid; + int group = 1; + int ret = 0; read_lock(&fown->lock); + + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; @@ -514,16 +605,9 @@ int send_sigurg(struct fown_struct *fown) ret = 1; read_lock(&tasklist_lock); - if (pid > 0) { - p = find_task_by_pid(pid); - if (p) { - send_sigurg_to_task(p, fown); - } - } else { - do_each_task_pid(-pid, PIDTYPE_PGID, p) { - send_sigurg_to_task(p, fown); - } while_each_task_pid(-pid, PIDTYPE_PGID, p); - } + do_each_pid_task(pid, type, p) { + send_sigurg_to_task(p, fown, group); + } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: read_unlock(&fown->lock); @@ -531,52 +615,93 @@ int send_sigurg(struct fown_struct *fown) } static DEFINE_RWLOCK(fasync_lock); -static kmem_cache_t *fasync_cache __read_mostly; +static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by some character device drivers (mainly mice) - * to set up the fasync queue. It returns negative on error, 0 if it did - * no changes and positive if it added/deleted the entry. + * Remove a fasync entry. If successfully removed, return + * positive and clear the FASYNC flag. If no entry exists, + * do nothing and return 0. + * + * NOTE! It is very important that the FASYNC flag always + * match the state "is the filp on a fasync list". + * + * We always take the 'filp->f_lock', in since fasync_lock + * needs to be irq-safe. */ -int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) +static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *fa, **fp; - struct fasync_struct *new = NULL; int result = 0; - if (on) { - new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); - if (!new) - return -ENOMEM; - } + spin_lock(&filp->f_lock); write_lock_irq(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { - if (fa->fa_file == filp) { - if(on) { - fa->fa_fd = fd; - kmem_cache_free(fasync_cache, new); - } else { - *fp = fa->fa_next; - kmem_cache_free(fasync_cache, fa); - result = 1; - } - goto out; - } + if (fa->fa_file != filp) + continue; + *fp = fa->fa_next; + kmem_cache_free(fasync_cache, fa); + filp->f_flags &= ~FASYNC; + result = 1; + break; } + write_unlock_irq(&fasync_lock); + spin_unlock(&filp->f_lock); + return result; +} - if (on) { - new->magic = FASYNC_MAGIC; - new->fa_file = filp; - new->fa_fd = fd; - new->fa_next = *fapp; - *fapp = new; - result = 1; +/* + * Add a fasync entry. Return negative on error, positive if + * added, and zero if did nothing but change an existing one. + * + * NOTE! It is very important that the FASYNC flag always + * match the state "is the filp on a fasync list". + */ +static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) +{ + struct fasync_struct *new, *fa, **fp; + int result = 0; + + new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock(&filp->f_lock); + write_lock_irq(&fasync_lock); + for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { + if (fa->fa_file != filp) + continue; + fa->fa_fd = fd; + kmem_cache_free(fasync_cache, new); + goto out; } + + new->magic = FASYNC_MAGIC; + new->fa_file = filp; + new->fa_fd = fd; + new->fa_next = *fapp; + *fapp = new; + result = 1; + filp->f_flags |= FASYNC; + out: write_unlock_irq(&fasync_lock); + spin_unlock(&filp->f_lock); return result; } +/* + * fasync_helper() is used by almost all character device drivers + * to set up the fasync queue, and for regular files by the file + * lease code. It returns negative on error, 0 if it did no changes + * and positive if it added/deleted the entry. + */ +int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) +{ + if (!on) + return fasync_remove_entry(filp, fapp); + return fasync_add_entry(fd, filp, fapp); +} + EXPORT_SYMBOL(fasync_helper); void __kill_fasync(struct fasync_struct *fa, int sig, int band) @@ -617,7 +742,7 @@ EXPORT_SYMBOL(kill_fasync); static int __init fasync_init(void) { fasync_cache = kmem_cache_create("fasync_cache", - sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); return 0; }