UIO: cdev lock_kernel() pushdown
[safe/jmp/linux-2.6] / fs / exec.c
index 007d0d8..1f8a24a 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
 
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/mman.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
@@ -112,14 +113,14 @@ asmlinkage long sys_uselib(const char __user * library)
                goto out;
 
        error = -EINVAL;
-       if (!S_ISREG(nd.dentry->d_inode->i_mode))
+       if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
                goto exit;
 
        error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
        if (error)
                goto exit;
 
-       file = nameidata_to_filp(&nd, O_RDONLY);
+       file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
@@ -148,7 +149,7 @@ out:
        return error;
 exit:
        release_open_intent(&nd);
-       path_release(&nd);
+       path_put(&nd.path);
        goto out;
 }
 
@@ -173,8 +174,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                return NULL;
 
        if (write) {
-               struct rlimit *rlim = current->signal->rlim;
                unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+               struct rlimit *rlim;
+
+               /*
+                * We've historically supported up to 32 pages (ARG_MAX)
+                * of argument strings even with small stacks
+                */
+               if (size <= ARG_MAX)
+                       return page;
 
                /*
                 * Limit to 1/4-th the stack size for the argv+env strings.
@@ -183,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                 *  - the program will have a reasonable amount of stack left
                 *    to work from.
                 */
+               rlim = current->signal->rlim;
                if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
                        put_page(page);
                        return NULL;
@@ -652,13 +661,14 @@ struct file *open_exec(const char *name)
        file = ERR_PTR(err);
 
        if (!err) {
-               struct inode *inode = nd.dentry->d_inode;
+               struct inode *inode = nd.path.dentry->d_inode;
                file = ERR_PTR(-EACCES);
                if (S_ISREG(inode->i_mode)) {
                        int err = vfs_permission(&nd, MAY_EXEC);
                        file = ERR_PTR(err);
                        if (!err) {
-                               file = nameidata_to_filp(&nd, O_RDONLY);
+                               file = nameidata_to_filp(&nd,
+                                                       O_RDONLY|O_LARGEFILE);
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
@@ -671,7 +681,7 @@ out:
                        }
                }
                release_open_intent(&nd);
-               path_release(&nd);
+               path_put(&nd.path);
        }
        goto out;
 }
@@ -726,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
        task_unlock(tsk);
+       mm_update_next_owner(old_mm);
        arch_pick_mmap_layout(mm);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
@@ -756,53 +767,22 @@ static int de_thread(struct task_struct *tsk)
 
        /*
         * Kill all other threads in the thread group.
-        * We must hold tasklist_lock to call zap_other_threads.
         */
-       read_lock(&tasklist_lock);
        spin_lock_irq(lock);
-       if (sig->flags & SIGNAL_GROUP_EXIT) {
+       if (signal_group_exit(sig)) {
                /*
                 * Another group action in progress, just
                 * return so that the signal is processed.
                 */
                spin_unlock_irq(lock);
-               read_unlock(&tasklist_lock);
                return -EAGAIN;
        }
-
-       /*
-        * child_reaper ignores SIGKILL, change it now.
-        * Reparenting needs write_lock on tasklist_lock,
-        * so it is safe to do it under read_lock.
-        */
-       if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
-               task_active_pid_ns(tsk)->child_reaper = tsk;
-
+       sig->group_exit_task = tsk;
        zap_other_threads(tsk);
-       read_unlock(&tasklist_lock);
-
-       /*
-        * Account for the thread group leader hanging around:
-        */
-       count = 1;
-       if (!thread_group_leader(tsk)) {
-               count = 2;
-               /*
-                * The SIGALRM timer survives the exec, but needs to point
-                * at us as the new group leader now.  We have a race with
-                * a timer firing now getting the old leader, so we need to
-                * synchronize with any firing (by calling del_timer_sync)
-                * before we can safely let the old group leader die.
-                */
-               sig->tsk = tsk;
-               spin_unlock_irq(lock);
-               if (hrtimer_cancel(&sig->real_timer))
-                       hrtimer_restart(&sig->real_timer);
-               spin_lock_irq(lock);
-       }
 
+       /* Account for the thread group leader hanging around: */
+       count = thread_group_leader(tsk) ? 1 : 2;
        sig->notify_count = count;
-       sig->group_exit_task = tsk;
        while (atomic_read(&sig->count) > count) {
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(lock);
@@ -819,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
        if (!thread_group_leader(tsk)) {
                leader = tsk->group_leader;
 
-               sig->notify_count = -1;
+               sig->notify_count = -1; /* for exit_notify() */
                for (;;) {
                        write_lock_irq(&tasklist_lock);
                        if (likely(leader->exit_state))
@@ -829,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
                        schedule();
                }
 
+               if (unlikely(task_child_reaper(tsk) == leader))
+                       task_active_pid_ns(tsk)->child_reaper = tsk;
                /*
                 * The only record we have of the real-time age of a
                 * process, regardless of execs it's done, is start_time.
@@ -841,8 +823,8 @@ static int de_thread(struct task_struct *tsk)
                 */
                tsk->start_time = leader->start_time;
 
-               BUG_ON(leader->tgid != tsk->tgid);
-               BUG_ON(tsk->pid == tsk->tgid);
+               BUG_ON(!same_thread_group(leader, tsk));
+               BUG_ON(has_group_leader_pid(tsk));
                /*
                 * An exec() starts a new thread group with the
                 * TGID of the previous thread group. Rehash the
@@ -871,15 +853,10 @@ static int de_thread(struct task_struct *tsk)
                leader->exit_state = EXIT_DEAD;
 
                write_unlock_irq(&tasklist_lock);
-        }
+       }
 
        sig->group_exit_task = NULL;
        sig->notify_count = 0;
-       /*
-        * There may be one thread left which is just exiting,
-        * but it's safe to stop telling the group to kill themselves.
-        */
-       sig->flags = 0;
 
 no_thread_group:
        exit_itimers(sig);
@@ -947,12 +924,13 @@ static void flush_old_files(struct files_struct * files)
        spin_unlock(&files->file_lock);
 }
 
-void get_task_comm(char *buf, struct task_struct *tsk)
+char *get_task_comm(char *buf, struct task_struct *tsk)
 {
        /* buf must be at least sizeof(tsk->comm) in size */
        task_lock(tsk);
        strncpy(buf, tsk->comm, sizeof(tsk->comm));
        task_unlock(tsk);
+       return buf;
 }
 
 void set_task_comm(struct task_struct *tsk, char *buf)
@@ -966,7 +944,6 @@ int flush_old_exec(struct linux_binprm * bprm)
 {
        char * name;
        int i, ch, retval;
-       struct files_struct *files;
        char tcomm[sizeof(current->comm)];
 
        /*
@@ -977,27 +954,18 @@ int flush_old_exec(struct linux_binprm * bprm)
        if (retval)
                goto out;
 
-       /*
-        * Make sure we have private file handles. Ask the
-        * fork helper to do the work for us and the exit
-        * helper to do the cleanup of the old one.
-        */
-       files = current->files;         /* refcounted so safe to hold */
-       retval = unshare_files();
-       if (retval)
-               goto out;
+       set_mm_exe_file(bprm->mm, bprm->file);
+
        /*
         * Release all of the old mmap stuff
         */
        retval = exec_mmap(bprm->mm);
        if (retval)
-               goto mmap_failed;
+               goto out;
 
        bprm->mm = NULL;                /* We're using it now */
 
        /* This is the point of no return */
-       put_files_struct(files);
-
        current->sas_ss_sp = current->sas_ss_size = 0;
 
        if (current->euid == current->uid && current->egid == current->gid)
@@ -1047,8 +1015,6 @@ int flush_old_exec(struct linux_binprm * bprm)
 
        return 0;
 
-mmap_failed:
-       reset_files_struct(current, files);
 out:
        return retval;
 }
@@ -1188,7 +1154,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 {
        int try,retval;
        struct linux_binfmt *fmt;
-#ifdef __alpha__
+#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT)
        /* handle /sbin/loader.. */
        {
            struct exec * eh = (struct exec *) bprm->buf;
@@ -1295,13 +1261,17 @@ int do_execve(char * filename,
 {
        struct linux_binprm *bprm;
        struct file *file;
-       unsigned long env_p;
+       struct files_struct *displaced;
        int retval;
 
+       retval = unshare_files(&displaced);
+       if (retval)
+               goto out_ret;
+
        retval = -ENOMEM;
        bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
        if (!bprm)
-               goto out_ret;
+               goto out_files;
 
        file = open_exec(filename);
        retval = PTR_ERR(file);
@@ -1343,11 +1313,9 @@ int do_execve(char * filename,
        if (retval < 0)
                goto out;
 
-       env_p = bprm->p;
        retval = copy_strings(bprm->argc, argv, bprm);
        if (retval < 0)
                goto out;
-       bprm->argv_len = env_p - bprm->p;
 
        retval = search_binary_handler(bprm,regs);
        if (retval >= 0) {
@@ -1356,6 +1324,8 @@ int do_execve(char * filename,
                security_bprm_free(bprm);
                acct_update_integrals(current);
                kfree(bprm);
+               if (displaced)
+                       put_files_struct(displaced);
                return retval;
        }
 
@@ -1376,6 +1346,9 @@ out_file:
 out_kfree:
        kfree(bprm);
 
+out_files:
+       if (displaced)
+               reset_files_struct(displaced);
 out_ret:
        return retval;
 }
@@ -1548,7 +1521,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
        int err = -EAGAIN;
 
        spin_lock_irq(&tsk->sighand->siglock);
-       if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+       if (!signal_group_exit(tsk->signal)) {
                tsk->signal->group_exit_code = exit_code;
                zap_process(tsk);
                err = 0;
@@ -1692,7 +1665,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!binfmt || !binfmt->core_dump)
                goto fail;
        down_write(&mm->mmap_sem);
-       if (!get_dumpable(mm)) {
+       /*
+        * If another thread got here first, or we are not dumpable, bail out.
+        */
+       if (mm->core_waiters || !get_dumpable(mm)) {
                up_write(&mm->mmap_sem);
                goto fail;
        }
@@ -1706,7 +1682,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                flag = O_EXCL;          /* Stop rewrite attacks */
                current->fsuid = 0;     /* Dump root private */
        }
-       set_dumpable(mm, 0);
 
        retval = coredump_wait(exit_code);
        if (retval < 0)
@@ -1778,6 +1753,12 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
           but keep the previous behaviour for now. */
        if (!ispipe && !S_ISREG(inode->i_mode))
                goto close_fail;
+       /*
+        * Dont allow local users get cute and trick others to coredump
+        * into their pre-created files:
+        */
+       if (inode->i_uid != current->fsuid)
+               goto close_fail;
        if (!file->f_op)
                goto close_fail;
        if (!file->f_op->write)