exec: RT sub-thread can livelock and monopolize CPU on exec
[safe/jmp/linux-2.6] / fs / exec.c
index 550ae9b..aa470a9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -29,6 +29,7 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
+#include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -111,9 +112,6 @@ asmlinkage long sys_uselib(const char __user * library)
        if (error)
                goto out;
 
-       error = -EACCES;
-       if (nd.mnt->mnt_flags & MNT_NOEXEC)
-               goto exit;
        error = -EINVAL;
        if (!S_ISREG(nd.dentry->d_inode->i_mode))
                goto exit;
@@ -657,8 +655,7 @@ struct file *open_exec(const char *name)
        if (!err) {
                struct inode *inode = nd.dentry->d_inode;
                file = ERR_PTR(-EACCES);
-               if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
-                   S_ISREG(inode->i_mode)) {
+               if (S_ISREG(inode->i_mode)) {
                        int err = vfs_permission(&nd, MAY_EXEC);
                        file = ERR_PTR(err);
                        if (!err) {
@@ -750,24 +747,11 @@ static int exec_mmap(struct mm_struct *mm)
 static int de_thread(struct task_struct *tsk)
 {
        struct signal_struct *sig = tsk->signal;
-       struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+       struct sighand_struct *oldsighand = tsk->sighand;
        spinlock_t *lock = &oldsighand->siglock;
        struct task_struct *leader = NULL;
        int count;
 
-       /*
-        * If we don't share sighandlers, then we aren't sharing anything
-        * and we can just re-use it all.
-        */
-       if (atomic_read(&oldsighand->count) <= 1) {
-               exit_itimers(sig);
-               return 0;
-       }
-
-       newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-       if (!newsighand)
-               return -ENOMEM;
-
        if (thread_group_empty(tsk))
                goto no_thread_group;
 
@@ -784,7 +768,6 @@ static int de_thread(struct task_struct *tsk)
                 */
                spin_unlock_irq(lock);
                read_unlock(&tasklist_lock);
-               kmem_cache_free(sighand_cachep, newsighand);
                return -EAGAIN;
        }
 
@@ -818,16 +801,15 @@ static int de_thread(struct task_struct *tsk)
                        hrtimer_restart(&sig->real_timer);
                spin_lock_irq(lock);
        }
+
+       sig->notify_count = count;
+       sig->group_exit_task = tsk;
        while (atomic_read(&sig->count) > count) {
-               sig->group_exit_task = tsk;
-               sig->notify_count = count;
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(lock);
                schedule();
                spin_lock_irq(lock);
        }
-       sig->group_exit_task = NULL;
-       sig->notify_count = 0;
        spin_unlock_irq(lock);
 
        /*
@@ -836,14 +818,17 @@ static int de_thread(struct task_struct *tsk)
         * and to assume its PID:
         */
        if (!thread_group_leader(tsk)) {
-               /*
-                * Wait for the thread group leader to be a zombie.
-                * It should already be zombie at this point, most
-                * of the time.
-                */
                leader = tsk->group_leader;
-               while (leader->exit_state != EXIT_ZOMBIE)
-                       yield();
+
+               sig->notify_count = -1;
+               for (;;) {
+                       write_lock_irq(&tasklist_lock);
+                       if (likely(leader->exit_state))
+                               break;
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tasklist_lock);
+                       schedule();
+               }
 
                /*
                 * The only record we have of the real-time age of a
@@ -857,8 +842,6 @@ static int de_thread(struct task_struct *tsk)
                 */
                tsk->start_time = leader->start_time;
 
-               write_lock_irq(&tasklist_lock);
-
                BUG_ON(leader->tgid != tsk->tgid);
                BUG_ON(tsk->pid == tsk->tgid);
                /*
@@ -891,6 +874,8 @@ static int de_thread(struct task_struct *tsk)
                write_unlock_irq(&tasklist_lock);
         }
 
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
        /*
         * There may be one thread left which is just exiting,
         * but it's safe to stop telling the group to kill themselves.
@@ -902,29 +887,23 @@ no_thread_group:
        if (leader)
                release_task(leader);
 
-       if (atomic_read(&oldsighand->count) == 1) {
+       if (atomic_read(&oldsighand->count) != 1) {
+               struct sighand_struct *newsighand;
                /*
-                * Now that we nuked the rest of the thread group,
-                * it turns out we are not sharing sighand any more either.
-                * So we can just keep it.
-                */
-               kmem_cache_free(sighand_cachep, newsighand);
-       } else {
-               /*
-                * Move our state over to newsighand and switch it in.
+                * This ->sighand is shared with the CLONE_SIGHAND
+                * but not CLONE_THREAD task, switch to the new one.
                 */
+               newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+               if (!newsighand)
+                       return -ENOMEM;
+
                atomic_set(&newsighand->count, 1);
                memcpy(newsighand->action, oldsighand->action,
                       sizeof(newsighand->action));
 
                write_lock_irq(&tasklist_lock);
                spin_lock(&oldsighand->siglock);
-               spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
-
                rcu_assign_pointer(tsk->sighand, newsighand);
-               recalc_sigpending();
-
-               spin_unlock(&newsighand->siglock);
                spin_unlock(&oldsighand->siglock);
                write_unlock_irq(&tasklist_lock);
 
@@ -934,12 +913,11 @@ no_thread_group:
        BUG_ON(!thread_group_leader(tsk));
        return 0;
 }
-       
+
 /*
  * These functions flushes out all traces of the currently running executable
  * so that a new one can be started
  */
-
 static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
@@ -1514,6 +1492,14 @@ static int format_corename(char *corename, const char *pattern, long signr)
                                        goto out;
                                out_ptr += rc;
                                break;
+                       /* core limit size */
+                       case 'c':
+                               rc = snprintf(out_ptr, out_end - out_ptr,
+                                             "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
+                               if (rc > out_end - out_ptr)
+                                       goto out;
+                               out_ptr += rc;
+                               break;
                        default:
                                break;
                        }
@@ -1697,6 +1683,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        int fsuid = current->fsuid;
        int flag = 0;
        int ispipe = 0;
+       unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+       char **helper_argv = NULL;
+       int helper_argc = 0;
+       char *delimit;
 
        audit_core_dumps(signr);
 
@@ -1730,9 +1720,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         */
        clear_thread_flag(TIF_SIGPENDING);
 
-       if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
-               goto fail_unlock;
-
        /*
         * lock_kernel() because format_corename() is controlled by sysctl, which
         * uses lock_kernel()
@@ -1740,9 +1727,39 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        lock_kernel();
        ispipe = format_corename(corename, core_pattern, signr);
        unlock_kernel();
+       /*
+        * Don't bother to check the RLIMIT_CORE value if core_pattern points
+        * to a pipe.  Since we're not writing directly to the filesystem
+        * RLIMIT_CORE doesn't really apply, as no actual core file will be
+        * created unless the pipe reader choses to write out the core file
+        * at which point file size limits and permissions will be imposed
+        * as it does with any other process
+        */
+       if ((!ispipe) && (core_limit < binfmt->min_coredump))
+               goto fail_unlock;
+
        if (ispipe) {
+               helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+               /* Terminate the string before the first option */
+               delimit = strchr(corename, ' ');
+               if (delimit)
+                       *delimit = '\0';
+               delimit = strrchr(helper_argv[0], '/');
+               if (delimit)
+                       delimit++;
+               else
+                       delimit = helper_argv[0];
+               if (!strcmp(delimit, current->comm)) {
+                       printk(KERN_NOTICE "Recursive core dump detected, "
+                                       "aborting\n");
+                       goto fail_unlock;
+               }
+
+               core_limit = RLIM_INFINITY;
+
                /* SIGPIPE can happen, but it's just never processed */
-               if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+                               &file)) {
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
                               corename);
                        goto fail_unlock;
@@ -1770,13 +1787,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                goto close_fail;
 
-       retval = binfmt->core_dump(signr, regs, file);
+       retval = binfmt->core_dump(signr, regs, file, core_limit);
 
        if (retval)
                current->signal->group_exit_code |= 0x80;
 close_fail:
        filp_close(file, NULL);
 fail_unlock:
+       if (helper_argv)
+               argv_free(helper_argv);
+
        current->fsuid = fsuid;
        complete_all(&mm->core_done);
 fail: