exec: RT sub-thread can livelock and monopolize CPU on exec

[safe/jmp/linux-2.6] / fs / exec.c
diff --git a/fs/exec.c b/fs/exec.c

index 550ae9b..aa470a9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -29,6 +29,7 @@
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/smp_lock.h>
+#include <linux/string.h>
  #include <linux/init.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
@@ -111,9 +112,6 @@ asmlinkage long sys_uselib(const char __user * library)
         if (error)
                 goto out;
  
-       error = -EACCES;
-       if (nd.mnt->mnt_flags & MNT_NOEXEC)
-               goto exit;
         error = -EINVAL;
         if (!S_ISREG(nd.dentry->d_inode->i_mode))
                 goto exit;
@@ -657,8 +655,7 @@ struct file *open_exec(const char *name)
         if (!err) {
                 struct inode *inode = nd.dentry->d_inode;
                 file = ERR_PTR(-EACCES);
-               if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
-                   S_ISREG(inode->i_mode)) {
+               if (S_ISREG(inode->i_mode)) {
                         int err = vfs_permission(&nd, MAY_EXEC);
                         file = ERR_PTR(err);
                         if (!err) {
@@ -750,24 +747,11 @@ static int exec_mmap(struct mm_struct *mm)
  static int de_thread(struct task_struct *tsk)
  {
         struct signal_struct *sig = tsk->signal;
-       struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+       struct sighand_struct *oldsighand = tsk->sighand;
         spinlock_t *lock = &oldsighand->siglock;
         struct task_struct *leader = NULL;
         int count;
  
-       /*
-        * If we don't share sighandlers, then we aren't sharing anything
-        * and we can just re-use it all.
-        */
-       if (atomic_read(&oldsighand->count) <= 1) {
-               exit_itimers(sig);
-               return 0;
-       }
-
-       newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-       if (!newsighand)
-               return -ENOMEM;
-
         if (thread_group_empty(tsk))
                 goto no_thread_group;
  
@@ -784,7 +768,6 @@ static int de_thread(struct task_struct *tsk)
                  */
                 spin_unlock_irq(lock);
                 read_unlock(&tasklist_lock);
-               kmem_cache_free(sighand_cachep, newsighand);
                 return -EAGAIN;
         }
  
@@ -818,16 +801,15 @@ static int de_thread(struct task_struct *tsk)
                         hrtimer_restart(&sig->real_timer);
                 spin_lock_irq(lock);
         }
+
+       sig->notify_count = count;
+       sig->group_exit_task = tsk;
         while (atomic_read(&sig->count) > count) {
-               sig->group_exit_task = tsk;
-               sig->notify_count = count;
                 __set_current_state(TASK_UNINTERRUPTIBLE);
                 spin_unlock_irq(lock);
                 schedule();
                 spin_lock_irq(lock);
         }
-       sig->group_exit_task = NULL;
-       sig->notify_count = 0;
         spin_unlock_irq(lock);
  
         /*
@@ -836,14 +818,17 @@ static int de_thread(struct task_struct *tsk)
          * and to assume its PID:
          */
         if (!thread_group_leader(tsk)) {
-               /*
-                * Wait for the thread group leader to be a zombie.
-                * It should already be zombie at this point, most
-                * of the time.
-                */
                 leader = tsk->group_leader;
-               while (leader->exit_state != EXIT_ZOMBIE)
-                       yield();
+
+               sig->notify_count = -1;
+               for (;;) {
+                       write_lock_irq(&tasklist_lock);
+                       if (likely(leader->exit_state))
+                               break;
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tasklist_lock);
+                       schedule();
+               }
  
                 /*
                  * The only record we have of the real-time age of a
@@ -857,8 +842,6 @@ static int de_thread(struct task_struct *tsk)
                  */
                 tsk->start_time = leader->start_time;
  
-               write_lock_irq(&tasklist_lock);
-
                 BUG_ON(leader->tgid != tsk->tgid);
                 BUG_ON(tsk->pid == tsk->tgid);
                 /*
@@ -891,6 +874,8 @@ static int de_thread(struct task_struct *tsk)
                 write_unlock_irq(&tasklist_lock);
          }
  
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
         /*
          * There may be one thread left which is just exiting,
          * but it's safe to stop telling the group to kill themselves.
@@ -902,29 +887,23 @@ no_thread_group:
         if (leader)
                 release_task(leader);
  
-       if (atomic_read(&oldsighand->count) == 1) {
+       if (atomic_read(&oldsighand->count) != 1) {
+               struct sighand_struct *newsighand;
                 /*
-                * Now that we nuked the rest of the thread group,
-                * it turns out we are not sharing sighand any more either.
-                * So we can just keep it.
-                */
-               kmem_cache_free(sighand_cachep, newsighand);
-       } else {
-               /*
-                * Move our state over to newsighand and switch it in.
+                * This ->sighand is shared with the CLONE_SIGHAND
+                * but not CLONE_THREAD task, switch to the new one.
                  */
+               newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+               if (!newsighand)
+                       return -ENOMEM;
+
                 atomic_set(&newsighand->count, 1);
                 memcpy(newsighand->action, oldsighand->action,
                        sizeof(newsighand->action));
  
                 write_lock_irq(&tasklist_lock);
                 spin_lock(&oldsighand->siglock);
-               spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
-
                 rcu_assign_pointer(tsk->sighand, newsighand);
-               recalc_sigpending();
-
-               spin_unlock(&newsighand->siglock);
                 spin_unlock(&oldsighand->siglock);
                 write_unlock_irq(&tasklist_lock);
  
@@ -934,12 +913,11 @@ no_thread_group:
         BUG_ON(!thread_group_leader(tsk));
         return 0;
  }
-       
+
  /*
   * These functions flushes out all traces of the currently running executable
   * so that a new one can be started
   */
-
  static void flush_old_files(struct files_struct * files)
  {
         long j = -1;
@@ -1514,6 +1492,14 @@ static int format_corename(char *corename, const char *pattern, long signr)
                                         goto out;
                                 out_ptr += rc;
                                 break;
+                       /* core limit size */
+                       case 'c':
+                               rc = snprintf(out_ptr, out_end - out_ptr,
+                                             "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
+                               if (rc > out_end - out_ptr)
+                                       goto out;
+                               out_ptr += rc;
+                               break;
                         default:
                                 break;
                         }
@@ -1697,6 +1683,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         int fsuid = current->fsuid;
         int flag = 0;
         int ispipe = 0;
+       unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+       char **helper_argv = NULL;
+       int helper_argc = 0;
+       char *delimit;
  
         audit_core_dumps(signr);
  
@@ -1730,9 +1720,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
          */
         clear_thread_flag(TIF_SIGPENDING);
  
-       if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
-               goto fail_unlock;
-
         /*
          * lock_kernel() because format_corename() is controlled by sysctl, which
          * uses lock_kernel()
@@ -1740,9 +1727,39 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         lock_kernel();
         ispipe = format_corename(corename, core_pattern, signr);
         unlock_kernel();
+       /*
+        * Don't bother to check the RLIMIT_CORE value if core_pattern points
+        * to a pipe.  Since we're not writing directly to the filesystem
+        * RLIMIT_CORE doesn't really apply, as no actual core file will be
+        * created unless the pipe reader choses to write out the core file
+        * at which point file size limits and permissions will be imposed
+        * as it does with any other process
+        */
+       if ((!ispipe) && (core_limit < binfmt->min_coredump))
+               goto fail_unlock;
+
         if (ispipe) {
+               helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+               /* Terminate the string before the first option */
+               delimit = strchr(corename, ' ');
+               if (delimit)
+                       *delimit = '\0';
+               delimit = strrchr(helper_argv[0], '/');
+               if (delimit)
+                       delimit++;
+               else
+                       delimit = helper_argv[0];
+               if (!strcmp(delimit, current->comm)) {
+                       printk(KERN_NOTICE "Recursive core dump detected, "
+                                       "aborting\n");
+                       goto fail_unlock;
+               }
+
+               core_limit = RLIM_INFINITY;
+
                 /* SIGPIPE can happen, but it's just never processed */
-               if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+                               &file)) {
                         printk(KERN_INFO "Core dump to %s pipe failed\n",
                                corename);
                         goto fail_unlock;
@@ -1770,13 +1787,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                 goto close_fail;
  
-       retval = binfmt->core_dump(signr, regs, file);
+       retval = binfmt->core_dump(signr, regs, file, core_limit);
  
         if (retval)
                 current->signal->group_exit_code |= 0x80;
  close_fail:
         filp_close(file, NULL);
  fail_unlock:
+       if (helper_argv)
+               argv_free(helper_argv);
+
         current->fsuid = fsuid;
         complete_all(&mm->core_done);
  fail: