tracehook: death

[safe/jmp/linux-2.6] / kernel / exit.c
diff --git a/kernel/exit.c b/kernel/exit.c

index 1e90982..6cdf607 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -46,6 +46,7 @@
  #include <linux/resource.h>
  #include <linux/blkdev.h>
  #include <linux/task_io_accounting_ops.h>
+#include <linux/tracehook.h>
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
@@ -85,7 +86,6 @@ static void __exit_signal(struct task_struct *tsk)
         BUG_ON(!sig);
         BUG_ON(!atomic_read(&sig->count));
  
-       rcu_read_lock();
         sighand = rcu_dereference(tsk->sighand);
         spin_lock(&sighand->siglock);
  
@@ -121,6 +121,18 @@ static void __exit_signal(struct task_struct *tsk)
                 sig->nivcsw += tsk->nivcsw;
                 sig->inblock += task_io_get_inblock(tsk);
                 sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+               sig->rchar += tsk->rchar;
+               sig->wchar += tsk->wchar;
+               sig->syscr += tsk->syscr;
+               sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+               sig->ioac.read_bytes += tsk->ioac.read_bytes;
+               sig->ioac.write_bytes += tsk->ioac.write_bytes;
+               sig->ioac.cancelled_write_bytes +=
+                                       tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
                 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                 sig = NULL; /* Marker for below. */
         }
@@ -136,7 +148,6 @@ static void __exit_signal(struct task_struct *tsk)
         tsk->signal = NULL;
         tsk->sighand = NULL;
         spin_unlock(&sighand->siglock);
-       rcu_read_unlock();
  
         __cleanup_sighand(sighand);
         clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
@@ -152,27 +163,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
         put_task_struct(container_of(rhp, struct task_struct, rcu));
  }
  
-/*
- * Do final ptrace-related cleanup of a zombie being reaped.
- *
- * Called with write_lock(&tasklist_lock) held.
- */
-static void ptrace_release_task(struct task_struct *p)
-{
-       BUG_ON(!list_empty(&p->ptraced));
-       ptrace_unlink(p);
-       BUG_ON(!list_empty(&p->ptrace_entry));
-}
  
  void release_task(struct task_struct * p)
  {
         struct task_struct *leader;
         int zap_leader;
  repeat:
+       tracehook_prepare_release_task(p);
         atomic_dec(&p->user->processes);
         proc_flush_task(p);
         write_lock_irq(&tasklist_lock);
-       ptrace_release_task(p);
+       tracehook_finish_release_task(p);
         __exit_signal(p);
  
         /*
@@ -194,6 +195,13 @@ repeat:
                  * that case.
                  */
                 zap_leader = task_detached(leader);
+
+               /*
+                * This maintains the invariant that release_task()
+                * only runs on a task in EXIT_DEAD, just for sanity.
+                */
+               if (zap_leader)
+                       leader->exit_state = EXIT_DEAD;
         }
  
         write_unlock_irq(&tasklist_lock);
@@ -432,7 +440,7 @@ void daemonize(const char *name, ...)
          * We don't want to have TIF_FREEZE set if the system-wide hibernation
          * or suspend transition begins right now.
          */
-       current->flags |= PF_NOFREEZE;
+       current->flags |= (PF_NOFREEZE | PF_KTHREAD);
  
         if (current->nsproxy != &init_nsproxy) {
                 get_nsproxy(&init_nsproxy);
@@ -666,26 +674,40 @@ assign_new_owner:
  static void exit_mm(struct task_struct * tsk)
  {
         struct mm_struct *mm = tsk->mm;
+       struct core_state *core_state;
  
         mm_release(tsk, mm);
         if (!mm)
                 return;
         /*
          * Serialize with any possible pending coredump.
-        * We must hold mmap_sem around checking core_waiters
+        * We must hold mmap_sem around checking core_state
          * and clearing tsk->mm.  The core-inducing thread
-        * will increment core_waiters for each thread in the
+        * will increment ->nr_threads for each thread in the
          * group with ->mm != NULL.
          */
         down_read(&mm->mmap_sem);
-       if (mm->core_waiters) {
+       core_state = mm->core_state;
+       if (core_state) {
+               struct core_thread self;
                 up_read(&mm->mmap_sem);
-               down_write(&mm->mmap_sem);
-               if (!--mm->core_waiters)
-                       complete(mm->core_startup_done);
-               up_write(&mm->mmap_sem);
  
-               wait_for_completion(&mm->core_done);
+               self.task = tsk;
+               self.next = xchg(&core_state->dumper.next, &self);
+               /*
+                * Implies mb(), the result of xchg() must be visible
+                * to core_state->dumper.
+                */
+               if (atomic_dec_and_test(&core_state->nr_threads))
+                       complete(&core_state->startup);
+
+               for (;;) {
+                       set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+                       if (!self.task) /* see coredump_finish() */
+                               break;
+                       schedule();
+               }
+               __set_task_state(tsk, TASK_RUNNING);
                 down_read(&mm->mmap_sem);
         }
         atomic_inc(&mm->mm_count);
@@ -703,6 +725,23 @@ static void exit_mm(struct task_struct * tsk)
  }
  
  /*
+ * Return nonzero if @parent's children should reap themselves.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static int ignoring_children(struct task_struct *parent)
+{
+       int ret;
+       struct sighand_struct *psig = parent->sighand;
+       unsigned long flags;
+       spin_lock_irqsave(&psig->siglock, flags);
+       ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
+       spin_unlock_irqrestore(&psig->siglock, flags);
+       return ret;
+}
+
+/*
   * Detach all tasks we were using ptrace on.
   * Any that need to be release_task'd are put on the @dead list.
   *
@@ -711,6 +750,7 @@ static void exit_mm(struct task_struct * tsk)
  static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
  {
         struct task_struct *p, *n;
+       int ign = -1;
  
         list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
                 __ptrace_unlink(p);
@@ -726,10 +766,18 @@ static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
                  * release_task() here because we already hold tasklist_lock.
                  *
                  * If it's our own child, there is no notification to do.
+                * But if our normal children self-reap, then this child
+                * was prevented by ptrace and we must reap it now.
                  */
                 if (!task_detached(p) && thread_group_empty(p)) {
                         if (!same_thread_group(p->real_parent, parent))
                                 do_notify_parent(p, p->exit_signal);
+                       else {
+                               if (ign < 0)
+                                       ign = ignoring_children(parent);
+                               if (ign)
+                                       p->exit_signal = -1;
+                       }
                 }
  
                 if (task_detached(p)) {
@@ -837,7 +885,8 @@ static void forget_original_parent(struct task_struct *father)
   */
  static void exit_notify(struct task_struct *tsk, int group_dead)
  {
-       int state;
+       int signal;
+       void *cookie;
  
         /*
          * This does two things:
@@ -874,22 +923,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
             !capable(CAP_KILL))
                 tsk->exit_signal = SIGCHLD;
  
-       /* If something other than our normal parent is ptracing us, then
-        * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
-        * only has special meaning to our real parent.
-        */
-       if (!task_detached(tsk) && thread_group_empty(tsk)) {
-               int signal = ptrace_reparented(tsk) ?
-                               SIGCHLD : tsk->exit_signal;
-               do_notify_parent(tsk, signal);
-       } else if (tsk->ptrace) {
-               do_notify_parent(tsk, SIGCHLD);
-       }
+       signal = tracehook_notify_death(tsk, &cookie, group_dead);
+       if (signal > 0)
+               signal = do_notify_parent(tsk, signal);
  
-       state = EXIT_ZOMBIE;
-       if (task_detached(tsk) && likely(!tsk->ptrace))
-               state = EXIT_DEAD;
-       tsk->exit_state = state;
+       tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
  
         /* mt-exec, de_thread() is waiting for us */
         if (thread_group_leader(tsk) &&
@@ -899,8 +937,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
  
         write_unlock_irq(&tasklist_lock);
  
+       tracehook_report_death(tsk, signal, cookie, group_dead);
+
         /* If the process is dead, release it - nobody will wait for it */
-       if (state == EXIT_DEAD)
+       if (signal < 0)
                 release_task(tsk);
  }
  
@@ -979,10 +1019,7 @@ NORET_TYPE void do_exit(long code)
         if (unlikely(!tsk->pid))
                 panic("Attempted to kill the idle task!");
  
-       if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
-               current->ptrace_message = code;
-               ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
-       }
+       tracehook_report_exit(&code);
  
         /*
          * We're taking recursive faults here in do_exit. Safest is to just
@@ -1199,14 +1236,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
                 return 0;
  
         err = security_task_wait(p);
-       if (likely(!err))
-               return 1;
+       if (err)
+               return err;
  
-       if (type != PIDTYPE_PID)
-               return 0;
-       /* This child was explicitly requested, abort */
-       read_unlock(&tasklist_lock);
-       return err;
+       return 1;
  }
  
  static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1332,6 +1365,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 psig->coublock +=
                         task_io_get_oublock(p) +
                         sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+               psig->rchar += p->rchar + sig->rchar;
+               psig->wchar += p->wchar + sig->wchar;
+               psig->syscr += p->syscr + sig->syscr;
+               psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+               psig->ioac.read_bytes +=
+                       p->ioac.read_bytes + sig->ioac.read_bytes;
+               psig->ioac.write_bytes +=
+                       p->ioac.write_bytes + sig->ioac.write_bytes;
+               psig->ioac.cancelled_write_bytes +=
+                               p->ioac.cancelled_write_bytes +
+                               sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
                 spin_unlock_irq(&p->parent->sighand->siglock);
         }
  
@@ -1536,7 +1584,8 @@ static int wait_task_continued(struct task_struct *p, int options,
   * -ECHILD should be in *@notask_error before the first call.
   * Returns nonzero for a final return, when we have unlocked tasklist_lock.
   * Returns zero if the search for a child should continue;
- * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD.
+ * then *@notask_error is 0 if @p is an eligible child,
+ * or another error from security_task_wait(), or still -ECHILD.
   */
  static int wait_consider_task(struct task_struct *parent, int ptrace,
                               struct task_struct *p, int *notask_error,
@@ -1545,9 +1594,21 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
                               int __user *stat_addr, struct rusage __user *ru)
  {
         int ret = eligible_child(type, pid, options, p);
-       if (ret <= 0)
+       if (!ret)
                 return ret;
  
+       if (unlikely(ret < 0)) {
+               /*
+                * If we have not yet seen any eligible child,
+                * then let this error code replace -ECHILD.
+                * A permission error will give the user a clue
+                * to look for security policy problems, rather
+                * than for mysterious wait bugs.
+                */
+               if (*notask_error)
+                       *notask_error = ret;
+       }
+
         if (likely(!ptrace) && unlikely(p->ptrace)) {
                 /*
                  * This child is hidden by ptrace.
@@ -1585,7 +1646,8 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
   * -ECHILD should be in *@notask_error before the first call.
   * Returns nonzero for a final return, when we have unlocked tasklist_lock.
   * Returns zero if the search for a child should continue; then
- * *@notask_error is 0 if there were any eligible children, or still -ECHILD.
+ * *@notask_error is 0 if there were any eligible children,
+ * or another error from security_task_wait(), or still -ECHILD.
   */
  static int do_wait_thread(struct task_struct *tsk, int *notask_error,
                           enum pid_type type, struct pid *pid, int options,