X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fexit.c;h=6cdf60712bd2efdf923238281390e0030b2b185f;hb=2b2a1ff64afbadac842bbc58c5166962cf4f7664;hp=3f2182ccf1875818c22ddc9395591769413fb54c;hpb=762a24beed3f3ab93224bd447710e6c36fcf1968;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/exit.c b/kernel/exit.c index 3f2182c..6cdf607 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -13,12 +13,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -44,16 +46,20 @@ #include #include #include +#include #include #include #include #include -extern void sem_exit (void); - static void exit_mm(struct task_struct * tsk); +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -66,7 +72,7 @@ static void __unhash_process(struct task_struct *p) __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); - remove_parent(p); + list_del_init(&p->sibling); } /* @@ -80,7 +86,6 @@ static void __exit_signal(struct task_struct *tsk) BUG_ON(!sig); BUG_ON(!atomic_read(&sig->count)); - rcu_read_lock(); sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); @@ -116,20 +121,36 @@ static void __exit_signal(struct task_struct *tsk) sig->nivcsw += tsk->nivcsw; sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); +#ifdef CONFIG_TASK_XACCT + sig->rchar += tsk->rchar; + sig->wchar += tsk->wchar; + sig->syscr += tsk->syscr; + sig->syscw += tsk->syscw; +#endif /* CONFIG_TASK_XACCT */ +#ifdef CONFIG_TASK_IO_ACCOUNTING + sig->ioac.read_bytes += tsk->ioac.read_bytes; + sig->ioac.write_bytes += tsk->ioac.write_bytes; + sig->ioac.cancelled_write_bytes += + tsk->ioac.cancelled_write_bytes; +#endif /* CONFIG_TASK_IO_ACCOUNTING */ sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } __unhash_process(tsk); + /* + * Do this under ->siglock, we can race with another thread + * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. + */ + flush_sigqueue(&tsk->pending); + tsk->signal = NULL; tsk->sighand = NULL; spin_unlock(&sighand->siglock); - rcu_read_unlock(); __cleanup_sighand(sighand); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); - flush_sigqueue(&tsk->pending); if (sig) { flush_sigqueue(&sig->shared_pending); taskstats_tgid_free(sig); @@ -142,15 +163,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(container_of(rhp, struct task_struct, rcu)); } + void release_task(struct task_struct * p) { struct task_struct *leader; int zap_leader; repeat: + tracehook_prepare_release_task(p); atomic_dec(&p->user->processes); + proc_flush_task(p); write_lock_irq(&tasklist_lock); - ptrace_unlink(p); - BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); + tracehook_finish_release_task(p); __exit_signal(p); /* @@ -161,7 +184,7 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(leader->exit_signal == -1); + BUG_ON(task_detached(leader)); do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has @@ -171,11 +194,17 @@ repeat: * do_notify_parent() will have marked it self-reaping in * that case. */ - zap_leader = (leader->exit_signal == -1); + zap_leader = task_detached(leader); + + /* + * This maintains the invariant that release_task() + * only runs on a task in EXIT_DEAD, just for sanity. + */ + if (zap_leader) + leader->exit_state = EXIT_DEAD; } write_unlock_irq(&tasklist_lock); - proc_flush_task(p); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); @@ -216,20 +245,19 @@ struct pid *session_of_pgrp(struct pid *pgrp) static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) { struct task_struct *p; - int ret = 1; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - if (p == ignored_task - || p->exit_state - || is_global_init(p->real_parent)) + if ((p == ignored_task) || + (p->exit_state && thread_group_empty(p)) || + is_global_init(p->real_parent)) continue; + if (task_pgrp(p->real_parent) != pgrp && - task_session(p->real_parent) == task_session(p)) { - ret = 0; - break; - } + task_session(p->real_parent) == task_session(p)) + return 0; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); - return ret; /* (sighing) "Often!" */ + + return 1; } int is_current_pgrp_orphaned(void) @@ -249,7 +277,7 @@ static int has_stopped_jobs(struct pid *pgrp) struct task_struct *p; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - if (p->state != TASK_STOPPED) + if (!task_is_stopped(p)) continue; retval = 1; break; @@ -257,6 +285,37 @@ static int has_stopped_jobs(struct pid *pgrp) return retval; } +/* + * Check to see if any process groups have become orphaned as + * a result of our exiting, and if they have any stopped jobs, + * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + */ +static void +kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) +{ + struct pid *pgrp = task_pgrp(tsk); + struct task_struct *ignored_task = tsk; + + if (!parent) + /* exit: our father is in a different pgrp than + * we are and we were the only connection outside. + */ + parent = tsk->real_parent; + else + /* reparent: our child is in a different pgrp than + * we are, and it was the only connection outside. + */ + ignored_task = NULL; + + if (task_pgrp(parent) != pgrp && + task_session(parent) == task_session(tsk) && + will_become_orphaned_pgrp(pgrp, ignored_task) && + has_stopped_jobs(pgrp)) { + __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); + __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); + } +} + /** * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd * @@ -275,9 +334,8 @@ static void reparent_to_kthreadd(void) ptrace_unlink(current); /* Reparent to init */ - remove_parent(current); current->real_parent = current->parent = kthreadd_task; - add_parent(current); + list_move_tail(¤t->sibling, ¤t->real_parent->children); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -295,26 +353,25 @@ static void reparent_to_kthreadd(void) switch_uid(INIT_USER); } -void __set_special_pids(pid_t session, pid_t pgrp) +void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; + pid_t nr = pid_nr(pid); - if (task_session_nr(curr) != session) { - detach_pid(curr, PIDTYPE_SID); - set_task_session(curr, session); - attach_pid(curr, PIDTYPE_SID, find_pid(session)); + if (task_session(curr) != pid) { + change_pid(curr, PIDTYPE_SID, pid); + set_task_session(curr, nr); } - if (task_pgrp_nr(curr) != pgrp) { - detach_pid(curr, PIDTYPE_PGID); - curr->signal->pgrp = pgrp; - attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp)); + if (task_pgrp(curr) != pid) { + change_pid(curr, PIDTYPE_PGID, pid); + set_task_pgrp(curr, nr); } } -static void set_special_pids(pid_t session, pid_t pgrp) +static void set_special_pids(struct pid *pid) { write_lock_irq(&tasklist_lock); - __set_special_pids(session, pgrp); + __set_special_pids(pid); write_unlock_irq(&tasklist_lock); } @@ -383,9 +440,13 @@ void daemonize(const char *name, ...) * We don't want to have TIF_FREEZE set if the system-wide hibernation * or suspend transition begins right now. */ - current->flags |= PF_NOFREEZE; + current->flags |= (PF_NOFREEZE | PF_KTHREAD); - set_special_pids(1, 1); + if (current->nsproxy != &init_nsproxy) { + get_nsproxy(&init_nsproxy); + switch_task_namespaces(current, &init_nsproxy); + } + set_special_pids(&init_struct_pid); proc_clear_tty(current); /* Block and flush all signals */ @@ -400,11 +461,6 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - if (current->nsproxy != init_task.nsproxy) { - get_nsproxy(init_task.nsproxy); - switch_task_namespaces(current, init_task.nsproxy); - } - exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -460,7 +516,7 @@ struct files_struct *get_files_struct(struct task_struct *task) return files; } -void fastcall put_files_struct(struct files_struct *files) +void put_files_struct(struct files_struct *files) { struct fdtable *fdt; @@ -479,10 +535,9 @@ void fastcall put_files_struct(struct files_struct *files) } } -EXPORT_SYMBOL(put_files_struct); - -void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +void reset_files_struct(struct files_struct *files) { + struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; @@ -491,9 +546,8 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files) task_unlock(tsk); put_files_struct(old); } -EXPORT_SYMBOL(reset_files_struct); -static inline void __exit_files(struct task_struct *tsk) +void exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; @@ -505,33 +559,19 @@ static inline void __exit_files(struct task_struct *tsk) } } -void exit_files(struct task_struct *tsk) -{ - __exit_files(tsk); -} - -static inline void __put_fs_struct(struct fs_struct *fs) +void put_fs_struct(struct fs_struct *fs) { /* No need to hold fs->lock if we are killing it */ if (atomic_dec_and_test(&fs->count)) { - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { - dput(fs->altroot); - mntput(fs->altrootmnt); - } + path_put(&fs->root); + path_put(&fs->pwd); + if (fs->altroot.dentry) + path_put(&fs->altroot); kmem_cache_free(fs_cachep, fs); } } -void put_fs_struct(struct fs_struct *fs) -{ - __put_fs_struct(fs); -} - -static inline void __exit_fs(struct task_struct *tsk) +void exit_fs(struct task_struct *tsk) { struct fs_struct * fs = tsk->fs; @@ -539,16 +579,93 @@ static inline void __exit_fs(struct task_struct *tsk) task_lock(tsk); tsk->fs = NULL; task_unlock(tsk); - __put_fs_struct(fs); + put_fs_struct(fs); } } -void exit_fs(struct task_struct *tsk) +EXPORT_SYMBOL_GPL(exit_fs); + +#ifdef CONFIG_MM_OWNER +/* + * Task p is exiting and it owned mm, lets find a new owner for it + */ +static inline int +mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) { - __exit_fs(tsk); + /* + * If there are other users of the mm and the owner (us) is exiting + * we need to find a new owner to take on the responsibility. + */ + if (!mm) + return 0; + if (atomic_read(&mm->mm_users) <= 1) + return 0; + if (mm->owner != p) + return 0; + return 1; } -EXPORT_SYMBOL_GPL(exit_fs); +void mm_update_next_owner(struct mm_struct *mm) +{ + struct task_struct *c, *g, *p = current; + +retry: + if (!mm_need_new_owner(mm, p)) + return; + + read_lock(&tasklist_lock); + /* + * Search in the children + */ + list_for_each_entry(c, &p->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search in the siblings + */ + list_for_each_entry(c, &p->parent->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search through everything else. We should not get + * here often + */ + do_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + } while_each_thread(g, c); + + read_unlock(&tasklist_lock); + return; + +assign_new_owner: + BUG_ON(c == p); + get_task_struct(c); + /* + * The task_lock protects c->mm from changing. + * We always want mm->owner->mm == mm + */ + task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); + if (c->mm != mm) { + task_unlock(c); + put_task_struct(c); + goto retry; + } + cgroup_mm_owner_callbacks(mm->owner, c); + mm->owner = c; + task_unlock(c); + put_task_struct(c); +} +#endif /* CONFIG_MM_OWNER */ /* * Turn us into a lazy TLB process if we @@ -557,26 +674,40 @@ EXPORT_SYMBOL_GPL(exit_fs); static void exit_mm(struct task_struct * tsk) { struct mm_struct *mm = tsk->mm; + struct core_state *core_state; mm_release(tsk, mm); if (!mm) return; /* * Serialize with any possible pending coredump. - * We must hold mmap_sem around checking core_waiters + * We must hold mmap_sem around checking core_state * and clearing tsk->mm. The core-inducing thread - * will increment core_waiters for each thread in the + * will increment ->nr_threads for each thread in the * group with ->mm != NULL. */ down_read(&mm->mmap_sem); - if (mm->core_waiters) { + core_state = mm->core_state; + if (core_state) { + struct core_thread self; up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - if (!--mm->core_waiters) - complete(mm->core_startup_done); - up_write(&mm->mmap_sem); - wait_for_completion(&mm->core_done); + self.task = tsk; + self.next = xchg(&core_state->dumper.next, &self); + /* + * Implies mb(), the result of xchg() must be visible + * to core_state->dumper. + */ + if (atomic_dec_and_test(&core_state->nr_threads)) + complete(&core_state->startup); + + for (;;) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!self.task) /* see coredump_finish() */ + break; + schedule(); + } + __set_task_state(tsk, TASK_RUNNING); down_read(&mm->mmap_sem); } atomic_inc(&mm->mm_count); @@ -589,74 +720,121 @@ static void exit_mm(struct task_struct * tsk) /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); task_unlock(tsk); + mm_update_next_owner(mm); mmput(mm); } -static void -reparent_thread(struct task_struct *p, struct task_struct *father, int traced) +/* + * Return nonzero if @parent's children should reap themselves. + * + * Called with write_lock_irq(&tasklist_lock) held. + */ +static int ignoring_children(struct task_struct *parent) { - if (p->pdeath_signal) - /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + int ret; + struct sighand_struct *psig = parent->sighand; + unsigned long flags; + spin_lock_irqsave(&psig->siglock, flags); + ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || + (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); + spin_unlock_irqrestore(&psig->siglock, flags); + return ret; +} - /* Move the child from its dying parent to the new one. */ - if (unlikely(traced)) { - /* Preserve ptrace links if someone else is tracing this child. */ - list_del_init(&p->ptrace_list); - if (p->parent != p->real_parent) - list_add(&p->ptrace_list, &p->real_parent->ptrace_children); - } else { - /* If this child is being traced, then we're the one tracing it - * anyway, so let go of it. +/* + * Detach all tasks we were using ptrace on. + * Any that need to be release_task'd are put on the @dead list. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_exit(struct task_struct *parent, struct list_head *dead) +{ + struct task_struct *p, *n; + int ign = -1; + + list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { + __ptrace_unlink(p); + + if (p->exit_state != EXIT_ZOMBIE) + continue; + + /* + * If it's a zombie, our attachedness prevented normal + * parent notification or self-reaping. Do notification + * now if it would have happened earlier. If it should + * reap itself, add it to the @dead list. We can't call + * release_task() here because we already hold tasklist_lock. + * + * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. */ - p->ptrace = 0; - remove_parent(p); - p->parent = p->real_parent; - add_parent(p); + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, parent)) + do_notify_parent(p, p->exit_signal); + else { + if (ign < 0) + ign = ignoring_children(parent); + if (ign) + p->exit_signal = -1; + } + } - if (p->state == TASK_TRACED) { + if (task_detached(p)) { /* - * If it was at a trace stop, turn it into - * a normal stop since it's no longer being - * traced. + * Mark it as in the process of being reaped. */ - ptrace_untrace(p); + p->exit_state = EXIT_DEAD; + list_add(&p->ptrace_entry, dead); } } +} + +/* + * Finish up exit-time ptrace cleanup. + * + * Called without locks. + */ +static void ptrace_exit_finish(struct task_struct *parent, + struct list_head *dead) +{ + struct task_struct *p, *n; + + BUG_ON(!list_empty(&parent->ptraced)); + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } +} + +static void reparent_thread(struct task_struct *p, struct task_struct *father) +{ + if (p->pdeath_signal) + /* We already hold the tasklist_lock here. */ + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ - if (p->real_parent->group_leader == father->group_leader) + if (same_thread_group(p->real_parent, father)) return; /* We don't want people slaying init. */ - if (p->exit_signal != -1) + if (!task_detached(p)) p->exit_signal = SIGCHLD; /* If we'd notified the old parent about this child's death, * also notify the new parent. */ - if (!traced && p->exit_state == EXIT_ZOMBIE && - p->exit_signal != -1 && thread_group_empty(p)) + if (!ptrace_reparented(p) && + p->exit_state == EXIT_ZOMBIE && + !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); - /* - * process group orphan check - * Case ii: Our child is in a different pgrp - * than we are, and it was the only connection - * outside, so the child pgrp is now orphaned. - */ - if ((task_pgrp(p) != task_pgrp(father)) && - (task_session(p) == task_session(father))) { - struct pid *pgrp = task_pgrp(p); - - if (will_become_orphaned_pgrp(pgrp, NULL) && - has_stopped_jobs(pgrp)) { - __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); - __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); - } - } + kill_orphaned_pgrp(p, father); } /* @@ -669,12 +847,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) static void forget_original_parent(struct task_struct *father) { struct task_struct *p, *n, *reaper = father; - struct list_head ptrace_dead; - - INIT_LIST_HEAD(&ptrace_dead); + LIST_HEAD(ptrace_dead); write_lock_irq(&tasklist_lock); + /* + * First clean up ptrace if we were using it. + */ + ptrace_exit(father, &ptrace_dead); + do { reaper = next_thread(reaper); if (reaper == father) { @@ -683,87 +864,29 @@ static void forget_original_parent(struct task_struct *father) } } while (reaper->flags & PF_EXITING); - /* - * There are only two places where our children can be: - * - * - in our child list - * - in our ptraced child list - * - * Search them and reparent children. - */ list_for_each_entry_safe(p, n, &father->children, sibling) { - int ptrace; - - ptrace = p->ptrace; - - /* if father isn't the real parent, then ptrace must be enabled */ - BUG_ON(father != p->real_parent && !ptrace); - - if (father == p->real_parent) { - /* reparent with a reaper, real father it's us */ - p->real_parent = reaper; - reparent_thread(p, father, 0); - } else { - /* reparent ptraced task to its real parent */ - __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && - thread_group_empty(p)) - do_notify_parent(p, p->exit_signal); - } - - /* - * if the ptraced child is a zombie with exit_signal == -1 - * we must collect it before we exit, or it will remain - * zombie forever since we prevented it from self-reap itself - * while it was being traced by us, to be able to see it in wait4. - */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) - list_add(&p->ptrace_list, &ptrace_dead); - } - - list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) { p->real_parent = reaper; - reparent_thread(p, father, 1); + if (p->parent == father) { + BUG_ON(p->ptrace); + p->parent = p->real_parent; + } + reparent_thread(p, father); } write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&father->children)); - BUG_ON(!list_empty(&father->ptrace_children)); - - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) { - list_del_init(&p->ptrace_list); - release_task(p); - } + ptrace_exit_finish(father, &ptrace_dead); } /* * Send signals to all our closest relatives so that they know * to properly mourn us.. */ -static void exit_notify(struct task_struct *tsk) +static void exit_notify(struct task_struct *tsk, int group_dead) { - int state; - struct task_struct *t; - struct pid *pgrp; - - if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT) - && !thread_group_empty(tsk)) { - /* - * This occurs when there was a race between our exit - * syscall and a group signal choosing us as the one to - * wake up. It could be that we are the only thread - * alerted to check for pending signals, but another thread - * should be woken now to take the signal since we will not. - * Now we'll wake all the threads in the group just to make - * sure someone gets all the pending signals. - */ - spin_lock_irq(&tsk->sighand->siglock); - for (t = next_thread(tsk); t != tsk; t = next_thread(t)) - if (!signal_pending(t) && !(t->flags & PF_EXITING)) - recalc_sigpending_and_wake(t); - spin_unlock_irq(&tsk->sighand->siglock); - } + int signal; + void *cookie; /* * This does two things: @@ -774,27 +897,11 @@ static void exit_notify(struct task_struct *tsk) * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) */ forget_original_parent(tsk); + exit_task_namespaces(tsk); write_lock_irq(&tasklist_lock); - /* - * Check to see if any process groups have become orphaned - * as a result of our exiting, and if they have any stopped - * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) - * - * Case i: Our father is in a different pgrp than we are - * and we were the only connection outside, so our pgrp - * is about to become orphaned. - */ - t = tsk->real_parent; - - pgrp = task_pgrp(tsk); - if ((task_pgrp(t) != pgrp) && - (task_session(t) == task_session(tsk)) && - will_become_orphaned_pgrp(pgrp, tsk) && - has_stopped_jobs(pgrp)) { - __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); - __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); - } + if (group_dead) + kill_orphaned_pgrp(tsk->group_leader, NULL); /* Let father know we died * @@ -810,29 +917,19 @@ static void exit_notify(struct task_struct *tsk) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && - ( tsk->parent_exec_id != t->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) - && !capable(CAP_KILL)) + if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && + (tsk->parent_exec_id != tsk->real_parent->self_exec_id || + tsk->self_exec_id != tsk->parent_exec_id) && + !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; + signal = tracehook_notify_death(tsk, &cookie, group_dead); + if (signal > 0) + signal = do_notify_parent(tsk, signal); - /* If something other than our normal parent is ptracing us, then - * send it a SIGCHLD instead of honoring exit_signal. exit_signal - * only has special meaning to our real parent. - */ - if (tsk->exit_signal != -1 && thread_group_empty(tsk)) { - int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; - do_notify_parent(tsk, signal); - } else if (tsk->ptrace) { - do_notify_parent(tsk, SIGCHLD); - } - - state = EXIT_ZOMBIE; - if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) - state = EXIT_DEAD; - tsk->exit_state = state; + tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; + /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && tsk->signal->notify_count < 0 && tsk->signal->group_exit_task) @@ -840,8 +937,10 @@ static void exit_notify(struct task_struct *tsk) write_unlock_irq(&tasklist_lock); + tracehook_report_death(tsk, signal, cookie, group_dead); + /* If the process is dead, release it - nobody will wait for it */ - if (state == EXIT_DEAD) + if (signal < 0) release_task(tsk); } @@ -878,10 +977,35 @@ static inline void exit_child_reaper(struct task_struct *tsk) if (likely(tsk->group_leader != task_child_reaper(tsk))) return; - panic("Attempted to kill init!"); + if (tsk->nsproxy->pid_ns == &init_pid_ns) + panic("Attempted to kill init!"); + + /* + * @tsk is the last thread in the 'cgroup-init' and is exiting. + * Terminate all remaining processes in the namespace and reap them + * before exiting @tsk. + * + * Note that @tsk (last thread of cgroup-init) may not necessarily + * be the child-reaper (i.e main thread of cgroup-init) of the + * namespace i.e the child_reaper may have already exited. + * + * Even after a child_reaper exits, we let it inherit orphaned children, + * because, pid_ns->child_reaper remains valid as long as there is + * at least one living sub-thread in the cgroup init. + + * This living sub-thread of the cgroup-init will be notified when + * a child inherited by the 'child-reaper' exits (do_notify_parent() + * uses __group_send_sig_info()). Further, when reaping child processes, + * do_wait() iterates over children of all living sub threads. + + * i.e even though 'child_reaper' thread is listed as the parent of the + * orphaned children, any living sub-thread in the cgroup-init can + * perform the role of the child_reaper. + */ + zap_pid_ns_processes(tsk->nsproxy->pid_ns); } -fastcall NORET_TYPE void do_exit(long code) +NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; int group_dead; @@ -895,10 +1019,7 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(current->ptrace & PT_TRACE_EXIT)) { - current->ptrace_message = code; - ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); - } + tracehook_report_exit(&code); /* * We're taking recursive faults here in do_exit. Safest is to just @@ -923,7 +1044,7 @@ fastcall NORET_TYPE void do_exit(long code) schedule(); } - tsk->flags |= PF_EXITING; + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against * an exiting task cleaning up the robust pi futexes. @@ -933,7 +1054,7 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), preempt_count()); acct_update_integrals(tsk); @@ -969,8 +1090,8 @@ fastcall NORET_TYPE void do_exit(long code) if (group_dead) acct_process(); exit_sem(tsk); - __exit_files(tsk); - __exit_fs(tsk); + exit_files(tsk); + exit_fs(tsk); check_stack_usage(); exit_thread(); cgroup_exit(tsk, 1); @@ -984,10 +1105,9 @@ fastcall NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); proc_exit_connector(tsk); - exit_task_namespaces(tsk); - exit_notify(tsk); + exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA - mpol_free(tsk->mempolicy); + mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif #ifdef CONFIG_FUTEX @@ -1052,19 +1172,21 @@ asmlinkage long sys_exit(int error_code) NORET_TYPE void do_group_exit(int exit_code) { + struct signal_struct *sig = current->signal; + BUG_ON(exit_code & 0x80); /* core dumps don't get here */ - if (current->signal->flags & SIGNAL_GROUP_EXIT) - exit_code = current->signal->group_exit_code; + if (signal_group_exit(sig)) + exit_code = sig->group_exit_code; else if (!thread_group_empty(current)) { - struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); - if (sig->flags & SIGNAL_GROUP_EXIT) + if (signal_group_exit(sig)) /* Another thread got here before we took the lock. */ exit_code = sig->group_exit_code; else { sig->group_exit_code = exit_code; + sig->flags = SIGNAL_GROUP_EXIT; zap_other_threads(current); } spin_unlock_irq(&sighand->siglock); @@ -1084,28 +1206,26 @@ asmlinkage void sys_exit_group(int error_code) do_group_exit((error_code & 0xff) << 8); } -static int eligible_child(pid_t pid, int options, struct task_struct *p) +static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +{ + struct pid *pid = NULL; + if (type == PIDTYPE_PID) + pid = task->pids[type].pid; + else if (type < PIDTYPE_MAX) + pid = task->group_leader->pids[type].pid; + return pid; +} + +static int eligible_child(enum pid_type type, struct pid *pid, int options, + struct task_struct *p) { int err; - if (pid > 0) { - if (p->pid != pid) - return 0; - } else if (!pid) { - if (task_pgrp_nr(p) != task_pgrp_nr(current)) - return 0; - } else if (pid != -1) { - if (task_pgrp_nr(p) != -pid) + if (type < PIDTYPE_MAX) { + if (task_pid_type(p, type) != pid) return 0; } - /* - * Do not consider detached threads that are - * not ptraced: - */ - if (p->exit_signal == -1 && !p->ptrace) - return 0; - /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1114,12 +1234,6 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p) if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) return 0; - /* - * Do not consider thread group leaders that are - * in a non-empty thread group: - */ - if (delay_group_leader(p)) - return 2; err = security_task_wait(p); if (err) @@ -1159,23 +1273,22 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(struct task_struct *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { unsigned long state; int retval, status, traced; + pid_t pid = task_pid_vnr(p); - if (unlikely(noreap)) { - pid_t pid = p->pid; + if (!likely(options & WEXITED)) + return 0; + + if (unlikely(options & WNOWAIT)) { uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; - if (unlikely(p->exit_state != EXIT_ZOMBIE)) - return 0; - if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) - return 0; get_task_struct(p); read_unlock(&tasklist_lock); if ((exit_code & 0x7f) == 0) { @@ -1199,8 +1312,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, return 0; } - /* traced means p->ptrace, but not vice versa */ - traced = (p->real_parent != p->parent); + traced = ptrace_reparented(p); if (likely(!traced)) { struct signal_struct *psig; @@ -1253,6 +1365,21 @@ static int wait_task_zombie(struct task_struct *p, int noreap, psig->coublock += task_io_get_oublock(p) + sig->oublock + sig->coublock; +#ifdef CONFIG_TASK_XACCT + psig->rchar += p->rchar + sig->rchar; + psig->wchar += p->wchar + sig->wchar; + psig->syscr += p->syscr + sig->syscr; + psig->syscw += p->syscw + sig->syscw; +#endif /* CONFIG_TASK_XACCT */ +#ifdef CONFIG_TASK_IO_ACCOUNTING + psig->ioac.read_bytes += + p->ioac.read_bytes + sig->ioac.read_bytes; + psig->ioac.write_bytes += + p->ioac.write_bytes + sig->ioac.write_bytes; + psig->ioac.cancelled_write_bytes += + p->ioac.cancelled_write_bytes + + sig->ioac.cancelled_write_bytes; +#endif /* CONFIG_TASK_IO_ACCOUNTING */ spin_unlock_irq(&p->parent->sighand->siglock); } @@ -1286,11 +1413,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap, retval = put_user(status, &infop->si_status); } if (!retval && infop) - retval = put_user(p->pid, &infop->si_pid); + retval = put_user(pid, &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (!retval) - retval = p->pid; + retval = pid; if (traced) { write_lock_irq(&tasklist_lock); @@ -1301,9 +1428,9 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * If it's still not detached after that, don't release * it now. */ - if (p->exit_signal != -1) { + if (!task_detached(p)) { do_notify_parent(p, p->exit_signal); - if (p->exit_signal != -1) { + if (!task_detached(p)) { p->exit_state = EXIT_ZOMBIE; p = NULL; } @@ -1322,20 +1449,41 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, - int noreap, struct siginfo __user *infop, +static int wait_task_stopped(int ptrace, struct task_struct *p, + int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { - int retval, exit_code; + int retval, exit_code, why; + uid_t uid = 0; /* unneeded, required by compiler */ + pid_t pid; - if (!p->exit_code) + if (!(options & WUNTRACED)) return 0; - if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && - p->signal->group_stop_count > 0) + + exit_code = 0; + spin_lock_irq(&p->sighand->siglock); + + if (unlikely(!task_is_stopped_or_traced(p))) + goto unlock_sig; + + if (!ptrace && p->signal->group_stop_count > 0) /* * A group stop is in progress and this is the group leader. * We won't report until all threads have stopped. */ + goto unlock_sig; + + exit_code = p->exit_code; + if (!exit_code) + goto unlock_sig; + + if (!unlikely(options & WNOWAIT)) + p->exit_code = 0; + + uid = p->uid; +unlock_sig: + spin_unlock_irq(&p->sighand->siglock); + if (!exit_code) return 0; /* @@ -1346,65 +1494,14 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, * possibly take page faults for user memory. */ get_task_struct(p); + pid = task_pid_vnr(p); + why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); - if (unlikely(noreap)) { - pid_t pid = p->pid; - uid_t uid = p->uid; - int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; - - exit_code = p->exit_code; - if (unlikely(!exit_code) || - unlikely(p->state & TASK_TRACED)) - goto bail_ref; + if (unlikely(options & WNOWAIT)) return wait_noreap_copyout(p, pid, uid, - why, (exit_code << 8) | 0x7f, + why, exit_code, infop, ru); - } - - write_lock_irq(&tasklist_lock); - - /* - * This uses xchg to be atomic with the thread resuming and setting - * it. It must also be done with the write lock held to prevent a - * race with the EXIT_ZOMBIE case. - */ - exit_code = xchg(&p->exit_code, 0); - if (unlikely(p->exit_state)) { - /* - * The task resumed and then died. Let the next iteration - * catch it in EXIT_ZOMBIE. Note that exit_code might - * already be zero here if it resumed and did _exit(0). - * The task itself is dead and won't touch exit_code again; - * other processors in this function are locked out. - */ - p->exit_code = exit_code; - exit_code = 0; - } - if (unlikely(exit_code == 0)) { - /* - * Another thread in this function got to it first, or it - * resumed, or it resumed and then died. - */ - write_unlock_irq(&tasklist_lock); -bail_ref: - put_task_struct(p); - /* - * We are returning to the wait loop without having successfully - * removed the process and having released the lock. We cannot - * continue, since the "p" task pointer is potentially stale. - * - * Return -EAGAIN, and do_wait() will restart the loop from the - * beginning. Do _not_ re-acquire the lock. - */ - return -EAGAIN; - } - - /* move to end of parent's list to avoid starvation */ - remove_parent(p); - add_parent(p); - - write_unlock_irq(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) @@ -1414,17 +1511,15 @@ bail_ref: if (!retval && infop) retval = put_user(0, &infop->si_errno); if (!retval && infop) - retval = put_user((short)((p->ptrace & PT_PTRACED) - ? CLD_TRAPPED : CLD_STOPPED), - &infop->si_code); + retval = put_user((short)why, &infop->si_code); if (!retval && infop) retval = put_user(exit_code, &infop->si_status); if (!retval && infop) - retval = put_user(p->pid, &infop->si_pid); + retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(uid, &infop->si_uid); if (!retval) - retval = p->pid; + retval = pid; put_task_struct(p); BUG_ON(!retval); @@ -1437,7 +1532,7 @@ bail_ref: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(struct task_struct *p, int noreap, +static int wait_task_continued(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1445,6 +1540,9 @@ static int wait_task_continued(struct task_struct *p, int noreap, pid_t pid; uid_t uid; + if (!unlikely(options & WCONTINUED)) + return 0; + if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; @@ -1454,11 +1552,11 @@ static int wait_task_continued(struct task_struct *p, int noreap, spin_unlock_irq(&p->sighand->siglock); return 0; } - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; spin_unlock_irq(&p->sighand->siglock); - pid = p->pid; + pid = task_pid_vnr(p); uid = p->uid; get_task_struct(p); read_unlock(&tasklist_lock); @@ -1469,7 +1567,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, if (!retval && stat_addr) retval = put_user(0xffff, stat_addr); if (!retval) - retval = p->pid; + retval = pid; } else { retval = wait_noreap_copyout(p, pid, uid, CLD_CONTINUED, SIGCONT, @@ -1480,162 +1578,182 @@ static int wait_task_continued(struct task_struct *p, int noreap, return retval; } - -static inline int my_ptrace_child(struct task_struct *p) +/* + * Consider @p for a wait by @parent. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; + * then *@notask_error is 0 if @p is an eligible child, + * or another error from security_task_wait(), or still -ECHILD. + */ +static int wait_consider_task(struct task_struct *parent, int ptrace, + struct task_struct *p, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *ru) { - if (!(p->ptrace & PT_PTRACED)) + int ret = eligible_child(type, pid, options, p); + if (!ret) + return ret; + + if (unlikely(ret < 0)) { + /* + * If we have not yet seen any eligible child, + * then let this error code replace -ECHILD. + * A permission error will give the user a clue + * to look for security policy problems, rather + * than for mysterious wait bugs. + */ + if (*notask_error) + *notask_error = ret; + } + + if (likely(!ptrace) && unlikely(p->ptrace)) { + /* + * This child is hidden by ptrace. + * We aren't allowed to see it now, but eventually we will. + */ + *notask_error = 0; return 0; - if (!(p->ptrace & PT_ATTACHED)) - return 1; + } + + if (p->exit_state == EXIT_DEAD) + return 0; + + /* + * We don't reap group leaders with subthreads. + */ + if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) + return wait_task_zombie(p, options, infop, stat_addr, ru); + + /* + * It's stopped or running now, so it might + * later continue, exit, or stop again. + */ + *notask_error = 0; + + if (task_is_stopped_or_traced(p)) + return wait_task_stopped(ptrace, p, options, + infop, stat_addr, ru); + + return wait_task_continued(p, options, infop, stat_addr, ru); +} + +/* + * Do the work of do_wait() for one thread in the group, @tsk. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; then + * *@notask_error is 0 if there were any eligible children, + * or another error from security_task_wait(), or still -ECHILD. + */ +static int do_wait_thread(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + list_for_each_entry(p, &tsk->children, sibling) { + /* + * Do not consider detached threads. + */ + if (!task_detached(p)) { + int ret = wait_consider_task(tsk, 0, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + } + + return 0; +} + +static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + /* - * This child was PTRACE_ATTACH'd. We should be seeing it only if - * we are the attacher. If we are the real parent, this is a race - * inside ptrace_attach. It is waiting for the tasklist_lock, - * which we have to switch the parent links, but has already set - * the flags in p->ptrace. + * Traditionally we see ptrace'd stopped tasks regardless of options. */ - return (p->parent != p->real_parent); + options |= WUNTRACED; + + list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { + int ret = wait_consider_task(tsk, 1, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + + return 0; } -static long do_wait(pid_t pid, int options, struct siginfo __user *infop, - int __user *stat_addr, struct rusage __user *ru) +static long do_wait(enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) { DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; - int flag, retval; - int allowed, denied; + int retval; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: /* - * We will set this flag if we see any child that might later + * If there is nothing that can match our critiera just get out. + * We will clear @retval to zero if we see any child that might later * match our criteria, even if we are not able to reap it yet. */ - flag = 0; - allowed = denied = 0; + retval = -ECHILD; + if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) + goto end; + current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { - struct task_struct *p; - int ret; - - list_for_each_entry(p, &tsk->children, sibling) { - ret = eligible_child(pid, options, p); - if (!ret) - continue; - - if (unlikely(ret < 0)) { - denied = ret; - continue; - } - allowed = 1; - - switch (p->state) { - case TASK_TRACED: - /* - * When we hit the race with PTRACE_ATTACH, - * we will not report this child. But the - * race means it has not yet been moved to - * our ptrace_children list, so we need to - * set the flag here to avoid a spurious ECHILD - * when the race happens with the only child. - */ - flag = 1; - if (!my_ptrace_child(p)) - continue; - /*FALLTHROUGH*/ - case TASK_STOPPED: - /* - * It's stopped now, so it might later - * continue, exit, or stop again. - */ - flag = 1; - if (!(options & WUNTRACED) && - !my_ptrace_child(p)) - continue; - retval = wait_task_stopped(p, ret == 2, - (options & WNOWAIT), - infop, - stat_addr, ru); - if (retval == -EAGAIN) - goto repeat; - if (retval != 0) /* He released the lock. */ - goto end; - break; - default: - // case EXIT_DEAD: - if (p->exit_state == EXIT_DEAD) - continue; - // case EXIT_ZOMBIE: - if (p->exit_state == EXIT_ZOMBIE) { - /* - * Eligible but we cannot release - * it yet: - */ - if (ret == 2) - goto check_continued; - if (!likely(options & WEXITED)) - continue; - retval = wait_task_zombie( - p, (options & WNOWAIT), + int tsk_result = do_wait_thread(tsk, &retval, + type, pid, options, infop, stat_addr, ru); - /* He released the lock. */ - if (retval != 0) - goto end; - break; - } -check_continued: - /* - * It's running now, so it might later - * exit, stop, or stop and then continue. - */ - flag = 1; - if (!unlikely(options & WCONTINUED)) - continue; - retval = wait_task_continued( - p, (options & WNOWAIT), - infop, stat_addr, ru); - if (retval != 0) /* He released the lock. */ - goto end; - break; - } - } - if (!flag) { - list_for_each_entry(p, &tsk->ptrace_children, - ptrace_list) { - if (!eligible_child(pid, options, p)) - continue; - flag = 1; - break; - } + if (!tsk_result) + tsk_result = ptrace_do_wait(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (tsk_result) { + /* + * tasklist_lock is unlocked and we have a final result. + */ + retval = tsk_result; + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); BUG_ON(tsk->signal != current->signal); } while (tsk != current); - read_unlock(&tasklist_lock); - if (flag) { - retval = 0; - if (options & WNOHANG) - goto end; + + if (!retval && !(options & WNOHANG)) { retval = -ERESTARTSYS; - if (signal_pending(current)) - goto end; - schedule(); - goto repeat; + if (!signal_pending(current)) { + schedule(); + goto repeat; + } } - retval = -ECHILD; - if (unlikely(denied) && !allowed) - retval = denied; + end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); if (infop) { if (retval > 0) - retval = 0; + retval = 0; else { /* * For a WNOHANG return, clear out all the fields @@ -1659,10 +1777,12 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t pid, +asmlinkage long sys_waitid(int which, pid_t upid, struct siginfo __user *infop, int options, struct rusage __user *ru) { + struct pid *pid = NULL; + enum pid_type type; long ret; if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) @@ -1672,40 +1792,61 @@ asmlinkage long sys_waitid(int which, pid_t pid, switch (which) { case P_ALL: - pid = -1; + type = PIDTYPE_MAX; break; case P_PID: - if (pid <= 0) + type = PIDTYPE_PID; + if (upid <= 0) return -EINVAL; break; case P_PGID: - if (pid <= 0) + type = PIDTYPE_PGID; + if (upid <= 0) return -EINVAL; - pid = -pid; break; default: return -EINVAL; } - ret = do_wait(pid, options, infop, NULL, ru); + if (type < PIDTYPE_MAX) + pid = find_get_pid(upid); + ret = do_wait(type, pid, options, infop, NULL, ru); + put_pid(pid); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(5, ret, which, upid, infop, options, ru); return ret; } -asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, +asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, int options, struct rusage __user *ru) { + struct pid *pid = NULL; + enum pid_type type; long ret; if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; - ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru); + + if (upid == -1) + type = PIDTYPE_MAX; + else if (upid < 0) { + type = PIDTYPE_PGID; + pid = find_get_pid(-upid); + } else if (upid == 0) { + type = PIDTYPE_PGID; + pid = get_pid(task_pgrp(current)); + } else /* upid > 0 */ { + type = PIDTYPE_PID; + pid = find_get_pid(upid); + } + + ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); + put_pid(pid); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(4, ret, upid, stat_addr, options, ru); return ret; }