X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fexit.c;h=cce59cb5ee6aececf1b663d39a1de3fb15b4b426;hb=449cedf099b23a250e7d61982e35555ccb871182;hp=e47ee8a061355a0843f950cdb00c5232e76e745a;hpb=cdd6c482c9ff9c55475ee7392ec8f672eddb7be6;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/exit.c b/kernel/exit.c index e47ee8a..cce59cb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -67,10 +68,10 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_rcu(&p->tasks); + list_del_init(&p->sibling); __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); - list_del_init(&p->sibling); } /* @@ -84,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk) BUG_ON(!sig); BUG_ON(!atomic_read(&sig->count)); - sighand = rcu_dereference(tsk->sighand); + sighand = rcu_dereference_check(tsk->sighand, + rcu_read_lock_held() || + lockdep_tasklist_lock_is_held()); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); @@ -110,9 +113,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); - sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); + sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -169,8 +172,10 @@ void release_task(struct task_struct * p) repeat: tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and - * can't be modifying its own credentials */ + * can't be modifying its own credentials. But shut RCU-lockdep up */ + rcu_read_lock(); atomic_dec(&__task_cred(p)->user->processes); + rcu_read_unlock(); proc_flush_task(p); @@ -472,9 +477,11 @@ static void close_files(struct files_struct * files) /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the - * files structure. + * files structure. But use RCU to shut RCU-lockdep up. */ + rcu_read_lock(); fdt = files_fdtable(files); + rcu_read_unlock(); for (;;) { unsigned long set; i = j * __NFDBITS; @@ -520,10 +527,12 @@ void put_files_struct(struct files_struct *files) * at the end of the RCU grace period. Otherwise, * you can free files immediately. */ + rcu_read_lock(); fdt = files_fdtable(files); if (fdt != &files->fdtab) kmem_cache_free(files_cachep, files); free_fdtable(fdt); + rcu_read_unlock(); } } @@ -735,12 +744,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father) /* * Any that need to be release_task'd are put on the @dead list. */ -static void reparent_thread(struct task_struct *father, struct task_struct *p, +static void reparent_leader(struct task_struct *father, struct task_struct *p, struct list_head *dead) { - if (p->pdeath_signal) - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); - list_move_tail(&p->sibling, &p->real_parent->children); if (task_detached(p)) @@ -779,12 +785,18 @@ static void forget_original_parent(struct task_struct *father) reaper = find_new_reaper(father); list_for_each_entry_safe(p, n, &father->children, sibling) { - p->real_parent = reaper; - if (p->parent == father) { - BUG_ON(task_ptrace(p)); - p->parent = p->real_parent; - } - reparent_thread(father, p, &dead_children); + struct task_struct *t = p; + do { + t->real_parent = reaper; + if (t->parent == father) { + BUG_ON(task_ptrace(t)); + t->parent = t->real_parent; + } + if (t->pdeath_signal) + group_send_sig_info(t->pdeath_signal, + SEND_SIG_NOINFO, t); + } while_each_thread(p, t); + reparent_leader(father, p, &dead_children); } write_unlock_irq(&tasklist_lock); @@ -932,7 +944,7 @@ NORET_TYPE void do_exit(long code) * an exiting task cleaning up the robust pi futexes. */ smp_mb(); - spin_unlock_wait(&tsk->pi_lock); + raw_spin_unlock_wait(&tsk->pi_lock); if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", @@ -940,11 +952,14 @@ NORET_TYPE void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - + /* sync mm's RSS info before statistics gathering */ + sync_mm_rss(tsk, tsk->mm); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); + if (tsk->mm) + setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); } acct_collect(code, group_dead); if (group_dead) @@ -968,16 +983,18 @@ NORET_TYPE void do_exit(long code) exit_thread(); cgroup_exit(tsk, 1); - if (group_dead && tsk->signal->leader) + if (group_dead) disassociate_ctty(1); module_put(task_thread_info(tsk)->exec_domain->module); - if (tsk->binfmt) - module_put(tsk->binfmt->module); proc_exit_connector(tsk); /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); + /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ @@ -989,8 +1006,6 @@ NORET_TYPE void do_exit(long code) tsk->mempolicy = NULL; #endif #ifdef CONFIG_FUTEX - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); #endif @@ -1006,7 +1021,7 @@ NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITPIDONE; if (tsk->io_context) - exit_io_context(); + exit_io_context(tsk); if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); @@ -1093,28 +1108,28 @@ struct wait_opts { int __user *wo_stat; struct rusage __user *wo_rusage; + wait_queue_t child_wait; int notask_error; }; -static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +static inline +struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { - struct pid *pid = NULL; - if (type == PIDTYPE_PID) - pid = task->pids[type].pid; - else if (type < PIDTYPE_MAX) - pid = task->group_leader->pids[type].pid; - return pid; + if (type != PIDTYPE_PID) + task = task->group_leader; + return task->pids[type].pid; } -static int eligible_child(struct wait_opts *wo, struct task_struct *p) +static int eligible_pid(struct wait_opts *wo, struct task_struct *p) { - int err; - - if (wo->wo_type < PIDTYPE_MAX) { - if (task_pid_type(p, wo->wo_type) != wo->wo_pid) - return 0; - } + return wo->wo_type == PIDTYPE_MAX || + task_pid_type(p, wo->wo_type) == wo->wo_pid; +} +static int eligible_child(struct wait_opts *wo, struct task_struct *p) +{ + if (!eligible_pid(wo, p)) + return 0; /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1124,10 +1139,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p) && !(wo->wo_flags & __WALL)) return 0; - err = security_task_wait(p); - if (err) - return err; - return 1; } @@ -1140,18 +1151,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, put_task_struct(p); infop = wo->wo_info; - if (!retval) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(pid, &infop->si_pid); - if (!retval) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = put_user(status, &infop->si_status); + if (infop) { + if (!retval) + retval = put_user(SIGCHLD, &infop->si_signo); + if (!retval) + retval = put_user(0, &infop->si_errno); + if (!retval) + retval = put_user((short)why, &infop->si_code); + if (!retval) + retval = put_user(pid, &infop->si_pid); + if (!retval) + retval = put_user(uid, &infop->si_uid); + if (!retval) + retval = put_user(status, &infop->si_status); + } if (!retval) retval = pid; return retval; @@ -1176,7 +1189,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) if (unlikely(wo->wo_flags & WNOWAIT)) { int exit_code = p->exit_code; - int why, status; + int why; get_task_struct(p); read_unlock(&tasklist_lock); @@ -1208,6 +1221,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) if (likely(!traced) && likely(!task_detached(p))) { struct signal_struct *psig; struct signal_struct *sig; + unsigned long maxrss; + cputime_t tgutime, tgstime; /* * The resource counters for the group leader are in its @@ -1223,20 +1238,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * need to protect the access to parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_times() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ + thread_group_times(p, &tgutime, &tgstime); spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(tgutime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(tgstime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, @@ -1256,6 +1274,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) psig->coublock += task_io_get_oublock(p) + sig->oublock + sig->coublock; + maxrss = max(sig->maxrss, sig->cmaxrss); + if (psig->cmaxrss < maxrss) + psig->cmaxrss = maxrss; task_io_accounting_add(&psig->ioac, &p->ioac); task_io_accounting_add(&psig->ioac, &sig->ioac); spin_unlock_irq(&p->real_parent->sighand->siglock); @@ -1477,13 +1498,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) * then ->notask_error is 0 if @p is an eligible child, * or another error from security_task_wait(), or still -ECHILD. */ -static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, - int ptrace, struct task_struct *p) +static int wait_consider_task(struct wait_opts *wo, int ptrace, + struct task_struct *p) { int ret = eligible_child(wo, p); if (!ret) return ret; + ret = security_task_wait(p); if (unlikely(ret < 0)) { /* * If we have not yet seen any eligible child, @@ -1541,14 +1563,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) struct task_struct *p; list_for_each_entry(p, &tsk->children, sibling) { - /* - * Do not consider detached threads. - */ - if (!task_detached(p)) { - int ret = wait_consider_task(wo, tsk, 0, p); - if (ret) - return ret; - } + int ret = wait_consider_task(wo, 0, p); + if (ret) + return ret; } return 0; @@ -1559,7 +1576,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) struct task_struct *p; list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { - int ret = wait_consider_task(wo, tsk, 1, p); + int ret = wait_consider_task(wo, 1, p); if (ret) return ret; } @@ -1567,15 +1584,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) return 0; } +static int child_wait_callback(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + struct wait_opts *wo = container_of(wait, struct wait_opts, + child_wait); + struct task_struct *p = key; + + if (!eligible_pid(wo, p)) + return 0; + + if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) + return 0; + + return default_wake_function(wait, mode, sync, key); +} + +void __wake_up_parent(struct task_struct *p, struct task_struct *parent) +{ + __wake_up_sync_key(&parent->signal->wait_chldexit, + TASK_INTERRUPTIBLE, 1, p); +} + static long do_wait(struct wait_opts *wo) { - DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int retval; trace_sched_process_wait(wo->wo_pid); - add_wait_queue(¤t->signal->wait_chldexit,&wait); + init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); + wo->child_wait.private = current; + add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); repeat: /* * If there is nothing that can match our critiera just get out. @@ -1616,32 +1656,7 @@ notask: } end: __set_current_state(TASK_RUNNING); - remove_wait_queue(¤t->signal->wait_chldexit,&wait); - if (wo->wo_info) { - struct siginfo __user *infop = wo->wo_info; - - if (retval > 0) - retval = 0; - else { - /* - * For a WNOHANG return, clear out all the fields - * we would set so the user can easily tell the - * difference. - */ - if (!retval) - retval = put_user(0, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user(0, &infop->si_code); - if (!retval) - retval = put_user(0, &infop->si_pid); - if (!retval) - retval = put_user(0, &infop->si_uid); - if (!retval) - retval = put_user(0, &infop->si_status); - } - } + remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); return retval; } @@ -1686,6 +1701,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, wo.wo_stat = NULL; wo.wo_rusage = ru; ret = do_wait(&wo); + + if (ret > 0) { + ret = 0; + } else if (infop) { + /* + * For a WNOHANG return, clear out all the fields + * we would set so the user can easily tell the + * difference. + */ + if (!ret) + ret = put_user(0, &infop->si_signo); + if (!ret) + ret = put_user(0, &infop->si_errno); + if (!ret) + ret = put_user(0, &infop->si_code); + if (!ret) + ret = put_user(0, &infop->si_pid); + if (!ret) + ret = put_user(0, &infop->si_uid); + if (!ret) + ret = put_user(0, &infop->si_status); + } + put_pid(pid); /* avoid REGPARM breakage on x86: */