Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...
[safe/jmp/linux-2.6] / kernel / fork.c
index 1415dc4..b6cce14 100644 (file)
@@ -87,6 +87,14 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+#ifdef CONFIG_PROVE_RCU
+int lockdep_tasklist_lock_is_held(void)
+{
+       return lockdep_is_held(&tasklist_lock);
+}
+EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
 int nr_processes(void)
 {
        int cpu;
@@ -157,6 +165,18 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
+static inline void free_signal_struct(struct signal_struct *sig)
+{
+       taskstats_tgid_free(sig);
+       kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void put_signal_struct(struct signal_struct *sig)
+{
+       if (atomic_dec_and_test(&sig->sigcnt))
+               free_signal_struct(sig);
+}
+
 void __put_task_struct(struct task_struct *tsk)
 {
        WARN_ON(!tsk->exit_state);
@@ -165,6 +185,7 @@ void __put_task_struct(struct task_struct *tsk)
 
        exit_creds(tsk);
        delayacct_tsk_free(tsk);
+       put_signal_struct(tsk->signal);
 
        if (!profile_handoff_task(tsk))
                free_task(tsk);
@@ -328,15 +349,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                if (!tmp)
                        goto fail_nomem;
                *tmp = *mpnt;
+               INIT_LIST_HEAD(&tmp->anon_vma_chain);
                pol = mpol_dup(vma_policy(mpnt));
                retval = PTR_ERR(pol);
                if (IS_ERR(pol))
                        goto fail_nomem_policy;
                vma_set_policy(tmp, pol);
+               if (anon_vma_fork(tmp, mpnt))
+                       goto fail_nomem_anon_vma_fork;
                tmp->vm_flags &= ~VM_LOCKED;
                tmp->vm_mm = mm;
                tmp->vm_next = NULL;
-               anon_vma_link(tmp);
                file = tmp->vm_file;
                if (file) {
                        struct inode *inode = file->f_path.dentry->d_inode;
@@ -391,6 +414,8 @@ out:
        flush_tlb_mm(oldmm);
        up_write(&oldmm->mmap_sem);
        return retval;
+fail_nomem_anon_vma_fork:
+       mpol_put(pol);
 fail_nomem_policy:
        kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
@@ -454,8 +479,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
                (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
        mm->core_state = NULL;
        mm->nr_ptes = 0;
-       set_mm_counter(mm, file_rss, 0);
-       set_mm_counter(mm, anon_rss, 0);
+       memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
        spin_lock_init(&mm->page_table_lock);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
@@ -824,23 +848,14 @@ void __cleanup_sighand(struct sighand_struct *sighand)
  */
 static void posix_cpu_timers_init_group(struct signal_struct *sig)
 {
+       unsigned long cpu_limit;
+
        /* Thread group counters. */
        thread_group_cputime_init(sig);
 
-       /* Expiration times and increments. */
-       sig->it[CPUCLOCK_PROF].expires = cputime_zero;
-       sig->it[CPUCLOCK_PROF].incr = cputime_zero;
-       sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
-       sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
-
-       /* Cached expiration times. */
-       sig->cputime_expires.prof_exp = cputime_zero;
-       sig->cputime_expires.virt_exp = cputime_zero;
-       sig->cputime_expires.sched_exp = 0;
-
-       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-               sig->cputime_expires.prof_exp =
-                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+       cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+       if (cpu_limit != RLIM_INFINITY) {
+               sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
                sig->cputimer.running = 1;
        }
 
@@ -857,54 +872,30 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        if (clone_flags & CLONE_THREAD)
                return 0;
 
-       sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+       sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
        tsk->signal = sig;
        if (!sig)
                return -ENOMEM;
 
-       atomic_set(&sig->count, 1);
+       sig->nr_threads = 1;
        atomic_set(&sig->live, 1);
+       atomic_set(&sig->sigcnt, 1);
        init_waitqueue_head(&sig->wait_chldexit);
-       sig->flags = 0;
        if (clone_flags & CLONE_NEWPID)
                sig->flags |= SIGNAL_UNKILLABLE;
-       sig->group_exit_code = 0;
-       sig->group_exit_task = NULL;
-       sig->group_stop_count = 0;
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
        INIT_LIST_HEAD(&sig->posix_timers);
 
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       sig->it_real_incr.tv64 = 0;
        sig->real_timer.function = it_real_fn;
 
-       sig->leader = 0;        /* session leadership doesn't inherit */
-       sig->tty_old_pgrp = NULL;
-       sig->tty = NULL;
-
-       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
-       sig->gtime = cputime_zero;
-       sig->cgtime = cputime_zero;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-       sig->prev_utime = sig->prev_stime = cputime_zero;
-#endif
-       sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
-       sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
-       sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
-       sig->maxrss = sig->cmaxrss = 0;
-       task_io_accounting_init(&sig->ioac);
-       sig->sum_sched_runtime = 0;
-       taskstats_tgid_init(sig);
-
        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
        posix_cpu_timers_init_group(sig);
 
-       acct_init_pacct(&sig->pacct);
-
        tty_audit_fork(sig);
 
        sig->oom_adj = current->signal->oom_adj;
@@ -912,13 +903,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
 
-void __cleanup_signal(struct signal_struct *sig)
-{
-       thread_group_cputime_free(sig);
-       tty_kref_put(sig->tty);
-       kmem_cache_free(signal_cachep, sig);
-}
-
 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
@@ -939,9 +923,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 
 static void rt_mutex_init_task(struct task_struct *p)
 {
-       spin_lock_init(&p->pi_lock);
+       raw_spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
-       plist_head_init(&p->pi_waiters, &p->pi_lock);
+       plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
        p->pi_blocked_on = NULL;
 #endif
 }
@@ -1033,7 +1017,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
        retval = -EAGAIN;
        if (atomic_read(&p->real_cred->user->processes) >=
-                       p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+                       task_rlimit(p, RLIMIT_NPROC)) {
                if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
                    p->real_cred->user != INIT_USER)
                        goto bad_fork_free;
@@ -1075,6 +1059,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
 #endif
+#if defined(SPLIT_RSS_COUNTING)
+       memset(&p->rss_stat, 0, sizeof(p->rss_stat));
+#endif
 
        p->default_timer_slack_ns = current->timer_slack_ns;
 
@@ -1127,10 +1114,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
-
-       p->bts = NULL;
-
-       p->stack_start = stack_start;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+       p->memcg_batch.do_batch = 0;
+       p->memcg_batch.memcg = NULL;
+#endif
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
@@ -1206,9 +1193,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                p->sas_ss_sp = p->sas_ss_size = 0;
 
        /*
-        * Syscall tracing should be turned off in the child regardless
-        * of CLONE_PTRACE.
+        * Syscall tracing and stepping should be turned off in the
+        * child regardless of CLONE_PTRACE.
         */
+       user_disable_single_step(p);
        clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1236,21 +1224,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
-       /*
-        * The task hasn't been attached yet, so its cpus_allowed mask will
-        * not be changed, nor will its assigned CPU.
-        *
-        * The cpus_allowed mask of the parent may have changed after it was
-        * copied first time - so re-copy it here, then check the child's CPU
-        * to ensure it is on a valid CPU (and if not, just force it back to
-        * parent's CPU). This avoids alot of nasty races.
-        */
-       p->cpus_allowed = current->cpus_allowed;
-       p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
-       if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
-                       !cpu_online(task_cpu(p))))
-               set_task_cpu(p, smp_processor_id());
-
        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
                p->real_parent = current->real_parent;
@@ -1279,14 +1252,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
 
        if (clone_flags & CLONE_THREAD) {
-               atomic_inc(&current->signal->count);
+               current->signal->nr_threads++;
                atomic_inc(&current->signal->live);
+               atomic_inc(&current->signal->sigcnt);
                p->group_leader = current->group_leader;
                list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
        }
 
        if (likely(p->pid)) {
-               list_add_tail(&p->sibling, &p->real_parent->children);
                tracehook_finish_clone(p, clone_flags, trace);
 
                if (thread_group_leader(p)) {
@@ -1294,10 +1267,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                                p->nsproxy->pid_ns->child_reaper = p;
 
                        p->signal->leader_pid = pid;
-                       tty_kref_put(p->signal->tty);
                        p->signal->tty = tty_kref_get(current->signal->tty);
                        attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
                        attach_pid(p, PIDTYPE_SID, task_session(current));
+                       list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __get_cpu_var(process_counts)++;
                }
@@ -1326,7 +1299,7 @@ bad_fork_cleanup_mm:
                mmput(p->mm);
 bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
-               __cleanup_signal(p->signal);
+               free_signal_struct(p->signal);
 bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
@@ -1361,6 +1334,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re
        return regs;
 }
 
+static inline void init_idle_pids(struct pid_link *links)
+{
+       enum pid_type type;
+
+       for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+               INIT_HLIST_NODE(&links[type].node); /* not really needed */
+               links[type].pid = &init_struct_pid;
+       }
+}
+
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
        struct task_struct *task;
@@ -1368,8 +1351,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 
        task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
                            &init_struct_pid, 0);
-       if (!IS_ERR(task))
+       if (!IS_ERR(task)) {
+               init_idle_pids(task->pids);
                init_idle(task, cpu);
+       }
 
        return task;
 }
@@ -1541,14 +1526,6 @@ static void check_unshare_flags(unsigned long *flags_ptr)
                *flags_ptr |= CLONE_SIGHAND;
 
        /*
-        * If unsharing signal handlers and the task was created
-        * using CLONE_THREAD, then must unshare the thread
-        */
-       if ((*flags_ptr & CLONE_SIGHAND) &&
-           (atomic_read(&current->signal->count) > 1))
-               *flags_ptr |= CLONE_THREAD;
-
-       /*
         * If unsharing namespace, must also unshare filesystem information.
         */
        if (*flags_ptr & CLONE_NEWNS)