Merge branch 'sched/core' into cpus4096
[safe/jmp/linux-2.6] / kernel / fork.c
index 8214ba7..7b93da7 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
+#include <linux/mmu_notifier.h>
 #include <linux/fs.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/jiffies.h>
 #include <linux/tracehook.h>
 #include <linux/futex.h>
+#include <linux/compat.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
+#include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -57,6 +60,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -77,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+DEFINE_TRACE(sched_process_fork);
+
 int nr_processes(void)
 {
        int cpu;
@@ -134,6 +140,7 @@ void free_task(struct task_struct *tsk)
        prop_local_destroy_single(&tsk->dirties);
        free_thread_info(tsk->stack);
        rt_mutex_debug_task_free(tsk);
+       ftrace_graph_exit_task(tsk);
        free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -312,17 +319,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                file = tmp->vm_file;
                if (file) {
                        struct inode *inode = file->f_path.dentry->d_inode;
+                       struct address_space *mapping = file->f_mapping;
+
                        get_file(file);
                        if (tmp->vm_flags & VM_DENYWRITE)
                                atomic_dec(&inode->i_writecount);
-
-                       /* insert tmp into the share list, just after mpnt */
-                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       spin_lock(&mapping->i_mmap_lock);
+                       if (tmp->vm_flags & VM_SHARED)
+                               mapping->i_mmap_writable++;
                        tmp->vm_truncate_count = mpnt->vm_truncate_count;
-                       flush_dcache_mmap_lock(file->f_mapping);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after mpnt */
                        vma_prio_tree_add(tmp, mpnt);
-                       flush_dcache_mmap_unlock(file->f_mapping);
-                       spin_unlock(&file->f_mapping->i_mmap_lock);
+                       flush_dcache_mmap_unlock(mapping);
+                       spin_unlock(&mapping->i_mmap_lock);
                }
 
                /*
@@ -414,6 +424,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
+               mmu_notifier_mm_init(mm);
                return mm;
        }
 
@@ -446,6 +457,7 @@ void __mmdrop(struct mm_struct *mm)
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
+       mmu_notifier_mm_destroy(mm);
        free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -515,6 +527,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
        struct completion *vfork_done = tsk->vfork_done;
 
+       /* Get rid of any futexes when releasing the mm */
+#ifdef CONFIG_FUTEX
+       if (unlikely(tsk->robust_list))
+               exit_robust_list(tsk);
+#ifdef CONFIG_COMPAT
+       if (unlikely(tsk->compat_robust_list))
+               compat_exit_robust_list(tsk);
+#endif
+#endif
+
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
 
@@ -756,15 +778,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
                kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+       /* Thread group counters. */
+       thread_group_cputime_init(sig);
+
+       /* Expiration times and increments. */
+       sig->it_virt_expires = cputime_zero;
+       sig->it_virt_incr = cputime_zero;
+       sig->it_prof_expires = cputime_zero;
+       sig->it_prof_incr = cputime_zero;
+
+       /* Cached expiration times. */
+       sig->cputime_expires.prof_exp = cputime_zero;
+       sig->cputime_expires.virt_exp = cputime_zero;
+       sig->cputime_expires.sched_exp = 0;
+
+       /* The timer lists. */
+       INIT_LIST_HEAD(&sig->cpu_timers[0]);
+       INIT_LIST_HEAD(&sig->cpu_timers[1]);
+       INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct signal_struct *sig;
        int ret;
 
        if (clone_flags & CLONE_THREAD) {
-               atomic_inc(&current->signal->count);
-               atomic_inc(&current->signal->live);
-               return 0;
+               ret = thread_group_cputime_clone_thread(current);
+               if (likely(!ret)) {
+                       atomic_inc(&current->signal->count);
+                       atomic_inc(&current->signal->live);
+               }
+               return ret;
        }
        sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
        tsk->signal = sig;
@@ -792,39 +843,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->it_real_incr.tv64 = 0;
        sig->real_timer.function = it_real_fn;
 
-       sig->it_virt_expires = cputime_zero;
-       sig->it_virt_incr = cputime_zero;
-       sig->it_prof_expires = cputime_zero;
-       sig->it_prof_incr = cputime_zero;
-
        sig->leader = 0;        /* session leadership doesn't inherit */
        sig->tty_old_pgrp = NULL;
+       sig->tty = NULL;
 
-       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+       sig->cutime = sig->cstime = cputime_zero;
        sig->gtime = cputime_zero;
        sig->cgtime = cputime_zero;
        sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
        sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
        sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
        task_io_accounting_init(&sig->ioac);
-       sig->sum_sched_runtime = 0;
-       INIT_LIST_HEAD(&sig->cpu_timers[0]);
-       INIT_LIST_HEAD(&sig->cpu_timers[1]);
-       INIT_LIST_HEAD(&sig->cpu_timers[2]);
        taskstats_tgid_init(sig);
 
        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
-       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-               /*
-                * New sole thread in the process gets an expiry time
-                * of the whole CPU time limit.
-                */
-               tsk->it_prof_expires =
-                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-       }
+       posix_cpu_timers_init_group(sig);
+
        acct_init_pacct(&sig->pacct);
 
        tty_audit_fork(sig);
@@ -834,7 +871,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+       thread_group_cputime_free(sig);
        exit_thread_group_keys(sig);
+       tty_kref_put(sig->tty);
        kmem_cache_free(signal_cachep, sig);
 }
 
@@ -883,6 +922,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif /* CONFIG_MM_OWNER */
 
 /*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+       tsk->cputime_expires.prof_exp = cputime_zero;
+       tsk->cputime_expires.virt_exp = cputime_zero;
+       tsk->cputime_expires.sched_exp = 0;
+       INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+       INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+       INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
+/*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
  *
@@ -984,6 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
 
+       p->default_timer_slack_ns = current->timer_slack_ns;
+
 #ifdef CONFIG_DETECT_SOFTLOCKUP
        p->last_switch_count = 0;
        p->last_switch_timestamp = 0;
@@ -992,12 +1046,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);
 
-       p->it_virt_expires = cputime_zero;
-       p->it_prof_expires = cputime_zero;
-       p->it_sched_expires = 0;
-       INIT_LIST_HEAD(&p->cpu_timers[0]);
-       INIT_LIST_HEAD(&p->cpu_timers[1]);
-       INIT_LIST_HEAD(&p->cpu_timers[2]);
+       posix_cpu_timers_init(p);
 
        p->lock_depth = -1;             /* -1 = no lock */
        do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1091,6 +1140,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                }
        }
 
+       ftrace_graph_init_task(p);
+
        p->pid = pid_nr(pid);
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
@@ -1099,7 +1150,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (current->nsproxy != p->nsproxy) {
                retval = ns_cgroup_clone(p, pid);
                if (retval)
-                       goto bad_fork_free_pid;
+                       goto bad_fork_free_graph;
        }
 
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1192,27 +1243,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_free_pid;
+               goto bad_fork_free_graph;
        }
 
        if (clone_flags & CLONE_THREAD) {
                p->group_leader = current->group_leader;
                list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-               if (!cputime_eq(current->signal->it_virt_expires,
-                               cputime_zero) ||
-                   !cputime_eq(current->signal->it_prof_expires,
-                               cputime_zero) ||
-                   current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-                   !list_empty(&current->signal->cpu_timers[0]) ||
-                   !list_empty(&current->signal->cpu_timers[1]) ||
-                   !list_empty(&current->signal->cpu_timers[2])) {
-                       /*
-                        * Have child wake up on its first tick to check
-                        * for process CPU timers.
-                        */
-                       p->it_prof_expires = jiffies_to_cputime(1);
-               }
        }
 
        if (likely(p->pid)) {
@@ -1224,7 +1260,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                                p->nsproxy->pid_ns->child_reaper = p;
 
                        p->signal->leader_pid = pid;
-                       p->signal->tty = current->signal->tty;
+                       tty_kref_put(p->signal->tty);
+                       p->signal->tty = tty_kref_get(current->signal->tty);
                        set_task_pgrp(p, task_pgrp_nr(current));
                        set_task_session(p, task_session_nr(current));
                        attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
@@ -1243,6 +1280,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        cgroup_post_fork(p);
        return p;
 
+bad_fork_free_graph:
+       ftrace_graph_exit_task(p);
 bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
@@ -1358,6 +1397,8 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
+               trace_sched_process_fork(current, p);
+
                nr = task_pid_vnr(p);
 
                if (clone_flags & CLONE_PARENT_SETTID)
@@ -1368,6 +1409,7 @@ long do_fork(unsigned long clone_flags,
                        init_completion(&vfork);
                }
 
+               audit_finish_fork(p);
                tracehook_report_clone(trace, regs, clone_flags, nr, p);
 
                /*