nfsd4: remove some dprintk's
[safe/jmp/linux-2.6] / kernel / fork.c
index 81fdc77..4854c2c 100644 (file)
 #include <linux/jiffies.h>
 #include <linux/tracehook.h>
 #include <linux/futex.h>
+#include <linux/compat.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
+#include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -79,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+DEFINE_TRACE(sched_process_fork);
+
 int nr_processes(void)
 {
        int cpu;
@@ -136,6 +140,7 @@ void free_task(struct task_struct *tsk)
        prop_local_destroy_single(&tsk->dirties);
        free_thread_info(tsk->stack);
        rt_mutex_debug_task_free(tsk);
+       ftrace_graph_exit_task(tsk);
        free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -146,9 +151,8 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
 
-       security_task_free(tsk);
-       free_uid(tsk->__temp_cred.user);
-       put_group_info(tsk->__temp_cred.group_info);
+       put_cred(tsk->real_cred);
+       put_cred(tsk->cred);
        delayacct_tsk_free(tsk);
 
        if (!profile_handoff_task(tsk))
@@ -314,17 +318,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                file = tmp->vm_file;
                if (file) {
                        struct inode *inode = file->f_path.dentry->d_inode;
+                       struct address_space *mapping = file->f_mapping;
+
                        get_file(file);
                        if (tmp->vm_flags & VM_DENYWRITE)
                                atomic_dec(&inode->i_writecount);
-
-                       /* insert tmp into the share list, just after mpnt */
-                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       spin_lock(&mapping->i_mmap_lock);
+                       if (tmp->vm_flags & VM_SHARED)
+                               mapping->i_mmap_writable++;
                        tmp->vm_truncate_count = mpnt->vm_truncate_count;
-                       flush_dcache_mmap_lock(file->f_mapping);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after mpnt */
                        vma_prio_tree_add(tmp, mpnt);
-                       flush_dcache_mmap_unlock(file->f_mapping);
-                       spin_unlock(&file->f_mapping->i_mmap_lock);
+                       flush_dcache_mmap_unlock(mapping);
+                       spin_unlock(&mapping->i_mmap_lock);
                }
 
                /*
@@ -393,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 #define allocate_mm()  (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)    (kmem_cache_free(mm_cachep, (mm)))
 
+static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
+
+static int __init coredump_filter_setup(char *s)
+{
+       default_dump_filter =
+               (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
+               MMF_DUMP_FILTER_MASK;
+       return 1;
+}
+
+__setup("coredump_filter=", coredump_filter_setup);
+
 #include <linux/init_task.h>
 
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -401,15 +420,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
        atomic_set(&mm->mm_count, 1);
        init_rwsem(&mm->mmap_sem);
        INIT_LIST_HEAD(&mm->mmlist);
-       mm->flags = (current->mm) ? current->mm->flags
-                                 : MMF_DUMP_FILTER_DEFAULT;
+       mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
        mm->core_state = NULL;
        mm->nr_ptes = 0;
        set_mm_counter(mm, file_rss, 0);
        set_mm_counter(mm, anon_rss, 0);
        spin_lock_init(&mm->page_table_lock);
-       rwlock_init(&mm->ioctx_list_lock);
-       mm->ioctx_list = NULL;
+       spin_lock_init(&mm->ioctx_lock);
+       INIT_HLIST_HEAD(&mm->ioctx_list);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
        mm_init_owner(mm, p);
@@ -519,6 +537,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
        struct completion *vfork_done = tsk->vfork_done;
 
+       /* Get rid of any futexes when releasing the mm */
+#ifdef CONFIG_FUTEX
+       if (unlikely(tsk->robust_list))
+               exit_robust_list(tsk);
+#ifdef CONFIG_COMPAT
+       if (unlikely(tsk->compat_robust_list))
+               compat_exit_robust_list(tsk);
+#endif
+#endif
+
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
 
@@ -741,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct sighand_struct *sig;
 
-       if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+       if (clone_flags & CLONE_SIGHAND) {
                atomic_inc(&current->sighand->count);
                return 0;
        }
@@ -789,27 +817,21 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct signal_struct *sig;
-       int ret;
 
        if (clone_flags & CLONE_THREAD) {
-               ret = thread_group_cputime_clone_thread(current);
-               if (likely(!ret)) {
-                       atomic_inc(&current->signal->count);
-                       atomic_inc(&current->signal->live);
-               }
-               return ret;
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
        }
        sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+
+       if (sig)
+               posix_cpu_timers_init_group(sig);
+
        tsk->signal = sig;
        if (!sig)
                return -ENOMEM;
 
-       ret = copy_thread_group_keys(tsk);
-       if (ret < 0) {
-               kmem_cache_free(signal_cachep, sig);
-               return ret;
-       }
-
        atomic_set(&sig->count, 1);
        atomic_set(&sig->live, 1);
        init_waitqueue_head(&sig->wait_chldexit);
@@ -829,21 +851,20 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->tty_old_pgrp = NULL;
        sig->tty = NULL;
 
-       sig->cutime = sig->cstime = cputime_zero;
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
        sig->gtime = cputime_zero;
        sig->cgtime = cputime_zero;
        sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
        sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
        sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
        task_io_accounting_init(&sig->ioac);
+       sig->sum_sched_runtime = 0;
        taskstats_tgid_init(sig);
 
        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
-       posix_cpu_timers_init_group(sig);
-
        acct_init_pacct(&sig->pacct);
 
        tty_audit_fork(sig);
@@ -854,7 +875,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 void __cleanup_signal(struct signal_struct *sig)
 {
        thread_group_cputime_free(sig);
-       exit_thread_group_keys(sig);
        tty_kref_put(sig->tty);
        kmem_cache_free(signal_cachep, sig);
 }
@@ -880,7 +900,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
        clear_freeze_flag(p);
 }
 
-asmlinkage long sys_set_tid_address(int __user *tidptr)
+SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
        current->clear_child_tid = tidptr;
 
@@ -969,24 +989,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
-       p->cred = &p->__temp_cred;
        retval = -EAGAIN;
-       if (atomic_read(&p->cred->user->processes) >=
+       if (atomic_read(&p->real_cred->user->processes) >=
                        p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
                if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-                   p->cred->user != current->nsproxy->user_ns->root_user)
+                   p->real_cred->user != INIT_USER)
                        goto bad_fork_free;
        }
 
-       atomic_inc(&p->cred->user->__count);
-       atomic_inc(&p->cred->user->processes);
-       get_group_info(p->cred->group_info);
+       retval = copy_creds(p, clone_flags);
+       if (retval < 0)
+               goto bad_fork_free;
 
        /*
         * If multiple threads are within copy_process(), then this check
         * triggers too late. This doesn't hurt, the check is only there
         * to stop root fork bombs.
         */
+       retval = -EAGAIN;
        if (nr_threads >= max_threads)
                goto bad_fork_cleanup_count;
 
@@ -1035,9 +1055,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        do_posix_clock_monotonic_gettime(&p->start_time);
        p->real_start_time = p->start_time;
        monotonic_to_bootbased(&p->real_start_time);
-#ifdef CONFIG_SECURITY
-       p->cred->security = NULL;
-#endif
        p->io_context = NULL;
        p->audit_context = NULL;
        cgroup_fork(p);
@@ -1078,14 +1095,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
+       if (unlikely(current->ptrace))
+               ptrace_fork(p, clone_flags);
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
 
-       if ((retval = security_task_alloc(p)))
-               goto bad_fork_cleanup_policy;
        if ((retval = audit_alloc(p)))
-               goto bad_fork_cleanup_security;
+               goto bad_fork_cleanup_policy;
        /* copy all the process information */
        if ((retval = copy_semundo(clone_flags, p)))
                goto bad_fork_cleanup_audit;
@@ -1099,10 +1116,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_sighand;
        if ((retval = copy_mm(clone_flags, p)))
                goto bad_fork_cleanup_signal;
-       if ((retval = copy_keys(clone_flags, p)))
-               goto bad_fork_cleanup_mm;
        if ((retval = copy_namespaces(clone_flags, p)))
-               goto bad_fork_cleanup_keys;
+               goto bad_fork_cleanup_mm;
        if ((retval = copy_io(clone_flags, p)))
                goto bad_fork_cleanup_namespaces;
        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
@@ -1111,17 +1126,19 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        if (pid != &init_struct_pid) {
                retval = -ENOMEM;
-               pid = alloc_pid(task_active_pid_ns(p));
+               pid = alloc_pid(p->nsproxy->pid_ns);
                if (!pid)
                        goto bad_fork_cleanup_io;
 
                if (clone_flags & CLONE_NEWPID) {
-                       retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+                       retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
                        if (retval < 0)
                                goto bad_fork_free_pid;
                }
        }
 
+       ftrace_graph_init_task(p);
+
        p->pid = pid_nr(pid);
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
@@ -1130,7 +1147,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (current->nsproxy != p->nsproxy) {
                retval = ns_cgroup_clone(p, pid);
                if (retval)
-                       goto bad_fork_free_pid;
+                       goto bad_fork_free_graph;
        }
 
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1162,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
        clear_all_latency_tracing(p);
 
-       /* Our parent execution domain becomes current domain
-          These must match for thread signalling to apply */
-       p->parent_exec_id = p->self_exec_id;
-
        /* ok, now we should be set up.. */
        p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
        p->pdeath_signal = 0;
@@ -1203,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                set_task_cpu(p, smp_processor_id());
 
        /* CLONE_PARENT re-uses the old parent */
-       if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+       if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
                p->real_parent = current->real_parent;
-       else
+               p->parent_exec_id = current->parent_exec_id;
+       } else {
                p->real_parent = current;
+               p->parent_exec_id = current->self_exec_id;
+       }
 
        spin_lock(&current->sighand->siglock);
 
@@ -1223,7 +1239,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_free_pid;
+               goto bad_fork_free_graph;
        }
 
        if (clone_flags & CLONE_THREAD) {
@@ -1260,6 +1276,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        cgroup_post_fork(p);
        return p;
 
+bad_fork_free_graph:
+       ftrace_graph_exit_task(p);
 bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
@@ -1267,8 +1285,6 @@ bad_fork_cleanup_io:
        put_io_context(p->io_context);
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
-bad_fork_cleanup_keys:
-       exit_keys(p);
 bad_fork_cleanup_mm:
        if (p->mm)
                mmput(p->mm);
@@ -1284,8 +1300,6 @@ bad_fork_cleanup_semundo:
        exit_sem(p);
 bad_fork_cleanup_audit:
        audit_free(p);
-bad_fork_cleanup_security:
-       security_task_free(p);
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
@@ -1298,9 +1312,9 @@ bad_fork_cleanup_cgroup:
 bad_fork_cleanup_put_domain:
        module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
-       put_group_info(p->cred->group_info);
        atomic_dec(&p->cred->user->processes);
-       free_uid(p->cred->user);
+       put_cred(p->real_cred);
+       put_cred(p->cred);
 bad_fork_free:
        free_task(p);
 fork_out:
@@ -1344,6 +1358,21 @@ long do_fork(unsigned long clone_flags,
        long nr;
 
        /*
+        * Do some preliminary argument and permissions checking before we
+        * actually start allocating stuff
+        */
+       if (clone_flags & CLONE_NEWUSER) {
+               if (clone_flags & CLONE_THREAD)
+                       return -EINVAL;
+               /* hopefully this check will go away when userns support is
+                * complete
+                */
+               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
+                               !capable(CAP_SETGID))
+                       return -EPERM;
+       }
+
+       /*
         * We hope to recycle these flags after 2.6.26
         */
        if (unlikely(clone_flags & CLONE_STOPPED)) {
@@ -1387,6 +1416,7 @@ long do_fork(unsigned long clone_flags,
                        init_completion(&vfork);
                }
 
+               audit_finish_fork(p);
                tracehook_report_clone(trace, regs, clone_flags, nr, p);
 
                /*
@@ -1450,12 +1480,10 @@ void __init proc_caches_init(void)
        fs_cachep = kmem_cache_create("fs_cache",
                        sizeof(struct fs_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-       vm_area_cachep = kmem_cache_create("vm_area_struct",
-                       sizeof(struct vm_area_struct), 0,
-                       SLAB_PANIC, NULL);
        mm_cachep = kmem_cache_create("mm_struct",
                        sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+       mmap_init();
 }
 
 /*
@@ -1574,7 +1602,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
  * constructed. Here we are modifying the current, active,
  * task_struct.
  */
-asmlinkage long sys_unshare(unsigned long unshare_flags)
+SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
        int err = 0;
        struct fs_struct *fs, *new_fs = NULL;
@@ -1590,8 +1618,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
        err = -EINVAL;
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
-                               CLONE_NEWNET))
+                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
                goto bad_unshare_out;
 
        /*