Take fs_struct handling to new file (fs/fs_struct.c)
[safe/jmp/linux-2.6] / kernel / fork.c
index 1dd8945..05c02dc 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/mount.h>
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
+#include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -60,6 +61,7 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <trace/sched.h>
+#include <linux/magic.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -80,6 +82,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+DEFINE_TRACE(sched_process_fork);
+
 int nr_processes(void)
 {
        int cpu;
@@ -137,6 +141,7 @@ void free_task(struct task_struct *tsk)
        prop_local_destroy_single(&tsk->dirties);
        free_thread_info(tsk->stack);
        rt_mutex_debug_task_free(tsk);
+       ftrace_graph_exit_task(tsk);
        free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -208,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
        struct task_struct *tsk;
        struct thread_info *ti;
+       unsigned long *stackend;
+
        int err;
 
        prepare_to_copy(orig);
@@ -233,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                goto out;
 
        setup_thread_stack(tsk, orig);
+       stackend = end_of_stack(tsk);
+       *stackend = STACK_END_MAGIC;    /* for overflow detection */
 
 #ifdef CONFIG_CC_STACKPROTECTOR
        tsk->stack_canary = get_random_int();
@@ -275,7 +284,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
        mm->free_area_cache = oldmm->mmap_base;
        mm->cached_hole_size = ~0UL;
        mm->map_count = 0;
-       cpus_clear(mm->cpu_vm_mask);
+       cpumask_clear(mm_cpumask(mm));
        mm->mm_rb = RB_ROOT;
        rb_link = &mm->mm_rb.rb_node;
        rb_parent = NULL;
@@ -314,17 +323,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                file = tmp->vm_file;
                if (file) {
                        struct inode *inode = file->f_path.dentry->d_inode;
+                       struct address_space *mapping = file->f_mapping;
+
                        get_file(file);
                        if (tmp->vm_flags & VM_DENYWRITE)
                                atomic_dec(&inode->i_writecount);
-
-                       /* insert tmp into the share list, just after mpnt */
-                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       spin_lock(&mapping->i_mmap_lock);
+                       if (tmp->vm_flags & VM_SHARED)
+                               mapping->i_mmap_writable++;
                        tmp->vm_truncate_count = mpnt->vm_truncate_count;
-                       flush_dcache_mmap_lock(file->f_mapping);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after mpnt */
                        vma_prio_tree_add(tmp, mpnt);
-                       flush_dcache_mmap_unlock(file->f_mapping);
-                       spin_unlock(&file->f_mapping->i_mmap_lock);
+                       flush_dcache_mmap_unlock(mapping);
+                       spin_unlock(&mapping->i_mmap_lock);
                }
 
                /*
@@ -393,6 +405,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 #define allocate_mm()  (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)    (kmem_cache_free(mm_cachep, (mm)))
 
+static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
+
+static int __init coredump_filter_setup(char *s)
+{
+       default_dump_filter =
+               (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
+               MMF_DUMP_FILTER_MASK;
+       return 1;
+}
+
+__setup("coredump_filter=", coredump_filter_setup);
+
 #include <linux/init_task.h>
 
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -401,15 +425,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
        atomic_set(&mm->mm_count, 1);
        init_rwsem(&mm->mmap_sem);
        INIT_LIST_HEAD(&mm->mmlist);
-       mm->flags = (current->mm) ? current->mm->flags
-                                 : MMF_DUMP_FILTER_DEFAULT;
+       mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
        mm->core_state = NULL;
        mm->nr_ptes = 0;
        set_mm_counter(mm, file_rss, 0);
        set_mm_counter(mm, anon_rss, 0);
        spin_lock_init(&mm->page_table_lock);
-       rwlock_init(&mm->ioctx_list_lock);
-       mm->ioctx_list = NULL;
+       spin_lock_init(&mm->ioctx_lock);
+       INIT_HLIST_HEAD(&mm->ioctx_list);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
        mm_init_owner(mm, p);
@@ -658,38 +681,13 @@ fail_nomem:
        return retval;
 }
 
-static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
-{
-       struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
-       /* We don't need to lock fs - think why ;-) */
-       if (fs) {
-               atomic_set(&fs->count, 1);
-               rwlock_init(&fs->lock);
-               fs->umask = old->umask;
-               read_lock(&old->lock);
-               fs->root = old->root;
-               path_get(&old->root);
-               fs->pwd = old->pwd;
-               path_get(&old->pwd);
-               read_unlock(&old->lock);
-       }
-       return fs;
-}
-
-struct fs_struct *copy_fs_struct(struct fs_struct *old)
-{
-       return __copy_fs_struct(old);
-}
-
-EXPORT_SYMBOL_GPL(copy_fs_struct);
-
 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
        if (clone_flags & CLONE_FS) {
                atomic_inc(&current->fs->count);
                return 0;
        }
-       tsk->fs = __copy_fs_struct(current->fs);
+       tsk->fs = copy_fs_struct(current->fs);
        if (!tsk->fs)
                return -ENOMEM;
        return 0;
@@ -751,7 +749,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct sighand_struct *sig;
 
-       if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+       if (clone_flags & CLONE_SIGHAND) {
                atomic_inc(&current->sighand->count);
                return 0;
        }
@@ -799,17 +797,17 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct signal_struct *sig;
-       int ret;
 
        if (clone_flags & CLONE_THREAD) {
-               ret = thread_group_cputime_clone_thread(current);
-               if (likely(!ret)) {
-                       atomic_inc(&current->signal->count);
-                       atomic_inc(&current->signal->live);
-               }
-               return ret;
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
        }
        sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+
+       if (sig)
+               posix_cpu_timers_init_group(sig);
+
        tsk->signal = sig;
        if (!sig)
                return -ENOMEM;
@@ -833,21 +831,20 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->tty_old_pgrp = NULL;
        sig->tty = NULL;
 
-       sig->cutime = sig->cstime = cputime_zero;
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
        sig->gtime = cputime_zero;
        sig->cgtime = cputime_zero;
        sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
        sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
        sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
        task_io_accounting_init(&sig->ioac);
+       sig->sum_sched_runtime = 0;
        taskstats_tgid_init(sig);
 
        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
-       posix_cpu_timers_init_group(sig);
-
        acct_init_pacct(&sig->pacct);
 
        tty_audit_fork(sig);
@@ -883,7 +880,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
        clear_freeze_flag(p);
 }
 
-asmlinkage long sys_set_tid_address(int __user *tidptr)
+SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
        current->clear_child_tid = tidptr;
 
@@ -989,6 +986,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * triggers too late. This doesn't hurt, the check is only there
         * to stop root fork bombs.
         */
+       retval = -EAGAIN;
        if (nr_threads >= max_threads)
                goto bad_fork_cleanup_count;
 
@@ -1077,6 +1075,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
+       if (unlikely(current->ptrace))
+               ptrace_fork(p, clone_flags);
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
@@ -1106,17 +1106,19 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        if (pid != &init_struct_pid) {
                retval = -ENOMEM;
-               pid = alloc_pid(task_active_pid_ns(p));
+               pid = alloc_pid(p->nsproxy->pid_ns);
                if (!pid)
                        goto bad_fork_cleanup_io;
 
                if (clone_flags & CLONE_NEWPID) {
-                       retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+                       retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
                        if (retval < 0)
                                goto bad_fork_free_pid;
                }
        }
 
+       ftrace_graph_init_task(p);
+
        p->pid = pid_nr(pid);
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
@@ -1125,7 +1127,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (current->nsproxy != p->nsproxy) {
                retval = ns_cgroup_clone(p, pid);
                if (retval)
-                       goto bad_fork_free_pid;
+                       goto bad_fork_free_graph;
        }
 
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1157,10 +1159,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
        clear_all_latency_tracing(p);
 
-       /* Our parent execution domain becomes current domain
-          These must match for thread signalling to apply */
-       p->parent_exec_id = p->self_exec_id;
-
        /* ok, now we should be set up.. */
        p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
        p->pdeath_signal = 0;
@@ -1198,10 +1196,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                set_task_cpu(p, smp_processor_id());
 
        /* CLONE_PARENT re-uses the old parent */
-       if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+       if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
                p->real_parent = current->real_parent;
-       else
+               p->parent_exec_id = current->parent_exec_id;
+       } else {
                p->real_parent = current;
+               p->parent_exec_id = current->self_exec_id;
+       }
 
        spin_lock(&current->sighand->siglock);
 
@@ -1218,7 +1219,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_free_pid;
+               goto bad_fork_free_graph;
        }
 
        if (clone_flags & CLONE_THREAD) {
@@ -1255,6 +1256,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        cgroup_post_fork(p);
        return p;
 
+bad_fork_free_graph:
+       ftrace_graph_exit_task(p);
 bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
@@ -1344,7 +1347,8 @@ long do_fork(unsigned long clone_flags,
                /* hopefully this check will go away when userns support is
                 * complete
                 */
-               if (!capable(CAP_SYS_ADMIN))
+               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
+                               !capable(CAP_SETGID))
                        return -EPERM;
        }
 
@@ -1392,6 +1396,7 @@ long do_fork(unsigned long clone_flags,
                        init_completion(&vfork);
                }
 
+               audit_finish_fork(p);
                tracehook_report_clone(trace, regs, clone_flags, nr, p);
 
                /*
@@ -1455,12 +1460,10 @@ void __init proc_caches_init(void)
        fs_cachep = kmem_cache_create("fs_cache",
                        sizeof(struct fs_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-       vm_area_cachep = kmem_cache_create("vm_area_struct",
-                       sizeof(struct vm_area_struct), 0,
-                       SLAB_PANIC, NULL);
        mm_cachep = kmem_cache_create("mm_struct",
                        sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+       mmap_init();
 }
 
 /*
@@ -1517,7 +1520,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 
        if ((unshare_flags & CLONE_FS) &&
            (fs && atomic_read(&fs->count) > 1)) {
-               *new_fsp = __copy_fs_struct(current->fs);
+               *new_fsp = copy_fs_struct(current->fs);
                if (!*new_fsp)
                        return -ENOMEM;
        }
@@ -1579,7 +1582,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
  * constructed. Here we are modifying the current, active,
  * task_struct.
  */
-asmlinkage long sys_unshare(unsigned long unshare_flags)
+SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
        int err = 0;
        struct fs_struct *fs, *new_fs = NULL;