CGroup API files: add res_counter_read_u64()
[safe/jmp/linux-2.6] / kernel / fork.c
index 05e0b6f..6067e42 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/memcontrol.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -131,6 +132,14 @@ void __put_task_struct(struct task_struct *tsk)
                free_task(tsk);
 }
 
+/*
+ * macro override instead of weak attribute alias, to workaround
+ * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
+ */
+#ifndef arch_task_cache_init
+#define arch_task_cache_init()
+#endif
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -143,6 +152,9 @@ void __init fork_init(unsigned long mempages)
                        ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
 #endif
 
+       /* do the arch specific task caches init */
+       arch_task_cache_init();
+
        /*
         * The default maximum number of threads is set to a safe
         * value: the thread structures can take up at most half
@@ -162,6 +174,13 @@ void __init fork_init(unsigned long mempages)
                init_task.signal->rlim[RLIMIT_NPROC];
 }
 
+int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+                                              struct task_struct *src)
+{
+       *dst = *src;
+       return 0;
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
        struct task_struct *tsk;
@@ -180,15 +199,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                return NULL;
        }
 
-       *tsk = *orig;
+       err = arch_dup_task_struct(tsk, orig);
+       if (err)
+               goto out;
+
        tsk->stack = ti;
 
        err = prop_local_init_single(&tsk->dirties);
-       if (err) {
-               free_thread_info(ti);
-               free_task_struct(tsk);
-               return NULL;
-       }
+       if (err)
+               goto out;
 
        setup_thread_stack(tsk, orig);
 
@@ -204,6 +223,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 #endif
        tsk->splice_pipe = NULL;
        return tsk;
+
+out:
+       free_thread_info(ti);
+       free_task_struct(tsk);
+       return NULL;
 }
 
 #ifdef CONFIG_MMU
@@ -255,7 +279,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                if (!tmp)
                        goto fail_nomem;
                *tmp = *mpnt;
-               pol = mpol_copy(vma_policy(mpnt));
+               pol = mpol_dup(vma_policy(mpnt));
                retval = PTR_ERR(pol);
                if (IS_ERR(pol))
                        goto fail_nomem_policy;
@@ -325,7 +349,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm)
 
 static inline void mm_free_pgd(struct mm_struct * mm)
 {
-       pgd_free(mm->pgd);
+       pgd_free(mm, mm->pgd);
 }
 #else
 #define dup_mmap(mm, oldmm)    (0)
@@ -340,7 +364,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
@@ -357,11 +381,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
        mm->ioctx_list = NULL;
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
+       mm_init_cgroup(mm, p);
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
                return mm;
        }
+
+       mm_free_cgroup(mm);
        free_mm(mm);
        return NULL;
 }
@@ -376,7 +403,7 @@ struct mm_struct * mm_alloc(void)
        mm = allocate_mm();
        if (mm) {
                memset(mm, 0, sizeof(*mm));
-               mm = mm_init(mm);
+               mm = mm_init(mm, current);
        }
        return mm;
 }
@@ -386,7 +413,7 @@ struct mm_struct * mm_alloc(void)
  * is dropped: either by a lazy thread or by
  * mmput. Free the page directory and the mm.
  */
-void fastcall __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
@@ -411,6 +438,7 @@ void mmput(struct mm_struct *mm)
                        spin_unlock(&mmlist_lock);
                }
                put_swap_token(mm);
+               mm_free_cgroup(mm);
                mmdrop(mm);
        }
 }
@@ -493,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
  */
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
 {
        struct mm_struct *mm, *oldmm = current->mm;
        int err;
@@ -511,7 +539,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
        mm->token_priority = 0;
        mm->last_interval = 0;
 
-       if (!mm_init(mm))
+       if (!mm_init(mm, tsk))
                goto fail_nomem;
 
        if (init_new_context(tsk, mm))
@@ -595,16 +623,16 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
                rwlock_init(&fs->lock);
                fs->umask = old->umask;
                read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
+               fs->root = old->root;
+               path_get(&old->root);
+               fs->pwd = old->pwd;
+               path_get(&old->pwd);
+               if (old->altroot.dentry) {
+                       fs->altroot = old->altroot;
+                       path_get(&old->altroot);
                } else {
-                       fs->altrootmnt = NULL;
-                       fs->altroot = NULL;
+                       fs->altroot.mnt = NULL;
+                       fs->altroot.dentry = NULL;
                }
                read_unlock(&old->lock);
        }
@@ -777,12 +805,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
                goto out;
        }
 
-       /*
-        * Note: we may be using current for both targets (See exec.c)
-        * This works because we cache current->files (old) as oldf. Don't
-        * break this.
-        */
-       tsk->files = NULL;
        newf = dup_fd(oldf, &error);
        if (!newf)
                goto out;
@@ -818,34 +840,6 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
 
-/*
- *     Helper to unshare the files of the current task.
- *     We don't want to expose copy_files internals to
- *     the exec layer of the kernel.
- */
-
-int unshare_files(void)
-{
-       struct files_struct *files  = current->files;
-       int rc;
-
-       BUG_ON(!files);
-
-       /* This can race but the race causes us to copy when we don't
-          need to and drop the copy */
-       if(atomic_read(&files->count) == 1)
-       {
-               atomic_inc(&files->count);
-               return 0;
-       }
-       rc = copy_files(0, current);
-       if(rc)
-               current->files = files;
-       return rc;
-}
-
-EXPORT_SYMBOL(unshare_files);
-
 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct sighand_struct *sig;
@@ -904,7 +898,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        sig->it_real_incr.tv64 = 0;
        sig->real_timer.function = it_real_fn;
-       sig->tsk = tsk;
 
        sig->it_virt_expires = cputime_zero;
        sig->it_virt_incr = cputime_zero;
@@ -1118,11 +1111,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_SECURITY
        p->security = NULL;
 #endif
+       p->cap_bset = current->cap_bset;
        p->io_context = NULL;
        p->audit_context = NULL;
        cgroup_fork(p);
 #ifdef CONFIG_NUMA
-       p->mempolicy = mpol_copy(p->mempolicy);
+       p->mempolicy = mpol_dup(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
@@ -1332,6 +1326,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        if (clone_flags & CLONE_NEWPID)
                                p->nsproxy->pid_ns->child_reaper = p;
 
+                       p->signal->leader_pid = pid;
                        p->signal->tty = current->signal->tty;
                        set_task_pgrp(p, task_pgrp_nr(current));
                        set_task_session(p, task_session_nr(current));
@@ -1379,7 +1374,7 @@ bad_fork_cleanup_security:
        security_task_free(p);
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
-       mpol_free(p->mempolicy);
+       mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
        cgroup_exit(p, cgroup_callbacks_done);
@@ -1398,7 +1393,7 @@ fork_out:
        return ERR_PTR(retval);
 }
 
-noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 {
        memset(regs, 0, sizeof(struct pt_regs));
        return regs;
@@ -1450,6 +1445,23 @@ long do_fork(unsigned long clone_flags,
        int trace = 0;
        long nr;
 
+       /*
+        * We hope to recycle these flags after 2.6.26
+        */
+       if (unlikely(clone_flags & CLONE_STOPPED)) {
+               static int __read_mostly count = 100;
+
+               if (count > 0 && printk_ratelimit()) {
+                       char comm[TASK_COMM_LEN];
+
+                       count--;
+                       printk(KERN_INFO "fork(): process `%s' used deprecated "
+                                       "clone flags 0x%lx\n",
+                               get_task_comm(comm, current),
+                               clone_flags & CLONE_STOPPED);
+               }
+       }
+
        if (unlikely(current->ptrace)) {
                trace = fork_traceflag (clone_flags);
                if (trace)
@@ -1465,13 +1477,7 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
-               /*
-                * this is enough to call pid_nr_ns here, but this if
-                * improves optimisation of regular fork()
-                */
-               nr = (clone_flags & CLONE_NEWPID) ?
-                       task_pid_nr_ns(p, current->nsproxy->pid_ns) :
-                               task_pid_vnr(p);
+               nr = task_pid_vnr(p);
 
                if (clone_flags & CLONE_PARENT_SETTID)
                        put_user(nr, parent_tidptr);
@@ -1492,7 +1498,7 @@ long do_fork(unsigned long clone_flags,
                if (!(clone_flags & CLONE_STOPPED))
                        wake_up_new_task(p, clone_flags);
                else
-                       p->state = TASK_STOPPED;
+                       __set_task_state(p, TASK_STOPPED);
 
                if (unlikely (trace)) {
                        current->ptrace_message = nr;
@@ -1771,3 +1777,27 @@ bad_unshare_cleanup_thread:
 bad_unshare_out:
        return err;
 }
+
+/*
+ *     Helper to unshare the files of the current task.
+ *     We don't want to expose copy_files internals to
+ *     the exec layer of the kernel.
+ */
+
+int unshare_files(struct files_struct **displaced)
+{
+       struct task_struct *task = current;
+       struct files_struct *copy = NULL;
+       int error;
+
+       error = unshare_fd(CLONE_FILES, &copy);
+       if (error || !copy) {
+               *displaced = NULL;
+               return error;
+       }
+       *displaced = task->files;
+       task_lock(task);
+       task->files = copy;
+       task_unlock(task);
+       return 0;
+}