cgroups: add lock for child->cgroups in cgroup_post_fork()
[safe/jmp/linux-2.6] / kernel / cgroup.c
index e210526..d7ab4ff 100644 (file)
@@ -116,7 +116,6 @@ static int root_count;
  * be called.
  */
 static int need_forkexit_callback __read_mostly;
-static int need_mm_owner_callback __read_mostly;
 
 /* convenient tests for these bits */
 inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -573,7 +572,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
                inode->i_mode = mode;
                inode->i_uid = current_fsuid();
                inode->i_gid = current_fsgid();
-               inode->i_blocks = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
        }
@@ -588,7 +586,7 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
        struct cgroup_subsys *ss;
        for_each_subsys(cgrp->root, ss)
-               if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
+               if (ss->pre_destroy)
                        ss->pre_destroy(ss, cgrp);
        return;
 }
@@ -612,10 +610,8 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                /*
                 * Release the subsystem state objects.
                 */
-               for_each_subsys(cgrp->root, ss) {
-                       if (cgrp->subsys[ss->subsys_id])
-                               ss->destroy(ss, cgrp);
-               }
+               for_each_subsys(cgrp->root, ss)
+                       ss->destroy(ss, cgrp);
 
                cgrp->root->number_of_cgroups--;
                mutex_unlock(&cgroup_mutex);
@@ -702,7 +698,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
         * any child cgroups exist. This is theoretically supportable
         * but involves complex error handling, so it's being left until
         * later */
-       if (!list_empty(&cgrp->children))
+       if (root->number_of_cgroups > 1)
                return -EBUSY;
 
        /* Process each subsystem */
@@ -1024,7 +1020,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                if (ret == -EBUSY) {
                        mutex_unlock(&cgroup_mutex);
                        mutex_unlock(&inode->i_mutex);
-                       goto drop_new_super;
+                       goto free_cg_links;
                }
 
                /* EBUSY should be the only error here */
@@ -1073,10 +1069,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 
        return simple_set_mnt(mnt, sb);
 
+ free_cg_links:
+       free_cg_links(&tmp_cg_links);
  drop_new_super:
        up_write(&sb->s_umount);
        deactivate_super(sb);
-       free_cg_links(&tmp_cg_links);
        return ret;
 }
 
@@ -1279,7 +1276,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 {
        struct task_struct *tsk;
-       uid_t euid;
+       const struct cred *cred = current_cred(), *tcred;
        int ret;
 
        if (pid) {
@@ -1289,16 +1286,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
                        rcu_read_unlock();
                        return -ESRCH;
                }
-               get_task_struct(tsk);
-               rcu_read_unlock();
 
-               euid = current_euid();
-               if (euid &&
-                   euid != tsk->cred->uid &&
-                   euid != tsk->cred->suid) {
-                       put_task_struct(tsk);
+               tcred = __task_cred(tsk);
+               if (cred->euid &&
+                   cred->euid != tcred->uid &&
+                   cred->euid != tcred->suid) {
+                       rcu_read_unlock();
                        return -EACCES;
                }
+               get_task_struct(tsk);
+               rcu_read_unlock();
        } else {
                tsk = current;
                get_task_struct(tsk);
@@ -1446,7 +1443,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
        if (cft->write)
                return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1491,7 +1488,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
 
        if (cft->read)
@@ -1555,10 +1552,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
        err = generic_file_open(inode, file);
        if (err)
                return err;
-
        cft = __d_cft(file->f_dentry);
-       if (!cft)
-               return -ENODEV;
+
        if (cft->read_map || cft->read_seq_string) {
                struct cgroup_seqfile_state *state =
                        kzalloc(sizeof(*state), GFP_USER);
@@ -2042,10 +2037,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
        struct cgroup *cgrp;
        struct cgroup_iter it;
        struct task_struct *tsk;
+
        /*
-        * Validate dentry by checking the superblock operations
+        * Validate dentry by checking the superblock operations,
+        * and make sure it's a directory.
         */
-       if (dentry->d_sb->s_op != &cgroup_ops)
+       if (dentry->d_sb->s_op != &cgroup_ops ||
+           !S_ISDIR(dentry->d_inode->i_mode))
                 goto err;
 
        ret = 0;
@@ -2461,8 +2459,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        struct cgroup *cgrp = dentry->d_fsdata;
        struct dentry *d;
        struct cgroup *parent;
-       struct super_block *sb;
-       struct cgroupfs_root *root;
 
        /* the vfs holds both inode->i_mutex already */
 
@@ -2475,10 +2471,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
-
-       parent = cgrp->parent;
-       root = cgrp->root;
-       sb = root->sb;
+       mutex_unlock(&cgroup_mutex);
 
        /*
         * Call pre_destroy handlers of subsys. Notify subsystems
@@ -2486,7 +2479,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
         */
        cgroup_call_pre_destroy(cgrp);
 
-       if (cgroup_has_css_refs(cgrp)) {
+       mutex_lock(&cgroup_mutex);
+       parent = cgrp->parent;
+
+       if (atomic_read(&cgrp->count)
+           || !list_empty(&cgrp->children)
+           || cgroup_has_css_refs(cgrp)) {
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
@@ -2500,7 +2498,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        list_del(&cgrp->sibling);
        spin_lock(&cgrp->dentry->d_lock);
        d = dget(cgrp->dentry);
-       cgrp->dentry = NULL;
        spin_unlock(&d->d_lock);
 
        cgroup_d_remove_dir(d);
@@ -2533,7 +2530,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
        init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
 
        need_forkexit_callback |= ss->fork || ss->exit;
-       need_mm_owner_callback |= !!ss->mm_owner_changed;
 
        /* At system boot, before all subsystems have been
         * registered, no tasks have been forked, so we don't
@@ -2783,37 +2779,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
        }
 }
 
-#ifdef CONFIG_MM_OWNER
-/**
- * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
- * @p: the new owner
- *
- * Called on every change to mm->owner. mm_init_owner() does not
- * invoke this routine, since it assigns the mm->owner the first time
- * and does not change it.
- *
- * The callbacks are invoked with mmap_sem held in read mode.
- */
-void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-       struct cgroup *oldcgrp, *newcgrp = NULL;
-
-       if (need_mm_owner_callback) {
-               int i;
-               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-                       struct cgroup_subsys *ss = subsys[i];
-                       oldcgrp = task_cgroup(old, ss->subsys_id);
-                       if (new)
-                               newcgrp = task_cgroup(new, ss->subsys_id);
-                       if (oldcgrp == newcgrp)
-                               continue;
-                       if (ss->mm_owner_changed)
-                               ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
-               }
-       }
-}
-#endif /* CONFIG_MM_OWNER */
-
 /**
  * cgroup_post_fork - called on a new task after adding it to the task list
  * @child: the task in question
@@ -2827,8 +2792,10 @@ void cgroup_post_fork(struct task_struct *child)
 {
        if (use_task_css_set_links) {
                write_lock(&css_set_lock);
+               task_lock(child);
                if (list_empty(&child->cg_list))
                        list_add(&child->cg_list, &child->cgroups->tasks);
+               task_unlock(child);
                write_unlock(&css_set_lock);
        }
 }
@@ -2931,9 +2898,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
  again:
        root = subsys->root;
        if (root == &rootnode) {
-               printk(KERN_INFO
-                      "Not cloning cgroup for unused subsystem %s\n",
-                      subsys->name);
                mutex_unlock(&cgroup_mutex);
                return 0;
        }
@@ -2941,7 +2905,11 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
        parent = task_cgroup(tsk, subsys->subsys_id);
 
        /* Pin the hierarchy */
-       atomic_inc(&parent->root->sb->s_active);
+       if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
+               /* We race with the final deactivate_super() */
+               mutex_unlock(&cgroup_mutex);
+               return 0;
+       }
 
        /* Keep the cgroup alive */
        get_css_set(cg);
@@ -2963,7 +2931,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
        }
 
        /* Create the cgroup directory, which also creates the cgroup */
-       ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+       ret = vfs_mkdir(inode, dentry, 0755);
        child = __d_cgrp(dentry);
        dput(dentry);
        if (ret) {
@@ -2973,13 +2941,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
                goto out_release;
        }
 
-       if (!child) {
-               printk(KERN_INFO
-                      "Couldn't find new cgroup %s\n", nodename);
-               ret = -ENOMEM;
-               goto out_release;
-       }
-
        /* The cgroup now exists. Retake cgroup_mutex and check
         * that we're still in the same state that we thought we
         * were. */