cgroups: introduce link_css_set() to remove duplicate code
[safe/jmp/linux-2.6] / kernel / cgroup.c
index dee025f..4c475ce 100644 (file)
@@ -84,7 +84,7 @@ struct cgroupfs_root {
        /* Tracks how many cgroups are currently defined in hierarchy.*/
        int number_of_cgroups;
 
-       /* A list running through the mounted hierarchies */
+       /* A list running through the active hierarchies */
        struct list_head root_list;
 
        /* Hierarchy-specific flags */
@@ -116,7 +116,6 @@ static int root_count;
  * be called.
  */
 static int need_forkexit_callback __read_mostly;
-static int need_mm_owner_callback __read_mostly;
 
 /* convenient tests for these bits */
 inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -149,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp)
 #define for_each_subsys(_root, _ss) \
 list_for_each_entry(_ss, &_root->subsys_list, sibling)
 
-/* for_each_root() allows you to iterate across the active hierarchies */
-#define for_each_root(_root) \
+/* for_each_active_root() allows you to iterate across the active hierarchies */
+#define for_each_active_root(_root) \
 list_for_each_entry(_root, &roots, root_list)
 
 /* the list of cgroups eligible for automatic release. Protected by
@@ -385,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp)
        return 0;
 }
 
+/**
+ * link_css_set - a helper function to link a css_set to a cgroup
+ * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
+ * @cg: the css_set to be linked
+ * @cgrp: the destination cgroup
+ */
+static void link_css_set(struct list_head *tmp_cg_links,
+                        struct css_set *cg, struct cgroup *cgrp)
+{
+       struct cg_cgroup_link *link;
+
+       BUG_ON(list_empty(tmp_cg_links));
+       link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
+                               cgrp_link_list);
+       link->cg = cg;
+       list_move(&link->cgrp_link_list, &cgrp->css_sets);
+       list_add(&link->cg_link_list, &cg->cg_links);
+}
+
 /*
  * find_css_set() takes an existing cgroup group and a
  * cgroup object, and returns a css_set object that's
@@ -400,7 +418,6 @@ static struct css_set *find_css_set(
        int i;
 
        struct list_head tmp_cg_links;
-       struct cg_cgroup_link *link;
 
        struct hlist_head *hhead;
 
@@ -445,26 +462,11 @@ static struct css_set *find_css_set(
                 * only do it for the first subsystem in each
                 * hierarchy
                 */
-               if (ss->root->subsys_list.next == &ss->sibling) {
-                       BUG_ON(list_empty(&tmp_cg_links));
-                       link = list_entry(tmp_cg_links.next,
-                                         struct cg_cgroup_link,
-                                         cgrp_link_list);
-                       list_del(&link->cgrp_link_list);
-                       list_add(&link->cgrp_link_list, &cgrp->css_sets);
-                       link->cg = res;
-                       list_add(&link->cg_link_list, &res->cg_links);
-               }
-       }
-       if (list_empty(&rootnode.subsys_list)) {
-               link = list_entry(tmp_cg_links.next,
-                                 struct cg_cgroup_link,
-                                 cgrp_link_list);
-               list_del(&link->cgrp_link_list);
-               list_add(&link->cgrp_link_list, &dummytop->css_sets);
-               link->cg = res;
-               list_add(&link->cg_link_list, &res->cg_links);
+               if (ss->root->subsys_list.next == &ss->sibling)
+                       link_css_set(&tmp_cg_links, res, cgrp);
        }
+       if (list_empty(&rootnode.subsys_list))
+               link_css_set(&tmp_cg_links, res, dummytop);
 
        BUG_ON(!list_empty(&tmp_cg_links));
 
@@ -573,7 +575,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
                inode->i_mode = mode;
                inode->i_uid = current_fsuid();
                inode->i_gid = current_fsgid();
-               inode->i_blocks = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
        }
@@ -588,7 +589,7 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
        struct cgroup_subsys *ss;
        for_each_subsys(cgrp->root, ss)
-               if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
+               if (ss->pre_destroy)
                        ss->pre_destroy(ss, cgrp);
        return;
 }
@@ -612,10 +613,8 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                /*
                 * Release the subsystem state objects.
                 */
-               for_each_subsys(cgrp->root, ss) {
-                       if (cgrp->subsys[ss->subsys_id])
-                               ss->destroy(ss, cgrp);
-               }
+               for_each_subsys(cgrp->root, ss)
+                       ss->destroy(ss, cgrp);
 
                cgrp->root->number_of_cgroups--;
                mutex_unlock(&cgroup_mutex);
@@ -702,7 +701,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
         * any child cgroups exist. This is theoretically supportable
         * but involves complex error handling, so it's being left until
         * later */
-       if (!list_empty(&cgrp->children))
+       if (root->number_of_cgroups > 1)
                return -EBUSY;
 
        /* Process each subsystem */
@@ -716,8 +715,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                        BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
                        cgrp->subsys[i] = dummytop->subsys[i];
                        cgrp->subsys[i]->cgroup = cgrp;
-                       list_add(&ss->sibling, &root->subsys_list);
-                       rcu_assign_pointer(ss->root, root);
+                       list_move(&ss->sibling, &root->subsys_list);
+                       ss->root = root;
                        if (ss->bind)
                                ss->bind(ss, cgrp);
 
@@ -729,8 +728,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                                ss->bind(ss, dummytop);
                        dummytop->subsys[i]->cgroup = dummytop;
                        cgrp->subsys[i] = NULL;
-                       rcu_assign_pointer(subsys[i]->root, &rootnode);
-                       list_del(&ss->sibling);
+                       subsys[i]->root = &rootnode;
+                       list_move(&ss->sibling, &rootnode.subsys_list);
                } else if (bit & final_bits) {
                        /* Subsystem state should already exist */
                        BUG_ON(!cgrp->subsys[i]);
@@ -992,7 +991,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                root = NULL;
        } else {
                /* New superblock */
-               struct cgroup *cgrp = &root->top_cgroup;
+               struct cgroup *root_cgrp = &root->top_cgroup;
                struct inode *inode;
                int i;
 
@@ -1024,7 +1023,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                if (ret == -EBUSY) {
                        mutex_unlock(&cgroup_mutex);
                        mutex_unlock(&inode->i_mutex);
-                       goto drop_new_super;
+                       goto free_cg_links;
                }
 
                /* EBUSY should be the only error here */
@@ -1033,7 +1032,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                list_add(&root->root_list, &roots);
                root_count++;
 
-               sb->s_root->d_fsdata = &root->top_cgroup;
+               sb->s_root->d_fsdata = root_cgrp;
                root->top_cgroup.dentry = sb->s_root;
 
                /* Link the top cgroup in this hierarchy into all
@@ -1044,39 +1043,29 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                        struct hlist_node *node;
                        struct css_set *cg;
 
-                       hlist_for_each_entry(cg, node, hhead, hlist) {
-                               struct cg_cgroup_link *link;
-
-                               BUG_ON(list_empty(&tmp_cg_links));
-                               link = list_entry(tmp_cg_links.next,
-                                                 struct cg_cgroup_link,
-                                                 cgrp_link_list);
-                               list_del(&link->cgrp_link_list);
-                               link->cg = cg;
-                               list_add(&link->cgrp_link_list,
-                                        &root->top_cgroup.css_sets);
-                               list_add(&link->cg_link_list, &cg->cg_links);
-                       }
+                       hlist_for_each_entry(cg, node, hhead, hlist)
+                               link_css_set(&tmp_cg_links, cg, root_cgrp);
                }
                write_unlock(&css_set_lock);
 
                free_cg_links(&tmp_cg_links);
 
-               BUG_ON(!list_empty(&cgrp->sibling));
-               BUG_ON(!list_empty(&cgrp->children));
+               BUG_ON(!list_empty(&root_cgrp->sibling));
+               BUG_ON(!list_empty(&root_cgrp->children));
                BUG_ON(root->number_of_cgroups != 1);
 
-               cgroup_populate_dir(cgrp);
+               cgroup_populate_dir(root_cgrp);
                mutex_unlock(&inode->i_mutex);
                mutex_unlock(&cgroup_mutex);
        }
 
        return simple_set_mnt(mnt, sb);
 
+ free_cg_links:
+       free_cg_links(&tmp_cg_links);
  drop_new_super:
        up_write(&sb->s_umount);
        deactivate_super(sb);
-       free_cg_links(&tmp_cg_links);
        return ret;
 }
 
@@ -1114,10 +1103,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
        }
        write_unlock(&css_set_lock);
 
-       if (!list_empty(&root->root_list)) {
-               list_del(&root->root_list);
-               root_count--;
-       }
+       list_del(&root->root_list);
+       root_count--;
+
        mutex_unlock(&cgroup_mutex);
 
        kfree(root);
@@ -1217,7 +1205,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        int retval = 0;
        struct cgroup_subsys *ss;
        struct cgroup *oldcgrp;
-       struct css_set *cg = tsk->cgroups;
+       struct css_set *cg;
        struct css_set *newcg;
        struct cgroupfs_root *root = cgrp->root;
        int subsys_id;
@@ -1237,11 +1225,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                }
        }
 
+       task_lock(tsk);
+       cg = tsk->cgroups;
+       get_css_set(cg);
+       task_unlock(tsk);
        /*
         * Locate or allocate a new css_set for this task,
         * based on its final set of cgroups
         */
        newcg = find_css_set(cg, cgrp);
+       put_css_set(cg);
        if (!newcg)
                return -ENOMEM;
 
@@ -1446,7 +1439,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
        if (cft->write)
                return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1491,7 +1484,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
 
        if (cft->read)
@@ -1555,10 +1548,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
        err = generic_file_open(inode, file);
        if (err)
                return err;
-
        cft = __d_cft(file->f_dentry);
-       if (!cft)
-               return -ENODEV;
+
        if (cft->read_map || cft->read_seq_string) {
                struct cgroup_seqfile_state *state =
                        kzalloc(sizeof(*state), GFP_USER);
@@ -1813,6 +1804,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 {
        struct task_struct *res;
        struct list_head *l = it->task;
+       struct cg_cgroup_link *link;
 
        /* If the iterator cg is NULL, we have no tasks */
        if (!it->cg_link)
@@ -1820,7 +1812,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
        res = list_entry(l, struct task_struct, cg_list);
        /* Advance iterator to find next entry */
        l = l->next;
-       if (l == &res->cgroups->tasks) {
+       link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
+       if (l == &link->cg->tasks) {
                /* We reached the end of this task list - move on to
                 * the next cg_cgroup_link */
                cgroup_advance_iter(cgrp, it);
@@ -2053,7 +2046,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
        ret = 0;
        cgrp = dentry->d_fsdata;
-       rcu_read_lock();
 
        cgroup_iter_start(cgrp, &it);
        while ((tsk = cgroup_iter_next(cgrp, &it))) {
@@ -2078,7 +2070,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
        }
        cgroup_iter_end(cgrp, &it);
 
-       rcu_read_unlock();
 err:
        return ret;
 }
@@ -2464,8 +2455,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        struct cgroup *cgrp = dentry->d_fsdata;
        struct dentry *d;
        struct cgroup *parent;
-       struct super_block *sb;
-       struct cgroupfs_root *root;
 
        /* the vfs holds both inode->i_mutex already */
 
@@ -2488,8 +2477,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 
        mutex_lock(&cgroup_mutex);
        parent = cgrp->parent;
-       root = cgrp->root;
-       sb = root->sb;
 
        if (atomic_read(&cgrp->count)
            || !list_empty(&cgrp->children)
@@ -2526,6 +2513,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
        printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
        /* Create the top cgroup state for this subsystem */
+       list_add(&ss->sibling, &rootnode.subsys_list);
        ss->root = &rootnode;
        css = ss->create(ss, dummytop);
        /* We don't handle early failures gracefully */
@@ -2539,7 +2527,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
        init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
 
        need_forkexit_callback |= ss->fork || ss->exit;
-       need_mm_owner_callback |= !!ss->mm_owner_changed;
 
        /* At system boot, before all subsystems have been
         * registered, no tasks have been forked, so we don't
@@ -2564,7 +2551,6 @@ int __init cgroup_init_early(void)
        INIT_HLIST_NODE(&init_css_set.hlist);
        css_set_count = 1;
        init_cgroup_root(&rootnode);
-       list_add(&rootnode.root_list, &roots);
        root_count = 1;
        init_task.cgroups = &init_css_set;
 
@@ -2671,15 +2657,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 
        mutex_lock(&cgroup_mutex);
 
-       for_each_root(root) {
+       for_each_active_root(root) {
                struct cgroup_subsys *ss;
                struct cgroup *cgrp;
                int subsys_id;
                int count = 0;
 
-               /* Skip this hierarchy if it has no active subsystems */
-               if (!root->actual_subsys_bits)
-                       continue;
                seq_printf(m, "%lu:", root->subsys_bits);
                for_each_subsys(root, ss)
                        seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -2789,37 +2772,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
        }
 }
 
-#ifdef CONFIG_MM_OWNER
-/**
- * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
- * @p: the new owner
- *
- * Called on every change to mm->owner. mm_init_owner() does not
- * invoke this routine, since it assigns the mm->owner the first time
- * and does not change it.
- *
- * The callbacks are invoked with mmap_sem held in read mode.
- */
-void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-       struct cgroup *oldcgrp, *newcgrp = NULL;
-
-       if (need_mm_owner_callback) {
-               int i;
-               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-                       struct cgroup_subsys *ss = subsys[i];
-                       oldcgrp = task_cgroup(old, ss->subsys_id);
-                       if (new)
-                               newcgrp = task_cgroup(new, ss->subsys_id);
-                       if (oldcgrp == newcgrp)
-                               continue;
-                       if (ss->mm_owner_changed)
-                               ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
-               }
-       }
-}
-#endif /* CONFIG_MM_OWNER */
-
 /**
  * cgroup_post_fork - called on a new task after adding it to the task list
  * @child: the task in question
@@ -2833,8 +2785,10 @@ void cgroup_post_fork(struct task_struct *child)
 {
        if (use_task_css_set_links) {
                write_lock(&css_set_lock);
+               task_lock(child);
                if (list_empty(&child->cg_list))
                        list_add(&child->cg_list, &child->cgroups->tasks);
+               task_unlock(child);
                write_unlock(&css_set_lock);
        }
 }
@@ -2937,20 +2891,23 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
  again:
        root = subsys->root;
        if (root == &rootnode) {
-               printk(KERN_INFO
-                      "Not cloning cgroup for unused subsystem %s\n",
-                      subsys->name);
                mutex_unlock(&cgroup_mutex);
                return 0;
        }
+       task_lock(tsk);
        cg = tsk->cgroups;
        parent = task_cgroup(tsk, subsys->subsys_id);
 
        /* Pin the hierarchy */
-       atomic_inc(&parent->root->sb->s_active);
+       if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
+               /* We race with the final deactivate_super() */
+               mutex_unlock(&cgroup_mutex);
+               return 0;
+       }
 
        /* Keep the cgroup alive */
        get_css_set(cg);
+       task_unlock(tsk);
        mutex_unlock(&cgroup_mutex);
 
        /* Now do the VFS work to create a cgroup */
@@ -2969,7 +2926,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
        }
 
        /* Create the cgroup directory, which also creates the cgroup */
-       ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+       ret = vfs_mkdir(inode, dentry, 0755);
        child = __d_cgrp(dentry);
        dput(dentry);
        if (ret) {
@@ -2979,13 +2936,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
                goto out_release;
        }
 
-       if (!child) {
-               printk(KERN_INFO
-                      "Couldn't find new cgroup %s\n", nodename);
-               ret = -ENOMEM;
-               goto out_release;
-       }
-
        /* The cgroup now exists. Retake cgroup_mutex and check
         * that we're still in the same state that we thought we
         * were. */