X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fcgroup.c;h=e7da66efc9fc578fbd9c95582b28f71f910ba932;hb=3df91fe30a1547af7e794c6e8cca76f4932c6ad7;hp=7d207414f2c71cfca74e57c07ede5d4b6f609e1c;hpb=622d42cac9ed42098aa50c53994f625abfa3d473;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7d20741..e7da66e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -113,9 +113,9 @@ static int root_count; #define dummytop (&rootnode.top_cgroup) /* This flag indicates whether tasks in the fork and exit paths should - * take callback_mutex and check for fork/exit handlers to call. This - * avoids us having to do extra work in the fork/exit path if none of the - * subsystems need to be called. + * check for fork/exit handlers to call. This avoids us having to do + * extra work in the fork/exit path if none of the subsystems need to + * be called. */ static int need_forkexit_callback; @@ -141,7 +141,7 @@ enum { ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ }; -inline int cgroup_is_releasable(const struct cgroup *cgrp) +static int cgroup_is_releasable(const struct cgroup *cgrp) { const int bits = (1 << CGRP_RELEASABLE) | @@ -149,7 +149,7 @@ inline int cgroup_is_releasable(const struct cgroup *cgrp) return (cgrp->flags & bits) == bits; } -inline int notify_on_release(const struct cgroup *cgrp) +static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } @@ -307,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg) * template: location in which to build the desired set of subsystem * state objects for the new cgroup group */ - static struct css_set *find_existing_css_set( struct css_set *oldcg, struct cgroup *cgrp, @@ -320,7 +319,7 @@ static struct css_set *find_existing_css_set( /* Built the set of subsystem state objects that we want to * see in the new css_set */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (root->subsys_bits & (1ull << i)) { + if (root->subsys_bits & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -354,7 +353,6 @@ static struct css_set *find_existing_css_set( * and chains them on tmp through their cgrp_link_list fields. Returns 0 on * success or a negative error */ - static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; @@ -396,7 +394,6 @@ static void free_cg_links(struct list_head *tmp) * substituted into the appropriate hierarchy. Must be called with * cgroup_mutex held */ - static struct css_set *find_css_set( struct css_set *oldcg, struct cgroup *cgrp) { @@ -473,7 +470,6 @@ static struct css_set *find_css_set( /* Link this cgroup group into the list */ list_add(&res->list, &init_css_set.list); css_set_count++; - INIT_LIST_HEAD(&res->tasks); write_unlock(&css_set_lock); return res; @@ -489,7 +485,7 @@ static struct css_set *find_css_set( * Any task can increment and decrement the count field without lock. * So in general, code holding cgroup_mutex can't rely on the count * field not changing. However, if the count goes to zero, then only - * attach_task() can increment it again. Because a count of zero + * cgroup_attach_task() can increment it again. Because a count of zero * means that no tasks are currently attached, therefore there is no * way a task attached to that cgroup can fork (the other way to * increment the count). So code holding cgroup_mutex can safely @@ -507,8 +503,8 @@ static struct css_set *find_css_set( * critical pieces of code here. The exception occurs on cgroup_exit(), * when a task in a notify_on_release cgroup exits. Then cgroup_mutex * is taken, and if the cgroup count is zero, a usermode call made - * to /sbin/cgroup_release_agent with the name of the cgroup (path - * relative to the root of cgroup file system) as the argument. + * to the release agent with the name of the cgroup (path relative to + * the root of cgroup file system) as the argument. * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all @@ -520,24 +516,23 @@ static struct css_set *find_css_set( * The task_lock() exception * * The need for this exception arises from the action of - * attach_task(), which overwrites one tasks cgroup pointer with - * another. It does so using cgroup_mutexe, however there are + * cgroup_attach_task(), which overwrites one tasks cgroup pointer with + * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference * task->cgroup without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as - * in attach_task(), modifying a task'ss cgroup pointer we use + * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * * P.S. One more locking exception. RCU is used to guard the - * update of a tasks cgroup pointer by attach_task() + * update of a tasks cgroup pointer by cgroup_attach_task() */ /** * cgroup_lock - lock out any changes to cgroup structures * */ - void cgroup_lock(void) { mutex_lock(&cgroup_mutex); @@ -548,7 +543,6 @@ void cgroup_lock(void) * * Undo the lock taken in a previous cgroup_lock() call. */ - void cgroup_unlock(void) { mutex_unlock(&cgroup_mutex); @@ -586,11 +580,25 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) return inode; } +/* + * Call subsys's pre_destroy handler. + * This is called before css refcnt check. + */ +static void cgroup_call_pre_destroy(struct cgroup *cgrp) +{ + struct cgroup_subsys *ss; + for_each_subsys(cgrp->root, ss) + if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) + ss->pre_destroy(ss, cgrp); + return; +} + static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ if (S_ISDIR(inode->i_mode)) { struct cgroup *cgrp = dentry->d_fsdata; + struct cgroup_subsys *ss; BUG_ON(!(cgroup_is_removed(cgrp))); /* It's possible for external users to be holding css * reference counts on a cgroup; css_put() needs to @@ -599,6 +607,23 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * queue the cgroup to be handled by the release * agent */ synchronize_rcu(); + + mutex_lock(&cgroup_mutex); + /* + * Release the subsystem state objects. + */ + for_each_subsys(cgrp->root, ss) { + if (cgrp->subsys[ss->subsys_id]) + ss->destroy(ss, cgrp); + } + + cgrp->root->number_of_cgroups--; + mutex_unlock(&cgroup_mutex); + + /* Drop the active superblock reference that we took when we + * created the cgroup */ + deactivate_super(cgrp->root->sb); + kfree(cgrp); } iput(inode); @@ -663,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, added_bits = final_bits & ~root->actual_subsys_bits; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - unsigned long long bit = 1ull << i; + unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; if (!(bit & added_bits)) continue; @@ -757,7 +782,14 @@ static int parse_cgroupfs_options(char *data, if (!*token) return -EINVAL; if (!strcmp(token, "all")) { - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; + /* Add all non-disabled subsystems */ + int i; + opts->subsys_bits = 0; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!ss->disabled) + opts->subsys_bits |= 1ul << i; + } } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { @@ -775,7 +807,8 @@ static int parse_cgroupfs_options(char *data, for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { ss = subsys[i]; if (!strcmp(token, ss->name)) { - set_bit(i, &opts->subsys_bits); + if (!ss->disabled) + set_bit(i, &opts->subsys_bits); break; } } @@ -894,7 +927,6 @@ static int cgroup_get_rootdir(struct super_block *sb) if (!inode) return -ENOMEM; - inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_op = &cgroup_dir_inode_operations; /* directories start off with i_nlink == 2 (for "." entry) */ @@ -928,8 +960,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, } root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) + if (!root) { + if (opts.release_agent) + kfree(opts.release_agent); return -ENOMEM; + } init_cgroup_root(root); root->subsys_bits = opts.subsys_bits; @@ -1096,8 +1131,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry) return dentry->d_fsdata; } -/* - * Called with cgroup_mutex held. Writes path of cgroup into buf. +/** + * cgroup_path - generate the path of a cgroup + * @cgrp: the cgroup in question + * @buf: the buffer to write the path into + * @buflen: the length of the buffer + * + * Called with cgroup_mutex held. Writes path of cgroup into buf. * Returns 0 on success, -errno on error. */ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) @@ -1155,13 +1195,15 @@ static void get_first_subsys(const struct cgroup *cgrp, *subsys_id = test_ss->subsys_id; } -/* - * Attach task 'tsk' to cgroup 'cgrp' +/** + * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' + * @cgrp: the cgroup the task is attaching to + * @tsk: the task to be attached * - * Call holding cgroup_mutex. May take task_lock of - * the task 'pid' during call. + * Call holding cgroup_mutex. May take task_lock of + * the task 'tsk' during call. */ -static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { int retval = 0; struct cgroup_subsys *ss; @@ -1236,7 +1278,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) if (pid) { rcu_read_lock(); - tsk = find_task_by_pid(pid); + tsk = find_task_by_vpid(pid); if (!tsk || tsk->flags & PF_EXITING) { rcu_read_unlock(); return -ESRCH; @@ -1254,13 +1296,12 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) get_task_struct(tsk); } - ret = attach_task(cgrp, tsk); + ret = cgroup_attach_task(cgrp, tsk); put_task_struct(tsk); return ret; } /* The various types of files and directories in a cgroup file system */ - enum cgroup_filetype { FILE_ROOT, FILE_DIR, @@ -1330,6 +1371,10 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp, mutex_lock(&cgroup_mutex); + /* + * This was already checked for in cgroup_file_write(), but + * check again now we're holding cgroup_mutex. + */ if (cgroup_is_removed(cgrp)) { retval = -ENODEV; goto out2; @@ -1370,7 +1415,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (!cft) + if (!cft || cgroup_is_removed(cgrp)) return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); @@ -1440,7 +1485,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (!cft) + if (!cft || cgroup_is_removed(cgrp)) return -ENODEV; if (cft->read) @@ -1547,12 +1592,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode, } /* - * cgroup_create_dir - create a directory for an object. - * cgrp: the cgroup we create the directory for. - * It must have a valid ->parent field - * And we are going to fill its ->dentry field. - * dentry: dentry of the new cgroup - * mode: mode to set on new directory. + * cgroup_create_dir - create a directory for an object. + * @cgrp: the cgroup we create the directory for. It must have a valid + * ->parent field. And we are going to fill its ->dentry field. + * @dentry: dentry of the new cgroup + * @mode: mode to set on new directory. */ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, int mode) @@ -1614,8 +1658,12 @@ int cgroup_add_files(struct cgroup *cgrp, return 0; } -/* Count the number of tasks in a cgroup. */ - +/** + * cgroup_task_count - count the number of tasks in a cgroup. + * @cgrp: the cgroup in question + * + * Return the number of tasks in the cgroup. + */ int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; @@ -1658,6 +1706,34 @@ static void cgroup_advance_iter(struct cgroup *cgrp, it->task = cg->tasks.next; } +/* + * To reduce the fork() overhead for systems that are not actually + * using their cgroups capability, we don't maintain the lists running + * through each css_set to its tasks until we see the list actually + * used - in other words after the first call to cgroup_iter_start(). + * + * The tasklist_lock is not held here, as do_each_thread() and + * while_each_thread() are protected by RCU. + */ +static void cgroup_enable_task_cg_lists(void) +{ + struct task_struct *p, *g; + write_lock(&css_set_lock); + use_task_css_set_links = 1; + do_each_thread(g, p) { + task_lock(p); + /* + * We should check if the process is exiting, otherwise + * it will race with cgroup_exit() in that the list + * entry won't be deleted though the process has exited. + */ + if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) + list_add(&p->cg_list, &p->cgroups->tasks); + task_unlock(p); + } while_each_thread(g, p); + write_unlock(&css_set_lock); +} + void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) { /* @@ -1665,18 +1741,9 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) * we need to enable the list linking each css_set to its * tasks, and fix up all existing tasks. */ - if (!use_task_css_set_links) { - struct task_struct *p, *g; - write_lock(&css_set_lock); - use_task_css_set_links = 1; - do_each_thread(g, p) { - task_lock(p); - if (list_empty(&p->cg_list)) - list_add(&p->cg_list, &p->cgroups->tasks); - task_unlock(p); - } while_each_thread(g, p); - write_unlock(&css_set_lock); - } + if (!use_task_css_set_links) + cgroup_enable_task_cg_lists(); + read_lock(&css_set_lock); it->cg_link = &cgrp->css_sets; cgroup_advance_iter(cgrp, it); @@ -1709,6 +1776,166 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) read_unlock(&css_set_lock); } +static inline int started_after_time(struct task_struct *t1, + struct timespec *time, + struct task_struct *t2) +{ + int start_diff = timespec_compare(&t1->start_time, time); + if (start_diff > 0) { + return 1; + } else if (start_diff < 0) { + return 0; + } else { + /* + * Arbitrarily, if two processes started at the same + * time, we'll say that the lower pointer value + * started first. Note that t2 may have exited by now + * so this may not be a valid pointer any longer, but + * that's fine - it still serves to distinguish + * between two tasks started (effectively) simultaneously. + */ + return t1 > t2; + } +} + +/* + * This function is a callback from heap_insert() and is used to order + * the heap. + * In this case we order the heap in descending task start time. + */ +static inline int started_after(void *p1, void *p2) +{ + struct task_struct *t1 = p1; + struct task_struct *t2 = p2; + return started_after_time(t1, &t2->start_time, t2); +} + +/** + * cgroup_scan_tasks - iterate though all the tasks in a cgroup + * @scan: struct cgroup_scanner containing arguments for the scan + * + * Arguments include pointers to callback functions test_task() and + * process_task(). + * Iterate through all the tasks in a cgroup, calling test_task() for each, + * and if it returns true, call process_task() for it also. + * The test_task pointer may be NULL, meaning always true (select all tasks). + * Effectively duplicates cgroup_iter_{start,next,end}() + * but does not lock css_set_lock for the call to process_task(). + * The struct cgroup_scanner may be embedded in any structure of the caller's + * creation. + * It is guaranteed that process_task() will act on every task that + * is a member of the cgroup for the duration of this call. This + * function may or may not call process_task() for tasks that exit + * or move to a different cgroup during the call, or are forked or + * move into the cgroup during the call. + * + * Note that test_task() may be called with locks held, and may in some + * situations be called multiple times for the same task, so it should + * be cheap. + * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been + * pre-allocated and will be used for heap operations (and its "gt" member will + * be overwritten), else a temporary heap will be used (allocation of which + * may cause this function to fail). + */ +int cgroup_scan_tasks(struct cgroup_scanner *scan) +{ + int retval, i; + struct cgroup_iter it; + struct task_struct *p, *dropped; + /* Never dereference latest_task, since it's not refcounted */ + struct task_struct *latest_task = NULL; + struct ptr_heap tmp_heap; + struct ptr_heap *heap; + struct timespec latest_time = { 0, 0 }; + + if (scan->heap) { + /* The caller supplied our heap and pre-allocated its memory */ + heap = scan->heap; + heap->gt = &started_after; + } else { + /* We need to allocate our own heap memory */ + heap = &tmp_heap; + retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after); + if (retval) + /* cannot allocate the heap */ + return retval; + } + + again: + /* + * Scan tasks in the cgroup, using the scanner's "test_task" callback + * to determine which are of interest, and using the scanner's + * "process_task" callback to process any of them that need an update. + * Since we don't want to hold any locks during the task updates, + * gather tasks to be processed in a heap structure. + * The heap is sorted by descending task start time. + * If the statically-sized heap fills up, we overflow tasks that + * started later, and in future iterations only consider tasks that + * started after the latest task in the previous pass. This + * guarantees forward progress and that we don't miss any tasks. + */ + heap->size = 0; + cgroup_iter_start(scan->cg, &it); + while ((p = cgroup_iter_next(scan->cg, &it))) { + /* + * Only affect tasks that qualify per the caller's callback, + * if he provided one + */ + if (scan->test_task && !scan->test_task(p, scan)) + continue; + /* + * Only process tasks that started after the last task + * we processed + */ + if (!started_after_time(p, &latest_time, latest_task)) + continue; + dropped = heap_insert(heap, p); + if (dropped == NULL) { + /* + * The new task was inserted; the heap wasn't + * previously full + */ + get_task_struct(p); + } else if (dropped != p) { + /* + * The new task was inserted, and pushed out a + * different task + */ + get_task_struct(p); + put_task_struct(dropped); + } + /* + * Else the new task was newer than anything already in + * the heap and wasn't inserted + */ + } + cgroup_iter_end(scan->cg, &it); + + if (heap->size) { + for (i = 0; i < heap->size; i++) { + struct task_struct *p = heap->ptrs[i]; + if (i == 0) { + latest_time = p->start_time; + latest_task = p; + } + /* Process the task per the caller's callback */ + scan->process_task(p, scan); + put_task_struct(p); + } + /* + * If we had to process any tasks at all, scan again + * in case some of them were in the middle of forking + * children that didn't get processed. + * Not the most efficient way to do it, but it avoids + * having to take callback_mutex in the fork path + */ + goto again; + } + if (heap == &tmp_heap) + heap_free(&tmp_heap); + return 0; +} + /* * Stuff for reading the 'tasks' file. * @@ -1744,19 +1971,20 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) while ((tsk = cgroup_iter_next(cgrp, &it))) { if (unlikely(n == npids)) break; - pidarray[n++] = task_pid_nr(tsk); + pidarray[n++] = task_pid_vnr(tsk); } cgroup_iter_end(cgrp, &it); return n; } /** - * Build and fill cgroupstats so that taskstats can export it to user - * space. - * + * cgroupstats_build - build and fill cgroupstats * @stats: cgroupstats to fill information into * @dentry: A dentry entry belonging to the cgroup for which stats have * been requested. + * + * Build and fill cgroupstats so that taskstats can export it to user + * space. */ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { @@ -1867,7 +2095,7 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file) kfree(pidarray); } else { - ctr->buf = 0; + ctr->buf = NULL; ctr->bufsz = 0; } file->private_data = ctr; @@ -1988,14 +2216,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, } /* - * cgroup_create - create a cgroup - * parent: cgroup that will be parent of the new cgroup. - * name: name of the new cgroup. Will be strcpy'ed. - * mode: mode to set on new inode + * cgroup_create - create a cgroup + * @parent: cgroup that will be parent of the new cgroup + * @dentry: dentry of the new cgroup + * @mode: mode to set on new inode * - * Must be called with the mutex on the parent inode held + * Must be called with the mutex on the parent inode held */ - static long cgroup_create(struct cgroup *parent, struct dentry *dentry, int mode) { @@ -2018,7 +2245,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, mutex_lock(&cgroup_mutex); - cgrp->flags = 0; INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->css_sets); @@ -2028,6 +2254,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, cgrp->root = parent->root; cgrp->top_cgroup = parent->top_cgroup; + if (notify_on_release(parent)) + set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); if (IS_ERR(css)) { @@ -2120,7 +2349,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cgroup *cgrp = dentry->d_fsdata; struct dentry *d; struct cgroup *parent; - struct cgroup_subsys *ss; struct super_block *sb; struct cgroupfs_root *root; @@ -2140,16 +2368,17 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) root = cgrp->root; sb = root->sb; + /* + * Call pre_destroy handlers of subsys. Notify subsystems + * that rmdir() request comes. + */ + cgroup_call_pre_destroy(cgrp); + if (cgroup_has_css_refs(cgrp)) { mutex_unlock(&cgroup_mutex); return -EBUSY; } - for_each_subsys(root, ss) { - if (cgrp->subsys[ss->subsys_id]) - ss->destroy(ss, cgrp); - } - spin_lock(&release_list_lock); set_bit(CGRP_REMOVED, &cgrp->flags); if (!list_empty(&cgrp->release_list)) @@ -2164,15 +2393,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) cgroup_d_remove_dir(d); dput(d); - root->number_of_cgroups--; set_bit(CGRP_RELEASABLE, &parent->flags); check_for_release(parent); mutex_unlock(&cgroup_mutex); - /* Drop the active superblock reference that we took when we - * created the cgroup */ - deactivate_super(sb); return 0; } @@ -2223,8 +2448,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) } /** - * cgroup_init_early - initialize cgroups at system boot, and - * initialize any subsystems that request early init. + * cgroup_init_early - cgroup initialization at system boot + * + * Initialize cgroups at system boot, and initialize any + * subsystems that request early init. */ int __init cgroup_init_early(void) { @@ -2266,8 +2493,10 @@ int __init cgroup_init_early(void) } /** - * cgroup_init - register cgroup filesystem and /proc file, and - * initialize any subsystems that didn't request early init. + * cgroup_init - cgroup initialization + * + * Register cgroup filesystem and /proc file, and initialize + * any subsystems that didn't request early init. */ int __init cgroup_init(void) { @@ -2306,7 +2535,7 @@ out: * - Used for /proc//cgroup. * - No need to task_lock(tsk) on this tsk->cgroup reference, as it * doesn't really matter if tsk->cgroup changes after we read it, - * and we take cgroup_mutex, keeping attach_task() from changing it + * and we take cgroup_mutex, keeping cgroup_attach_task() from changing it * anyway. No need to check that tsk->cgroup != NULL, thanks to * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks * cgroup to top_cgroup. @@ -2345,6 +2574,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v) /* Skip this hierarchy if it has no active subsystems */ if (!root->actual_subsys_bits) continue; + seq_printf(m, "%lu:", root->subsys_bits); for_each_subsys(root, ss) seq_printf(m, "%s%s", count++ ? "," : "", ss->name); seq_putc(m, ':'); @@ -2384,13 +2614,13 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) { int i; - seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n"); + seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - seq_printf(m, "%s\t%lu\t%d\n", + seq_printf(m, "%s\t%lu\t%d\t%d\n", ss->name, ss->root->subsys_bits, - ss->root->number_of_cgroups); + ss->root->number_of_cgroups, !ss->disabled); } mutex_unlock(&cgroup_mutex); return 0; @@ -2398,7 +2628,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) static int cgroupstats_open(struct inode *inode, struct file *file) { - return single_open(file, proc_cgroupstats_show, 0); + return single_open(file, proc_cgroupstats_show, NULL); } static struct file_operations proc_cgroupstats_operations = { @@ -2410,14 +2640,14 @@ static struct file_operations proc_cgroupstats_operations = { /** * cgroup_fork - attach newly forked task to its parents cgroup. - * @tsk: pointer to task_struct of forking parent process. + * @child: pointer to task_struct of forking parent process. * * Description: A task inherits its parent's cgroup at fork(). * * A pointer to the shared css_set was automatically copied in * fork.c by dup_task_struct(). However, we ignore that copy, since * it was not made under the protection of RCU or cgroup_mutex, so - * might no longer be a valid cgroup pointer. attach_task() might + * might no longer be a valid cgroup pointer. cgroup_attach_task() might * have already changed current->cgroups, allowing the previously * referenced cgroup group to be removed and freed. * @@ -2434,9 +2664,12 @@ void cgroup_fork(struct task_struct *child) } /** - * cgroup_fork_callbacks - called on a new task very soon before - * adding it to the tasklist. No need to take any locks since no-one - * can be operating on this task + * cgroup_fork_callbacks - run fork callbacks + * @child: the new task + * + * Called on a new task very soon before adding it to the + * tasklist. No need to take any locks since no-one can + * be operating on this task. */ void cgroup_fork_callbacks(struct task_struct *child) { @@ -2451,11 +2684,14 @@ void cgroup_fork_callbacks(struct task_struct *child) } /** - * cgroup_post_fork - called on a new task after adding it to the - * task list. Adds the task to the list running through its css_set - * if necessary. Has to be after the task is visible on the task list - * in case we race with the first call to cgroup_iter_start() - to - * guarantee that the new task ends up on its list. */ + * cgroup_post_fork - called on a new task after adding it to the task list + * @child: the task in question + * + * Adds the task to the list running through its css_set if necessary. + * Has to be after the task is visible on the task list in case we race + * with the first call to cgroup_iter_start() - to guarantee that the + * new task ends up on its list. + */ void cgroup_post_fork(struct task_struct *child) { if (use_task_css_set_links) { @@ -2468,6 +2704,7 @@ void cgroup_post_fork(struct task_struct *child) /** * cgroup_exit - detach cgroup from exiting task * @tsk: pointer to task_struct of exiting process + * @run_callback: run exit callbacks? * * Description: Detach cgroup from @tsk and release it. * @@ -2496,9 +2733,8 @@ void cgroup_post_fork(struct task_struct *child) * attach us to a different cgroup, decrementing the count on * the first cgroup that we never incremented. But in this case, * top_cgroup isn't going away, and either task has PF_EXITING set, - * which wards off any attach_task() attempts, or task is a failed - * fork, never visible to attach_task. - * + * which wards off any cgroup_attach_task() attempts, or task is a failed + * fork, never visible to cgroup_attach_task. */ void cgroup_exit(struct task_struct *tsk, int run_callbacks) { @@ -2535,9 +2771,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) } /** - * cgroup_clone - duplicate the current cgroup in the hierarchy - * that the given subsystem is attached to, and move this task into - * the new child + * cgroup_clone - clone the cgroup the given subsystem is attached to + * @tsk: the task to be moved + * @subsys: the given subsystem + * + * Duplicate the current cgroup in the hierarchy that the given + * subsystem is attached to, and move this task into the new + * child. */ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) { @@ -2637,7 +2877,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) } /* All seems fine. Finish by moving the task into the new cgroup */ - ret = attach_task(child, tsk); + ret = cgroup_attach_task(child, tsk); mutex_unlock(&cgroup_mutex); out_release: @@ -2650,9 +2890,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) return ret; } -/* - * See if "cgrp" is a descendant of the current task's cgroup in - * the appropriate hierarchy +/** + * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp + * @cgrp: the cgroup in question + * + * See if @cgrp is a descendant of the current task's cgroup in + * the appropriate hierarchy. * * If we are sending in dummytop, then presumably we are creating * the top cgroup in the subsystem. @@ -2731,9 +2974,7 @@ void __css_put(struct cgroup_subsys_state *css) * release agent task. We don't bother to wait because the caller of * this routine has no use for the exit status of the release agent * task, so no sense holding our caller up for that. - * */ - static void cgroup_release_agent(struct work_struct *work) { BUG_ON(work != &release_agent_work); @@ -2783,3 +3024,27 @@ static void cgroup_release_agent(struct work_struct *work) spin_unlock(&release_list_lock); mutex_unlock(&cgroup_mutex); } + +static int __init cgroup_disable(char *str) +{ + int i; + char *token; + + while ((token = strsep(&str, ",")) != NULL) { + if (!*token) + continue; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + + if (!strcmp(token, ss->name)) { + ss->disabled = 1; + printk(KERN_INFO "Disabling %s control group" + " subsystem\n", ss->name); + break; + } + } + } + return 1; +} +__setup("cgroup_disable=", cgroup_disable);