X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Flinux%2Fcgroup.h;h=8f78073d7caaa278017232112f0d3da05aa0d4c0;hb=3374cd1abd478f767aaedf2c21d109596ff0fe72;hp=665fa70e4094166c76b85176f0fbb3f5f6fae7c5;hpb=811158b147a503fbdf9773224004ffd32002d1fe;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 665fa70..8f78073 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -28,6 +28,7 @@ struct css_id; extern int cgroup_init_early(void); extern int cgroup_init(void); extern void cgroup_lock(void); +extern int cgroup_lock_is_held(void); extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); @@ -36,16 +37,24 @@ extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); +extern int cgroup_load_subsys(struct cgroup_subsys *ss); +extern void cgroup_unload_subsys(struct cgroup_subsys *ss); -extern struct file_operations proc_cgroup_operations; +extern const struct file_operations proc_cgroup_operations; -/* Define the enumeration of all cgroup subsystems */ +/* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, enum cgroup_subsys_id { #include - CGROUP_SUBSYS_COUNT + CGROUP_BUILTIN_SUBSYS_COUNT }; #undef SUBSYS +/* + * This define indicates the maximum number of subsystems that can be loaded + * at once. We limit to this many since cgroupfs_root has subsys_bits to keep + * track of all of them. + */ +#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long)) /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { @@ -75,6 +84,12 @@ enum { CSS_REMOVED, /* This CSS is dead */ }; +/* Caller must verify that the css is not for root cgroup */ +static inline void __css_get(struct cgroup_subsys_state *css, int count) +{ + atomic_add(count, &css->refcnt); +} + /* * Call css_get() to hold a reference on the css; it can be used * for a reference obtained via: @@ -86,7 +101,7 @@ static inline void css_get(struct cgroup_subsys_state *css) { /* We don't need to reference count the root state */ if (!test_bit(CSS_ROOT, &css->flags)) - atomic_inc(&css->refcnt); + __css_get(css, 1); } static inline bool css_is_removed(struct cgroup_subsys_state *css) @@ -117,11 +132,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) * css_get() or css_tryget() */ -extern void __css_put(struct cgroup_subsys_state *css); +extern void __css_put(struct cgroup_subsys_state *css, int count); static inline void css_put(struct cgroup_subsys_state *css) { if (!test_bit(CSS_ROOT, &css->flags)) - __css_put(css); + __css_put(css, 1); } /* bits in struct cgroup flags field */ @@ -141,6 +156,38 @@ enum { CGRP_WAIT_ON_RMDIR, }; +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ +struct cgroup_pidlist { + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,17 +226,19 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects the fields below */ - struct rw_semaphore pids_mutex; - /* Array of process ids in the cgroup */ - pid_t *tasks_pids; - /* How many files are using the current tasks_pids array */ - int pids_use_count; - /* Length of the current tasks_pids array */ - int pids_length; + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; /* For RCU-protected deletion */ struct rcu_head rcu_head; + + /* List of events which userspace want to recieve */ + struct list_head event_list; + spinlock_t event_list_lock; }; /* @@ -227,9 +276,13 @@ struct css_set { /* * Set of subsystem states, one for each subsystem. This array * is immutable after creation apart from the init_css_set - * during subsystem registration (at boot time). + * during subsystem registration (at boot time) and modular subsystem + * loading/unloading. */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* @@ -329,6 +382,23 @@ struct cftype { int (*trigger)(struct cgroup *cgrp, unsigned int event); int (*release)(struct inode *inode, struct file *file); + + /* + * register_event() callback will be used to add new userspace + * waiter for changes related to the cftype. Implement it if + * you want to provide this functionality. Use eventfd_signal() + * on eventfd to send notification to userspace. + */ + int (*register_event)(struct cgroup *cgrp, struct cftype *cft, + struct eventfd_ctx *eventfd, const char *args); + /* + * unregister_event() callback will be called when userspace + * closes the eventfd or on cgroup removing. + * This callback must be implemented, if you want provide + * notification functionality. + */ + int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, + struct eventfd_ctx *eventfd); }; struct cgroup_scanner { @@ -366,6 +436,23 @@ int cgroup_task_count(const struct cgroup *cgrp); int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); /* + * When the subsys has to access css and may add permanent refcnt to css, + * it should take care of racy conditions with rmdir(). Following set of + * functions, is for stop/restart rmdir if necessary. + * Because these will call css_get/put, "css" should be alive css. + * + * cgroup_exclude_rmdir(); + * ...do some jobs which may access arbitrary empty cgroup + * cgroup_release_and_wakeup_rmdir(); + * + * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, + * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. + */ + +void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); +void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); + +/* * Control Group subsystem type. * See Documentation/cgroups/cgroups.txt for details */ @@ -375,10 +462,13 @@ struct cgroup_subsys { struct cgroup *cgrp); int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, - struct cgroup *cgrp, struct task_struct *tsk); + int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); + void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *tsk); + struct cgroup *old_cgrp, struct task_struct *tsk, + bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, @@ -420,6 +510,9 @@ struct cgroup_subsys { /* used when use_id == true */ struct idr idr; spinlock_t id_lock; + + /* should be defined only by modular subsystems */ + struct module *module; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; @@ -435,7 +528,10 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( static inline struct cgroup_subsys_state *task_subsys_state( struct task_struct *task, int subsys_id) { - return rcu_dereference(task->cgroups->subsys[subsys_id]); + return rcu_dereference_check(task->cgroups->subsys[subsys_id], + rcu_read_lock_held() || + lockdep_is_held(&task->alloc_lock) || + cgroup_lock_is_held()); } static inline struct cgroup* task_cgroup(struct task_struct *task,