X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fblk-cgroup.c;h=9af7257f429c34dfa444f54d5e4907d7f06bd10a;hb=31373d09da5b7fe21fe6f781e92bd534a3495f00;hp=73a5525cc0a2440e2fa4ddffe3972c3543b56189;hpb=f2eecb91522686edf8199947b77f435a4031d92f;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 73a5525..9af7257 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -13,23 +13,113 @@ #include #include #include +#include +#include +#include #include "blk-cgroup.h" -#include "cfq-iosched.h" + +static DEFINE_SPINLOCK(blkio_list_lock); +static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; +EXPORT_SYMBOL_GPL(blkio_root_cgroup); + +static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, + struct cgroup *); +static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, + struct task_struct *, bool); +static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, + struct cgroup *, struct task_struct *, bool); +static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); +static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); + +struct cgroup_subsys blkio_subsys = { + .name = "blkio", + .create = blkiocg_create, + .can_attach = blkiocg_can_attach, + .attach = blkiocg_attach, + .destroy = blkiocg_destroy, + .populate = blkiocg_populate, +#ifdef CONFIG_BLK_CGROUP + /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ + .subsys_id = blkio_subsys_id, +#endif + .use_id = 1, + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(blkio_subsys); struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), struct blkio_cgroup, css); } +EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); + +/* + * Add to the appropriate stat variable depending on the request type. + * This should be called with the blkg->stats_lock held. + */ +void io_add_stat(uint64_t *stat, uint64_t add, unsigned int flags) +{ + if (flags & REQ_RW) + stat[IO_WRITE] += add; + else + stat[IO_READ] += add; + /* + * Everywhere in the block layer, an IO is treated as sync if it is a + * read or a SYNC write. We follow the same norm. + */ + if (!(flags & REQ_RW) || flags & REQ_RW_SYNC) + stat[IO_SYNC] += add; + else + stat[IO_ASYNC] += add; +} + +void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) +{ + unsigned long flags; + + spin_lock_irqsave(&blkg->stats_lock, flags); + blkg->stats.time += time; + spin_unlock_irqrestore(&blkg->stats_lock, flags); +} +EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); -void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, - unsigned long time, unsigned long sectors) +void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, + struct request *rq) { - blkg->time += time; - blkg->sectors += sectors; + struct blkio_group_stats *stats; + unsigned long flags; + + spin_lock_irqsave(&blkg->stats_lock, flags); + stats = &blkg->stats; + stats->sectors += blk_rq_sectors(rq); + io_add_stat(stats->io_serviced, 1, rq->cmd_flags); + io_add_stat(stats->io_service_bytes, blk_rq_sectors(rq) << 9, + rq->cmd_flags); + spin_unlock_irqrestore(&blkg->stats_lock, flags); +} + +void blkiocg_update_request_completion_stats(struct blkio_group *blkg, + struct request *rq) +{ + struct blkio_group_stats *stats; + unsigned long flags; + unsigned long long now = sched_clock(); + + spin_lock_irqsave(&blkg->stats_lock, flags); + stats = &blkg->stats; + if (time_after64(now, rq->io_start_time_ns)) + io_add_stat(stats->io_service_time, now - rq->io_start_time_ns, + rq->cmd_flags); + if (time_after64(rq->io_start_time_ns, rq->start_time_ns)) + io_add_stat(stats->io_wait_time, + rq->io_start_time_ns - rq->start_time_ns, + rq->cmd_flags); + spin_unlock_irqrestore(&blkg->stats_lock, flags); } +EXPORT_SYMBOL_GPL(blkiocg_update_request_completion_stats); void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev) @@ -47,6 +137,7 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, #endif blkg->dev = dev; } +EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); static void __blkiocg_del_blkio_group(struct blkio_group *blkg) { @@ -81,6 +172,7 @@ out: rcu_read_unlock(); return ret; } +EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); /* called under rcu_read_lock(). */ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) @@ -97,6 +189,7 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) return NULL; } +EXPORT_SYMBOL_GPL(blkiocg_lookup_group); #define SHOW_FUNCTION(__VAR) \ static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ @@ -117,26 +210,140 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) struct blkio_cgroup *blkcg; struct blkio_group *blkg; struct hlist_node *n; + struct blkio_policy_type *blkiop; if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) return -EINVAL; blkcg = cgroup_to_blkio_cgroup(cgroup); + spin_lock(&blkio_list_lock); spin_lock_irq(&blkcg->lock); blkcg->weight = (unsigned int)val; - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) - cfq_update_blkio_group_weight(blkg, blkcg->weight); + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + list_for_each_entry(blkiop, &blkio_list, list) + blkiop->ops.blkio_update_group_weight_fn(blkg, + blkcg->weight); + } spin_unlock_irq(&blkcg->lock); + spin_unlock(&blkio_list_lock); return 0; } -#define SHOW_FUNCTION_PER_GROUP(__VAR) \ +static int +blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) +{ + struct blkio_cgroup *blkcg; + struct blkio_group *blkg; + struct hlist_node *n; + struct blkio_group_stats *stats; + + blkcg = cgroup_to_blkio_cgroup(cgroup); + spin_lock_irq(&blkcg->lock); + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + spin_lock(&blkg->stats_lock); + stats = &blkg->stats; + memset(stats, 0, sizeof(struct blkio_group_stats)); + spin_unlock(&blkg->stats_lock); + } + spin_unlock_irq(&blkcg->lock); + return 0; +} + +void get_key_name(int type, char *disk_id, char *str, int chars_left) +{ + strlcpy(str, disk_id, chars_left); + chars_left -= strlen(str); + if (chars_left <= 0) { + printk(KERN_WARNING + "Possibly incorrect cgroup stat display format"); + return; + } + switch (type) { + case IO_READ: + strlcat(str, " Read", chars_left); + break; + case IO_WRITE: + strlcat(str, " Write", chars_left); + break; + case IO_SYNC: + strlcat(str, " Sync", chars_left); + break; + case IO_ASYNC: + strlcat(str, " Async", chars_left); + break; + case IO_TYPE_MAX: + strlcat(str, " Total", chars_left); + break; + default: + strlcat(str, " Invalid", chars_left); + } +} + +typedef uint64_t (get_var) (struct blkio_group *, int); + +#define MAX_KEY_LEN 100 +uint64_t get_typed_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, + get_var *getvar, char *disk_id) +{ + uint64_t disk_total; + char key_str[MAX_KEY_LEN]; + int type; + + for (type = 0; type < IO_TYPE_MAX; type++) { + get_key_name(type, disk_id, key_str, MAX_KEY_LEN); + cb->fill(cb, key_str, getvar(blkg, type)); + } + disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE); + get_key_name(IO_TYPE_MAX, disk_id, key_str, MAX_KEY_LEN); + cb->fill(cb, key_str, disk_total); + return disk_total; +} + +uint64_t get_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, + get_var *getvar, char *disk_id) +{ + uint64_t var = getvar(blkg, 0); + cb->fill(cb, disk_id, var); + return var; +} + +#define GET_STAT_INDEXED(__VAR) \ +uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int type) \ +{ \ + return blkg->stats.__VAR[type]; \ +} \ + +GET_STAT_INDEXED(io_service_bytes); +GET_STAT_INDEXED(io_serviced); +GET_STAT_INDEXED(io_service_time); +GET_STAT_INDEXED(io_wait_time); +#undef GET_STAT_INDEXED + +#define GET_STAT(__VAR, __CONV) \ +uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \ +{ \ + uint64_t data = blkg->stats.__VAR; \ + if (__CONV) \ + data = (uint64_t)jiffies_to_msecs(data) * NSEC_PER_MSEC;\ + return data; \ +} + +GET_STAT(time, 1); +GET_STAT(sectors, 0); +#ifdef CONFIG_DEBUG_BLK_CGROUP +GET_STAT(dequeue, 0); +#endif +#undef GET_STAT + +#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ - struct cftype *cftype, struct seq_file *m) \ + struct cftype *cftype, struct cgroup_map_cb *cb) \ { \ struct blkio_cgroup *blkcg; \ struct blkio_group *blkg; \ struct hlist_node *n; \ + uint64_t cgroup_total = 0; \ + char disk_id[10]; \ \ if (!cgroup_lock_live_group(cgroup)) \ return -ENODEV; \ @@ -144,28 +351,42 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ blkcg = cgroup_to_blkio_cgroup(cgroup); \ rcu_read_lock(); \ hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ - if (blkg->dev) \ - seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \ - MINOR(blkg->dev), blkg->__VAR); \ + if (blkg->dev) { \ + spin_lock_irq(&blkg->stats_lock); \ + snprintf(disk_id, 10, "%u:%u", MAJOR(blkg->dev),\ + MINOR(blkg->dev)); \ + cgroup_total += get_stats(blkg, cb, getvar, \ + disk_id); \ + spin_unlock_irq(&blkg->stats_lock); \ + } \ } \ + if (show_total) \ + cb->fill(cb, "Total", cgroup_total); \ rcu_read_unlock(); \ cgroup_unlock(); \ return 0; \ } -SHOW_FUNCTION_PER_GROUP(time); -SHOW_FUNCTION_PER_GROUP(sectors); +SHOW_FUNCTION_PER_GROUP(time, get_stat, get_time_stat, 0); +SHOW_FUNCTION_PER_GROUP(sectors, get_stat, get_sectors_stat, 0); +SHOW_FUNCTION_PER_GROUP(io_service_bytes, get_typed_stat, + get_io_service_bytes_stat, 1); +SHOW_FUNCTION_PER_GROUP(io_serviced, get_typed_stat, get_io_serviced_stat, 1); +SHOW_FUNCTION_PER_GROUP(io_service_time, get_typed_stat, + get_io_service_time_stat, 1); +SHOW_FUNCTION_PER_GROUP(io_wait_time, get_typed_stat, get_io_wait_time_stat, 1); #ifdef CONFIG_DEBUG_BLK_CGROUP -SHOW_FUNCTION_PER_GROUP(dequeue); +SHOW_FUNCTION_PER_GROUP(dequeue, get_stat, get_dequeue_stat, 0); #endif #undef SHOW_FUNCTION_PER_GROUP #ifdef CONFIG_DEBUG_BLK_CGROUP -void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, +void blkiocg_update_dequeue_stats(struct blkio_group *blkg, unsigned long dequeue) { - blkg->dequeue += dequeue; + blkg->stats.dequeue += dequeue; } +EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); #endif struct cftype blkio_files[] = { @@ -176,16 +397,38 @@ struct cftype blkio_files[] = { }, { .name = "time", - .read_seq_string = blkiocg_time_read, + .read_map = blkiocg_time_read, + .write_u64 = blkiocg_reset_write, }, { .name = "sectors", - .read_seq_string = blkiocg_sectors_read, + .read_map = blkiocg_sectors_read, + .write_u64 = blkiocg_reset_write, + }, + { + .name = "io_service_bytes", + .read_map = blkiocg_io_service_bytes_read, + .write_u64 = blkiocg_reset_write, + }, + { + .name = "io_serviced", + .read_map = blkiocg_io_serviced_read, + .write_u64 = blkiocg_reset_write, + }, + { + .name = "io_service_time", + .read_map = blkiocg_io_service_time_read, + .write_u64 = blkiocg_reset_write, + }, + { + .name = "io_wait_time", + .read_map = blkiocg_io_wait_time_read, + .write_u64 = blkiocg_reset_write, }, #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "dequeue", - .read_seq_string = blkiocg_dequeue_read, + .read_map = blkiocg_dequeue_read, }, #endif }; @@ -202,6 +445,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) unsigned long flags; struct blkio_group *blkg; void *key; + struct blkio_policy_type *blkiop; rcu_read_lock(); remove_entry: @@ -227,12 +471,16 @@ remove_entry: * we have more policies in place, we need some dynamic registration * of callback function. */ - cfq_unlink_blkio_group(key, blkg); + spin_lock(&blkio_list_lock); + list_for_each_entry(blkiop, &blkio_list, list) + blkiop->ops.blkio_unlink_group_fn(key, blkg); + spin_unlock(&blkio_list_lock); goto remove_entry; done: free_css_id(&blkio_subsys, &blkcg->css); rcu_read_unlock(); - kfree(blkcg); + if (blkcg != &blkio_root_cgroup) + kfree(blkcg); } static struct cgroup_subsys_state * @@ -298,13 +546,32 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, task_unlock(tsk); } -struct cgroup_subsys blkio_subsys = { - .name = "blkio", - .create = blkiocg_create, - .can_attach = blkiocg_can_attach, - .attach = blkiocg_attach, - .destroy = blkiocg_destroy, - .populate = blkiocg_populate, - .subsys_id = blkio_subsys_id, - .use_id = 1, -}; +void blkio_policy_register(struct blkio_policy_type *blkiop) +{ + spin_lock(&blkio_list_lock); + list_add_tail(&blkiop->list, &blkio_list); + spin_unlock(&blkio_list_lock); +} +EXPORT_SYMBOL_GPL(blkio_policy_register); + +void blkio_policy_unregister(struct blkio_policy_type *blkiop) +{ + spin_lock(&blkio_list_lock); + list_del_init(&blkiop->list); + spin_unlock(&blkio_list_lock); +} +EXPORT_SYMBOL_GPL(blkio_policy_unregister); + +static int __init init_cgroup_blkio(void) +{ + return cgroup_load_subsys(&blkio_subsys); +} + +static void __exit exit_cgroup_blkio(void) +{ + cgroup_unload_subsys(&blkio_subsys); +} + +module_init(init_cgroup_blkio); +module_exit(exit_cgroup_blkio); +MODULE_LICENSE("GPL");