X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=block%2Fcfq-iosched.c;h=5ff4f4850e717ddb319423e9678e0e44cd7f265c;hp=9e0df2bdcf2124f1f16afe645e43788e10ec9d9c;hb=HEAD;hpb=4facdaec1ce186e731e6baa04f074804849e9a49 diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9e0df2b..5ff4f48 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4; #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) +#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_ioc_pool; @@ -63,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); static struct completion *ioc_gone; static DEFINE_SPINLOCK(ioc_gone_lock); +static DEFINE_SPINLOCK(cic_index_lock); +static DEFINE_IDA(cic_index_ida); + #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) @@ -270,6 +274,7 @@ struct cfq_data { unsigned int cfq_latency; unsigned int cfq_group_isolation; + unsigned int cic_index; struct list_head cic_list; /* @@ -344,7 +349,7 @@ CFQ_CFQQ_FNS(deep); CFQ_CFQQ_FNS(wait_busy); #undef CFQ_CFQQ_FNS -#ifdef CONFIG_DEBUG_CFQ_IOSCHED +#ifdef CONFIG_CFQ_GROUP_IOSCHED #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ @@ -429,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, cic->cfqq[is_sync] = cfqq; } +#define CIC_DEAD_KEY 1ul +#define CIC_DEAD_INDEX_SHIFT 1 + +static inline void *cfqd_dead_key(struct cfq_data *cfqd) +{ + return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); +} + +static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) +{ + struct cfq_data *cfqd = cic->key; + + if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) + return NULL; + + return cfqd; +} + /* * We regard a request as SYNC, if it's either a read or has the SYNC bit * set (in which case it could also be direct WRITE). @@ -887,7 +910,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) } static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, - struct cfq_queue *cfqq, bool forced) + struct cfq_queue *cfqq) { struct cfq_rb_root *st = &cfqd->grp_service_tree; unsigned int used_sl, charge_sl; @@ -917,7 +940,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); - blkiocg_set_start_empty_time(&cfqg->blkg, forced); + blkiocg_set_start_empty_time(&cfqg->blkg); } #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -961,7 +984,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); - blkio_group_init(&cfqg->blkg); /* * Take the initial reference that will be released on destroy @@ -1002,6 +1024,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) return cfqg; } +static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) +{ + atomic_inc(&cfqg->ref); + return cfqg; +} + static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { /* Currently, all async queues are mapped to root group */ @@ -1085,6 +1113,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) { return &cfqd->root_group; } + +static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) +{ + return cfqg; +} + static inline void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { cfqq->cfqg = cfqg; @@ -1387,12 +1421,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; - blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_add_rq_rb(rq); - blkiocg_update_io_add_stats( - &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, - rq_data_dir(rq), rq_is_sync(rq)); + blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, + &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), + rq_is_sync(rq)); } static struct request * @@ -1448,7 +1482,7 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; - blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); @@ -1484,8 +1518,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, static void cfq_bio_merged(struct request_queue *q, struct request *req, struct bio *bio) { - struct cfq_queue *cfqq = RQ_CFQQ(req); - blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio), + blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio), cfq_bio_sync(bio)); } @@ -1506,7 +1539,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, if (cfqq->next_rq == next) cfqq->next_rq = rq; cfq_remove_request(next); - blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next), + blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), rq_is_sync(next)); } @@ -1571,7 +1604,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, */ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, - bool timed_out, bool forced) + bool timed_out) { cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); @@ -1598,7 +1631,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); } - cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced); + cfq_group_served(cfqd, cfqq->cfqg, cfqq); if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); @@ -1617,13 +1650,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, } } -static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out, - bool forced) +static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) { struct cfq_queue *cfqq = cfqd->active_queue; if (cfqq) - __cfq_slice_expired(cfqd, cfqq, timed_out, forced); + __cfq_slice_expired(cfqd, cfqq, timed_out); } /* @@ -2191,7 +2223,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) } expire: - cfq_slice_expired(cfqd, 0, false); + cfq_slice_expired(cfqd, 0); new_queue: /* * Current queue expired. Check if we have to switch to a new @@ -2217,7 +2249,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) BUG_ON(!list_empty(&cfqq->fifo)); /* By default cfqq is not expired if it is empty. Do it explicitly */ - __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true); + __cfq_slice_expired(cfqq->cfqd, cfqq, 0); return dispatched; } @@ -2400,7 +2432,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || cfq_class_idle(cfqq))) { cfqq->slice_end = jiffies + 1; - cfq_slice_expired(cfqd, 0, false); + cfq_slice_expired(cfqd, 0); } cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); @@ -2431,7 +2463,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) orig_cfqg = cfqq->orig_cfqg; if (unlikely(cfqd->active_queue == cfqq)) { - __cfq_slice_expired(cfqd, cfqq, 0, false); + __cfq_slice_expired(cfqd, cfqq, 0); cfq_schedule_dispatch(cfqd); } @@ -2500,11 +2532,12 @@ static void cfq_cic_free(struct cfq_io_context *cic) static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) { unsigned long flags; + unsigned long dead_key = (unsigned long) cic->key; - BUG_ON(!cic->dead_key); + BUG_ON(!(dead_key & CIC_DEAD_KEY)); spin_lock_irqsave(&ioc->lock, flags); - radix_tree_delete(&ioc->radix_root, cic->dead_key); + radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); hlist_del_rcu(&cic->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); @@ -2527,15 +2560,10 @@ static void cfq_free_io_context(struct io_context *ioc) __call_for_each_cic(ioc, cic_free_func); } -static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) +static void cfq_put_cooperator(struct cfq_queue *cfqq) { struct cfq_queue *__cfqq, *next; - if (unlikely(cfqq == cfqd->active_queue)) { - __cfq_slice_expired(cfqd, cfqq, 0, false); - cfq_schedule_dispatch(cfqd); - } - /* * If this queue was scheduled to merge with another queue, be * sure to drop the reference taken on that queue (and others in @@ -2551,6 +2579,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfq_put_queue(__cfqq); __cfqq = next; } +} + +static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + if (unlikely(cfqq == cfqd->active_queue)) { + __cfq_slice_expired(cfqd, cfqq, 0); + cfq_schedule_dispatch(cfqd); + } + + cfq_put_cooperator(cfqq); cfq_put_queue(cfqq); } @@ -2563,11 +2601,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, list_del_init(&cic->queue_list); /* - * Make sure key == NULL is seen for dead queues + * Make sure dead mark is seen for dead queues */ smp_wmb(); - cic->dead_key = (unsigned long) cic->key; - cic->key = NULL; + cic->key = cfqd_dead_key(cfqd); if (ioc->ioc_data == cic) rcu_assign_pointer(ioc->ioc_data, NULL); @@ -2586,7 +2623,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, static void cfq_exit_single_io_context(struct io_context *ioc, struct cfq_io_context *cic) { - struct cfq_data *cfqd = cic->key; + struct cfq_data *cfqd = cic_to_cfqd(cic); if (cfqd) { struct request_queue *q = cfqd->queue; @@ -2599,7 +2636,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc, * race between exiting task and queue */ smp_read_barrier_depends(); - if (cic->key) + if (cic->key == cfqd) __cfq_exit_single_io_context(cfqd, cic); spin_unlock_irqrestore(q->queue_lock, flags); @@ -2679,7 +2716,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) { - struct cfq_data *cfqd = cic->key; + struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *cfqq; unsigned long flags; @@ -2736,7 +2773,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) { struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); - struct cfq_data *cfqd = cic->key; + struct cfq_data *cfqd = cic_to_cfqd(cic); unsigned long flags; struct request_queue *q; @@ -2873,12 +2910,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, unsigned long flags; WARN_ON(!list_empty(&cic->queue_list)); + BUG_ON(cic->key != cfqd_dead_key(cfqd)); spin_lock_irqsave(&ioc->lock, flags); BUG_ON(ioc->ioc_data == cic); - radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); + radix_tree_delete(&ioc->radix_root, cfqd->cic_index); hlist_del_rcu(&cic->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); @@ -2890,7 +2928,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct cfq_io_context *cic; unsigned long flags; - void *k; if (unlikely(!ioc)) return NULL; @@ -2907,13 +2944,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) } do { - cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); + cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); rcu_read_unlock(); if (!cic) break; - /* ->key must be copied to avoid race with cfq_exit_queue() */ - k = cic->key; - if (unlikely(!k)) { + if (unlikely(cic->key != cfqd)) { cfq_drop_dead_cic(cfqd, ioc, cic); rcu_read_lock(); continue; @@ -2946,7 +2981,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, spin_lock_irqsave(&ioc->lock, flags); ret = radix_tree_insert(&ioc->radix_root, - (unsigned long) cfqd, cic); + cfqd->cic_index, cic); if (!ret) hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); @@ -3161,7 +3196,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { cfq_log_cfqq(cfqd, cfqq, "preempt"); - cfq_slice_expired(cfqd, 1, false); + cfq_slice_expired(cfqd, 1); /* * Put the new queue at the front of the of the current list, @@ -3241,8 +3276,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); - - blkiocg_update_io_add_stats(&cfqq->cfqg->blkg, + blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, &cfqd->serving_group->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_rq_enqueued(cfqd, cfqq, rq); @@ -3373,7 +3407,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * - when there is a close cooperator */ if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) - cfq_slice_expired(cfqd, 1, false); + cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); @@ -3473,6 +3507,10 @@ static void cfq_put_request(struct request *rq) rq->elevator_private = NULL; rq->elevator_private2 = NULL; + /* Put down rq reference on cfqg */ + cfq_put_cfqg(RQ_CFQG(rq)); + rq->elevator_private3 = NULL; + cfq_put_queue(cfqq); } } @@ -3503,6 +3541,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) } cic_set_cfqq(cic, NULL, 1); + + cfq_put_cooperator(cfqq); + cfq_put_queue(cfqq); return NULL; } @@ -3561,6 +3602,7 @@ new_queue: rq->elevator_private = cic; rq->elevator_private2 = cfqq; + rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); return 0; queue_fail: @@ -3633,7 +3675,7 @@ static void cfq_idle_slice_timer(unsigned long data) cfq_clear_cfqq_deep(cfqq); } expire: - cfq_slice_expired(cfqd, timed_out, false); + cfq_slice_expired(cfqd, timed_out); out_kick: cfq_schedule_dispatch(cfqd); out_cont: @@ -3676,7 +3718,7 @@ static void cfq_exit_queue(struct elevator_queue *e) spin_lock_irq(q->queue_lock); if (cfqd->active_queue) - __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false); + __cfq_slice_expired(cfqd, cfqd->active_queue, 0); while (!list_empty(&cfqd->cic_list)) { struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, @@ -3694,10 +3736,32 @@ static void cfq_exit_queue(struct elevator_queue *e) cfq_shutdown_timer_wq(cfqd); + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, cfqd->cic_index); + spin_unlock(&cic_index_lock); + /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ call_rcu(&cfqd->rcu, cfq_cfqd_free); } +static int cfq_alloc_cic_index(void) +{ + int index, error; + + do { + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&cic_index_lock); + error = ida_get_new(&cic_index_ida, &index); + spin_unlock(&cic_index_lock); + if (error && error != -EAGAIN) + return error; + } while (error); + + return index; +} + static void *cfq_init_queue(struct request_queue *q) { struct cfq_data *cfqd; @@ -3705,10 +3769,16 @@ static void *cfq_init_queue(struct request_queue *q) struct cfq_group *cfqg; struct cfq_rb_root *st; + i = cfq_alloc_cic_index(); + if (i < 0) + return NULL; + cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); if (!cfqd) return NULL; + cfqd->cic_index = i; + /* Init root service tree */ cfqd->grp_service_tree = CFQ_RB_ROOT; @@ -3727,8 +3797,10 @@ static void *cfq_init_queue(struct request_queue *q) * to make sure that cfq_put_cfqg() does not try to kfree root group */ atomic_set(&cfqg->ref, 1); + rcu_read_lock(); blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd, 0); + rcu_read_unlock(); #endif /* * Not strictly needed (since RB_ROOT just clears the node and we @@ -3774,7 +3846,6 @@ static void *cfq_init_queue(struct request_queue *q) * second, in order to have larger depth for async operations. */ cfqd->last_delayed_sync = jiffies - HZ; - INIT_RCU_HEAD(&cfqd->rcu); return cfqd; } @@ -3969,6 +4040,7 @@ static void __exit cfq_exit(void) */ if (elv_ioc_count_read(cfq_ioc_count)) wait_for_completion(&all_gone); + ida_destroy(&cic_index_ida); cfq_slab_kill(); }