X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fcfq-iosched.c;h=6a062eebbd15301320e7491b5dd45d17f2204a3c;hb=99cd3386f290eaf61f2b7596d5a4cc2007771174;hp=ca198e61fa65c555bf936d2d628876313b62dbb1;hpb=fe094d98e79351344c9e0e2c1446794240d247a4;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ca198e6..6a062ee 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * tunables @@ -38,16 +39,18 @@ static int cfq_slice_idle = HZ / 125; #define CFQ_MIN_TT (2) #define CFQ_SLICE_SCALE (5) +#define CFQ_HW_QUEUE_MIN (5) #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) -#define RQ_CFQQ(rq) ((rq)->elevator_private2) +#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_ioc_pool; static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; +static DEFINE_SPINLOCK(ioc_gone_lock); #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) @@ -84,7 +87,14 @@ struct cfq_data { int rq_in_driver; int sync_flight; + + /* + * queue-depth detection + */ + int rq_queued; int hw_tag; + int hw_tag_samples; + int rq_in_driver_peak; /* * idle window management @@ -124,6 +134,8 @@ struct cfq_data { struct cfq_queue { /* reference count */ atomic_t ref; + /* various state flags, see below */ + unsigned int flags; /* parent cfq_data */ struct cfq_data *cfqd; /* service_tree member */ @@ -138,14 +150,14 @@ struct cfq_queue { int queued[2]; /* currently allocated requests */ int allocated[2]; - /* pending metadata requests */ - int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_end; long slice_resid; + /* pending metadata requests */ + int meta_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -153,8 +165,7 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; - /* various state flags, see below */ - unsigned int flags; + pid_t pid; }; enum cfqq_state_flags { @@ -198,6 +209,11 @@ CFQ_CFQQ_FNS(slice_new); CFQ_CFQQ_FNS(sync); #undef CFQ_CFQQ_FNS +#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ + blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) +#define cfq_log(cfqd, fmt, args...) \ + blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) + static void cfq_dispatch_insert(struct request_queue *, struct request *); static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, struct io_context *, gfp_t); @@ -234,8 +250,10 @@ static inline int cfq_bio_sync(struct bio *bio) */ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) { - if (cfqd->busy_queues) - kblockd_schedule_work(&cfqd->unplug_work); + if (cfqd->busy_queues) { + cfq_log(cfqd, "schedule dispatch"); + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); + } } static int cfq_queue_empty(struct request_queue *q) @@ -270,6 +288,7 @@ static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; + cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); } /* @@ -539,6 +558,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) */ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { + cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; @@ -552,6 +572,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) */ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { + cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); BUG_ON(!cfq_cfqq_on_rr(cfqq)); cfq_clear_cfqq_on_rr(cfqq); @@ -638,15 +659,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) struct cfq_data *cfqd = q->elevator->elevator_data; cfqd->rq_in_driver++; - - /* - * If the depth is larger 1, it really could be queueing. But lets - * make the mark a little higher - idling could still be good for - * low queueing, and a low queueing number could also just indicate - * a SCSI mid layer like behaviour where limit+1 is often seen. - */ - if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) - cfqd->hw_tag = 1; + cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", + cfqd->rq_in_driver); cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; } @@ -657,6 +671,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq) WARN_ON(!cfqd->rq_in_driver); cfqd->rq_in_driver--; + cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", + cfqd->rq_in_driver); } static void cfq_remove_request(struct request *rq) @@ -669,6 +685,7 @@ static void cfq_remove_request(struct request *rq) list_del_init(&rq->queuelist); cfq_del_rq_rb(rq); + cfqq->cfqd->rq_queued--; if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; @@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (cfqq) { + cfq_log_cfqq(cfqd, cfqq, "set_active"); cfqq->slice_end = 0; cfq_clear_cfqq_must_alloc_slice(cfqq); cfq_clear_cfqq_fifo_expire(cfqq); @@ -763,6 +781,8 @@ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, int timed_out) { + cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); + if (cfq_cfqq_wait_request(cfqq)) del_timer(&cfqd->idle_slice_timer); @@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * store what was left of this slice, if the queue idled/timed out */ - if (timed_out && !cfq_cfqq_slice_new(cfqq)) + if (timed_out && !cfq_cfqq_slice_new(cfqq)) { cfqq->slice_resid = cfqq->slice_end - jiffies; + cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); + } cfq_resort_rr_list(cfqd, cfqq); @@ -856,6 +878,14 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) struct cfq_io_context *cic; unsigned long sl; + /* + * SSD device without seek penalty, disable idling. But only do so + * for devices that support queuing, otherwise we still have a problem + * with sync vs async workloads. + */ + if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) + return; + WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); WARN_ON(cfq_cfqq_slice_new(cfqq)); @@ -866,6 +896,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) return; /* + * still requests with the driver, don't idle + */ + if (cfqd->rq_in_driver) + return; + + /* * task has exited, don't wait */ cic = cfqd->active_cic; @@ -892,6 +928,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); mod_timer(&cfqd->idle_slice_timer, jiffies + sl); + cfq_log(cfqd, "arm_idle: %lu", sl); } /* @@ -902,6 +939,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq = RQ_CFQQ(rq); + cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); + cfq_remove_request(rq); cfqq->dispatched++; elv_dispatch_sort(q, rq); @@ -931,8 +970,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) rq = rq_entry_fifo(cfqq->fifo.next); if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) - return NULL; + rq = NULL; + cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq); return rq; } @@ -1072,6 +1112,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) BUG_ON(cfqd->busy_queues); + cfq_log(cfqd, "forced_dispatch=%d\n", dispatched); return dispatched; } @@ -1112,6 +1153,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); } + cfq_log(cfqd, "dispatched=%d", dispatched); return dispatched; } @@ -1130,6 +1172,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (!atomic_dec_and_test(&cfqq->ref)) return; + cfq_log_cfqq(cfqd, cfqq, "put_queue"); BUG_ON(rb_first(&cfqq->sort_list)); BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); BUG_ON(cfq_cfqq_on_rr(cfqq)); @@ -1143,43 +1186,58 @@ static void cfq_put_queue(struct cfq_queue *cfqq) } /* - * Call func for each cic attached to this ioc. Returns number of cic's seen. + * Must always be called with the rcu_read_lock() held + */ +static void +__call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, struct cfq_io_context *)) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) + func(ioc, cic); +} + +/* + * Call func for each cic attached to this ioc. */ -#define CIC_GANG_NR 16 -static unsigned int +static void call_for_each_cic(struct io_context *ioc, void (*func)(struct io_context *, struct cfq_io_context *)) { - struct cfq_io_context *cics[CIC_GANG_NR]; - unsigned long index = 0; - unsigned int called = 0; - int nr; - rcu_read_lock(); + __call_for_each_cic(ioc, func); + rcu_read_unlock(); +} - do { - int i; - - /* - * Perhaps there's a better way - this just gang lookups from - * 0 to the end, restarting after each CIC_GANG_NR from the - * last key + 1. - */ - nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics, - index, CIC_GANG_NR); - if (!nr) - break; +static void cfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; - called += nr; - index = 1 + (unsigned long) cics[nr - 1]->key; + cic = container_of(head, struct cfq_io_context, rcu_head); - for (i = 0; i < nr; i++) - func(ioc, cics[i]); - } while (nr == CIC_GANG_NR); + kmem_cache_free(cfq_ioc_pool, cic); + elv_ioc_count_dec(ioc_count); - rcu_read_unlock(); + if (ioc_gone) { + /* + * CFQ scheduler is exiting, grab exit lock and check + * the pending io context count. If it hits zero, + * complete ioc_gone and set it back to NULL + */ + spin_lock(&ioc_gone_lock); + if (ioc_gone && !elv_ioc_count_read(ioc_count)) { + complete(ioc_gone); + ioc_gone = NULL; + } + spin_unlock(&ioc_gone_lock); + } +} - return called; +static void cfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, cfq_cic_free_rcu); } static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) @@ -1190,26 +1248,26 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) spin_lock_irqsave(&ioc->lock, flags); radix_tree_delete(&ioc->radix_root, cic->dead_key); + hlist_del_rcu(&cic->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); - kmem_cache_free(cfq_ioc_pool, cic); + cfq_cic_free(cic); } +/* + * Must be called with rcu_read_lock() held or preemption otherwise disabled. + * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), + * and ->trim() which is called with the task lock held + */ static void cfq_free_io_context(struct io_context *ioc) { - int freed; - /* - * ioc->refcount is zero here, so no more cic's are allowed to be - * linked into this ioc. So it should be ok to iterate over the known - * list, we will see all cic's since no new ones are added. + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. */ - freed = call_for_each_cic(ioc, cic_free_func); - - elv_ioc_count_mod(ioc_count, -freed); - - if (ioc_gone && !elv_ioc_count_read(ioc_count)) - complete(ioc_gone); + __call_for_each_cic(ioc, cic_free_func); } static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) @@ -1225,6 +1283,8 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) static void __cfq_exit_single_io_context(struct cfq_data *cfqd, struct cfq_io_context *cic) { + struct io_context *ioc = cic->ioc; + list_del_init(&cic->queue_list); /* @@ -1234,6 +1294,9 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, cic->dead_key = (unsigned long) cic->key; cic->key = NULL; + if (ioc->ioc_data == cic) + rcu_assign_pointer(ioc->ioc_data, NULL); + if (cic->cfqq[ASYNC]) { cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); cic->cfqq[ASYNC] = NULL; @@ -1266,7 +1329,6 @@ static void cfq_exit_single_io_context(struct io_context *ioc, */ static void cfq_exit_io_context(struct io_context *ioc) { - rcu_assign_pointer(ioc->ioc_data, NULL); call_for_each_cic(ioc, cfq_exit_single_io_context); } @@ -1280,6 +1342,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) if (cic) { cic->last_end_request = jiffies; INIT_LIST_HEAD(&cic->queue_list); + INIT_HLIST_NODE(&cic->cic_list); cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; elv_ioc_count_inc(ioc_count); @@ -1302,10 +1365,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); case IOPRIO_CLASS_NONE: /* - * no prio set, place us in the middle of the BE classes + * no prio set, inherit CPU scheduling settings */ cfqq->ioprio = task_nice_ioprio(tsk); - cfqq->ioprio_class = IOPRIO_CLASS_BE; + cfqq->ioprio_class = task_nice_ioclass(tsk); break; case IOPRIO_CLASS_RT: cfqq->ioprio = task_ioprio(ioc); @@ -1418,6 +1481,8 @@ retry: cfq_mark_cfqq_idle_window(cfqq); cfq_mark_cfqq_sync(cfqq); } + cfqq->pid = current->pid; + cfq_log_cfqq(cfqd, cfqq, "alloced"); } if (new_cfqq) @@ -1475,15 +1540,6 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, return cfqq; } -static void cfq_cic_free(struct cfq_io_context *cic) -{ - kmem_cache_free(cfq_ioc_pool, cic); - elv_ioc_count_dec(ioc_count); - - if (ioc_gone && !elv_ioc_count_read(ioc_count)) - complete(ioc_gone); -} - /* * We drop cfq io contexts lazily, so we may find a dead one. */ @@ -1497,10 +1553,10 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, spin_lock_irqsave(&ioc->lock, flags); - if (ioc->ioc_data == cic) - rcu_assign_pointer(ioc->ioc_data, NULL); + BUG_ON(ioc->ioc_data == cic); radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); + hlist_del_rcu(&cic->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); cfq_cic_free(cic); @@ -1510,20 +1566,24 @@ static struct cfq_io_context * cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct cfq_io_context *cic; + unsigned long flags; void *k; if (unlikely(!ioc)) return NULL; + rcu_read_lock(); + /* * we maintain a last-hit cache, to avoid browsing over the tree */ cic = rcu_dereference(ioc->ioc_data); - if (cic && cic->key == cfqd) + if (cic && cic->key == cfqd) { + rcu_read_unlock(); return cic; + } do { - rcu_read_lock(); cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); rcu_read_unlock(); if (!cic) @@ -1532,10 +1592,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) k = cic->key; if (unlikely(!k)) { cfq_drop_dead_cic(cfqd, ioc, cic); + rcu_read_lock(); continue; } + spin_lock_irqsave(&ioc->lock, flags); rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); break; } while (1); @@ -1561,6 +1624,8 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, spin_lock_irqsave(&ioc->lock, flags); ret = radix_tree_insert(&ioc->radix_root, (unsigned long) cfqd, cic); + if (!ret) + hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); radix_tree_preload_end(); @@ -1666,7 +1731,7 @@ static void cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic) { - int enable_idle; + int old_idle, enable_idle; /* * Don't idle for async or idle io prio class @@ -1674,7 +1739,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) return; - enable_idle = cfq_cfqq_idle_window(cfqq); + enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || (cfqd->hw_tag && CIC_SEEKY(cic))) @@ -1686,10 +1751,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = 1; } - if (enable_idle) - cfq_mark_cfqq_idle_window(cfqq); - else - cfq_clear_cfqq_idle_window(cfqq); + if (old_idle != enable_idle) { + cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle); + if (enable_idle) + cfq_mark_cfqq_idle_window(cfqq); + else + cfq_clear_cfqq_idle_window(cfqq); + } } /* @@ -1748,6 +1816,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, */ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { + cfq_log_cfqq(cfqd, cfqq, "preempt"); cfq_slice_expired(cfqd, 1); /* @@ -1772,6 +1841,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, { struct cfq_io_context *cic = RQ_CIC(rq); + cfqd->rq_queued++; if (rq_is_meta(rq)) cfqq->meta_pending++; @@ -1809,6 +1879,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq = RQ_CFQQ(rq); + cfq_log_cfqq(cfqd, cfqq, "insert_request"); cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); cfq_add_rq_rb(rq); @@ -1818,6 +1889,31 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) cfq_rq_enqueued(cfqd, cfqq, rq); } +/* + * Update hw_tag based on peak queue depth over 50 samples under + * sufficient load. + */ +static void cfq_update_hw_tag(struct cfq_data *cfqd) +{ + if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) + cfqd->rq_in_driver_peak = cfqd->rq_in_driver; + + if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) + return; + + if (cfqd->hw_tag_samples++ < 50) + return; + + if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) + cfqd->hw_tag = 1; + else + cfqd->hw_tag = 0; + + cfqd->hw_tag_samples = 0; + cfqd->rq_in_driver_peak = 0; +} + static void cfq_completed_request(struct request_queue *q, struct request *rq) { struct cfq_queue *cfqq = RQ_CFQQ(rq); @@ -1826,6 +1922,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) unsigned long now; now = jiffies; + cfq_log_cfqq(cfqd, cfqq, "complete"); + + cfq_update_hw_tag(cfqd); WARN_ON(!cfqd->rq_in_driver); WARN_ON(!cfqq->dispatched); @@ -1995,6 +2094,7 @@ queue_fail: cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); + cfq_log(cfqd, "set_request fail"); return 1; } @@ -2020,6 +2120,8 @@ static void cfq_idle_slice_timer(unsigned long data) unsigned long flags; int timed_out = 1; + cfq_log(cfqd, "idle timer fired"); + spin_lock_irqsave(cfqd->queue->queue_lock, flags); cfqq = cfqd->active_queue; @@ -2134,12 +2236,17 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->hw_tag = 1; return cfqd; } static void cfq_slab_kill(void) { + /* + * Caller already ensured that pending RCU callbacks are completed, + * so we should have no busy allocations at this point. + */ if (cfq_pool) kmem_cache_destroy(cfq_pool); if (cfq_ioc_pool) @@ -2152,7 +2259,7 @@ static int __init cfq_slab_setup(void) if (!cfq_pool) goto fail; - cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); + cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); if (!cfq_ioc_pool) goto fail; @@ -2298,9 +2405,13 @@ static void __exit cfq_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); + + /* + * this also protects us from entering cfq_slab_kill() with + * pending RCU callbacks + */ if (elv_ioc_count_read(ioc_count)) - wait_for_completion(ioc_gone); - synchronize_rcu(); + wait_for_completion(&all_gone); cfq_slab_kill(); }