X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fcfq-iosched.c;h=1ca813b16e7840cf10086d79b7e2bb7b0c07005e;hb=ff2c3de305e2d06ae556e1a382ed75c5dd8f9dda;hp=99ac4304d711e093867040d49ace530e832876db;hpb=2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 99ac430..1ca813b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125; static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_ioc_pool; -static DEFINE_PER_CPU(unsigned long, ioc_count); +static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); static struct completion *ioc_gone; static DEFINE_SPINLOCK(ioc_gone_lock); @@ -71,6 +71,51 @@ struct cfq_rb_root { #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } /* + * Per process-grouping structure + */ +struct cfq_queue { + /* reference count */ + atomic_t ref; + /* various state flags, see below */ + unsigned int flags; + /* parent cfq_data */ + struct cfq_data *cfqd; + /* service_tree member */ + struct rb_node rb_node; + /* service_tree key */ + unsigned long rb_key; + /* prio tree member */ + struct rb_node p_node; + /* prio tree root we belong to, if any */ + struct rb_root *p_root; + /* sorted list of pending requests */ + struct rb_root sort_list; + /* if fifo isn't expired, next request to serve */ + struct request *next_rq; + /* requests queued in sort_list */ + int queued[2]; + /* currently allocated requests */ + int allocated[2]; + /* fifo list of requests in sort_list */ + struct list_head fifo; + + unsigned long slice_end; + long slice_resid; + unsigned int slice_dispatch; + + /* pending metadata requests */ + int meta_pending; + /* number of requests that are on the dispatch list or inside driver */ + int dispatched; + + /* io prio of this group */ + unsigned short ioprio, org_ioprio; + unsigned short ioprio_class, org_ioprio_class; + + pid_t pid; +}; + +/* * Per block device queue structure */ struct cfq_data { @@ -89,13 +134,8 @@ struct cfq_data { struct rb_root prio_trees[CFQ_PRIO_LISTS]; unsigned int busy_queues; - /* - * Used to track any pending rt requests so we can pre-empt current - * non-RT cfqq in service when this value is non-zero. - */ - unsigned int busy_rt_queues; - int rq_in_driver; + int rq_in_driver[2]; int sync_flight; /* @@ -122,7 +162,6 @@ struct cfq_data { struct cfq_queue *async_idle_cfqq; sector_t last_position; - unsigned long last_end_request; /* * tunables, see top of file @@ -136,58 +175,17 @@ struct cfq_data { unsigned int cfq_slice_idle; struct list_head cic_list; -}; -/* - * Per process-grouping structure - */ -struct cfq_queue { - /* reference count */ - atomic_t ref; - /* various state flags, see below */ - unsigned int flags; - /* parent cfq_data */ - struct cfq_data *cfqd; - /* service_tree member */ - struct rb_node rb_node; - /* service_tree key */ - unsigned long rb_key; - /* prio tree member */ - struct rb_node p_node; - /* prio tree root we belong to, if any */ - struct rb_root *p_root; - /* sorted list of pending requests */ - struct rb_root sort_list; - /* if fifo isn't expired, next request to serve */ - struct request *next_rq; - /* requests queued in sort_list */ - int queued[2]; - /* currently allocated requests */ - int allocated[2]; - /* fifo list of requests in sort_list */ - struct list_head fifo; - - unsigned long slice_end; - long slice_resid; - unsigned int slice_dispatch; - - /* pending metadata requests */ - int meta_pending; - /* number of requests that are on the dispatch list or inside driver */ - int dispatched; - - /* io prio of this group */ - unsigned short ioprio, org_ioprio; - unsigned short ioprio_class, org_ioprio_class; - - pid_t pid; + /* + * Fallback dummy cfqq for extreme OOM conditions + */ + struct cfq_queue oom_cfqq; }; enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ - CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ @@ -214,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ CFQ_CFQQ_FNS(on_rr); CFQ_CFQQ_FNS(wait_request); CFQ_CFQQ_FNS(must_dispatch); -CFQ_CFQQ_FNS(must_alloc); CFQ_CFQQ_FNS(must_alloc_slice); CFQ_CFQQ_FNS(fifo_expire); CFQ_CFQQ_FNS(idle_window); @@ -235,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); +static inline int rq_in_driver(struct cfq_data *cfqd) +{ + return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1]; +} + static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, int is_sync) { @@ -253,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, */ static inline int cfq_bio_sync(struct bio *bio) { - if (bio_data_dir(bio) == READ || bio_sync(bio)) + if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO)) return 1; return 0; @@ -644,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; - if (cfq_class_rt(cfqq)) - cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -669,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; - if (cfq_class_rt(cfqq)) - cfqd->busy_rt_queues--; } /* @@ -756,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - cfqd->rq_in_driver++; + cfqd->rq_in_driver[rq_is_sync(rq)]++; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", - cfqd->rq_in_driver); + rq_in_driver(cfqd)); cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } @@ -766,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) static void cfq_deactivate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); - WARN_ON(!cfqd->rq_in_driver); - cfqd->rq_in_driver--; + WARN_ON(!cfqd->rq_in_driver[sync]); + cfqd->rq_in_driver[sync]--; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", - cfqd->rq_in_driver); + rq_in_driver(cfqd)); } static void cfq_remove_request(struct request *rq) @@ -1076,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) /* * still requests with the driver, don't idle */ - if (cfqd->rq_in_driver) + if (rq_in_driver(cfqd)) return; /* @@ -1111,6 +1110,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); + cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); cfq_remove_request(rq); cfqq->dispatched++; elv_dispatch_sort(q, rq); @@ -1175,20 +1175,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) goto expire; /* - * If we have a RT cfqq waiting, then we pre-empt the current non-rt - * cfqq. - */ - if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) { - /* - * We simulate this as cfqq timed out so that it gets to bank - * the remaining of its time slice. - */ - cfq_log_cfqq(cfqd, cfqq, "preempt"); - cfq_slice_expired(cfqd, 1); - goto new_queue; - } - - /* * The active queue has requests and isn't expired, allow it to * dispatch. */ @@ -1253,7 +1239,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) BUG_ON(cfqd->busy_queues); - cfq_log(cfqd, "forced_dispatch=%d\n", dispatched); + cfq_log(cfqd, "forced_dispatch=%d", dispatched); return dispatched; } @@ -1282,7 +1268,7 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (!cfqd->active_cic) { struct cfq_io_context *cic = RQ_CIC(rq); - atomic_inc(&cic->ioc->refcount); + atomic_long_inc(&cic->ioc->refcount); cfqd->active_cic = cic; } } @@ -1308,6 +1294,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) return 0; /* + * Drain async requests before we start sync IO + */ + if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) + return 0; + + /* * If this is an async queue and we have sync IO in flight, let it wait */ if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) @@ -1358,7 +1350,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) cfq_slice_expired(cfqd, 0); } - cfq_log(cfqd, "dispatched a request"); + cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); return 1; } @@ -1423,7 +1415,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head) cic = container_of(head, struct cfq_io_context, rcu_head); kmem_cache_free(cfq_ioc_pool, cic); - elv_ioc_count_dec(ioc_count); + elv_ioc_count_dec(cfq_ioc_count); if (ioc_gone) { /* @@ -1432,7 +1424,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head) * complete ioc_gone and set it back to NULL */ spin_lock(&ioc_gone_lock); - if (ioc_gone && !elv_ioc_count_read(ioc_count)) { + if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { complete(ioc_gone); ioc_gone = NULL; } @@ -1558,7 +1550,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) INIT_HLIST_NODE(&cic->cic_list); cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; - elv_ioc_count_inc(ioc_count); + elv_ioc_count_inc(cfq_ioc_count); } return cic; @@ -1642,6 +1634,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) ioc->ioprio_changed = 0; } +static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&cfqq->rb_node); + RB_CLEAR_NODE(&cfqq->p_node); + INIT_LIST_HEAD(&cfqq->fifo); + + atomic_set(&cfqq->ref, 0); + cfqq->cfqd = cfqd; + + cfq_mark_cfqq_prio_changed(cfqq); + + if (is_sync) { + if (!cfq_class_idle(cfqq)) + cfq_mark_cfqq_idle_window(cfqq); + cfq_mark_cfqq_sync(cfqq); + } + cfqq->pid = pid; +} + static struct cfq_queue * cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, gfp_t gfp_mask) @@ -1654,56 +1666,40 @@ retry: /* cic always exists here */ cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + /* + * Always try a new alloc if we fell back to the OOM cfqq + * originally, since it should just be a temporary situation. + */ + if (!cfqq || cfqq == &cfqd->oom_cfqq) { + cfqq = NULL; if (new_cfqq) { cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { - /* - * Inform the allocator of the fact that we will - * just repeat this allocation if it fails, to allow - * the allocator to do whatever it needs to attempt to - * free memory. - */ spin_unlock_irq(cfqd->queue->queue_lock); new_cfqq = kmem_cache_alloc_node(cfq_pool, - gfp_mask | __GFP_NOFAIL | __GFP_ZERO, + gfp_mask | __GFP_ZERO, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); - goto retry; + if (new_cfqq) + goto retry; } else { cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask | __GFP_ZERO, cfqd->queue->node); - if (!cfqq) - goto out; } - RB_CLEAR_NODE(&cfqq->rb_node); - RB_CLEAR_NODE(&cfqq->p_node); - INIT_LIST_HEAD(&cfqq->fifo); - - atomic_set(&cfqq->ref, 0); - cfqq->cfqd = cfqd; - - cfq_mark_cfqq_prio_changed(cfqq); - - cfq_init_prio_data(cfqq, ioc); - - if (is_sync) { - if (!cfq_class_idle(cfqq)) - cfq_mark_cfqq_idle_window(cfqq); - cfq_mark_cfqq_sync(cfqq); - } - cfqq->pid = current->pid; - cfq_log_cfqq(cfqd, cfqq, "alloced"); + if (cfqq) { + cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); + cfq_init_prio_data(cfqq, ioc); + cfq_log_cfqq(cfqd, cfqq, "alloced"); + } else + cfqq = &cfqd->oom_cfqq; } if (new_cfqq) kmem_cache_free(cfq_pool, new_cfqq); -out: - WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); return cfqq; } @@ -1736,11 +1732,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, cfqq = *async_cfqq; } - if (!cfqq) { + if (!cfqq) cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); - if (!cfqq) - return NULL; - } /* * pin the queue now that it's allocated, scheduler exit will prune it @@ -2125,11 +2118,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) */ static void cfq_update_hw_tag(struct cfq_data *cfqd) { - if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) - cfqd->rq_in_driver_peak = cfqd->rq_in_driver; + if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak) + cfqd->rq_in_driver_peak = rq_in_driver(cfqd); if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && - cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) + rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN) return; if (cfqd->hw_tag_samples++ < 50) @@ -2156,17 +2149,14 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_update_hw_tag(cfqd); - WARN_ON(!cfqd->rq_in_driver); + WARN_ON(!cfqd->rq_in_driver[sync]); WARN_ON(!cfqq->dispatched); - cfqd->rq_in_driver--; + cfqd->rq_in_driver[sync]--; cfqq->dispatched--; if (cfq_cfqq_sync(cfqq)) cfqd->sync_flight--; - if (!cfq_class_idle(cfqq)) - cfqd->last_end_request = now; - if (sync) RQ_CIC(rq)->last_end_request = now; @@ -2195,7 +2185,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_arm_slice_timer(cfqd); } - if (!cfqd->rq_in_driver) + if (!rq_in_driver(cfqd)) cfq_schedule_dispatch(cfqd); } @@ -2227,8 +2217,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) static inline int __cfq_may_queue(struct cfq_queue *cfqq) { - if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && - !cfq_cfqq_must_alloc_slice(cfqq)) { + if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { cfq_mark_cfqq_must_alloc_slice(cfqq); return ELV_MQUEUE_MUST; } @@ -2309,17 +2298,12 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) goto queue_fail; cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + if (!cfqq || cfqq == &cfqd->oom_cfqq) { cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); - - if (!cfqq) - goto queue_fail; - cic_set_cfqq(cic, cfqq, is_sync); } cfqq->allocated[rw]++; - cfq_clear_cfqq_must_alloc(cfqq); atomic_inc(&cfqq->ref); spin_unlock_irqrestore(q->queue_lock, flags); @@ -2469,6 +2453,14 @@ static void *cfq_init_queue(struct request_queue *q) for (i = 0; i < CFQ_PRIO_LISTS; i++) cfqd->prio_trees[i] = RB_ROOT; + /* + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); + atomic_inc(&cfqd->oom_cfqq.ref); + INIT_LIST_HEAD(&cfqd->cic_list); cfqd->queue = q; @@ -2479,7 +2471,6 @@ static void *cfq_init_queue(struct request_queue *q) INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); - cfqd->last_end_request = jiffies; cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; @@ -2663,7 +2654,7 @@ static void __exit cfq_exit(void) * this also protects us from entering cfq_slab_kill() with * pending RCU callbacks */ - if (elv_ioc_count_read(ioc_count)) + if (elv_ioc_count_read(cfq_ioc_count)) wait_for_completion(&all_gone); cfq_slab_kill(); }