X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fcfq-iosched.c;h=c95c69e199f4cded3e9200c68756b8718bfda8d1;hb=aa6f6a3de18131348f70951efb2c56d806033e09;hp=fce8a749f4be8af3ffa7a584c3f97adf7fbf1bbd;hpb=8e2967555571659d2c8a70dd120710110ed7bba4;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index fce8a74..c95c69e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -27,6 +27,8 @@ static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; +static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ +static const int cfq_hist_divisor = 4; /* * offset from end of service tree @@ -38,6 +40,12 @@ static int cfq_slice_idle = HZ / 125; */ #define CFQ_MIN_TT (2) +/* + * Allow merged cfqqs to perform this amount of seeky I/O before + * deciding to break the queues up again. + */ +#define CFQQ_COOP_TOUT (HZ) + #define CFQ_SLICE_SCALE (5) #define CFQ_HW_QUEUE_MIN (5) @@ -67,8 +75,9 @@ static DEFINE_SPINLOCK(ioc_gone_lock); struct cfq_rb_root { struct rb_root rb; struct rb_node *left; + unsigned count; }; -#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } +#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, } /* * Per process-grouping structure @@ -112,7 +121,16 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + unsigned long seeky_start; + pid_t pid; + + struct cfq_rb_root *service_tree; + struct cfq_queue *new_cfqq; }; /* @@ -134,6 +152,8 @@ struct cfq_data { struct rb_root prio_trees[CFQ_PRIO_LISTS]; unsigned int busy_queues; + unsigned int busy_rt_queues; + unsigned int busy_queues_avg[2]; int rq_in_driver[2]; int sync_flight; @@ -150,7 +170,7 @@ struct cfq_data { * idle window management */ struct timer_list idle_slice_timer; - struct delayed_work unplug_work; + struct work_struct unplug_work; struct cfq_queue *active_queue; struct cfq_io_context *active_cic; @@ -173,7 +193,7 @@ struct cfq_data { unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; - unsigned int cfq_desktop; + unsigned int cfq_latency; struct list_head cic_list; @@ -195,7 +215,7 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ CFQ_CFQQ_FLAG_sync, /* synchronous queue */ - CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */ + CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ }; #define CFQ_CFQQ_FNS(name) \ @@ -230,7 +250,7 @@ CFQ_CFQQ_FNS(coop); blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) static void cfq_dispatch_insert(struct request_queue *, struct request *); -static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, +static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, struct io_context *, gfp_t); static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); @@ -241,40 +261,35 @@ static inline int rq_in_driver(struct cfq_data *cfqd) } static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, - int is_sync) + bool is_sync) { - return cic->cfqq[!!is_sync]; + return cic->cfqq[is_sync]; } static inline void cic_set_cfqq(struct cfq_io_context *cic, - struct cfq_queue *cfqq, int is_sync) + struct cfq_queue *cfqq, bool is_sync) { - cic->cfqq[!!is_sync] = cfqq; + cic->cfqq[is_sync] = cfqq; } /* * We regard a request as SYNC, if it's either a read or has the SYNC bit * set (in which case it could also be direct WRITE). */ -static inline int cfq_bio_sync(struct bio *bio) +static inline bool cfq_bio_sync(struct bio *bio) { - if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO)) - return 1; - - return 0; + return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO); } /* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ -static inline void cfq_schedule_dispatch(struct cfq_data *cfqd, - unsigned long delay) +static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) { if (cfqd->busy_queues) { cfq_log(cfqd, "schedule dispatch"); - kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work, - delay); + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); } } @@ -290,7 +305,7 @@ static int cfq_queue_empty(struct request_queue *q) * if a queue is marked sync and has sync io queued. A sync queue with async * io only, should not get full sync slice length. */ -static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync, +static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, unsigned short prio) { const int base_slice = cfqd->cfq_slice[sync]; @@ -306,10 +321,52 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); } +/* + * get averaged number of queues of RT/BE priority. + * average is updated, with a formula that gives more weight to higher numbers, + * to quickly follows sudden increases and decrease slowly + */ + +static inline unsigned +cfq_get_avg_queues(struct cfq_data *cfqd, bool rt) { + unsigned min_q, max_q; + unsigned mult = cfq_hist_divisor - 1; + unsigned round = cfq_hist_divisor / 2; + unsigned busy = cfqd->busy_rt_queues; + + if (!rt) + busy = cfqd->busy_queues - cfqd->busy_rt_queues; + + min_q = min(cfqd->busy_queues_avg[rt], busy); + max_q = max(cfqd->busy_queues_avg[rt], busy); + cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) / + cfq_hist_divisor; + return cfqd->busy_queues_avg[rt]; +} + static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; + unsigned slice = cfq_prio_to_slice(cfqd, cfqq); + if (cfqd->cfq_latency) { + /* interested queues (we consider only the ones with the same + * priority class) */ + unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq)); + unsigned sync_slice = cfqd->cfq_slice[1]; + unsigned expect_latency = sync_slice * iq; + if (expect_latency > cfq_target_latency) { + unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; + /* scale low_slice according to IO priority + * and sync vs async */ + unsigned low_slice = + min(slice, base_low_slice * slice / sync_slice); + /* the adapted slice value is scaled to fit all iqs + * into the target latency */ + slice = max(slice * cfq_target_latency / expect_latency, + low_slice); + } + } + cfqq->slice_end = jiffies + slice; cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); } @@ -318,7 +375,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) * isn't valid until the first request from the dispatch is activated * and the slice time set. */ -static inline int cfq_slice_used(struct cfq_queue *cfqq) +static inline bool cfq_slice_used(struct cfq_queue *cfqq) { if (cfq_cfqq_slice_new(cfqq)) return 0; @@ -448,6 +505,7 @@ static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) if (root->left == n) root->left = NULL; rb_erase_init(n, &root->rb); + --root->count; } /* @@ -493,27 +551,37 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, * we will service the queues. */ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int add_front) + bool add_front) { struct rb_node **p, *parent; struct cfq_queue *__cfqq; unsigned long rb_key; + struct cfq_rb_root *service_tree = &cfqd->service_tree; int left; if (cfq_class_idle(cfqq)) { rb_key = CFQ_IDLE_DELAY; - parent = rb_last(&cfqd->service_tree.rb); + parent = rb_last(&service_tree->rb); if (parent && parent != &cfqq->rb_node) { __cfqq = rb_entry(parent, struct cfq_queue, rb_node); rb_key += __cfqq->rb_key; } else rb_key += jiffies; } else if (!add_front) { + /* + * Get our rb key offset. Subtract any residual slice + * value carried from last service. A negative resid + * count indicates slice overrun, and this should position + * the next service time further away in the tree. + */ rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; - rb_key += cfqq->slice_resid; + rb_key -= cfqq->slice_resid; cfqq->slice_resid = 0; - } else - rb_key = 0; + } else { + rb_key = -HZ; + __cfqq = cfq_rb_first(service_tree); + rb_key += __cfqq ? __cfqq->rb_key : jiffies; + } if (!RB_EMPTY_NODE(&cfqq->rb_node)) { /* @@ -522,12 +590,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (rb_key == cfqq->rb_key) return; - cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); + cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); + cfqq->service_tree = NULL; } left = 1; parent = NULL; - p = &cfqd->service_tree.rb.rb_node; + cfqq->service_tree = service_tree; + p = &service_tree->rb.rb_node; while (*p) { struct rb_node **n; @@ -547,7 +617,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, n = &(*p)->rb_left; else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq)) n = &(*p)->rb_right; - else if (rb_key < __cfqq->rb_key) + else if (time_before(rb_key, __cfqq->rb_key)) n = &(*p)->rb_left; else n = &(*p)->rb_right; @@ -559,11 +629,12 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, } if (left) - cfqd->service_tree.left = &cfqq->rb_node; + service_tree->left = &cfqq->rb_node; cfqq->rb_key = rb_key; rb_link_node(&cfqq->rb_node, parent, p); - rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); + rb_insert_color(&cfqq->rb_node, &service_tree->rb); + service_tree->count++; } static struct cfq_queue * @@ -651,7 +722,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; - + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -665,8 +737,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfq_cfqq_on_rr(cfqq)); cfq_clear_cfqq_on_rr(cfqq); - if (!RB_EMPTY_NODE(&cfqq->rb_node)) - cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); + if (!RB_EMPTY_NODE(&cfqq->rb_node)) { + cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); + cfqq->service_tree = NULL; + } if (cfqq->p_root) { rb_erase(&cfqq->p_node, cfqq->p_root); cfqq->p_root = NULL; @@ -674,6 +748,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues--; } /* @@ -827,8 +903,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, * reposition in fifo if next is older than rq */ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(next->start_time, rq->start_time)) + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } cfq_remove_request(next); } @@ -844,7 +922,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, * Disallow merge of a sync bio into an async request. */ if (cfq_bio_sync(bio) && !rq_is_sync(rq)) - return 0; + return false; /* * Lookup the cfqq that this bio will be queued with. Allow @@ -852,13 +930,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, */ cic = cfq_cic_lookup(cfqd, current->io_context); if (!cic) - return 0; + return false; cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); - if (cfqq == RQ_CFQQ(rq)) - return 1; - - return 0; + return cfqq == RQ_CFQQ(rq); } static void __cfq_set_active_queue(struct cfq_data *cfqd, @@ -886,7 +961,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, */ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int timed_out) + bool timed_out) { cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); @@ -914,7 +989,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, } } -static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) +static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) { struct cfq_queue *cfqq = cfqd->active_queue; @@ -940,11 +1015,8 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - if (!cfqq) { + if (!cfqq) cfqq = cfq_get_next_queue(cfqd); - if (cfqq) - cfq_clear_cfqq_coop(cfqq); - } __cfq_set_active_queue(cfqd, cfqq); return cfqq; @@ -959,16 +1031,16 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, return cfqd->last_position - blk_rq_pos(rq); } -#define CIC_SEEK_THR 8 * 1024 -#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR) +#define CFQQ_SEEK_THR 8 * 1024 +#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) -static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) +static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *rq) { - struct cfq_io_context *cic = cfqd->active_cic; - sector_t sdist = cic->seek_mean; + sector_t sdist = cfqq->seek_mean; - if (!sample_valid(cic->seek_samples)) - sdist = CIC_SEEK_THR; + if (!sample_valid(cfqq->seek_samples)) + sdist = CFQQ_SEEK_THR; return cfq_dist_from_last(cfqd, rq) <= sdist; } @@ -997,7 +1069,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * will contain the closest sector. */ __cfqq = rb_entry(parent, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; if (blk_rq_pos(__cfqq->next_rq) < sector) @@ -1008,7 +1080,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, return NULL; __cfqq = rb_entry(node, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; return NULL; @@ -1025,16 +1097,13 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * assumption. */ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, - struct cfq_queue *cur_cfqq, - int probe) + struct cfq_queue *cur_cfqq) { struct cfq_queue *cfqq; - /* - * A valid cfq_io_context is necessary to compare requests against - * the seek_mean of the current cfqq. - */ - if (!cfqd->active_cic) + if (!cfq_cfqq_sync(cur_cfqq)) + return NULL; + if (CFQQ_SEEKY(cur_cfqq)) return NULL; /* @@ -1046,11 +1115,14 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, if (!cfqq) return NULL; - if (cfq_cfqq_coop(cfqq)) + /* + * It only makes sense to merge sync queues. + */ + if (!cfq_cfqq_sync(cfqq)) + return NULL; + if (CFQQ_SEEKY(cfqq)) return NULL; - if (!probe) - cfq_mark_cfqq_coop(cfqq); return cfqq; } @@ -1090,6 +1162,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) if (!cic || !atomic_read(&cic->ioc->nr_tasks)) return; + /* + * If our average think time is larger than the remaining time + * slice, then don't idle. This avoids overrunning the allotted + * time slice. + */ + if (sample_valid(cic->ttime_samples) && + (cfqq->slice_end - jiffies < cic->ttime_mean)) + return; + cfq_mark_cfqq_wait_request(cfqq); /* @@ -1098,7 +1179,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * seeks. so allow a little bit of time for him to submit a new rq */ sl = cfqd->cfq_slice_idle; - if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) + if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); mod_timer(&cfqd->idle_slice_timer, jiffies + sl); @@ -1129,9 +1210,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) */ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) { - struct cfq_data *cfqd = cfqq->cfqd; - struct request *rq; - int fifo; + struct request *rq = NULL; if (cfq_cfqq_fifo_expire(cfqq)) return NULL; @@ -1141,13 +1220,11 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) if (list_empty(&cfqq->fifo)) return NULL; - fifo = cfq_cfqq_sync(cfqq); rq = rq_entry_fifo(cfqq->fifo.next); - - if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) + if (time_before(jiffies, rq_fifo_time(rq))) rq = NULL; - cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq); + cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); return rq; } @@ -1162,6 +1239,52 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) } /* + * Must be called with the queue_lock held. + */ +static int cfqq_process_refs(struct cfq_queue *cfqq) +{ + int process_refs, io_refs; + + io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; + process_refs = atomic_read(&cfqq->ref) - io_refs; + BUG_ON(process_refs < 0); + return process_refs; +} + +static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) +{ + int process_refs, new_process_refs; + struct cfq_queue *__cfqq; + + /* Avoid a circular list and skip interim queue merges */ + while ((__cfqq = new_cfqq->new_cfqq)) { + if (__cfqq == cfqq) + return; + new_cfqq = __cfqq; + } + + process_refs = cfqq_process_refs(cfqq); + /* + * If the process for the cfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0) + return; + + /* + * Merge in the direction of the lesser amount of work. + */ + new_process_refs = cfqq_process_refs(new_cfqq); + if (new_process_refs >= process_refs) { + cfqq->new_cfqq = new_cfqq; + atomic_add(process_refs, &new_cfqq->ref); + } else { + new_cfqq->new_cfqq = cfqq; + atomic_add(new_process_refs, &cfqq->ref); + } +} + +/* * Select a queue for service. If we have a current active queue, * check whether to continue servicing it, or retrieve and set a new one. */ @@ -1190,11 +1313,14 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) * If another queue has a request waiting within our mean seek * distance, let it run. The expire code will check for close * cooperators and put the close queue at the front of the service - * tree. + * tree. If possible, merge the expiring queue with the new cfqq. */ - new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0); - if (new_cfqq) + new_cfqq = cfq_close_cooperator(cfqd, cfqq); + if (new_cfqq) { + if (!cfqq->new_cfqq) + cfq_setup_merge(cfqq, new_cfqq); goto expire; + } /* * No requests pending. If the active queue still has requests in @@ -1248,67 +1374,21 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } -/* - * Dispatch a request from cfqq, moving them to the request queue - * dispatch list. - */ -static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) -{ - struct request *rq; - - BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); - - /* - * follow expired path, else get first next available - */ - rq = cfq_check_fifo(cfqq); - if (!rq) - rq = cfqq->next_rq; - - /* - * insert request into driver dispatch list - */ - cfq_dispatch_insert(cfqd->queue, rq); - - if (!cfqd->active_cic) { - struct cfq_io_context *cic = RQ_CIC(rq); - - atomic_long_inc(&cic->ioc->refcount); - cfqd->active_cic = cic; - } -} - -/* - * Find the cfqq that we need to service and move a request from that to the - * dispatch list - */ -static int cfq_dispatch_requests(struct request_queue *q, int force) +static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq; unsigned int max_dispatch; - if (!cfqd->busy_queues) - return 0; - - if (unlikely(force)) - return cfq_forced_dispatch(cfqd); - - cfqq = cfq_select_queue(cfqd); - if (!cfqq) - return 0; - /* * Drain async requests before we start sync IO */ if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) - return 0; + return false; /* * If this is an async queue and we have sync IO in flight, let it wait */ if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) - return 0; + return false; max_dispatch = cfqd->cfq_quantum; if (cfq_class_idle(cfqq)) @@ -1322,13 +1402,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * idle queue must always only have a single IO in flight */ if (cfq_class_idle(cfqq)) - return 0; + return false; /* * We have other queues, don't allow more IO from this one */ if (cfqd->busy_queues > 1) - return 0; + return false; /* * Sole queue user, allow bigger slice @@ -1341,30 +1421,83 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * We also ramp up the dispatch depth gradually for async IO, * based on the last sync IO we serviced */ - if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_desktop) { + if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; unsigned int depth; - /* - * must wait a bit longer - */ - if (last_sync < cfq_slice_sync) { - cfq_schedule_dispatch(cfqd, cfq_slice_sync - last_sync); - return 0; - } - - depth = last_sync / cfq_slice_sync; + depth = last_sync / cfqd->cfq_slice[1]; + if (!depth && !cfqq->dispatched) + depth = 1; if (depth < max_dispatch) max_dispatch = depth; } - if (cfqq->dispatched >= max_dispatch) + /* + * If we're below the current max, allow a dispatch + */ + return cfqq->dispatched < max_dispatch; +} + +/* + * Dispatch a request from cfqq, moving them to the request queue + * dispatch list. + */ +static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + struct request *rq; + + BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); + + if (!cfq_may_dispatch(cfqd, cfqq)) + return false; + + /* + * follow expired path, else get first next available + */ + rq = cfq_check_fifo(cfqq); + if (!rq) + rq = cfqq->next_rq; + + /* + * insert request into driver dispatch list + */ + cfq_dispatch_insert(cfqd->queue, rq); + + if (!cfqd->active_cic) { + struct cfq_io_context *cic = RQ_CIC(rq); + + atomic_long_inc(&cic->ioc->refcount); + cfqd->active_cic = cic; + } + + return true; +} + +/* + * Find the cfqq that we need to service and move a request from that to the + * dispatch list + */ +static int cfq_dispatch_requests(struct request_queue *q, int force) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_queue *cfqq; + + if (!cfqd->busy_queues) + return 0; + + if (unlikely(force)) + return cfq_forced_dispatch(cfqd); + + cfqq = cfq_select_queue(cfqd); + if (!cfqq) return 0; /* - * Dispatch a request from this cfqq + * Dispatch a request from this cfqq, if it is allowed */ - cfq_dispatch_request(cfqd, cfqq); + if (!cfq_dispatch_request(cfqd, cfqq)) + return 0; + cfqq->slice_dispatch++; cfq_clear_cfqq_must_dispatch(cfqq); @@ -1405,7 +1538,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (unlikely(cfqd->active_queue == cfqq)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); } kmem_cache_free(cfq_pool, cfqq); @@ -1498,9 +1631,27 @@ static void cfq_free_io_context(struct io_context *ioc) static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { + struct cfq_queue *__cfqq, *next; + if (unlikely(cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); + } + + /* + * If this queue was scheduled to merge with another queue, be + * sure to drop the reference taken on that queue (and others in + * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs. + */ + __cfqq = cfqq->new_cfqq; + while (__cfqq) { + if (__cfqq == cfqq) { + WARN(1, "cfqq->new_cfqq loop detected\n"); + break; + } + next = __cfqq->new_cfqq; + cfq_put_queue(__cfqq); + __cfqq = next; } cfq_put_queue(cfqq); @@ -1664,7 +1815,7 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, - pid_t pid, int is_sync) + pid_t pid, bool is_sync) { RB_CLEAR_NODE(&cfqq->rb_node); RB_CLEAR_NODE(&cfqq->p_node); @@ -1684,7 +1835,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, } static struct cfq_queue * -cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, +cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, gfp_t gfp_mask) { struct cfq_queue *cfqq, *new_cfqq = NULL; @@ -1748,7 +1899,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) } static struct cfq_queue * -cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, gfp_t gfp_mask) { const int ioprio = task_ioprio(ioc); @@ -1932,33 +2083,46 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) } static void -cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, +cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq) { sector_t sdist; u64 total; - if (!cic->last_request_pos) + if (!cfqq->last_request_pos) sdist = 0; - else if (cic->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - cic->last_request_pos; + else if (cfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - cfqq->last_request_pos; else - sdist = cic->last_request_pos - blk_rq_pos(rq); + sdist = cfqq->last_request_pos - blk_rq_pos(rq); /* * Don't allow the seek distance to get too large from the * odd fragment, pagein, etc */ - if (cic->seek_samples <= 60) /* second&third seek */ - sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); + if (cfqq->seek_samples <= 60) /* second&third seek */ + sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024); else - sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); + sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64); + + cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8; + cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8; + total = cfqq->seek_total + (cfqq->seek_samples/2); + do_div(total, cfqq->seek_samples); + cfqq->seek_mean = (sector_t)total; - cic->seek_samples = (7*cic->seek_samples + 256) / 8; - cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; - total = cic->seek_total + (cic->seek_samples/2); - do_div(total, cic->seek_samples); - cic->seek_mean = (sector_t)total; + /* + * If this cfqq is shared between multiple processes, check to + * make sure that those processes are still issuing I/Os within + * the mean seek distance. If not, it may be time to break the + * queues apart again. + */ + if (cfq_cfqq_coop(cfqq)) { + if (CFQQ_SEEKY(cfqq) && !cfqq->seeky_start) + cfqq->seeky_start = jiffies; + else if (!CFQQ_SEEKY(cfqq)) + cfqq->seeky_start = 0; + } } /* @@ -1980,10 +2144,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (!cfqd->cfq_desktop && cfqd->hw_tag && CIC_SEEKY(cic))) + (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { - if (cic->ttime_mean > cfqd->cfq_slice_idle) + unsigned int slice_idle = cfqd->cfq_slice_idle; + if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) + slice_idle = msecs_to_jiffies(CFQ_MIN_TT); + if (cic->ttime_mean > slice_idle) enable_idle = 0; else enable_idle = 1; @@ -2002,7 +2169,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, * Check if new_cfqq should preempt the currently active queue. Return 0 for * no or if we aren't sure, a 1 will cause a preempt. */ -static int +static bool cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, struct request *rq) { @@ -2010,48 +2177,48 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, cfqq = cfqd->active_queue; if (!cfqq) - return 0; + return false; if (cfq_slice_used(cfqq)) - return 1; + return true; if (cfq_class_idle(new_cfqq)) - return 0; + return false; if (cfq_class_idle(cfqq)) - return 1; + return true; /* * if the new request is sync, but the currently running queue is * not, let the sync request have priority. */ if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) - return 1; + return true; /* * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ if (rq_is_meta(rq) && !cfqq->meta_pending) - return 1; + return false; /* * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. */ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) - return 1; + return true; if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) - return 0; + return false; /* * if this request is as-good as one we would expect from the * current cfqq, let it preempt */ - if (cfq_rq_close(cfqd, rq)) - return 1; + if (cfq_rq_close(cfqd, cfqq, rq)) + return true; - return 0; + return false; } /* @@ -2090,10 +2257,10 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cic); - cfq_update_io_seektime(cfqd, cic, rq); + cfq_update_io_seektime(cfqd, cfqq, rq); cfq_update_idle_window(cfqd, cfqq, cic); - cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); if (cfqq == cfqd->active_queue) { /* @@ -2134,9 +2301,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, cfqq, "insert_request"); cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); - cfq_add_rq_rb(rq); - + rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &cfqq->fifo); + cfq_add_rq_rb(rq); cfq_rq_enqueued(cfqd, cfqq, rq); } @@ -2147,6 +2314,8 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) */ static void cfq_update_hw_tag(struct cfq_data *cfqd) { + struct cfq_queue *cfqq = cfqd->active_queue; + if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak) cfqd->rq_in_driver_peak = rq_in_driver(cfqd); @@ -2154,6 +2323,16 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN) return; + /* + * If active queue hasn't enough requests and can idle, cfq might not + * dispatch sufficient requests to hardware. Don't zero hw_tag in this + * case + */ + if (cfqq && cfq_cfqq_idle_window(cfqq) && + cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < + CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN) + return; + if (cfqd->hw_tag_samples++ < 50) return; @@ -2211,13 +2390,13 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) */ if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) cfq_slice_expired(cfqd, 1); - else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) && + else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq) && sync && !rq_noidle(rq)) cfq_arm_slice_timer(cfqd); } if (!rq_in_driver(cfqd)) - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); } /* @@ -2306,6 +2485,43 @@ static void cfq_put_request(struct request *rq) } } +static struct cfq_queue * +cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, + struct cfq_queue *cfqq) +{ + cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); + cic_set_cfqq(cic, cfqq->new_cfqq, 1); + cfq_mark_cfqq_coop(cfqq->new_cfqq); + cfq_put_queue(cfqq); + return cic_to_cfqq(cic, 1); +} + +static int should_split_cfqq(struct cfq_queue *cfqq) +{ + if (cfqq->seeky_start && + time_after(jiffies, cfqq->seeky_start + CFQQ_COOP_TOUT)) + return 1; + return 0; +} + +/* + * Returns NULL if a new cfqq should be allocated, or the old cfqq if this + * was the last process referring to said cfqq. + */ +static struct cfq_queue * +split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) +{ + if (cfqq_process_refs(cfqq) == 1) { + cfqq->seeky_start = 0; + cfqq->pid = current->pid; + cfq_clear_cfqq_coop(cfqq); + return cfqq; + } + + cic_set_cfqq(cic, NULL, 1); + cfq_put_queue(cfqq); + return NULL; +} /* * Allocate cfq data structures associated with this request. */ @@ -2315,7 +2531,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_io_context *cic; const int rw = rq_data_dir(rq); - const int is_sync = rq_is_sync(rq); + const bool is_sync = rq_is_sync(rq); struct cfq_queue *cfqq; unsigned long flags; @@ -2328,10 +2544,30 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) if (!cic) goto queue_fail; +new_queue: cfqq = cic_to_cfqq(cic, is_sync); if (!cfqq || cfqq == &cfqd->oom_cfqq) { cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); cic_set_cfqq(cic, cfqq, is_sync); + } else { + /* + * If the queue was seeky for too long, break it apart. + */ + if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) { + cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); + cfqq = split_cfqq(cic, cfqq); + if (!cfqq) + goto new_queue; + } + + /* + * Check to see if this queue is scheduled to merge with + * another, closely cooperating queue. The merging of + * queues happens here as it must be done in process context. + * The reference on new_cfqq was taken in merge_cfqqs. + */ + if (cfqq->new_cfqq) + cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq); } cfqq->allocated[rw]++; @@ -2347,7 +2583,7 @@ queue_fail: if (cic) put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); return 1; @@ -2356,7 +2592,7 @@ queue_fail: static void cfq_kick_queue(struct work_struct *work) { struct cfq_data *cfqd = - container_of(work, struct cfq_data, unplug_work.work); + container_of(work, struct cfq_data, unplug_work); struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); @@ -2410,7 +2646,7 @@ static void cfq_idle_slice_timer(unsigned long data) expire: cfq_slice_expired(cfqd, timed_out); out_kick: - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } @@ -2418,7 +2654,7 @@ out_cont: static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { del_timer_sync(&cfqd->idle_slice_timer); - cancel_delayed_work_sync(&cfqd->unplug_work); + cancel_work_sync(&cfqd->unplug_work); } static void cfq_put_async_queues(struct cfq_data *cfqd) @@ -2500,7 +2736,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->idle_slice_timer.function = cfq_idle_slice_timer; cfqd->idle_slice_timer.data = (unsigned long) cfqd; - INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue); + INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; @@ -2511,7 +2747,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; - cfqd->cfq_desktop = 1; + cfqd->cfq_latency = 1; cfqd->hw_tag = 1; cfqd->last_end_sync_rq = jiffies; return cfqd; @@ -2581,7 +2817,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); -SHOW_FUNCTION(cfq_desktop_show, cfqd->cfq_desktop, 0); +SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -2613,7 +2849,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); -STORE_FUNCTION(cfq_desktop_store, &cfqd->cfq_desktop, 0, 1, 0); +STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); #undef STORE_FUNCTION #define CFQ_ATTR(name) \ @@ -2629,7 +2865,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_async), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), - CFQ_ATTR(desktop), + CFQ_ATTR(low_latency), __ATTR_NULL };