X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fas-iosched.c;h=631f6f44460a2bd7ad7218f89d5e15bb90742ee6;hb=be987fdb55a4726e2fcbab7501f89276bdb57288;hp=02eb9333898f38fa829b345c68365e01f9f440a7;hpb=8a8e674cb1dafc818ffea93d97e4c1c1f01fdbb6;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/as-iosched.c b/block/as-iosched.c index 02eb933..631f6f4 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1,7 +1,7 @@ /* * Anticipatory & deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe + * Copyright (C) 2002 Jens Axboe * Nick Piggin * */ @@ -149,8 +149,9 @@ enum arq_state { #define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) #define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; +static DEFINE_SPINLOCK(ioc_gone_lock); static void as_move_to_dispatch(struct as_data *ad, struct request *rq); static void as_antic_stop(struct as_data *ad); @@ -163,15 +164,29 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); - if (atomic_dec_and_test(&ioc_count) && ioc_gone) - complete(ioc_gone); + elv_ioc_count_dec(ioc_count); + if (ioc_gone) { + /* + * AS scheduler is exiting, grab exit lock and check + * the pending io context count. If it hits zero, + * complete ioc_gone and set it back to NULL. + */ + spin_lock(&ioc_gone_lock); + if (ioc_gone && !elv_ioc_count_read(ioc_count)) { + complete(ioc_gone); + ioc_gone = NULL; + } + spin_unlock(&ioc_gone_lock); + } } static void as_trim(struct io_context *ioc) { + spin_lock_irq(&ioc->lock); if (ioc->aic) free_as_io_context(ioc->aic); ioc->aic = NULL; + spin_unlock_irq(&ioc->lock); } /* Called when the task exits */ @@ -199,7 +214,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return ret; @@ -209,9 +224,9 @@ static struct as_io_context *alloc_as_io_context(void) * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */ -static struct io_context *as_get_io_context(void) +static struct io_context *as_get_io_context(int node) { - struct io_context *ioc = get_io_context(GFP_ATOMIC); + struct io_context *ioc = get_io_context(GFP_ATOMIC, node); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { @@ -232,10 +247,12 @@ static void as_put_io_context(struct request *rq) aic = RQ_IOC(rq)->aic; if (rq_is_sync(rq) && aic) { - spin_lock(&aic->lock); + unsigned long flags; + + spin_lock_irqsave(&aic->lock, flags); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; - spin_unlock(&aic->lock); + spin_unlock_irqrestore(&aic->lock, flags); } put_io_context(RQ_IOC(rq)); @@ -445,7 +462,7 @@ static void as_antic_stop(struct as_data *ad) del_timer(&ad->antic_timer); ad->antic_status = ANTIC_FINISHED; /* see as_work_handler */ - kblockd_schedule_work(&ad->antic_work); + kblockd_schedule_work(ad->q, &ad->antic_work); } } @@ -461,10 +478,12 @@ static void as_antic_timeout(unsigned long data) spin_lock_irqsave(q->queue_lock, flags); if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - struct as_io_context *aic = ad->io_context->aic; + struct as_io_context *aic; + spin_lock(&ad->io_context->lock); + aic = ad->io_context->aic; ad->antic_status = ANTIC_FINISHED; - kblockd_schedule_work(&ad->antic_work); + kblockd_schedule_work(q, &ad->antic_work); if (aic->ttime_samples == 0) { /* process anticipated on has exited or timed out*/ @@ -474,6 +493,7 @@ static void as_antic_timeout(unsigned long data) /* process not "saved" by a cooperating request */ ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; } + spin_unlock(&ad->io_context->lock); } spin_unlock_irqrestore(q->queue_lock, flags); } @@ -568,7 +588,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, static int as_close_req(struct as_data *ad, struct as_io_context *aic, struct request *rq) { - unsigned long delay; /* milliseconds */ + unsigned long delay; /* jiffies */ sector_t last = ad->last_sector[ad->batch_data_dir]; sector_t next = rq->sector; sector_t delta; /* acceptable close offset (in sectors) */ @@ -577,11 +597,11 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) delay = 0; else - delay = ((jiffies - ad->antic_start) * 1000) / HZ; + delay = jiffies - ad->antic_start; if (delay == 0) delta = 8192; - else if (delay <= 20 && delay <= ad->antic_expire) + else if (delay <= (20 * HZ / 1000) && delay <= ad->antic_expire) delta = 8192 << delay; else return 1; @@ -634,9 +654,11 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) ioc = ad->io_context; BUG_ON(!ioc); + spin_lock(&ioc->lock); if (rq && ioc == RQ_IOC(rq)) { /* request from same process */ + spin_unlock(&ioc->lock); return 1; } @@ -645,20 +667,25 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) * In this situation status should really be FINISHED, * however the timer hasn't had the chance to run yet. */ + spin_unlock(&ioc->lock); return 1; } aic = ioc->aic; - if (!aic) + if (!aic) { + spin_unlock(&ioc->lock); return 0; + } if (atomic_read(&aic->nr_queued) > 0) { /* process has more requests queued */ + spin_unlock(&ioc->lock); return 1; } if (atomic_read(&aic->nr_dispatched) > 0) { /* process has more requests dispatched */ + spin_unlock(&ioc->lock); return 1; } @@ -679,6 +706,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) } as_update_iohist(ad, aic, rq); + spin_unlock(&ioc->lock); return 1; } @@ -687,20 +715,27 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) if (aic->ttime_samples == 0) ad->exit_prob = (7*ad->exit_prob + 256)/8; - if (ad->exit_no_coop > 128) + if (ad->exit_no_coop > 128) { + spin_unlock(&ioc->lock); return 1; + } } if (aic->ttime_samples == 0) { - if (ad->new_ttime_mean > ad->antic_expire) + if (ad->new_ttime_mean > ad->antic_expire) { + spin_unlock(&ioc->lock); return 1; - if (ad->exit_prob * ad->exit_no_coop > 128*256) + } + if (ad->exit_prob * ad->exit_no_coop > 128*256) { + spin_unlock(&ioc->lock); return 1; + } } else if (aic->ttime_mean > ad->antic_expire) { /* the process thinks too much between requests */ + spin_unlock(&ioc->lock); return 1; } - + spin_unlock(&ioc->lock); return 0; } @@ -710,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) */ static int as_can_anticipate(struct as_data *ad, struct request *rq) { +#if 0 /* disable for now, we need to check tag level as well */ + /* + * SSD device without seek penalty, disable idling + */ + if (blk_queue_nonrot(ad->q)) axman + return 0; +#endif + if (!ad->io_context) /* * Last request submitted was a write @@ -795,20 +838,21 @@ static void update_write_batch(struct as_data *ad) * as_completed_request is to be called when a request has completed and * returned something to the requesting process, be it an error or data. */ -static void as_completed_request(request_queue_t *q, struct request *rq) +static void as_completed_request(struct request_queue *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; WARN_ON(!list_empty(&rq->queuelist)); if (RQ_STATE(rq) != AS_RQ_REMOVED) { - printk("rq->state %d\n", RQ_STATE(rq)); - WARN_ON(1); + WARN(1, "rq->state %d\n", RQ_STATE(rq)); goto out; } if (ad->changed_batch && ad->nr_dispatched == 1) { - kblockd_schedule_work(&ad->antic_work); + ad->current_batch_expires = jiffies + + ad->batch_expire[ad->batch_data_dir]; + kblockd_schedule_work(q, &ad->antic_work); ad->changed_batch = 0; if (ad->batch_data_dir == REQ_SYNC) @@ -852,7 +896,8 @@ out: * reference unless it replaces the request at somepart of the elevator * (ie. the dispatch queue) */ -static void as_remove_queued_request(request_queue_t *q, struct request *rq) +static void as_remove_queued_request(struct request_queue *q, + struct request *rq) { const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; @@ -878,7 +923,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) } /* - * as_fifo_expired returns 0 if there are no expired reads on the fifo, + * as_fifo_expired returns 0 if there are no expired requests on the fifo, * 1 otherwise. It is ratelimited so that we only perform the check once per * `fifo_expire' interval. Otherwise a large number of expired requests * would create a hopeless seekstorm. @@ -977,7 +1022,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) * read/write expire, batch expire, etc, and moves it to the dispatch * queue. Returns 1 if a request was found, 0 otherwise. */ -static int as_dispatch_request(request_queue_t *q, int force) +static int as_dispatch_request(struct request_queue *q, int force) { struct as_data *ad = q->elevator->elevator_data; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); @@ -1095,7 +1140,8 @@ dispatch_writes: ad->batch_data_dir = REQ_ASYNC; ad->current_write_count = ad->write_batch_count; ad->write_batch_idled = 0; - rq = ad->next_rq[ad->batch_data_dir]; + rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next); + ad->last_check_fifo[REQ_ASYNC] = jiffies; goto dispatch_request; } @@ -1138,7 +1184,7 @@ fifo_expired: /* * add rq to rbtree and fifo */ -static void as_add_request(request_queue_t *q, struct request *rq) +static void as_add_request(struct request_queue *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; int data_dir; @@ -1147,7 +1193,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) data_dir = rq_is_sync(rq); - rq->elevator_private = as_get_io_context(); + rq->elevator_private = as_get_io_context(q->node); if (RQ_IOC(rq)) { as_update_iohist(ad, RQ_IOC(rq)->aic, rq); @@ -1157,7 +1203,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) as_add_rq_rb(ad, rq); /* - * set expire time (only used for reads) and add to fifo list + * set expire time and add to fifo list */ rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]); list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]); @@ -1166,7 +1212,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) RQ_SET_STATE(rq, AS_RQ_QUEUED); } -static void as_activate_request(request_queue_t *q, struct request *rq) +static void as_activate_request(struct request_queue *q, struct request *rq) { WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED); RQ_SET_STATE(rq, AS_RQ_REMOVED); @@ -1174,7 +1220,7 @@ static void as_activate_request(request_queue_t *q, struct request *rq) atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched); } -static void as_deactivate_request(request_queue_t *q, struct request *rq) +static void as_deactivate_request(struct request_queue *q, struct request *rq) { WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED); RQ_SET_STATE(rq, AS_RQ_DISPATCHED); @@ -1188,7 +1234,7 @@ static void as_deactivate_request(request_queue_t *q, struct request *rq) * is not empty - it is used in the block layer to check for plugging and * merging opportunities */ -static int as_queue_empty(request_queue_t *q) +static int as_queue_empty(struct request_queue *q) { struct as_data *ad = q->elevator->elevator_data; @@ -1197,7 +1243,7 @@ static int as_queue_empty(request_queue_t *q) } static int -as_merge(request_queue_t *q, struct request **req, struct bio *bio) +as_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); @@ -1215,7 +1261,8 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; } -static void as_merged_request(request_queue_t *q, struct request *req, int type) +static void as_merged_request(struct request_queue *q, struct request *req, + int type) { struct as_data *ad = q->elevator->elevator_data; @@ -1233,7 +1280,7 @@ static void as_merged_request(request_queue_t *q, struct request *req, int type) } } -static void as_merged_requests(request_queue_t *q, struct request *req, +static void as_merged_requests(struct request_queue *q, struct request *req, struct request *next) { /* @@ -1242,16 +1289,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req, */ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { - struct io_context *rioc = RQ_IOC(req); - struct io_context *nioc = RQ_IOC(next); - list_move(&req->queuelist, &next->queuelist); rq_set_fifo_time(req, rq_fifo_time(next)); - /* - * Don't copy here but swap, because when anext is - * removed below, it must contain the unused context - */ - swap_io_context(&rioc, &nioc); } } @@ -1273,25 +1312,25 @@ static void as_merged_requests(request_queue_t *q, struct request *req, * * FIXME! dispatch queue is not a queue at all! */ -static void as_work_handler(void *data) +static void as_work_handler(struct work_struct *work) { - struct request_queue *q = data; + struct as_data *ad = container_of(work, struct as_data, antic_work); + struct request_queue *q = ad->q; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_empty(q)) - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } -static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int as_may_queue(struct request_queue *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - ioc = as_get_io_context(); + ioc = as_get_io_context(q->node); if (ad->io_context == ioc) ret = ELV_MQUEUE_MUST; put_io_context(ioc); @@ -1300,12 +1339,12 @@ static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) return ret; } -static void as_exit_queue(elevator_t *e) +static void as_exit_queue(struct elevator_queue *e) { struct as_data *ad = e->elevator_data; del_timer_sync(&ad->antic_timer); - kblockd_flush(); + cancel_work_sync(&ad->antic_work); BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); @@ -1317,14 +1356,13 @@ static void as_exit_queue(elevator_t *e) /* * initialize elevator private data (as_data). */ -static void *as_init_queue(request_queue_t *q, elevator_t *e) +static void *as_init_queue(struct request_queue *q) { struct as_data *ad; - ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); + ad = kmalloc_node(sizeof(*ad), GFP_KERNEL | __GFP_ZERO, q->node); if (!ad) return NULL; - memset(ad, 0, sizeof(*ad)); ad->q = q; /* Identify what queue the data belongs to */ @@ -1332,7 +1370,7 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); - INIT_WORK(&ad->antic_work, as_work_handler, q); + INIT_WORK(&ad->antic_work, as_work_handler); INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); @@ -1371,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count) return count; } -static ssize_t est_time_show(elevator_t *e, char *page) +static ssize_t est_time_show(struct elevator_queue *e, char *page) { struct as_data *ad = e->elevator_data; int pos = 0; @@ -1389,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page) } #define SHOW_FUNCTION(__FUNC, __VAR) \ -static ssize_t __FUNC(elevator_t *e, char *page) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct as_data *ad = e->elevator_data; \ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ @@ -1402,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ -static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct as_data *ad = e->elevator_data; \ int ret = as_var_store(__PTR, (page), count); \ @@ -1461,31 +1499,20 @@ static struct elevator_type iosched_as = { static int __init as_init(void) { - int ret; + elv_register(&iosched_as); - ret = elv_register(&iosched_as); - if (!ret) { - /* - * don't allow AS to get unregistered, since we would have - * to browse all tasks in the system and release their - * as_io_context first - */ - __module_get(THIS_MODULE); - return 0; - } - - return ret; + return 0; } static void __exit as_exit(void) { - DECLARE_COMPLETION(all_gone); + DECLARE_COMPLETION_ONSTACK(all_gone); elv_unregister(&iosched_as); ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) - wait_for_completion(ioc_gone); + if (elv_ioc_count_read(ioc_count)) + wait_for_completion(&all_gone); synchronize_rcu(); }