X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fas-iosched.c;h=631f6f44460a2bd7ad7218f89d5e15bb90742ee6;hb=be987fdb55a4726e2fcbab7501f89276bdb57288;hp=02eb9333898f38fa829b345c68365e01f9f440a7;hpb=8a8e674cb1dafc818ffea93d97e4c1c1f01fdbb6;p=safe%2Fjmp%2Flinux-2.6

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 02eb933..631f6f4 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1,7 +1,7 @@
 /*
  *  Anticipatory & deadline i/o scheduler.
  *
- *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
  *                     Nick Piggin <nickpiggin@yahoo.com.au>
  *
  */
@@ -149,8 +149,9 @@ enum arq_state {
 #define RQ_STATE(rq)	((enum arq_state)(rq)->elevator_private2)
 #define RQ_SET_STATE(rq, state)	((rq)->elevator_private2 = (void *) state)
 
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
 
 static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
@@ -163,15 +164,29 @@ static void as_antic_stop(struct as_data *ad);
 static void free_as_io_context(struct as_io_context *aic)
 {
 	kfree(aic);
-	if (atomic_dec_and_test(&ioc_count) && ioc_gone)
-		complete(ioc_gone);
+	elv_ioc_count_dec(ioc_count);
+	if (ioc_gone) {
+		/*
+		 * AS scheduler is exiting, grab exit lock and check
+		 * the pending io context count. If it hits zero,
+		 * complete ioc_gone and set it back to NULL.
+		 */
+		spin_lock(&ioc_gone_lock);
+		if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+			complete(ioc_gone);
+			ioc_gone = NULL;
+		}
+		spin_unlock(&ioc_gone_lock);
+	}
 }
 
 static void as_trim(struct io_context *ioc)
 {
+	spin_lock_irq(&ioc->lock);
 	if (ioc->aic)
 		free_as_io_context(ioc->aic);
 	ioc->aic = NULL;
+	spin_unlock_irq(&ioc->lock);
 }
 
 /* Called when the task exits */
@@ -199,7 +214,7 @@ static struct as_io_context *alloc_as_io_context(void)
 		ret->seek_total = 0;
 		ret->seek_samples = 0;
 		ret->seek_mean = 0;
-		atomic_inc(&ioc_count);
+		elv_ioc_count_inc(ioc_count);
 	}
 
 	return ret;
@@ -209,9 +224,9 @@ static struct as_io_context *alloc_as_io_context(void)
  * If the current task has no AS IO context then create one and initialise it.
  * Then take a ref on the task's io context and return it.
  */
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
 {
-	struct io_context *ioc = get_io_context(GFP_ATOMIC);
+	struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
 	if (ioc && !ioc->aic) {
 		ioc->aic = alloc_as_io_context();
 		if (!ioc->aic) {
@@ -232,10 +247,12 @@ static void as_put_io_context(struct request *rq)
 	aic = RQ_IOC(rq)->aic;
 
 	if (rq_is_sync(rq) && aic) {
-		spin_lock(&aic->lock);
+		unsigned long flags;
+
+		spin_lock_irqsave(&aic->lock, flags);
 		set_bit(AS_TASK_IORUNNING, &aic->state);
 		aic->last_end_request = jiffies;
-		spin_unlock(&aic->lock);
+		spin_unlock_irqrestore(&aic->lock, flags);
 	}
 
 	put_io_context(RQ_IOC(rq));
@@ -445,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
 			del_timer(&ad->antic_timer);
 		ad->antic_status = ANTIC_FINISHED;
 		/* see as_work_handler */
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(ad->q, &ad->antic_work);
 	}
 }
 
@@ -461,10 +478,12 @@ static void as_antic_timeout(unsigned long data)
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (ad->antic_status == ANTIC_WAIT_REQ
 			|| ad->antic_status == ANTIC_WAIT_NEXT) {
-		struct as_io_context *aic = ad->io_context->aic;
+		struct as_io_context *aic;
+		spin_lock(&ad->io_context->lock);
+		aic = ad->io_context->aic;
 
 		ad->antic_status = ANTIC_FINISHED;
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 
 		if (aic->ttime_samples == 0) {
 			/* process anticipated on has exited or timed out*/
@@ -474,6 +493,7 @@ static void as_antic_timeout(unsigned long data)
 			/* process not "saved" by a cooperating request */
 			ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8;
 		}
+		spin_unlock(&ad->io_context->lock);
 	}
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -568,7 +588,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 static int as_close_req(struct as_data *ad, struct as_io_context *aic,
 			struct request *rq)
 {
-	unsigned long delay;	/* milliseconds */
+	unsigned long delay;	/* jiffies */
 	sector_t last = ad->last_sector[ad->batch_data_dir];
 	sector_t next = rq->sector;
 	sector_t delta; /* acceptable close offset (in sectors) */
@@ -577,11 +597,11 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
 	if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished)
 		delay = 0;
 	else
-		delay = ((jiffies - ad->antic_start) * 1000) / HZ;
+		delay = jiffies - ad->antic_start;
 
 	if (delay == 0)
 		delta = 8192;
-	else if (delay <= 20 && delay <= ad->antic_expire)
+	else if (delay <= (20 * HZ / 1000) && delay <= ad->antic_expire)
 		delta = 8192 << delay;
 	else
 		return 1;
@@ -634,9 +654,11 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 
 	ioc = ad->io_context;
 	BUG_ON(!ioc);
+	spin_lock(&ioc->lock);
 
 	if (rq && ioc == RQ_IOC(rq)) {
 		/* request from same process */
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
 
@@ -645,20 +667,25 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 		 * In this situation status should really be FINISHED,
 		 * however the timer hasn't had the chance to run yet.
 		 */
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
 
 	aic = ioc->aic;
-	if (!aic)
+	if (!aic) {
+		spin_unlock(&ioc->lock);
 		return 0;
+	}
 
 	if (atomic_read(&aic->nr_queued) > 0) {
 		/* process has more requests queued */
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
 
 	if (atomic_read(&aic->nr_dispatched) > 0) {
 		/* process has more requests dispatched */
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
 
@@ -679,6 +706,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 		}
 
 		as_update_iohist(ad, aic, rq);
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
 
@@ -687,20 +715,27 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 		if (aic->ttime_samples == 0)
 			ad->exit_prob = (7*ad->exit_prob + 256)/8;
 
-		if (ad->exit_no_coop > 128)
+		if (ad->exit_no_coop > 128) {
+			spin_unlock(&ioc->lock);
 			return 1;
+		}
 	}
 
 	if (aic->ttime_samples == 0) {
-		if (ad->new_ttime_mean > ad->antic_expire)
+		if (ad->new_ttime_mean > ad->antic_expire) {
+			spin_unlock(&ioc->lock);
 			return 1;
-		if (ad->exit_prob * ad->exit_no_coop > 128*256)
+		}
+		if (ad->exit_prob * ad->exit_no_coop > 128*256) {
+			spin_unlock(&ioc->lock);
 			return 1;
+		}
 	} else if (aic->ttime_mean > ad->antic_expire) {
 		/* the process thinks too much between requests */
+		spin_unlock(&ioc->lock);
 		return 1;
 	}
-
+	spin_unlock(&ioc->lock);
 	return 0;
 }
 
@@ -710,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
  */
 static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
+#if 0 /* disable for now, we need to check tag level as well */
+	/*
+	 * SSD device without seek penalty, disable idling
+	 */
+	if (blk_queue_nonrot(ad->q)) axman
+		return 0;
+#endif
+
 	if (!ad->io_context)
 		/*
 		 * Last request submitted was a write
@@ -795,20 +838,21 @@ static void update_write_batch(struct as_data *ad)
  * as_completed_request is to be called when a request has completed and
  * returned something to the requesting process, be it an error or data.
  */
-static void as_completed_request(request_queue_t *q, struct request *rq)
+static void as_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
 	if (RQ_STATE(rq) != AS_RQ_REMOVED) {
-		printk("rq->state %d\n", RQ_STATE(rq));
-		WARN_ON(1);
+		WARN(1, "rq->state %d\n", RQ_STATE(rq));
 		goto out;
 	}
 
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
-		kblockd_schedule_work(&ad->antic_work);
+		ad->current_batch_expires = jiffies +
+					ad->batch_expire[ad->batch_data_dir];
+		kblockd_schedule_work(q, &ad->antic_work);
 		ad->changed_batch = 0;
 
 		if (ad->batch_data_dir == REQ_SYNC)
@@ -852,7 +896,8 @@ out:
  * reference unless it replaces the request at somepart of the elevator
  * (ie. the dispatch queue)
  */
-static void as_remove_queued_request(request_queue_t *q, struct request *rq)
+static void as_remove_queued_request(struct request_queue *q,
+				     struct request *rq)
 {
 	const int data_dir = rq_is_sync(rq);
 	struct as_data *ad = q->elevator->elevator_data;
@@ -878,7 +923,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 }
 
 /*
- * as_fifo_expired returns 0 if there are no expired reads on the fifo,
+ * as_fifo_expired returns 0 if there are no expired requests on the fifo,
  * 1 otherwise.  It is ratelimited so that we only perform the check once per
  * `fifo_expire' interval.  Otherwise a large number of expired requests
  * would create a hopeless seekstorm.
@@ -977,7 +1022,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
  * read/write expire, batch expire, etc, and moves it to the dispatch
  * queue. Returns 1 if a request was found, 0 otherwise.
  */
-static int as_dispatch_request(request_queue_t *q, int force)
+static int as_dispatch_request(struct request_queue *q, int force)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
@@ -1095,7 +1140,8 @@ dispatch_writes:
 		ad->batch_data_dir = REQ_ASYNC;
 		ad->current_write_count = ad->write_batch_count;
 		ad->write_batch_idled = 0;
-		rq = ad->next_rq[ad->batch_data_dir];
+		rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next);
+		ad->last_check_fifo[REQ_ASYNC] = jiffies;
 		goto dispatch_request;
 	}
 
@@ -1138,7 +1184,7 @@ fifo_expired:
 /*
  * add rq to rbtree and fifo
  */
-static void as_add_request(request_queue_t *q, struct request *rq)
+static void as_add_request(struct request_queue *q, struct request *rq)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	int data_dir;
@@ -1147,7 +1193,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 
 	data_dir = rq_is_sync(rq);
 
-	rq->elevator_private = as_get_io_context();
+	rq->elevator_private = as_get_io_context(q->node);
 
 	if (RQ_IOC(rq)) {
 		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
@@ -1157,7 +1203,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	as_add_rq_rb(ad, rq);
 
 	/*
-	 * set expire time (only used for reads) and add to fifo list
+	 * set expire time and add to fifo list
 	 */
 	rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
 	list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
@@ -1166,7 +1212,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	RQ_SET_STATE(rq, AS_RQ_QUEUED);
 }
 
-static void as_activate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(struct request_queue *q, struct request *rq)
 {
 	WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
 	RQ_SET_STATE(rq, AS_RQ_REMOVED);
@@ -1174,7 +1220,7 @@ static void as_activate_request(request_queue_t *q, struct request *rq)
 		atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
 }
 
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_deactivate_request(struct request_queue *q, struct request *rq)
 {
 	WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
 	RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
@@ -1188,7 +1234,7 @@ static void as_deactivate_request(request_queue_t *q, struct request *rq)
  * is not empty - it is used in the block layer to check for plugging and
  * merging opportunities
  */
-static int as_queue_empty(request_queue_t *q)
+static int as_queue_empty(struct request_queue *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
@@ -1197,7 +1243,7 @@ static int as_queue_empty(request_queue_t *q)
 }
 
 static int
-as_merge(request_queue_t *q, struct request **req, struct bio *bio)
+as_merge(struct request_queue *q, struct request **req, struct bio *bio)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
@@ -1215,7 +1261,8 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	return ELEVATOR_NO_MERGE;
 }
 
-static void as_merged_request(request_queue_t *q, struct request *req, int type)
+static void as_merged_request(struct request_queue *q, struct request *req,
+			      int type)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
@@ -1233,7 +1280,7 @@ static void as_merged_request(request_queue_t *q, struct request *req, int type)
 	}
 }
 
-static void as_merged_requests(request_queue_t *q, struct request *req,
+static void as_merged_requests(struct request_queue *q, struct request *req,
 			 	struct request *next)
 {
 	/*
@@ -1242,16 +1289,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
 	 */
 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
 		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
-			struct io_context *rioc = RQ_IOC(req);
-			struct io_context *nioc = RQ_IOC(next);
-
 			list_move(&req->queuelist, &next->queuelist);
 			rq_set_fifo_time(req, rq_fifo_time(next));
-			/*
-			 * Don't copy here but swap, because when anext is
-			 * removed below, it must contain the unused context
-			 */
-			swap_io_context(&rioc, &nioc);
 		}
 	}
 
@@ -1273,25 +1312,25 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
  *
  * FIXME! dispatch queue is not a queue at all!
  */
-static void as_work_handler(void *data)
+static void as_work_handler(struct work_struct *work)
 {
-	struct request_queue *q = data;
+	struct as_data *ad = container_of(work, struct as_data, antic_work);
+	struct request_queue *q = ad->q;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (!as_queue_empty(q))
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int as_may_queue(struct request_queue *q, int rw)
 {
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;
 	struct io_context *ioc;
 	if (ad->antic_status == ANTIC_WAIT_REQ ||
 			ad->antic_status == ANTIC_WAIT_NEXT) {
-		ioc = as_get_io_context();
+		ioc = as_get_io_context(q->node);
 		if (ad->io_context == ioc)
 			ret = ELV_MQUEUE_MUST;
 		put_io_context(ioc);
@@ -1300,12 +1339,12 @@ static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
 	return ret;
 }
 
-static void as_exit_queue(elevator_t *e)
+static void as_exit_queue(struct elevator_queue *e)
 {
 	struct as_data *ad = e->elevator_data;
 
 	del_timer_sync(&ad->antic_timer);
-	kblockd_flush();
+	cancel_work_sync(&ad->antic_work);
 
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
@@ -1317,14 +1356,13 @@ static void as_exit_queue(elevator_t *e)
 /*
  * initialize elevator private data (as_data).
  */
-static void *as_init_queue(request_queue_t *q, elevator_t *e)
+static void *as_init_queue(struct request_queue *q)
 {
 	struct as_data *ad;
 
-	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
+	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (!ad)
 		return NULL;
-	memset(ad, 0, sizeof(*ad));
 
 	ad->q = q; /* Identify what queue the data belongs to */
 
@@ -1332,7 +1370,7 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.data = (unsigned long)q;
 	init_timer(&ad->antic_timer);
-	INIT_WORK(&ad->antic_work, as_work_handler, q);
+	INIT_WORK(&ad->antic_work, as_work_handler);
 
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
@@ -1371,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
 	return count;
 }
 
-static ssize_t est_time_show(elevator_t *e, char *page)
+static ssize_t est_time_show(struct elevator_queue *e, char *page)
 {
 	struct as_data *ad = e->elevator_data;
 	int pos = 0;
@@ -1389,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR)				\
-static ssize_t __FUNC(elevator_t *e, char *page)		\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)	\
 {								\
 	struct as_data *ad = e->elevator_data;			\
 	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
@@ -1402,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct as_data *ad = e->elevator_data;				\
 	int ret = as_var_store(__PTR, (page), count);			\
@@ -1461,31 +1499,20 @@ static struct elevator_type iosched_as = {
 
 static int __init as_init(void)
 {
-	int ret;
+	elv_register(&iosched_as);
 
-	ret = elv_register(&iosched_as);
-	if (!ret) {
-		/*
-		 * don't allow AS to get unregistered, since we would have
-		 * to browse all tasks in the system and release their
-		 * as_io_context first
-		 */
-		__module_get(THIS_MODULE);
-		return 0;
-	}
-
-	return ret;
+	return 0;
 }
 
 static void __exit as_exit(void)
 {
-	DECLARE_COMPLETION(all_gone);
+	DECLARE_COMPLETION_ONSTACK(all_gone);
 	elv_unregister(&iosched_as);
 	ioc_gone = &all_gone;
 	/* ioc_gone's update must be visible before reading ioc_count */
 	smp_wmb();
-	if (atomic_read(&ioc_count))
-		wait_for_completion(ioc_gone);
+	if (elv_ioc_count_read(ioc_count))
+		wait_for_completion(&all_gone);
 	synchronize_rcu();
 }