#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/hash.h>
#include <linux/rbtree.h>
#include <linux/interrupt.h>
struct as_rq *next_arq[2]; /* next in sort order */
sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
- struct hlist_head *hash; /* request hash */
unsigned long exit_prob; /* probability a task will exit while
being waited on */
unsigned long antic_expire;
};
-#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
-
/*
* per-request data.
*/
};
struct as_rq {
- /*
- * rbtree index, key is the starting offset
- */
- struct rb_node rb_node;
- sector_t rb_key;
-
struct request *request;
struct io_context *io_context; /* The submitting task */
- /*
- * request hash, key is the ending offset (for back merge lookup)
- */
- struct hlist_node hash;
-
- /*
- * expire fifo
- */
- struct list_head fifo;
- unsigned long expires;
-
- unsigned int is_sync;
enum arq_state state;
};
aic = arq->io_context->aic;
- if (arq->is_sync == REQ_SYNC && aic) {
+ if (rq_is_sync(arq->request) && aic) {
spin_lock(&aic->lock);
set_bit(AS_TASK_IORUNNING, &aic->state);
aic->last_end_request = jiffies;
}
/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec) ((sec) >> 3)
-#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES (1 << as_hash_shift)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
- hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
- if (!hlist_unhashed(&arq->hash))
- __as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
- struct request *rq = arq->request;
-
- BUG_ON(!hlist_unhashed(&arq->hash));
-
- hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
- struct request *rq = arq->request;
- struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
- if (hlist_unhashed(&arq->hash)) {
- WARN_ON(1);
- return;
- }
-
- if (&arq->hash != head->first) {
- hlist_del(&arq->hash);
- hlist_add_head(&arq->hash, head);
- }
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
- struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
- struct hlist_node *entry, *next;
- struct as_rq *arq;
-
- hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
- struct request *__rq = arq->request;
-
- BUG_ON(hlist_unhashed(&arq->hash));
-
- if (!rq_mergeable(__rq)) {
- as_del_arq_hash(arq);
- continue;
- }
-
- if (rq_hash_key(__rq) == offset)
- return __rq;
- }
-
- return NULL;
-}
-
-/*
* rb tree support functions
*/
-#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
-#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
-#define rq_rb_key(rq) (rq)->sector
-
-/*
- * as_find_first_arq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
-{
- struct rb_node *n = ad->sort_list[data_dir].rb_node;
-
- if (n == NULL)
- return NULL;
+#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))])
- for (;;) {
- if (n->rb_left == NULL)
- return rb_entry_arq(n);
-
- n = n->rb_left;
- }
-}
-
-/*
- * Add the request to the rb tree if it is unique. If there is an alias (an
- * existing request against the same sector), which can happen when using
- * direct IO, then return the alias.
- */
-static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
+static void as_add_arq_rb(struct as_data *ad, struct request *rq)
{
- struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
- struct rb_node *parent = NULL;
- struct as_rq *__arq;
- struct request *rq = arq->request;
-
- arq->rb_key = rq_rb_key(rq);
-
- while (*p) {
- parent = *p;
- __arq = rb_entry_arq(parent);
-
- if (arq->rb_key < __arq->rb_key)
- p = &(*p)->rb_left;
- else if (arq->rb_key > __arq->rb_key)
- p = &(*p)->rb_right;
- else
- return __arq;
- }
+ struct request *alias;
- rb_link_node(&arq->rb_node, parent, p);
- rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-
- return NULL;
-}
-
-static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
- struct as_rq *alias;
-
- while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
- as_move_to_dispatch(ad, alias);
+ while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
+ as_move_to_dispatch(ad, RQ_DATA(alias));
as_antic_stop(ad);
}
}
-static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
+static inline void as_del_arq_rb(struct as_data *ad, struct request *rq)
{
- if (!RB_EMPTY_NODE(&arq->rb_node)) {
- WARN_ON(1);
- return;
- }
-
- rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
- RB_CLEAR_NODE(&arq->rb_node);
-}
-
-static struct request *
-as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
-{
- struct rb_node *n = ad->sort_list[data_dir].rb_node;
- struct as_rq *arq;
-
- while (n) {
- arq = rb_entry_arq(n);
-
- if (sector < arq->rb_key)
- n = n->rb_left;
- else if (sector > arq->rb_key)
- n = n->rb_right;
- else
- return arq->request;
- }
-
- return NULL;
+ elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
}
/*
if (arq2 == NULL)
return arq1;
- data_dir = arq1->is_sync;
+ data_dir = rq_is_sync(arq1->request);
last = ad->last_sector[data_dir];
s1 = arq1->request->sector;
s2 = arq2->request->sector;
- BUG_ON(data_dir != arq2->is_sync);
+ BUG_ON(data_dir != rq_is_sync(arq2->request));
/*
* Strict one way elevator _except_ in the case where we allow
* this with as_choose_req form the basis for how the scheduler chooses
* what request to process next. Anticipation works on top of this.
*/
-static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last)
+static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *arq)
{
- const int data_dir = last->is_sync;
- struct as_rq *ret;
+ struct request *last = arq->request;
struct rb_node *rbnext = rb_next(&last->rb_node);
struct rb_node *rbprev = rb_prev(&last->rb_node);
- struct as_rq *arq_next, *arq_prev;
+ struct as_rq *next = NULL, *prev = NULL;
- BUG_ON(!RB_EMPTY_NODE(&last->rb_node));
+ BUG_ON(RB_EMPTY_NODE(&last->rb_node));
if (rbprev)
- arq_prev = rb_entry_arq(rbprev);
- else
- arq_prev = NULL;
+ prev = RQ_DATA(rb_entry_rq(rbprev));
if (rbnext)
- arq_next = rb_entry_arq(rbnext);
+ next = RQ_DATA(rb_entry_rq(rbnext));
else {
- arq_next = as_find_first_arq(ad, data_dir);
- if (arq_next == last)
- arq_next = NULL;
- }
+ const int data_dir = rq_is_sync(last);
- ret = as_choose_req(ad, arq_next, arq_prev);
+ rbnext = rb_first(&ad->sort_list[data_dir]);
+ if (rbnext && rbnext != &last->rb_node)
+ next = RQ_DATA(rb_entry_rq(rbnext));
+ }
- return ret;
+ return as_choose_req(ad, next, prev);
}
/*
static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
struct request *rq)
{
- struct as_rq *arq = RQ_DATA(rq);
- int data_dir = arq->is_sync;
+ int data_dir = rq_is_sync(rq);
unsigned long thinktime = 0;
sector_t seek_dist;
return 1;
}
- if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+ if (arq && rq_is_sync(arq->request) && as_close_req(ad, aic, arq)) {
/*
* Found a close request that is not one of ours.
*
}
/*
- * as_can_anticipate indicates weather we should either run arq
+ * as_can_anticipate indicates whether we should either run arq
* or keep anticipating a better request.
*/
static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
*/
static void as_update_arq(struct as_data *ad, struct as_rq *arq)
{
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(arq->request);
/* keep the next_arq cache up to date */
ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
* actually serviced. This should help devices with big TCQ windows
* and writeback caches
*/
- if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+ if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
update_write_batch(ad);
ad->current_batch_expires = jiffies +
ad->batch_expire[REQ_SYNC];
static void as_remove_queued_request(request_queue_t *q, struct request *rq)
{
struct as_rq *arq = RQ_DATA(rq);
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(rq);
struct as_data *ad = q->elevator->elevator_data;
WARN_ON(arq->state != AS_RQ_QUEUED);
if (ad->next_arq[data_dir] == arq)
ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
- list_del_init(&arq->fifo);
- as_del_arq_hash(arq);
- as_del_arq_rb(ad, arq);
+ rq_fifo_clear(rq);
+ as_del_arq_rb(ad, rq);
}
/*
*/
static int as_fifo_expired(struct as_data *ad, int adir)
{
- struct as_rq *arq;
+ struct request *rq;
long delta_jif;
delta_jif = jiffies - ad->last_check_fifo[adir];
if (list_empty(&ad->fifo_list[adir]))
return 0;
- arq = list_entry_fifo(ad->fifo_list[adir].next);
+ rq = rq_entry_fifo(ad->fifo_list[adir].next);
- return time_after(jiffies, arq->expires);
+ return time_after(jiffies, rq_fifo_time(rq));
}
/*
static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
{
struct request *rq = arq->request;
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(rq);
- BUG_ON(!RB_EMPTY_NODE(&arq->rb_node));
+ BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
as_antic_stop(ad);
ad->antic_status = ANTIC_OFF;
ad->changed_batch = 1;
}
ad->batch_data_dir = REQ_SYNC;
- arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
+ arq = RQ_DATA(rq_entry_fifo(ad->fifo_list[REQ_SYNC].next));
ad->last_check_fifo[ad->batch_data_dir] = jiffies;
goto dispatch_request;
}
if (as_fifo_expired(ad, ad->batch_data_dir)) {
fifo_expired:
- arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
- BUG_ON(arq == NULL);
+ arq = RQ_DATA(rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next));
}
if (ad->changed_batch) {
arq->state = AS_RQ_NEW;
- if (rq_data_dir(arq->request) == READ
- || (arq->request->flags & REQ_RW_SYNC))
- arq->is_sync = 1;
- else
- arq->is_sync = 0;
- data_dir = arq->is_sync;
+ data_dir = rq_is_sync(rq);
arq->io_context = as_get_io_context();
atomic_inc(&arq->io_context->aic->nr_queued);
}
- as_add_arq_rb(ad, arq);
- if (rq_mergeable(arq->request))
- as_add_arq_hash(ad, arq);
+ as_add_arq_rb(ad, rq);
/*
* set expire time (only used for reads) and add to fifo list
*/
- arq->expires = jiffies + ad->fifo_expire[data_dir];
- list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
+ rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
+ list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
as_update_arq(ad, arq); /* keep state machine up to date */
arq->state = AS_RQ_QUEUED;
&& list_empty(&ad->fifo_list[REQ_SYNC]);
}
-static struct request *as_former_request(request_queue_t *q,
- struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
- struct rb_node *rbprev = rb_prev(&arq->rb_node);
- struct request *ret = NULL;
-
- if (rbprev)
- ret = rb_entry_arq(rbprev)->request;
-
- return ret;
-}
-
-static struct request *as_latter_request(request_queue_t *q,
- struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
- struct rb_node *rbnext = rb_next(&arq->rb_node);
- struct request *ret = NULL;
-
- if (rbnext)
- ret = rb_entry_arq(rbnext)->request;
-
- return ret;
-}
-
static int
as_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
struct as_data *ad = q->elevator->elevator_data;
sector_t rb_key = bio->bi_sector + bio_sectors(bio);
struct request *__rq;
- int ret;
-
- /*
- * see if the merge hash can satisfy a back merge
- */
- __rq = as_find_arq_hash(ad, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
- }
/*
* check for front merge
*/
- __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
- if (__rq) {
- BUG_ON(rb_key != rq_rb_key(__rq));
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
- }
+ __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ *req = __rq;
+ return ELEVATOR_FRONT_MERGE;
}
return ELEVATOR_NO_MERGE;
-out:
- if (ret) {
- if (rq_mergeable(__rq))
- as_hot_arq_hash(ad, RQ_DATA(__rq));
- }
- *req = __rq;
- return ret;
}
-static void as_merged_request(request_queue_t *q, struct request *req)
+static void as_merged_request(request_queue_t *q, struct request *req, int type)
{
struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(req);
-
- /*
- * hash always needs to be repositioned, key is end sector
- */
- as_del_arq_hash(arq);
- as_add_arq_hash(ad, arq);
/*
* if the merge was a front merge, we need to reposition request
*/
- if (rq_rb_key(req) != arq->rb_key) {
- as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
+ if (type == ELEVATOR_FRONT_MERGE) {
+ as_del_arq_rb(ad, req);
+ as_add_arq_rb(ad, req);
/*
* Note! At this stage of this and the next function, our next
* request may not be optimal - eg the request may have "grown"
static void as_merged_requests(request_queue_t *q, struct request *req,
struct request *next)
{
- struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(req);
struct as_rq *anext = RQ_DATA(next);
BUG_ON(!anext);
/*
- * reposition arq (this is the merged request) in hash, and in rbtree
- * in case of a front merge
- */
- as_del_arq_hash(arq);
- as_add_arq_hash(ad, arq);
-
- if (rq_rb_key(req) != arq->rb_key) {
- as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
- }
-
- /*
* if anext expires before arq, assign its expire time to arq
* and move into anext position (anext will be deleted) in fifo
*/
- if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
- if (time_before(anext->expires, arq->expires)) {
- list_move(&arq->fifo, &anext->fifo);
- arq->expires = anext->expires;
+ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+ if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+ list_move(&req->queuelist, &next->queuelist);
+ rq_set_fifo_time(req, rq_fifo_time(next));
/*
* Don't copy here but swap, because when anext is
* removed below, it must contain the unused context
if (arq) {
memset(arq, 0, sizeof(*arq));
- RB_CLEAR_NODE(&arq->rb_node);
arq->request = rq;
arq->state = AS_RQ_PRESCHED;
arq->io_context = NULL;
- INIT_HLIST_NODE(&arq->hash);
- INIT_LIST_HEAD(&arq->fifo);
rq->elevator_private = arq;
return 0;
}
mempool_destroy(ad->arq_pool);
put_io_context(ad->io_context);
- kfree(ad->hash);
kfree(ad);
}
static void *as_init_queue(request_queue_t *q, elevator_t *e)
{
struct as_data *ad;
- int i;
if (!arq_pool)
return NULL;
ad->q = q; /* Identify what queue the data belongs to */
- ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
- GFP_KERNEL, q->node);
- if (!ad->hash) {
- kfree(ad);
- return NULL;
- }
-
ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
mempool_free_slab, arq_pool, q->node);
if (!ad->arq_pool) {
- kfree(ad->hash);
kfree(ad);
return NULL;
}
init_timer(&ad->antic_timer);
INIT_WORK(&ad->antic_work, as_work_handler, q);
- for (i = 0; i < AS_HASH_ENTRIES; i++)
- INIT_HLIST_HEAD(&ad->hash[i]);
-
INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
ad->sort_list[REQ_SYNC] = RB_ROOT;
.elevator_deactivate_req_fn = as_deactivate_request,
.elevator_queue_empty_fn = as_queue_empty,
.elevator_completed_req_fn = as_completed_request,
- .elevator_former_req_fn = as_former_request,
- .elevator_latter_req_fn = as_latter_request,
+ .elevator_former_req_fn = elv_rb_former_request,
+ .elevator_latter_req_fn = elv_rb_latter_request,
.elevator_set_req_fn = as_set_request,
.elevator_put_req_fn = as_put_request,
.elevator_may_queue_fn = as_may_queue,