[PATCH] elevator: move the backmerging logic into the elevator core
[safe/jmp/linux-2.6] / block / deadline-iosched.c
1 /*
2  *  Deadline i/o scheduler.
3  *
4  *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5  */
6 #include <linux/kernel.h>
7 #include <linux/fs.h>
8 #include <linux/blkdev.h>
9 #include <linux/elevator.h>
10 #include <linux/bio.h>
11 #include <linux/module.h>
12 #include <linux/slab.h>
13 #include <linux/init.h>
14 #include <linux/compiler.h>
15 #include <linux/rbtree.h>
16
17 /*
18  * See Documentation/block/deadline-iosched.txt
19  */
20 static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
21 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
22 static const int writes_starved = 2;    /* max times reads can starve a write */
23 static const int fifo_batch = 16;       /* # of sequential requests treated as one
24                                      by the above parameters. For throughput. */
25
26 struct deadline_data {
27         /*
28          * run time data
29          */
30
31         /*
32          * requests (deadline_rq s) are present on both sort_list and fifo_list
33          */
34         struct rb_root sort_list[2];    
35         struct list_head fifo_list[2];
36         
37         /*
38          * next in sort order. read, write or both are NULL
39          */
40         struct deadline_rq *next_drq[2];
41         unsigned int batching;          /* number of sequential requests made */
42         sector_t last_sector;           /* head position */
43         unsigned int starved;           /* times reads have starved writes */
44
45         /*
46          * settings that change how the i/o scheduler behaves
47          */
48         int fifo_expire[2];
49         int fifo_batch;
50         int writes_starved;
51         int front_merges;
52
53         mempool_t *drq_pool;
54 };
55
56 /*
57  * pre-request data.
58  */
59 struct deadline_rq {
60         /*
61          * rbtree index, key is the starting offset
62          */
63         struct rb_node rb_node;
64         sector_t rb_key;
65
66         struct request *request;
67
68         /*
69          * expire fifo
70          */
71         struct list_head fifo;
72         unsigned long expires;
73 };
74
75 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
76
77 static kmem_cache_t *drq_pool;
78
79 #define RQ_DATA(rq)     ((struct deadline_rq *) (rq)->elevator_private)
80
81 /*
82  * rb tree support functions
83  */
84 #define rb_entry_drq(node)      rb_entry((node), struct deadline_rq, rb_node)
85 #define DRQ_RB_ROOT(dd, drq)    (&(dd)->sort_list[rq_data_dir((drq)->request)])
86 #define rq_rb_key(rq)           (rq)->sector
87
88 static struct deadline_rq *
89 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
90 {
91         struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
92         struct rb_node *parent = NULL;
93         struct deadline_rq *__drq;
94
95         while (*p) {
96                 parent = *p;
97                 __drq = rb_entry_drq(parent);
98
99                 if (drq->rb_key < __drq->rb_key)
100                         p = &(*p)->rb_left;
101                 else if (drq->rb_key > __drq->rb_key)
102                         p = &(*p)->rb_right;
103                 else
104                         return __drq;
105         }
106
107         rb_link_node(&drq->rb_node, parent, p);
108         return NULL;
109 }
110
111 static void
112 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
113 {
114         struct deadline_rq *__alias;
115
116         drq->rb_key = rq_rb_key(drq->request);
117
118 retry:
119         __alias = __deadline_add_drq_rb(dd, drq);
120         if (!__alias) {
121                 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
122                 return;
123         }
124
125         deadline_move_request(dd, __alias);
126         goto retry;
127 }
128
129 static inline void
130 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
131 {
132         const int data_dir = rq_data_dir(drq->request);
133
134         if (dd->next_drq[data_dir] == drq) {
135                 struct rb_node *rbnext = rb_next(&drq->rb_node);
136
137                 dd->next_drq[data_dir] = NULL;
138                 if (rbnext)
139                         dd->next_drq[data_dir] = rb_entry_drq(rbnext);
140         }
141
142         BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
143         rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
144         RB_CLEAR_NODE(&drq->rb_node);
145 }
146
147 static struct request *
148 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
149 {
150         struct rb_node *n = dd->sort_list[data_dir].rb_node;
151         struct deadline_rq *drq;
152
153         while (n) {
154                 drq = rb_entry_drq(n);
155
156                 if (sector < drq->rb_key)
157                         n = n->rb_left;
158                 else if (sector > drq->rb_key)
159                         n = n->rb_right;
160                 else
161                         return drq->request;
162         }
163
164         return NULL;
165 }
166
167 /*
168  * deadline_find_first_drq finds the first (lowest sector numbered) request
169  * for the specified data_dir. Used to sweep back to the start of the disk
170  * (1-way elevator) after we process the last (highest sector) request.
171  */
172 static struct deadline_rq *
173 deadline_find_first_drq(struct deadline_data *dd, int data_dir)
174 {
175         struct rb_node *n = dd->sort_list[data_dir].rb_node;
176
177         for (;;) {
178                 if (n->rb_left == NULL)
179                         return rb_entry_drq(n);
180                 
181                 n = n->rb_left;
182         }
183 }
184
185 /*
186  * add drq to rbtree and fifo
187  */
188 static void
189 deadline_add_request(struct request_queue *q, struct request *rq)
190 {
191         struct deadline_data *dd = q->elevator->elevator_data;
192         struct deadline_rq *drq = RQ_DATA(rq);
193         const int data_dir = rq_data_dir(drq->request);
194
195         deadline_add_drq_rb(dd, drq);
196
197         /*
198          * set expire time (only used for reads) and add to fifo list
199          */
200         drq->expires = jiffies + dd->fifo_expire[data_dir];
201         list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
202 }
203
204 /*
205  * remove rq from rbtree and fifo.
206  */
207 static void deadline_remove_request(request_queue_t *q, struct request *rq)
208 {
209         struct deadline_rq *drq = RQ_DATA(rq);
210         struct deadline_data *dd = q->elevator->elevator_data;
211
212         list_del_init(&drq->fifo);
213         deadline_del_drq_rb(dd, drq);
214 }
215
216 static int
217 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
218 {
219         struct deadline_data *dd = q->elevator->elevator_data;
220         struct request *__rq;
221         int ret;
222
223         /*
224          * check for front merge
225          */
226         if (dd->front_merges) {
227                 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
228
229                 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
230                 if (__rq) {
231                         BUG_ON(rb_key != rq_rb_key(__rq));
232
233                         if (elv_rq_merge_ok(__rq, bio)) {
234                                 ret = ELEVATOR_FRONT_MERGE;
235                                 goto out;
236                         }
237                 }
238         }
239
240         return ELEVATOR_NO_MERGE;
241 out:
242         *req = __rq;
243         return ret;
244 }
245
246 static void deadline_merged_request(request_queue_t *q, struct request *req)
247 {
248         struct deadline_data *dd = q->elevator->elevator_data;
249         struct deadline_rq *drq = RQ_DATA(req);
250
251         /*
252          * if the merge was a front merge, we need to reposition request
253          */
254         if (rq_rb_key(req) != drq->rb_key) {
255                 deadline_del_drq_rb(dd, drq);
256                 deadline_add_drq_rb(dd, drq);
257         }
258 }
259
260 static void
261 deadline_merged_requests(request_queue_t *q, struct request *req,
262                          struct request *next)
263 {
264         struct deadline_data *dd = q->elevator->elevator_data;
265         struct deadline_rq *drq = RQ_DATA(req);
266         struct deadline_rq *dnext = RQ_DATA(next);
267
268         BUG_ON(!drq);
269         BUG_ON(!dnext);
270
271         if (rq_rb_key(req) != drq->rb_key) {
272                 deadline_del_drq_rb(dd, drq);
273                 deadline_add_drq_rb(dd, drq);
274         }
275
276         /*
277          * if dnext expires before drq, assign its expire time to drq
278          * and move into dnext position (dnext will be deleted) in fifo
279          */
280         if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
281                 if (time_before(dnext->expires, drq->expires)) {
282                         list_move(&drq->fifo, &dnext->fifo);
283                         drq->expires = dnext->expires;
284                 }
285         }
286
287         /*
288          * kill knowledge of next, this one is a goner
289          */
290         deadline_remove_request(q, next);
291 }
292
293 /*
294  * move request from sort list to dispatch queue.
295  */
296 static inline void
297 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
298 {
299         request_queue_t *q = drq->request->q;
300
301         deadline_remove_request(q, drq->request);
302         elv_dispatch_add_tail(q, drq->request);
303 }
304
305 /*
306  * move an entry to dispatch queue
307  */
308 static void
309 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
310 {
311         const int data_dir = rq_data_dir(drq->request);
312         struct rb_node *rbnext = rb_next(&drq->rb_node);
313
314         dd->next_drq[READ] = NULL;
315         dd->next_drq[WRITE] = NULL;
316
317         if (rbnext)
318                 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
319         
320         dd->last_sector = drq->request->sector + drq->request->nr_sectors;
321
322         /*
323          * take it off the sort and fifo list, move
324          * to dispatch queue
325          */
326         deadline_move_to_dispatch(dd, drq);
327 }
328
329 #define list_entry_fifo(ptr)    list_entry((ptr), struct deadline_rq, fifo)
330
331 /*
332  * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
333  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
334  */
335 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
336 {
337         struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
338
339         /*
340          * drq is expired!
341          */
342         if (time_after(jiffies, drq->expires))
343                 return 1;
344
345         return 0;
346 }
347
348 /*
349  * deadline_dispatch_requests selects the best request according to
350  * read/write expire, fifo_batch, etc
351  */
352 static int deadline_dispatch_requests(request_queue_t *q, int force)
353 {
354         struct deadline_data *dd = q->elevator->elevator_data;
355         const int reads = !list_empty(&dd->fifo_list[READ]);
356         const int writes = !list_empty(&dd->fifo_list[WRITE]);
357         struct deadline_rq *drq;
358         int data_dir;
359
360         /*
361          * batches are currently reads XOR writes
362          */
363         if (dd->next_drq[WRITE])
364                 drq = dd->next_drq[WRITE];
365         else
366                 drq = dd->next_drq[READ];
367
368         if (drq) {
369                 /* we have a "next request" */
370                 
371                 if (dd->last_sector != drq->request->sector)
372                         /* end the batch on a non sequential request */
373                         dd->batching += dd->fifo_batch;
374                 
375                 if (dd->batching < dd->fifo_batch)
376                         /* we are still entitled to batch */
377                         goto dispatch_request;
378         }
379
380         /*
381          * at this point we are not running a batch. select the appropriate
382          * data direction (read / write)
383          */
384
385         if (reads) {
386                 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
387
388                 if (writes && (dd->starved++ >= dd->writes_starved))
389                         goto dispatch_writes;
390
391                 data_dir = READ;
392
393                 goto dispatch_find_request;
394         }
395
396         /*
397          * there are either no reads or writes have been starved
398          */
399
400         if (writes) {
401 dispatch_writes:
402                 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
403
404                 dd->starved = 0;
405
406                 data_dir = WRITE;
407
408                 goto dispatch_find_request;
409         }
410
411         return 0;
412
413 dispatch_find_request:
414         /*
415          * we are not running a batch, find best request for selected data_dir
416          */
417         if (deadline_check_fifo(dd, data_dir)) {
418                 /* An expired request exists - satisfy it */
419                 dd->batching = 0;
420                 drq = list_entry_fifo(dd->fifo_list[data_dir].next);
421                 
422         } else if (dd->next_drq[data_dir]) {
423                 /*
424                  * The last req was the same dir and we have a next request in
425                  * sort order. No expired requests so continue on from here.
426                  */
427                 drq = dd->next_drq[data_dir];
428         } else {
429                 /*
430                  * The last req was the other direction or we have run out of
431                  * higher-sectored requests. Go back to the lowest sectored
432                  * request (1 way elevator) and start a new batch.
433                  */
434                 dd->batching = 0;
435                 drq = deadline_find_first_drq(dd, data_dir);
436         }
437
438 dispatch_request:
439         /*
440          * drq is the selected appropriate request.
441          */
442         dd->batching++;
443         deadline_move_request(dd, drq);
444
445         return 1;
446 }
447
448 static int deadline_queue_empty(request_queue_t *q)
449 {
450         struct deadline_data *dd = q->elevator->elevator_data;
451
452         return list_empty(&dd->fifo_list[WRITE])
453                 && list_empty(&dd->fifo_list[READ]);
454 }
455
456 static struct request *
457 deadline_former_request(request_queue_t *q, struct request *rq)
458 {
459         struct deadline_rq *drq = RQ_DATA(rq);
460         struct rb_node *rbprev = rb_prev(&drq->rb_node);
461
462         if (rbprev)
463                 return rb_entry_drq(rbprev)->request;
464
465         return NULL;
466 }
467
468 static struct request *
469 deadline_latter_request(request_queue_t *q, struct request *rq)
470 {
471         struct deadline_rq *drq = RQ_DATA(rq);
472         struct rb_node *rbnext = rb_next(&drq->rb_node);
473
474         if (rbnext)
475                 return rb_entry_drq(rbnext)->request;
476
477         return NULL;
478 }
479
480 static void deadline_exit_queue(elevator_t *e)
481 {
482         struct deadline_data *dd = e->elevator_data;
483
484         BUG_ON(!list_empty(&dd->fifo_list[READ]));
485         BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
486
487         mempool_destroy(dd->drq_pool);
488         kfree(dd);
489 }
490
491 /*
492  * initialize elevator private data (deadline_data), and alloc a drq for
493  * each request on the free lists
494  */
495 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
496 {
497         struct deadline_data *dd;
498
499         if (!drq_pool)
500                 return NULL;
501
502         dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
503         if (!dd)
504                 return NULL;
505         memset(dd, 0, sizeof(*dd));
506
507         dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
508                                         mempool_free_slab, drq_pool, q->node);
509         if (!dd->drq_pool) {
510                 kfree(dd);
511                 return NULL;
512         }
513
514         INIT_LIST_HEAD(&dd->fifo_list[READ]);
515         INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
516         dd->sort_list[READ] = RB_ROOT;
517         dd->sort_list[WRITE] = RB_ROOT;
518         dd->fifo_expire[READ] = read_expire;
519         dd->fifo_expire[WRITE] = write_expire;
520         dd->writes_starved = writes_starved;
521         dd->front_merges = 1;
522         dd->fifo_batch = fifo_batch;
523         return dd;
524 }
525
526 static void deadline_put_request(request_queue_t *q, struct request *rq)
527 {
528         struct deadline_data *dd = q->elevator->elevator_data;
529         struct deadline_rq *drq = RQ_DATA(rq);
530
531         mempool_free(drq, dd->drq_pool);
532         rq->elevator_private = NULL;
533 }
534
535 static int
536 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
537                      gfp_t gfp_mask)
538 {
539         struct deadline_data *dd = q->elevator->elevator_data;
540         struct deadline_rq *drq;
541
542         drq = mempool_alloc(dd->drq_pool, gfp_mask);
543         if (drq) {
544                 memset(drq, 0, sizeof(*drq));
545                 RB_CLEAR_NODE(&drq->rb_node);
546                 drq->request = rq;
547
548                 INIT_LIST_HEAD(&drq->fifo);
549
550                 rq->elevator_private = drq;
551                 return 0;
552         }
553
554         return 1;
555 }
556
557 /*
558  * sysfs parts below
559  */
560
561 static ssize_t
562 deadline_var_show(int var, char *page)
563 {
564         return sprintf(page, "%d\n", var);
565 }
566
567 static ssize_t
568 deadline_var_store(int *var, const char *page, size_t count)
569 {
570         char *p = (char *) page;
571
572         *var = simple_strtol(p, &p, 10);
573         return count;
574 }
575
576 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)                            \
577 static ssize_t __FUNC(elevator_t *e, char *page)                        \
578 {                                                                       \
579         struct deadline_data *dd = e->elevator_data;                    \
580         int __data = __VAR;                                             \
581         if (__CONV)                                                     \
582                 __data = jiffies_to_msecs(__data);                      \
583         return deadline_var_show(__data, (page));                       \
584 }
585 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
586 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
587 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
588 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
589 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
590 #undef SHOW_FUNCTION
591
592 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
593 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)    \
594 {                                                                       \
595         struct deadline_data *dd = e->elevator_data;                    \
596         int __data;                                                     \
597         int ret = deadline_var_store(&__data, (page), count);           \
598         if (__data < (MIN))                                             \
599                 __data = (MIN);                                         \
600         else if (__data > (MAX))                                        \
601                 __data = (MAX);                                         \
602         if (__CONV)                                                     \
603                 *(__PTR) = msecs_to_jiffies(__data);                    \
604         else                                                            \
605                 *(__PTR) = __data;                                      \
606         return ret;                                                     \
607 }
608 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
609 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
610 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
611 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
612 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
613 #undef STORE_FUNCTION
614
615 #define DD_ATTR(name) \
616         __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
617                                       deadline_##name##_store)
618
619 static struct elv_fs_entry deadline_attrs[] = {
620         DD_ATTR(read_expire),
621         DD_ATTR(write_expire),
622         DD_ATTR(writes_starved),
623         DD_ATTR(front_merges),
624         DD_ATTR(fifo_batch),
625         __ATTR_NULL
626 };
627
628 static struct elevator_type iosched_deadline = {
629         .ops = {
630                 .elevator_merge_fn =            deadline_merge,
631                 .elevator_merged_fn =           deadline_merged_request,
632                 .elevator_merge_req_fn =        deadline_merged_requests,
633                 .elevator_dispatch_fn =         deadline_dispatch_requests,
634                 .elevator_add_req_fn =          deadline_add_request,
635                 .elevator_queue_empty_fn =      deadline_queue_empty,
636                 .elevator_former_req_fn =       deadline_former_request,
637                 .elevator_latter_req_fn =       deadline_latter_request,
638                 .elevator_set_req_fn =          deadline_set_request,
639                 .elevator_put_req_fn =          deadline_put_request,
640                 .elevator_init_fn =             deadline_init_queue,
641                 .elevator_exit_fn =             deadline_exit_queue,
642         },
643
644         .elevator_attrs = deadline_attrs,
645         .elevator_name = "deadline",
646         .elevator_owner = THIS_MODULE,
647 };
648
649 static int __init deadline_init(void)
650 {
651         int ret;
652
653         drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
654                                      0, 0, NULL, NULL);
655
656         if (!drq_pool)
657                 return -ENOMEM;
658
659         ret = elv_register(&iosched_deadline);
660         if (ret)
661                 kmem_cache_destroy(drq_pool);
662
663         return ret;
664 }
665
666 static void __exit deadline_exit(void)
667 {
668         kmem_cache_destroy(drq_pool);
669         elv_unregister(&iosched_deadline);
670 }
671
672 module_init(deadline_init);
673 module_exit(deadline_exit);
674
675 MODULE_AUTHOR("Jens Axboe");
676 MODULE_LICENSE("GPL");
677 MODULE_DESCRIPTION("deadline IO scheduler");