[PATCH] elevator: move the backmerging logic into the elevator core
[safe/jmp/linux-2.6] / block / elevator.c
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@suse.de> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/delay.h>
35 #include <linux/blktrace_api.h>
36 #include <linux/hash.h>
37
38 #include <asm/uaccess.h>
39
40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list);
42
43 /*
44  * Merge hash stuff.
45  */
46 static const int elv_hash_shift = 6;
47 #define ELV_HASH_BLOCK(sec)     ((sec) >> 3)
48 #define ELV_HASH_FN(sec)        (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49 #define ELV_HASH_ENTRIES        (1 << elv_hash_shift)
50 #define rq_hash_key(rq)         ((rq)->sector + (rq)->nr_sectors)
51 #define ELV_ON_HASH(rq)         (!hlist_unhashed(&(rq)->hash))
52
53 /*
54  * can we safely merge with this request?
55  */
56 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
57 {
58         if (!rq_mergeable(rq))
59                 return 0;
60
61         /*
62          * different data direction or already started, don't merge
63          */
64         if (bio_data_dir(bio) != rq_data_dir(rq))
65                 return 0;
66
67         /*
68          * same device and no special stuff set, merge is ok
69          */
70         if (rq->rq_disk == bio->bi_bdev->bd_disk &&
71             !rq->waiting && !rq->special)
72                 return 1;
73
74         return 0;
75 }
76 EXPORT_SYMBOL(elv_rq_merge_ok);
77
78 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
79 {
80         int ret = ELEVATOR_NO_MERGE;
81
82         /*
83          * we can merge and sequence is ok, check if it's possible
84          */
85         if (elv_rq_merge_ok(__rq, bio)) {
86                 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
87                         ret = ELEVATOR_BACK_MERGE;
88                 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
89                         ret = ELEVATOR_FRONT_MERGE;
90         }
91
92         return ret;
93 }
94
95 static struct elevator_type *elevator_find(const char *name)
96 {
97         struct elevator_type *e = NULL;
98         struct list_head *entry;
99
100         list_for_each(entry, &elv_list) {
101                 struct elevator_type *__e;
102
103                 __e = list_entry(entry, struct elevator_type, list);
104
105                 if (!strcmp(__e->elevator_name, name)) {
106                         e = __e;
107                         break;
108                 }
109         }
110
111         return e;
112 }
113
114 static void elevator_put(struct elevator_type *e)
115 {
116         module_put(e->elevator_owner);
117 }
118
119 static struct elevator_type *elevator_get(const char *name)
120 {
121         struct elevator_type *e;
122
123         spin_lock_irq(&elv_list_lock);
124
125         e = elevator_find(name);
126         if (e && !try_module_get(e->elevator_owner))
127                 e = NULL;
128
129         spin_unlock_irq(&elv_list_lock);
130
131         return e;
132 }
133
134 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
135 {
136         return eq->ops->elevator_init_fn(q, eq);
137 }
138
139 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
140                            void *data)
141 {
142         q->elevator = eq;
143         eq->elevator_data = data;
144 }
145
146 static char chosen_elevator[16];
147
148 static int __init elevator_setup(char *str)
149 {
150         /*
151          * Be backwards-compatible with previous kernels, so users
152          * won't get the wrong elevator.
153          */
154         if (!strcmp(str, "as"))
155                 strcpy(chosen_elevator, "anticipatory");
156         else
157                 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
158         return 1;
159 }
160
161 __setup("elevator=", elevator_setup);
162
163 static struct kobj_type elv_ktype;
164
165 static elevator_t *elevator_alloc(struct elevator_type *e)
166 {
167         elevator_t *eq;
168         int i;
169
170         eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
171         if (unlikely(!eq))
172                 goto err;
173
174         memset(eq, 0, sizeof(*eq));
175         eq->ops = &e->ops;
176         eq->elevator_type = e;
177         kobject_init(&eq->kobj);
178         snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
179         eq->kobj.ktype = &elv_ktype;
180         mutex_init(&eq->sysfs_lock);
181
182         eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
183         if (!eq->hash)
184                 goto err;
185
186         for (i = 0; i < ELV_HASH_ENTRIES; i++)
187                 INIT_HLIST_HEAD(&eq->hash[i]);
188
189         return eq;
190 err:
191         kfree(eq);
192         elevator_put(e);
193         return NULL;
194 }
195
196 static void elevator_release(struct kobject *kobj)
197 {
198         elevator_t *e = container_of(kobj, elevator_t, kobj);
199
200         elevator_put(e->elevator_type);
201         kfree(e->hash);
202         kfree(e);
203 }
204
205 int elevator_init(request_queue_t *q, char *name)
206 {
207         struct elevator_type *e = NULL;
208         struct elevator_queue *eq;
209         int ret = 0;
210         void *data;
211
212         INIT_LIST_HEAD(&q->queue_head);
213         q->last_merge = NULL;
214         q->end_sector = 0;
215         q->boundary_rq = NULL;
216
217         if (name && !(e = elevator_get(name)))
218                 return -EINVAL;
219
220         if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
221                 printk("I/O scheduler %s not found\n", chosen_elevator);
222
223         if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
224                 printk("Default I/O scheduler not found, using no-op\n");
225                 e = elevator_get("noop");
226         }
227
228         eq = elevator_alloc(e);
229         if (!eq)
230                 return -ENOMEM;
231
232         data = elevator_init_queue(q, eq);
233         if (!data) {
234                 kobject_put(&eq->kobj);
235                 return -ENOMEM;
236         }
237
238         elevator_attach(q, eq, data);
239         return ret;
240 }
241
242 void elevator_exit(elevator_t *e)
243 {
244         mutex_lock(&e->sysfs_lock);
245         if (e->ops->elevator_exit_fn)
246                 e->ops->elevator_exit_fn(e);
247         e->ops = NULL;
248         mutex_unlock(&e->sysfs_lock);
249
250         kobject_put(&e->kobj);
251 }
252
253 static inline void __elv_rqhash_del(struct request *rq)
254 {
255         hlist_del_init(&rq->hash);
256 }
257
258 static void elv_rqhash_del(request_queue_t *q, struct request *rq)
259 {
260         if (ELV_ON_HASH(rq))
261                 __elv_rqhash_del(rq);
262 }
263
264 static void elv_rqhash_add(request_queue_t *q, struct request *rq)
265 {
266         elevator_t *e = q->elevator;
267
268         BUG_ON(ELV_ON_HASH(rq));
269         hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
270 }
271
272 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
273 {
274         __elv_rqhash_del(rq);
275         elv_rqhash_add(q, rq);
276 }
277
278 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
279 {
280         elevator_t *e = q->elevator;
281         struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
282         struct hlist_node *entry, *next;
283         struct request *rq;
284
285         hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
286                 BUG_ON(!ELV_ON_HASH(rq));
287
288                 if (unlikely(!rq_mergeable(rq))) {
289                         __elv_rqhash_del(rq);
290                         continue;
291                 }
292
293                 if (rq_hash_key(rq) == offset)
294                         return rq;
295         }
296
297         return NULL;
298 }
299
300 /*
301  * Insert rq into dispatch queue of q.  Queue lock must be held on
302  * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
303  * appended to the dispatch queue.  To be used by specific elevators.
304  */
305 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
306 {
307         sector_t boundary;
308         struct list_head *entry;
309
310         if (q->last_merge == rq)
311                 q->last_merge = NULL;
312
313         elv_rqhash_del(q, rq);
314
315         q->nr_sorted--;
316
317         boundary = q->end_sector;
318
319         list_for_each_prev(entry, &q->queue_head) {
320                 struct request *pos = list_entry_rq(entry);
321
322                 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
323                         break;
324                 if (rq->sector >= boundary) {
325                         if (pos->sector < boundary)
326                                 continue;
327                 } else {
328                         if (pos->sector >= boundary)
329                                 break;
330                 }
331                 if (rq->sector >= pos->sector)
332                         break;
333         }
334
335         list_add(&rq->queuelist, entry);
336 }
337
338 /*
339  * This should be in elevator.h, but that requires pulling in rq and q
340  */
341 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
342 {
343         if (q->last_merge == rq)
344                 q->last_merge = NULL;
345
346         elv_rqhash_del(q, rq);
347
348         q->nr_sorted--;
349
350         q->end_sector = rq_end_sector(rq);
351         q->boundary_rq = rq;
352         list_add_tail(&rq->queuelist, &q->queue_head);
353 }
354
355 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
356 {
357         elevator_t *e = q->elevator;
358         struct request *__rq;
359         int ret;
360
361         /*
362          * First try one-hit cache.
363          */
364         if (q->last_merge) {
365                 ret = elv_try_merge(q->last_merge, bio);
366                 if (ret != ELEVATOR_NO_MERGE) {
367                         *req = q->last_merge;
368                         return ret;
369                 }
370         }
371
372         /*
373          * See if our hash lookup can find a potential backmerge.
374          */
375         __rq = elv_rqhash_find(q, bio->bi_sector);
376         if (__rq && elv_rq_merge_ok(__rq, bio)) {
377                 *req = __rq;
378                 return ELEVATOR_BACK_MERGE;
379         }
380
381         if (e->ops->elevator_merge_fn)
382                 return e->ops->elevator_merge_fn(q, req, bio);
383
384         return ELEVATOR_NO_MERGE;
385 }
386
387 void elv_merged_request(request_queue_t *q, struct request *rq)
388 {
389         elevator_t *e = q->elevator;
390
391         if (e->ops->elevator_merged_fn)
392                 e->ops->elevator_merged_fn(q, rq);
393
394         elv_rqhash_reposition(q, rq);
395
396         q->last_merge = rq;
397 }
398
399 void elv_merge_requests(request_queue_t *q, struct request *rq,
400                              struct request *next)
401 {
402         elevator_t *e = q->elevator;
403
404         if (e->ops->elevator_merge_req_fn)
405                 e->ops->elevator_merge_req_fn(q, rq, next);
406
407         elv_rqhash_reposition(q, rq);
408         elv_rqhash_del(q, next);
409
410         q->nr_sorted--;
411         q->last_merge = rq;
412 }
413
414 void elv_requeue_request(request_queue_t *q, struct request *rq)
415 {
416         elevator_t *e = q->elevator;
417
418         /*
419          * it already went through dequeue, we need to decrement the
420          * in_flight count again
421          */
422         if (blk_account_rq(rq)) {
423                 q->in_flight--;
424                 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
425                         e->ops->elevator_deactivate_req_fn(q, rq);
426         }
427
428         rq->cmd_flags &= ~REQ_STARTED;
429
430         elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
431 }
432
433 static void elv_drain_elevator(request_queue_t *q)
434 {
435         static int printed;
436         while (q->elevator->ops->elevator_dispatch_fn(q, 1))
437                 ;
438         if (q->nr_sorted == 0)
439                 return;
440         if (printed++ < 10) {
441                 printk(KERN_ERR "%s: forced dispatching is broken "
442                        "(nr_sorted=%u), please report this\n",
443                        q->elevator->elevator_type->elevator_name, q->nr_sorted);
444         }
445 }
446
447 void elv_insert(request_queue_t *q, struct request *rq, int where)
448 {
449         struct list_head *pos;
450         unsigned ordseq;
451         int unplug_it = 1;
452
453         blk_add_trace_rq(q, rq, BLK_TA_INSERT);
454
455         rq->q = q;
456
457         switch (where) {
458         case ELEVATOR_INSERT_FRONT:
459                 rq->cmd_flags |= REQ_SOFTBARRIER;
460
461                 list_add(&rq->queuelist, &q->queue_head);
462                 break;
463
464         case ELEVATOR_INSERT_BACK:
465                 rq->cmd_flags |= REQ_SOFTBARRIER;
466                 elv_drain_elevator(q);
467                 list_add_tail(&rq->queuelist, &q->queue_head);
468                 /*
469                  * We kick the queue here for the following reasons.
470                  * - The elevator might have returned NULL previously
471                  *   to delay requests and returned them now.  As the
472                  *   queue wasn't empty before this request, ll_rw_blk
473                  *   won't run the queue on return, resulting in hang.
474                  * - Usually, back inserted requests won't be merged
475                  *   with anything.  There's no point in delaying queue
476                  *   processing.
477                  */
478                 blk_remove_plug(q);
479                 q->request_fn(q);
480                 break;
481
482         case ELEVATOR_INSERT_SORT:
483                 BUG_ON(!blk_fs_request(rq));
484                 rq->cmd_flags |= REQ_SORTED;
485                 q->nr_sorted++;
486                 if (rq_mergeable(rq)) {
487                         elv_rqhash_add(q, rq);
488                         if (!q->last_merge)
489                                 q->last_merge = rq;
490                 }
491
492                 /*
493                  * Some ioscheds (cfq) run q->request_fn directly, so
494                  * rq cannot be accessed after calling
495                  * elevator_add_req_fn.
496                  */
497                 q->elevator->ops->elevator_add_req_fn(q, rq);
498                 break;
499
500         case ELEVATOR_INSERT_REQUEUE:
501                 /*
502                  * If ordered flush isn't in progress, we do front
503                  * insertion; otherwise, requests should be requeued
504                  * in ordseq order.
505                  */
506                 rq->cmd_flags |= REQ_SOFTBARRIER;
507
508                 if (q->ordseq == 0) {
509                         list_add(&rq->queuelist, &q->queue_head);
510                         break;
511                 }
512
513                 ordseq = blk_ordered_req_seq(rq);
514
515                 list_for_each(pos, &q->queue_head) {
516                         struct request *pos_rq = list_entry_rq(pos);
517                         if (ordseq <= blk_ordered_req_seq(pos_rq))
518                                 break;
519                 }
520
521                 list_add_tail(&rq->queuelist, pos);
522                 /*
523                  * most requeues happen because of a busy condition, don't
524                  * force unplug of the queue for that case.
525                  */
526                 unplug_it = 0;
527                 break;
528
529         default:
530                 printk(KERN_ERR "%s: bad insertion point %d\n",
531                        __FUNCTION__, where);
532                 BUG();
533         }
534
535         if (unplug_it && blk_queue_plugged(q)) {
536                 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
537                         - q->in_flight;
538
539                 if (nrq >= q->unplug_thresh)
540                         __generic_unplug_device(q);
541         }
542 }
543
544 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
545                        int plug)
546 {
547         if (q->ordcolor)
548                 rq->cmd_flags |= REQ_ORDERED_COLOR;
549
550         if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
551                 /*
552                  * toggle ordered color
553                  */
554                 if (blk_barrier_rq(rq))
555                         q->ordcolor ^= 1;
556
557                 /*
558                  * barriers implicitly indicate back insertion
559                  */
560                 if (where == ELEVATOR_INSERT_SORT)
561                         where = ELEVATOR_INSERT_BACK;
562
563                 /*
564                  * this request is scheduling boundary, update
565                  * end_sector
566                  */
567                 if (blk_fs_request(rq)) {
568                         q->end_sector = rq_end_sector(rq);
569                         q->boundary_rq = rq;
570                 }
571         } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
572                 where = ELEVATOR_INSERT_BACK;
573
574         if (plug)
575                 blk_plug_device(q);
576
577         elv_insert(q, rq, where);
578 }
579
580 void elv_add_request(request_queue_t *q, struct request *rq, int where,
581                      int plug)
582 {
583         unsigned long flags;
584
585         spin_lock_irqsave(q->queue_lock, flags);
586         __elv_add_request(q, rq, where, plug);
587         spin_unlock_irqrestore(q->queue_lock, flags);
588 }
589
590 static inline struct request *__elv_next_request(request_queue_t *q)
591 {
592         struct request *rq;
593
594         while (1) {
595                 while (!list_empty(&q->queue_head)) {
596                         rq = list_entry_rq(q->queue_head.next);
597                         if (blk_do_ordered(q, &rq))
598                                 return rq;
599                 }
600
601                 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
602                         return NULL;
603         }
604 }
605
606 struct request *elv_next_request(request_queue_t *q)
607 {
608         struct request *rq;
609         int ret;
610
611         while ((rq = __elv_next_request(q)) != NULL) {
612                 if (!(rq->cmd_flags & REQ_STARTED)) {
613                         elevator_t *e = q->elevator;
614
615                         /*
616                          * This is the first time the device driver
617                          * sees this request (possibly after
618                          * requeueing).  Notify IO scheduler.
619                          */
620                         if (blk_sorted_rq(rq) &&
621                             e->ops->elevator_activate_req_fn)
622                                 e->ops->elevator_activate_req_fn(q, rq);
623
624                         /*
625                          * just mark as started even if we don't start
626                          * it, a request that has been delayed should
627                          * not be passed by new incoming requests
628                          */
629                         rq->cmd_flags |= REQ_STARTED;
630                         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
631                 }
632
633                 if (!q->boundary_rq || q->boundary_rq == rq) {
634                         q->end_sector = rq_end_sector(rq);
635                         q->boundary_rq = NULL;
636                 }
637
638                 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
639                         break;
640
641                 ret = q->prep_rq_fn(q, rq);
642                 if (ret == BLKPREP_OK) {
643                         break;
644                 } else if (ret == BLKPREP_DEFER) {
645                         /*
646                          * the request may have been (partially) prepped.
647                          * we need to keep this request in the front to
648                          * avoid resource deadlock.  REQ_STARTED will
649                          * prevent other fs requests from passing this one.
650                          */
651                         rq = NULL;
652                         break;
653                 } else if (ret == BLKPREP_KILL) {
654                         int nr_bytes = rq->hard_nr_sectors << 9;
655
656                         if (!nr_bytes)
657                                 nr_bytes = rq->data_len;
658
659                         blkdev_dequeue_request(rq);
660                         rq->cmd_flags |= REQ_QUIET;
661                         end_that_request_chunk(rq, 0, nr_bytes);
662                         end_that_request_last(rq, 0);
663                 } else {
664                         printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
665                                                                 ret);
666                         break;
667                 }
668         }
669
670         return rq;
671 }
672
673 void elv_dequeue_request(request_queue_t *q, struct request *rq)
674 {
675         BUG_ON(list_empty(&rq->queuelist));
676         BUG_ON(ELV_ON_HASH(rq));
677
678         list_del_init(&rq->queuelist);
679
680         /*
681          * the time frame between a request being removed from the lists
682          * and to it is freed is accounted as io that is in progress at
683          * the driver side.
684          */
685         if (blk_account_rq(rq))
686                 q->in_flight++;
687 }
688
689 int elv_queue_empty(request_queue_t *q)
690 {
691         elevator_t *e = q->elevator;
692
693         if (!list_empty(&q->queue_head))
694                 return 0;
695
696         if (e->ops->elevator_queue_empty_fn)
697                 return e->ops->elevator_queue_empty_fn(q);
698
699         return 1;
700 }
701
702 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
703 {
704         elevator_t *e = q->elevator;
705
706         if (e->ops->elevator_latter_req_fn)
707                 return e->ops->elevator_latter_req_fn(q, rq);
708         return NULL;
709 }
710
711 struct request *elv_former_request(request_queue_t *q, struct request *rq)
712 {
713         elevator_t *e = q->elevator;
714
715         if (e->ops->elevator_former_req_fn)
716                 return e->ops->elevator_former_req_fn(q, rq);
717         return NULL;
718 }
719
720 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
721                     gfp_t gfp_mask)
722 {
723         elevator_t *e = q->elevator;
724
725         if (e->ops->elevator_set_req_fn)
726                 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
727
728         rq->elevator_private = NULL;
729         return 0;
730 }
731
732 void elv_put_request(request_queue_t *q, struct request *rq)
733 {
734         elevator_t *e = q->elevator;
735
736         if (e->ops->elevator_put_req_fn)
737                 e->ops->elevator_put_req_fn(q, rq);
738 }
739
740 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
741 {
742         elevator_t *e = q->elevator;
743
744         if (e->ops->elevator_may_queue_fn)
745                 return e->ops->elevator_may_queue_fn(q, rw, bio);
746
747         return ELV_MQUEUE_MAY;
748 }
749
750 void elv_completed_request(request_queue_t *q, struct request *rq)
751 {
752         elevator_t *e = q->elevator;
753
754         /*
755          * request is released from the driver, io must be done
756          */
757         if (blk_account_rq(rq)) {
758                 q->in_flight--;
759                 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
760                         e->ops->elevator_completed_req_fn(q, rq);
761         }
762
763         /*
764          * Check if the queue is waiting for fs requests to be
765          * drained for flush sequence.
766          */
767         if (unlikely(q->ordseq)) {
768                 struct request *first_rq = list_entry_rq(q->queue_head.next);
769                 if (q->in_flight == 0 &&
770                     blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
771                     blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
772                         blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
773                         q->request_fn(q);
774                 }
775         }
776 }
777
778 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
779
780 static ssize_t
781 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
782 {
783         elevator_t *e = container_of(kobj, elevator_t, kobj);
784         struct elv_fs_entry *entry = to_elv(attr);
785         ssize_t error;
786
787         if (!entry->show)
788                 return -EIO;
789
790         mutex_lock(&e->sysfs_lock);
791         error = e->ops ? entry->show(e, page) : -ENOENT;
792         mutex_unlock(&e->sysfs_lock);
793         return error;
794 }
795
796 static ssize_t
797 elv_attr_store(struct kobject *kobj, struct attribute *attr,
798                const char *page, size_t length)
799 {
800         elevator_t *e = container_of(kobj, elevator_t, kobj);
801         struct elv_fs_entry *entry = to_elv(attr);
802         ssize_t error;
803
804         if (!entry->store)
805                 return -EIO;
806
807         mutex_lock(&e->sysfs_lock);
808         error = e->ops ? entry->store(e, page, length) : -ENOENT;
809         mutex_unlock(&e->sysfs_lock);
810         return error;
811 }
812
813 static struct sysfs_ops elv_sysfs_ops = {
814         .show   = elv_attr_show,
815         .store  = elv_attr_store,
816 };
817
818 static struct kobj_type elv_ktype = {
819         .sysfs_ops      = &elv_sysfs_ops,
820         .release        = elevator_release,
821 };
822
823 int elv_register_queue(struct request_queue *q)
824 {
825         elevator_t *e = q->elevator;
826         int error;
827
828         e->kobj.parent = &q->kobj;
829
830         error = kobject_add(&e->kobj);
831         if (!error) {
832                 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
833                 if (attr) {
834                         while (attr->attr.name) {
835                                 if (sysfs_create_file(&e->kobj, &attr->attr))
836                                         break;
837                                 attr++;
838                         }
839                 }
840                 kobject_uevent(&e->kobj, KOBJ_ADD);
841         }
842         return error;
843 }
844
845 static void __elv_unregister_queue(elevator_t *e)
846 {
847         kobject_uevent(&e->kobj, KOBJ_REMOVE);
848         kobject_del(&e->kobj);
849 }
850
851 void elv_unregister_queue(struct request_queue *q)
852 {
853         if (q)
854                 __elv_unregister_queue(q->elevator);
855 }
856
857 int elv_register(struct elevator_type *e)
858 {
859         spin_lock_irq(&elv_list_lock);
860         BUG_ON(elevator_find(e->elevator_name));
861         list_add_tail(&e->list, &elv_list);
862         spin_unlock_irq(&elv_list_lock);
863
864         printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
865         if (!strcmp(e->elevator_name, chosen_elevator) ||
866                         (!*chosen_elevator &&
867                          !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
868                                 printk(" (default)");
869         printk("\n");
870         return 0;
871 }
872 EXPORT_SYMBOL_GPL(elv_register);
873
874 void elv_unregister(struct elevator_type *e)
875 {
876         struct task_struct *g, *p;
877
878         /*
879          * Iterate every thread in the process to remove the io contexts.
880          */
881         if (e->ops.trim) {
882                 read_lock(&tasklist_lock);
883                 do_each_thread(g, p) {
884                         task_lock(p);
885                         if (p->io_context)
886                                 e->ops.trim(p->io_context);
887                         task_unlock(p);
888                 } while_each_thread(g, p);
889                 read_unlock(&tasklist_lock);
890         }
891
892         spin_lock_irq(&elv_list_lock);
893         list_del_init(&e->list);
894         spin_unlock_irq(&elv_list_lock);
895 }
896 EXPORT_SYMBOL_GPL(elv_unregister);
897
898 /*
899  * switch to new_e io scheduler. be careful not to introduce deadlocks -
900  * we don't free the old io scheduler, before we have allocated what we
901  * need for the new one. this way we have a chance of going back to the old
902  * one, if the new one fails init for some reason.
903  */
904 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
905 {
906         elevator_t *old_elevator, *e;
907         void *data;
908
909         /*
910          * Allocate new elevator
911          */
912         e = elevator_alloc(new_e);
913         if (!e)
914                 return 0;
915
916         data = elevator_init_queue(q, e);
917         if (!data) {
918                 kobject_put(&e->kobj);
919                 return 0;
920         }
921
922         /*
923          * Turn on BYPASS and drain all requests w/ elevator private data
924          */
925         spin_lock_irq(q->queue_lock);
926
927         set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
928
929         elv_drain_elevator(q);
930
931         while (q->rq.elvpriv) {
932                 blk_remove_plug(q);
933                 q->request_fn(q);
934                 spin_unlock_irq(q->queue_lock);
935                 msleep(10);
936                 spin_lock_irq(q->queue_lock);
937                 elv_drain_elevator(q);
938         }
939
940         /*
941          * Remember old elevator.
942          */
943         old_elevator = q->elevator;
944
945         /*
946          * attach and start new elevator
947          */
948         elevator_attach(q, e, data);
949
950         spin_unlock_irq(q->queue_lock);
951
952         __elv_unregister_queue(old_elevator);
953
954         if (elv_register_queue(q))
955                 goto fail_register;
956
957         /*
958          * finally exit old elevator and turn off BYPASS.
959          */
960         elevator_exit(old_elevator);
961         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
962         return 1;
963
964 fail_register:
965         /*
966          * switch failed, exit the new io scheduler and reattach the old
967          * one again (along with re-adding the sysfs dir)
968          */
969         elevator_exit(e);
970         q->elevator = old_elevator;
971         elv_register_queue(q);
972         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
973         return 0;
974 }
975
976 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
977 {
978         char elevator_name[ELV_NAME_MAX];
979         size_t len;
980         struct elevator_type *e;
981
982         elevator_name[sizeof(elevator_name) - 1] = '\0';
983         strncpy(elevator_name, name, sizeof(elevator_name) - 1);
984         len = strlen(elevator_name);
985
986         if (len && elevator_name[len - 1] == '\n')
987                 elevator_name[len - 1] = '\0';
988
989         e = elevator_get(elevator_name);
990         if (!e) {
991                 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
992                 return -EINVAL;
993         }
994
995         if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
996                 elevator_put(e);
997                 return count;
998         }
999
1000         if (!elevator_switch(q, e))
1001                 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
1002         return count;
1003 }
1004
1005 ssize_t elv_iosched_show(request_queue_t *q, char *name)
1006 {
1007         elevator_t *e = q->elevator;
1008         struct elevator_type *elv = e->elevator_type;
1009         struct list_head *entry;
1010         int len = 0;
1011
1012         spin_lock_irq(q->queue_lock);
1013         list_for_each(entry, &elv_list) {
1014                 struct elevator_type *__e;
1015
1016                 __e = list_entry(entry, struct elevator_type, list);
1017                 if (!strcmp(elv->elevator_name, __e->elevator_name))
1018                         len += sprintf(name+len, "[%s] ", elv->elevator_name);
1019                 else
1020                         len += sprintf(name+len, "%s ", __e->elevator_name);
1021         }
1022         spin_unlock_irq(q->queue_lock);
1023
1024         len += sprintf(len+name, "\n");
1025         return len;
1026 }
1027
1028 EXPORT_SYMBOL(elv_dispatch_sort);
1029 EXPORT_SYMBOL(elv_add_request);
1030 EXPORT_SYMBOL(__elv_add_request);
1031 EXPORT_SYMBOL(elv_next_request);
1032 EXPORT_SYMBOL(elv_dequeue_request);
1033 EXPORT_SYMBOL(elv_queue_empty);
1034 EXPORT_SYMBOL(elevator_exit);
1035 EXPORT_SYMBOL(elevator_init);