block: add blk-iopoll, a NAPI like approach for block devices
[safe/jmp/linux-2.6] / block / blk-iopoll.c
1 /*
2  * Functions related to interrupt-poll handling in the block layer. This
3  * is similar to NAPI for network devices.
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/init.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/interrupt.h>
11 #include <linux/cpu.h>
12 #include <linux/blk-iopoll.h>
13 #include <linux/delay.h>
14
15 #include "blk.h"
16
17 int blk_iopoll_enabled = 1;
18 EXPORT_SYMBOL(blk_iopoll_enabled);
19
20 static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
21
22 /**
23  * blk_iopoll_sched - Schedule a run of the iopoll handler
24  * @iop:      The parent iopoll structure
25  *
26  * Description:
27  *     Add this blk_iopoll structure to the pending poll list and trigger the raise
28  *     of the blk iopoll softirq. The driver must already have gotten a succesful
29  *     return from blk_iopoll_sched_prep() before calling this.
30  **/
31 void blk_iopoll_sched(struct blk_iopoll *iop)
32 {
33         unsigned long flags;
34
35         local_irq_save(flags);
36         list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
37         __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
38         local_irq_restore(flags);
39 }
40 EXPORT_SYMBOL(blk_iopoll_sched);
41
42 /**
43  * __blk_iopoll_complete - Mark this @iop as un-polled again
44  * @iop:      The parent iopoll structure
45  *
46  * Description:
47  *     See blk_iopoll_complete(). This function must be called with interrupts disabled.
48  **/
49 void __blk_iopoll_complete(struct blk_iopoll *iop)
50 {
51         list_del(&iop->list);
52         smp_mb__before_clear_bit();
53         clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
54 }
55 EXPORT_SYMBOL(__blk_iopoll_complete);
56
57 /**
58  * blk_iopoll_complete - Mark this @iop as un-polled again
59  * @iop:      The parent iopoll structure
60  *
61  * Description:
62  *     If a driver consumes less than the assigned budget in its run of the iopoll
63  *     handler, it'll end the polled mode by calling this function. The iopoll handler
64  *     will not be invoked again before blk_iopoll_sched_prep() is called.
65  **/
66 void blk_iopoll_complete(struct blk_iopoll *iopoll)
67 {
68         unsigned long flags;
69
70         local_irq_save(flags);
71         __blk_iopoll_complete(iopoll);
72         local_irq_restore(flags);
73 }
74 EXPORT_SYMBOL(blk_iopoll_complete);
75
76 static void blk_iopoll_softirq(struct softirq_action *h)
77 {
78         struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
79         unsigned long start_time = jiffies;
80         int rearm = 0, budget = 64;
81
82         local_irq_disable();
83
84         while (!list_empty(list)) {
85                 struct blk_iopoll *iop;
86                 int work, weight;
87
88                 /*
89                  * If softirq window is exhausted then punt.
90                  */
91                 if (budget <= 0 || time_after(jiffies, start_time)) {
92                         rearm = 1;
93                         break;
94                 }
95
96                 local_irq_enable();
97
98                 /* Even though interrupts have been re-enabled, this
99                  * access is safe because interrupts can only add new
100                  * entries to the tail of this list, and only ->poll()
101                  * calls can remove this head entry from the list.
102                  */
103                 iop = list_entry(list->next, struct blk_iopoll, list);
104
105                 weight = iop->weight;
106                 work = 0;
107                 if (test_bit(IOPOLL_F_SCHED, &iop->state))
108                         work = iop->poll(iop, weight);
109
110                 budget -= work;
111
112                 local_irq_disable();
113
114                 /* Drivers must not modify the NAPI state if they
115                  * consume the entire weight.  In such cases this code
116                  * still "owns" the NAPI instance and therefore can
117                  * move the instance around on the list at-will.
118                  */
119                 if (work >= weight) {
120                         if (blk_iopoll_disable_pending(iop))
121                                 __blk_iopoll_complete(iop);
122                         else
123                                 list_move_tail(&iop->list, list);
124                 }
125         }
126
127         if (rearm)
128                 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
129
130         local_irq_enable();
131 }
132
133 /**
134  * blk_iopoll_disable - Disable iopoll on this @iop
135  * @iop:      The parent iopoll structure
136  *
137  * Description:
138  *     Disable io polling and wait for any pending callbacks to have completed.
139  **/
140 void blk_iopoll_disable(struct blk_iopoll *iop)
141 {
142         set_bit(IOPOLL_F_DISABLE, &iop->state);
143         while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
144                 msleep(1);
145         clear_bit(IOPOLL_F_DISABLE, &iop->state);
146 }
147 EXPORT_SYMBOL(blk_iopoll_disable);
148
149 /**
150  * blk_iopoll_enable - Enable iopoll on this @iop
151  * @iop:      The parent iopoll structure
152  *
153  * Description:
154  *     Enable iopoll on this @iop. Note that the handler run will not be scheduled, it
155  *     will only mark it as active.
156  **/
157 void blk_iopoll_enable(struct blk_iopoll *iop)
158 {
159         BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
160         smp_mb__before_clear_bit();
161         clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
162 }
163 EXPORT_SYMBOL(blk_iopoll_enable);
164
165 /**
166  * blk_iopoll_init - Initialize this @iop
167  * @iop:      The parent iopoll structure
168  * @weight:   The default weight (or command completion budget)
169  * @poll_fn:  The handler to invoke
170  *
171  * Description:
172  *     Initialize this blk_iopoll structure. Before being actively used, the driver
173  *     must call blk_iopoll_enable().
174  **/
175 void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
176 {
177         memset(iop, 0, sizeof(*iop));
178         INIT_LIST_HEAD(&iop->list);
179         iop->weight = weight;
180         iop->poll = poll_fn;
181         set_bit(IOPOLL_F_SCHED, &iop->state);
182 }
183 EXPORT_SYMBOL(blk_iopoll_init);
184
185 static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
186                                           unsigned long action, void *hcpu)
187 {
188         /*
189          * If a CPU goes away, splice its entries to the current CPU
190          * and trigger a run of the softirq
191          */
192         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
193                 int cpu = (unsigned long) hcpu;
194
195                 local_irq_disable();
196                 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
197                                  &__get_cpu_var(blk_cpu_iopoll));
198                 raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
199                 local_irq_enable();
200         }
201
202         return NOTIFY_OK;
203 }
204
205 static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
206         .notifier_call  = blk_iopoll_cpu_notify,
207 };
208
209 static __init int blk_iopoll_setup(void)
210 {
211         int i;
212
213         for_each_possible_cpu(i)
214                 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
215
216         open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
217         register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
218         return 0;
219 }
220 subsys_initcall(blk_iopoll_setup);