[PATCH] uninline irq_enter()
[safe/jmp/linux-2.6] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57         /* Interrupts are disabled: no need to stop preemption */
58         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60         if (tsk && tsk->state != TASK_RUNNING)
61                 wake_up_process(tsk);
62 }
63
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 #ifdef CONFIG_TRACE_IRQFLAGS
69 static void __local_bh_disable(unsigned long ip)
70 {
71         unsigned long flags;
72
73         WARN_ON_ONCE(in_irq());
74
75         raw_local_irq_save(flags);
76         add_preempt_count(SOFTIRQ_OFFSET);
77         /*
78          * Were softirqs turned off above:
79          */
80         if (softirq_count() == SOFTIRQ_OFFSET)
81                 trace_softirqs_off(ip);
82         raw_local_irq_restore(flags);
83 }
84 #else /* !CONFIG_TRACE_IRQFLAGS */
85 static inline void __local_bh_disable(unsigned long ip)
86 {
87         add_preempt_count(SOFTIRQ_OFFSET);
88         barrier();
89 }
90 #endif /* CONFIG_TRACE_IRQFLAGS */
91
92 void local_bh_disable(void)
93 {
94         __local_bh_disable((unsigned long)__builtin_return_address(0));
95 }
96
97 EXPORT_SYMBOL(local_bh_disable);
98
99 void __local_bh_enable(void)
100 {
101         WARN_ON_ONCE(in_irq());
102
103         /*
104          * softirqs should never be enabled by __local_bh_enable(),
105          * it always nests inside local_bh_enable() sections:
106          */
107         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
108
109         sub_preempt_count(SOFTIRQ_OFFSET);
110 }
111 EXPORT_SYMBOL_GPL(__local_bh_enable);
112
113 /*
114  * Special-case - softirqs can safely be enabled in
115  * cond_resched_softirq(), or by __do_softirq(),
116  * without processing still-pending softirqs:
117  */
118 void _local_bh_enable(void)
119 {
120         WARN_ON_ONCE(in_irq());
121         WARN_ON_ONCE(!irqs_disabled());
122
123         if (softirq_count() == SOFTIRQ_OFFSET)
124                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
125         sub_preempt_count(SOFTIRQ_OFFSET);
126 }
127
128 EXPORT_SYMBOL(_local_bh_enable);
129
130 void local_bh_enable(void)
131 {
132 #ifdef CONFIG_TRACE_IRQFLAGS
133         unsigned long flags;
134
135         WARN_ON_ONCE(in_irq());
136 #endif
137         WARN_ON_ONCE(irqs_disabled());
138
139 #ifdef CONFIG_TRACE_IRQFLAGS
140         local_irq_save(flags);
141 #endif
142         /*
143          * Are softirqs going to be turned on now:
144          */
145         if (softirq_count() == SOFTIRQ_OFFSET)
146                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
147         /*
148          * Keep preemption disabled until we are done with
149          * softirq processing:
150          */
151         sub_preempt_count(SOFTIRQ_OFFSET - 1);
152
153         if (unlikely(!in_interrupt() && local_softirq_pending()))
154                 do_softirq();
155
156         dec_preempt_count();
157 #ifdef CONFIG_TRACE_IRQFLAGS
158         local_irq_restore(flags);
159 #endif
160         preempt_check_resched();
161 }
162 EXPORT_SYMBOL(local_bh_enable);
163
164 void local_bh_enable_ip(unsigned long ip)
165 {
166 #ifdef CONFIG_TRACE_IRQFLAGS
167         unsigned long flags;
168
169         WARN_ON_ONCE(in_irq());
170
171         local_irq_save(flags);
172 #endif
173         /*
174          * Are softirqs going to be turned on now:
175          */
176         if (softirq_count() == SOFTIRQ_OFFSET)
177                 trace_softirqs_on(ip);
178         /*
179          * Keep preemption disabled until we are done with
180          * softirq processing:
181          */
182         sub_preempt_count(SOFTIRQ_OFFSET - 1);
183
184         if (unlikely(!in_interrupt() && local_softirq_pending()))
185                 do_softirq();
186
187         dec_preempt_count();
188 #ifdef CONFIG_TRACE_IRQFLAGS
189         local_irq_restore(flags);
190 #endif
191         preempt_check_resched();
192 }
193 EXPORT_SYMBOL(local_bh_enable_ip);
194
195 /*
196  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
197  * and we fall back to softirqd after that.
198  *
199  * This number has been established via experimentation.
200  * The two things to balance is latency against fairness -
201  * we want to handle softirqs as soon as possible, but they
202  * should not be able to lock up the box.
203  */
204 #define MAX_SOFTIRQ_RESTART 10
205
206 asmlinkage void __do_softirq(void)
207 {
208         struct softirq_action *h;
209         __u32 pending;
210         int max_restart = MAX_SOFTIRQ_RESTART;
211         int cpu;
212
213         pending = local_softirq_pending();
214         account_system_vtime(current);
215
216         __local_bh_disable((unsigned long)__builtin_return_address(0));
217         trace_softirq_enter();
218
219         cpu = smp_processor_id();
220 restart:
221         /* Reset the pending bitmask before enabling irqs */
222         set_softirq_pending(0);
223
224         local_irq_enable();
225
226         h = softirq_vec;
227
228         do {
229                 if (pending & 1) {
230                         h->action(h);
231                         rcu_bh_qsctr_inc(cpu);
232                 }
233                 h++;
234                 pending >>= 1;
235         } while (pending);
236
237         local_irq_disable();
238
239         pending = local_softirq_pending();
240         if (pending && --max_restart)
241                 goto restart;
242
243         if (pending)
244                 wakeup_softirqd();
245
246         trace_softirq_exit();
247
248         account_system_vtime(current);
249         _local_bh_enable();
250 }
251
252 #ifndef __ARCH_HAS_DO_SOFTIRQ
253
254 asmlinkage void do_softirq(void)
255 {
256         __u32 pending;
257         unsigned long flags;
258
259         if (in_interrupt())
260                 return;
261
262         local_irq_save(flags);
263
264         pending = local_softirq_pending();
265
266         if (pending)
267                 __do_softirq();
268
269         local_irq_restore(flags);
270 }
271
272 EXPORT_SYMBOL(do_softirq);
273
274 #endif
275
276 /*
277  * Enter an interrupt context.
278  */
279 void irq_enter(void)
280 {
281         account_system_vtime(current);
282         add_preempt_count(HARDIRQ_OFFSET);
283         trace_hardirq_enter();
284 }
285
286 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
287 # define invoke_softirq()       __do_softirq()
288 #else
289 # define invoke_softirq()       do_softirq()
290 #endif
291
292 /*
293  * Exit an interrupt context. Process softirqs if needed and possible:
294  */
295 void irq_exit(void)
296 {
297         account_system_vtime(current);
298         trace_hardirq_exit();
299         sub_preempt_count(IRQ_EXIT_OFFSET);
300         if (!in_interrupt() && local_softirq_pending())
301                 invoke_softirq();
302         preempt_enable_no_resched();
303 }
304
305 /*
306  * This function must run with irqs disabled!
307  */
308 inline fastcall void raise_softirq_irqoff(unsigned int nr)
309 {
310         __raise_softirq_irqoff(nr);
311
312         /*
313          * If we're in an interrupt or softirq, we're done
314          * (this also catches softirq-disabled code). We will
315          * actually run the softirq once we return from
316          * the irq or softirq.
317          *
318          * Otherwise we wake up ksoftirqd to make sure we
319          * schedule the softirq soon.
320          */
321         if (!in_interrupt())
322                 wakeup_softirqd();
323 }
324
325 EXPORT_SYMBOL(raise_softirq_irqoff);
326
327 void fastcall raise_softirq(unsigned int nr)
328 {
329         unsigned long flags;
330
331         local_irq_save(flags);
332         raise_softirq_irqoff(nr);
333         local_irq_restore(flags);
334 }
335
336 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
337 {
338         softirq_vec[nr].data = data;
339         softirq_vec[nr].action = action;
340 }
341
342 /* Tasklets */
343 struct tasklet_head
344 {
345         struct tasklet_struct *list;
346 };
347
348 /* Some compilers disobey section attribute on statics when not
349    initialized -- RR */
350 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
351 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
352
353 void fastcall __tasklet_schedule(struct tasklet_struct *t)
354 {
355         unsigned long flags;
356
357         local_irq_save(flags);
358         t->next = __get_cpu_var(tasklet_vec).list;
359         __get_cpu_var(tasklet_vec).list = t;
360         raise_softirq_irqoff(TASKLET_SOFTIRQ);
361         local_irq_restore(flags);
362 }
363
364 EXPORT_SYMBOL(__tasklet_schedule);
365
366 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
367 {
368         unsigned long flags;
369
370         local_irq_save(flags);
371         t->next = __get_cpu_var(tasklet_hi_vec).list;
372         __get_cpu_var(tasklet_hi_vec).list = t;
373         raise_softirq_irqoff(HI_SOFTIRQ);
374         local_irq_restore(flags);
375 }
376
377 EXPORT_SYMBOL(__tasklet_hi_schedule);
378
379 static void tasklet_action(struct softirq_action *a)
380 {
381         struct tasklet_struct *list;
382
383         local_irq_disable();
384         list = __get_cpu_var(tasklet_vec).list;
385         __get_cpu_var(tasklet_vec).list = NULL;
386         local_irq_enable();
387
388         while (list) {
389                 struct tasklet_struct *t = list;
390
391                 list = list->next;
392
393                 if (tasklet_trylock(t)) {
394                         if (!atomic_read(&t->count)) {
395                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
396                                         BUG();
397                                 t->func(t->data);
398                                 tasklet_unlock(t);
399                                 continue;
400                         }
401                         tasklet_unlock(t);
402                 }
403
404                 local_irq_disable();
405                 t->next = __get_cpu_var(tasklet_vec).list;
406                 __get_cpu_var(tasklet_vec).list = t;
407                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
408                 local_irq_enable();
409         }
410 }
411
412 static void tasklet_hi_action(struct softirq_action *a)
413 {
414         struct tasklet_struct *list;
415
416         local_irq_disable();
417         list = __get_cpu_var(tasklet_hi_vec).list;
418         __get_cpu_var(tasklet_hi_vec).list = NULL;
419         local_irq_enable();
420
421         while (list) {
422                 struct tasklet_struct *t = list;
423
424                 list = list->next;
425
426                 if (tasklet_trylock(t)) {
427                         if (!atomic_read(&t->count)) {
428                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
429                                         BUG();
430                                 t->func(t->data);
431                                 tasklet_unlock(t);
432                                 continue;
433                         }
434                         tasklet_unlock(t);
435                 }
436
437                 local_irq_disable();
438                 t->next = __get_cpu_var(tasklet_hi_vec).list;
439                 __get_cpu_var(tasklet_hi_vec).list = t;
440                 __raise_softirq_irqoff(HI_SOFTIRQ);
441                 local_irq_enable();
442         }
443 }
444
445
446 void tasklet_init(struct tasklet_struct *t,
447                   void (*func)(unsigned long), unsigned long data)
448 {
449         t->next = NULL;
450         t->state = 0;
451         atomic_set(&t->count, 0);
452         t->func = func;
453         t->data = data;
454 }
455
456 EXPORT_SYMBOL(tasklet_init);
457
458 void tasklet_kill(struct tasklet_struct *t)
459 {
460         if (in_interrupt())
461                 printk("Attempt to kill tasklet from interrupt\n");
462
463         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
464                 do
465                         yield();
466                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
467         }
468         tasklet_unlock_wait(t);
469         clear_bit(TASKLET_STATE_SCHED, &t->state);
470 }
471
472 EXPORT_SYMBOL(tasklet_kill);
473
474 void __init softirq_init(void)
475 {
476         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
477         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
478 }
479
480 static int ksoftirqd(void * __bind_cpu)
481 {
482         set_user_nice(current, 19);
483         current->flags |= PF_NOFREEZE;
484
485         set_current_state(TASK_INTERRUPTIBLE);
486
487         while (!kthread_should_stop()) {
488                 preempt_disable();
489                 if (!local_softirq_pending()) {
490                         preempt_enable_no_resched();
491                         schedule();
492                         preempt_disable();
493                 }
494
495                 __set_current_state(TASK_RUNNING);
496
497                 while (local_softirq_pending()) {
498                         /* Preempt disable stops cpu going offline.
499                            If already offline, we'll be on wrong CPU:
500                            don't process */
501                         if (cpu_is_offline((long)__bind_cpu))
502                                 goto wait_to_die;
503                         do_softirq();
504                         preempt_enable_no_resched();
505                         cond_resched();
506                         preempt_disable();
507                 }
508                 preempt_enable();
509                 set_current_state(TASK_INTERRUPTIBLE);
510         }
511         __set_current_state(TASK_RUNNING);
512         return 0;
513
514 wait_to_die:
515         preempt_enable();
516         /* Wait for kthread_stop */
517         set_current_state(TASK_INTERRUPTIBLE);
518         while (!kthread_should_stop()) {
519                 schedule();
520                 set_current_state(TASK_INTERRUPTIBLE);
521         }
522         __set_current_state(TASK_RUNNING);
523         return 0;
524 }
525
526 #ifdef CONFIG_HOTPLUG_CPU
527 /*
528  * tasklet_kill_immediate is called to remove a tasklet which can already be
529  * scheduled for execution on @cpu.
530  *
531  * Unlike tasklet_kill, this function removes the tasklet
532  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
533  *
534  * When this function is called, @cpu must be in the CPU_DEAD state.
535  */
536 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
537 {
538         struct tasklet_struct **i;
539
540         BUG_ON(cpu_online(cpu));
541         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
542
543         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
544                 return;
545
546         /* CPU is dead, so no lock needed. */
547         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
548                 if (*i == t) {
549                         *i = t->next;
550                         return;
551                 }
552         }
553         BUG();
554 }
555
556 static void takeover_tasklets(unsigned int cpu)
557 {
558         struct tasklet_struct **i;
559
560         /* CPU is dead, so no lock needed. */
561         local_irq_disable();
562
563         /* Find end, append list for that CPU. */
564         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
565         *i = per_cpu(tasklet_vec, cpu).list;
566         per_cpu(tasklet_vec, cpu).list = NULL;
567         raise_softirq_irqoff(TASKLET_SOFTIRQ);
568
569         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
570         *i = per_cpu(tasklet_hi_vec, cpu).list;
571         per_cpu(tasklet_hi_vec, cpu).list = NULL;
572         raise_softirq_irqoff(HI_SOFTIRQ);
573
574         local_irq_enable();
575 }
576 #endif /* CONFIG_HOTPLUG_CPU */
577
578 static int __cpuinit cpu_callback(struct notifier_block *nfb,
579                                   unsigned long action,
580                                   void *hcpu)
581 {
582         int hotcpu = (unsigned long)hcpu;
583         struct task_struct *p;
584
585         switch (action) {
586         case CPU_UP_PREPARE:
587                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
588                 if (IS_ERR(p)) {
589                         printk("ksoftirqd for %i failed\n", hotcpu);
590                         return NOTIFY_BAD;
591                 }
592                 kthread_bind(p, hotcpu);
593                 per_cpu(ksoftirqd, hotcpu) = p;
594                 break;
595         case CPU_ONLINE:
596                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
597                 break;
598 #ifdef CONFIG_HOTPLUG_CPU
599         case CPU_UP_CANCELED:
600                 if (!per_cpu(ksoftirqd, hotcpu))
601                         break;
602                 /* Unbind so it can run.  Fall thru. */
603                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
604                              any_online_cpu(cpu_online_map));
605         case CPU_DEAD:
606                 p = per_cpu(ksoftirqd, hotcpu);
607                 per_cpu(ksoftirqd, hotcpu) = NULL;
608                 kthread_stop(p);
609                 takeover_tasklets(hotcpu);
610                 break;
611 #endif /* CONFIG_HOTPLUG_CPU */
612         }
613         return NOTIFY_OK;
614 }
615
616 static struct notifier_block __cpuinitdata cpu_nfb = {
617         .notifier_call = cpu_callback
618 };
619
620 __init int spawn_ksoftirqd(void)
621 {
622         void *cpu = (void *)(long)smp_processor_id();
623         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
624
625         BUG_ON(err == NOTIFY_BAD);
626         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
627         register_cpu_notifier(&cpu_nfb);
628         return 0;
629 }
630
631 #ifdef CONFIG_SMP
632 /*
633  * Call a function on all processors
634  */
635 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
636 {
637         int ret = 0;
638
639         preempt_disable();
640         ret = smp_call_function(func, info, retry, wait);
641         local_irq_disable();
642         func(info);
643         local_irq_enable();
644         preempt_enable();
645         return ret;
646 }
647 EXPORT_SYMBOL(on_each_cpu);
648 #endif