[PATCH] lockdep: irqtrace subsystem, core
[safe/jmp/linux-2.6] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57         /* Interrupts are disabled: no need to stop preemption */
58         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60         if (tsk && tsk->state != TASK_RUNNING)
61                 wake_up_process(tsk);
62 }
63
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 static void __local_bh_disable(unsigned long ip)
69 {
70         unsigned long flags;
71
72         WARN_ON_ONCE(in_irq());
73
74         raw_local_irq_save(flags);
75         add_preempt_count(SOFTIRQ_OFFSET);
76         /*
77          * Were softirqs turned off above:
78          */
79         if (softirq_count() == SOFTIRQ_OFFSET)
80                 trace_softirqs_off(ip);
81         raw_local_irq_restore(flags);
82 }
83
84 void local_bh_disable(void)
85 {
86         __local_bh_disable((unsigned long)__builtin_return_address(0));
87 }
88
89 EXPORT_SYMBOL(local_bh_disable);
90
91 void __local_bh_enable(void)
92 {
93         WARN_ON_ONCE(in_irq());
94
95         /*
96          * softirqs should never be enabled by __local_bh_enable(),
97          * it always nests inside local_bh_enable() sections:
98          */
99         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101         sub_preempt_count(SOFTIRQ_OFFSET);
102 }
103 EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105 /*
106  * Special-case - softirqs can safely be enabled in
107  * cond_resched_softirq(), or by __do_softirq(),
108  * without processing still-pending softirqs:
109  */
110 void _local_bh_enable(void)
111 {
112         WARN_ON_ONCE(in_irq());
113         WARN_ON_ONCE(!irqs_disabled());
114
115         if (softirq_count() == SOFTIRQ_OFFSET)
116                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117         sub_preempt_count(SOFTIRQ_OFFSET);
118 }
119
120 EXPORT_SYMBOL(_local_bh_enable);
121
122 void local_bh_enable(void)
123 {
124         unsigned long flags;
125
126         WARN_ON_ONCE(in_irq());
127         WARN_ON_ONCE(irqs_disabled());
128
129         local_irq_save(flags);
130         /*
131          * Are softirqs going to be turned on now:
132          */
133         if (softirq_count() == SOFTIRQ_OFFSET)
134                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135         /*
136          * Keep preemption disabled until we are done with
137          * softirq processing:
138          */
139         sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141         if (unlikely(!in_interrupt() && local_softirq_pending()))
142                 do_softirq();
143
144         dec_preempt_count();
145         local_irq_restore(flags);
146         preempt_check_resched();
147 }
148 EXPORT_SYMBOL(local_bh_enable);
149
150 void local_bh_enable_ip(unsigned long ip)
151 {
152         unsigned long flags;
153
154         WARN_ON_ONCE(in_irq());
155
156         local_irq_save(flags);
157         /*
158          * Are softirqs going to be turned on now:
159          */
160         if (softirq_count() == SOFTIRQ_OFFSET)
161                 trace_softirqs_on(ip);
162         /*
163          * Keep preemption disabled until we are done with
164          * softirq processing:
165          */
166         sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168         if (unlikely(!in_interrupt() && local_softirq_pending()))
169                 do_softirq();
170
171         dec_preempt_count();
172         local_irq_restore(flags);
173         preempt_check_resched();
174 }
175 EXPORT_SYMBOL(local_bh_enable_ip);
176
177 /*
178  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
179  * and we fall back to softirqd after that.
180  *
181  * This number has been established via experimentation.
182  * The two things to balance is latency against fairness -
183  * we want to handle softirqs as soon as possible, but they
184  * should not be able to lock up the box.
185  */
186 #define MAX_SOFTIRQ_RESTART 10
187
188 asmlinkage void __do_softirq(void)
189 {
190         struct softirq_action *h;
191         __u32 pending;
192         int max_restart = MAX_SOFTIRQ_RESTART;
193         int cpu;
194
195         pending = local_softirq_pending();
196         __local_bh_disable((unsigned long)__builtin_return_address(0));
197         trace_softirq_enter();
198
199         cpu = smp_processor_id();
200 restart:
201         /* Reset the pending bitmask before enabling irqs */
202         set_softirq_pending(0);
203
204         local_irq_enable();
205
206         h = softirq_vec;
207
208         do {
209                 if (pending & 1) {
210                         h->action(h);
211                         rcu_bh_qsctr_inc(cpu);
212                 }
213                 h++;
214                 pending >>= 1;
215         } while (pending);
216
217         local_irq_disable();
218
219         pending = local_softirq_pending();
220         if (pending && --max_restart)
221                 goto restart;
222
223         if (pending)
224                 wakeup_softirqd();
225
226         trace_softirq_exit();
227         _local_bh_enable();
228 }
229
230 #ifndef __ARCH_HAS_DO_SOFTIRQ
231
232 asmlinkage void do_softirq(void)
233 {
234         __u32 pending;
235         unsigned long flags;
236
237         if (in_interrupt())
238                 return;
239
240         local_irq_save(flags);
241
242         pending = local_softirq_pending();
243
244         if (pending)
245                 __do_softirq();
246
247         local_irq_restore(flags);
248 }
249
250 EXPORT_SYMBOL(do_softirq);
251
252 #endif
253
254 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
255 # define invoke_softirq()       __do_softirq()
256 #else
257 # define invoke_softirq()       do_softirq()
258 #endif
259
260 /*
261  * Exit an interrupt context. Process softirqs if needed and possible:
262  */
263 void irq_exit(void)
264 {
265         account_system_vtime(current);
266         trace_hardirq_exit();
267         sub_preempt_count(IRQ_EXIT_OFFSET);
268         if (!in_interrupt() && local_softirq_pending())
269                 invoke_softirq();
270         preempt_enable_no_resched();
271 }
272
273 /*
274  * This function must run with irqs disabled!
275  */
276 inline fastcall void raise_softirq_irqoff(unsigned int nr)
277 {
278         __raise_softirq_irqoff(nr);
279
280         /*
281          * If we're in an interrupt or softirq, we're done
282          * (this also catches softirq-disabled code). We will
283          * actually run the softirq once we return from
284          * the irq or softirq.
285          *
286          * Otherwise we wake up ksoftirqd to make sure we
287          * schedule the softirq soon.
288          */
289         if (!in_interrupt())
290                 wakeup_softirqd();
291 }
292
293 EXPORT_SYMBOL(raise_softirq_irqoff);
294
295 void fastcall raise_softirq(unsigned int nr)
296 {
297         unsigned long flags;
298
299         local_irq_save(flags);
300         raise_softirq_irqoff(nr);
301         local_irq_restore(flags);
302 }
303
304 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
305 {
306         softirq_vec[nr].data = data;
307         softirq_vec[nr].action = action;
308 }
309
310 EXPORT_SYMBOL(open_softirq);
311
312 /* Tasklets */
313 struct tasklet_head
314 {
315         struct tasklet_struct *list;
316 };
317
318 /* Some compilers disobey section attribute on statics when not
319    initialized -- RR */
320 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
321 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
322
323 void fastcall __tasklet_schedule(struct tasklet_struct *t)
324 {
325         unsigned long flags;
326
327         local_irq_save(flags);
328         t->next = __get_cpu_var(tasklet_vec).list;
329         __get_cpu_var(tasklet_vec).list = t;
330         raise_softirq_irqoff(TASKLET_SOFTIRQ);
331         local_irq_restore(flags);
332 }
333
334 EXPORT_SYMBOL(__tasklet_schedule);
335
336 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
337 {
338         unsigned long flags;
339
340         local_irq_save(flags);
341         t->next = __get_cpu_var(tasklet_hi_vec).list;
342         __get_cpu_var(tasklet_hi_vec).list = t;
343         raise_softirq_irqoff(HI_SOFTIRQ);
344         local_irq_restore(flags);
345 }
346
347 EXPORT_SYMBOL(__tasklet_hi_schedule);
348
349 static void tasklet_action(struct softirq_action *a)
350 {
351         struct tasklet_struct *list;
352
353         local_irq_disable();
354         list = __get_cpu_var(tasklet_vec).list;
355         __get_cpu_var(tasklet_vec).list = NULL;
356         local_irq_enable();
357
358         while (list) {
359                 struct tasklet_struct *t = list;
360
361                 list = list->next;
362
363                 if (tasklet_trylock(t)) {
364                         if (!atomic_read(&t->count)) {
365                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
366                                         BUG();
367                                 t->func(t->data);
368                                 tasklet_unlock(t);
369                                 continue;
370                         }
371                         tasklet_unlock(t);
372                 }
373
374                 local_irq_disable();
375                 t->next = __get_cpu_var(tasklet_vec).list;
376                 __get_cpu_var(tasklet_vec).list = t;
377                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
378                 local_irq_enable();
379         }
380 }
381
382 static void tasklet_hi_action(struct softirq_action *a)
383 {
384         struct tasklet_struct *list;
385
386         local_irq_disable();
387         list = __get_cpu_var(tasklet_hi_vec).list;
388         __get_cpu_var(tasklet_hi_vec).list = NULL;
389         local_irq_enable();
390
391         while (list) {
392                 struct tasklet_struct *t = list;
393
394                 list = list->next;
395
396                 if (tasklet_trylock(t)) {
397                         if (!atomic_read(&t->count)) {
398                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
399                                         BUG();
400                                 t->func(t->data);
401                                 tasklet_unlock(t);
402                                 continue;
403                         }
404                         tasklet_unlock(t);
405                 }
406
407                 local_irq_disable();
408                 t->next = __get_cpu_var(tasklet_hi_vec).list;
409                 __get_cpu_var(tasklet_hi_vec).list = t;
410                 __raise_softirq_irqoff(HI_SOFTIRQ);
411                 local_irq_enable();
412         }
413 }
414
415
416 void tasklet_init(struct tasklet_struct *t,
417                   void (*func)(unsigned long), unsigned long data)
418 {
419         t->next = NULL;
420         t->state = 0;
421         atomic_set(&t->count, 0);
422         t->func = func;
423         t->data = data;
424 }
425
426 EXPORT_SYMBOL(tasklet_init);
427
428 void tasklet_kill(struct tasklet_struct *t)
429 {
430         if (in_interrupt())
431                 printk("Attempt to kill tasklet from interrupt\n");
432
433         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
434                 do
435                         yield();
436                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
437         }
438         tasklet_unlock_wait(t);
439         clear_bit(TASKLET_STATE_SCHED, &t->state);
440 }
441
442 EXPORT_SYMBOL(tasklet_kill);
443
444 void __init softirq_init(void)
445 {
446         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
447         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
448 }
449
450 static int ksoftirqd(void * __bind_cpu)
451 {
452         set_user_nice(current, 19);
453         current->flags |= PF_NOFREEZE;
454
455         set_current_state(TASK_INTERRUPTIBLE);
456
457         while (!kthread_should_stop()) {
458                 preempt_disable();
459                 if (!local_softirq_pending()) {
460                         preempt_enable_no_resched();
461                         schedule();
462                         preempt_disable();
463                 }
464
465                 __set_current_state(TASK_RUNNING);
466
467                 while (local_softirq_pending()) {
468                         /* Preempt disable stops cpu going offline.
469                            If already offline, we'll be on wrong CPU:
470                            don't process */
471                         if (cpu_is_offline((long)__bind_cpu))
472                                 goto wait_to_die;
473                         do_softirq();
474                         preempt_enable_no_resched();
475                         cond_resched();
476                         preempt_disable();
477                 }
478                 preempt_enable();
479                 set_current_state(TASK_INTERRUPTIBLE);
480         }
481         __set_current_state(TASK_RUNNING);
482         return 0;
483
484 wait_to_die:
485         preempt_enable();
486         /* Wait for kthread_stop */
487         set_current_state(TASK_INTERRUPTIBLE);
488         while (!kthread_should_stop()) {
489                 schedule();
490                 set_current_state(TASK_INTERRUPTIBLE);
491         }
492         __set_current_state(TASK_RUNNING);
493         return 0;
494 }
495
496 #ifdef CONFIG_HOTPLUG_CPU
497 /*
498  * tasklet_kill_immediate is called to remove a tasklet which can already be
499  * scheduled for execution on @cpu.
500  *
501  * Unlike tasklet_kill, this function removes the tasklet
502  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
503  *
504  * When this function is called, @cpu must be in the CPU_DEAD state.
505  */
506 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
507 {
508         struct tasklet_struct **i;
509
510         BUG_ON(cpu_online(cpu));
511         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
512
513         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
514                 return;
515
516         /* CPU is dead, so no lock needed. */
517         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
518                 if (*i == t) {
519                         *i = t->next;
520                         return;
521                 }
522         }
523         BUG();
524 }
525
526 static void takeover_tasklets(unsigned int cpu)
527 {
528         struct tasklet_struct **i;
529
530         /* CPU is dead, so no lock needed. */
531         local_irq_disable();
532
533         /* Find end, append list for that CPU. */
534         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
535         *i = per_cpu(tasklet_vec, cpu).list;
536         per_cpu(tasklet_vec, cpu).list = NULL;
537         raise_softirq_irqoff(TASKLET_SOFTIRQ);
538
539         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
540         *i = per_cpu(tasklet_hi_vec, cpu).list;
541         per_cpu(tasklet_hi_vec, cpu).list = NULL;
542         raise_softirq_irqoff(HI_SOFTIRQ);
543
544         local_irq_enable();
545 }
546 #endif /* CONFIG_HOTPLUG_CPU */
547
548 static int __devinit cpu_callback(struct notifier_block *nfb,
549                                   unsigned long action,
550                                   void *hcpu)
551 {
552         int hotcpu = (unsigned long)hcpu;
553         struct task_struct *p;
554
555         switch (action) {
556         case CPU_UP_PREPARE:
557                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
558                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
559                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
560                 if (IS_ERR(p)) {
561                         printk("ksoftirqd for %i failed\n", hotcpu);
562                         return NOTIFY_BAD;
563                 }
564                 kthread_bind(p, hotcpu);
565                 per_cpu(ksoftirqd, hotcpu) = p;
566                 break;
567         case CPU_ONLINE:
568                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
569                 break;
570 #ifdef CONFIG_HOTPLUG_CPU
571         case CPU_UP_CANCELED:
572                 if (!per_cpu(ksoftirqd, hotcpu))
573                         break;
574                 /* Unbind so it can run.  Fall thru. */
575                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
576                              any_online_cpu(cpu_online_map));
577         case CPU_DEAD:
578                 p = per_cpu(ksoftirqd, hotcpu);
579                 per_cpu(ksoftirqd, hotcpu) = NULL;
580                 kthread_stop(p);
581                 takeover_tasklets(hotcpu);
582                 break;
583 #endif /* CONFIG_HOTPLUG_CPU */
584         }
585         return NOTIFY_OK;
586 }
587
588 static struct notifier_block __devinitdata cpu_nfb = {
589         .notifier_call = cpu_callback
590 };
591
592 __init int spawn_ksoftirqd(void)
593 {
594         void *cpu = (void *)(long)smp_processor_id();
595         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
596         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
597         register_cpu_notifier(&cpu_nfb);
598         return 0;
599 }
600
601 #ifdef CONFIG_SMP
602 /*
603  * Call a function on all processors
604  */
605 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
606 {
607         int ret = 0;
608
609         preempt_disable();
610         ret = smp_call_function(func, info, retry, wait);
611         local_irq_disable();
612         func(info);
613         local_irq_enable();
614         preempt_enable();
615         return ret;
616 }
617 EXPORT_SYMBOL(on_each_cpu);
618 #endif