[PATCH] lockdep: irqtrace subsystem, move account_system_vtime() calls into kernel...
[safe/jmp/linux-2.6] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57         /* Interrupts are disabled: no need to stop preemption */
58         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60         if (tsk && tsk->state != TASK_RUNNING)
61                 wake_up_process(tsk);
62 }
63
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 static void __local_bh_disable(unsigned long ip)
69 {
70         unsigned long flags;
71
72         WARN_ON_ONCE(in_irq());
73
74         raw_local_irq_save(flags);
75         add_preempt_count(SOFTIRQ_OFFSET);
76         /*
77          * Were softirqs turned off above:
78          */
79         if (softirq_count() == SOFTIRQ_OFFSET)
80                 trace_softirqs_off(ip);
81         raw_local_irq_restore(flags);
82 }
83
84 void local_bh_disable(void)
85 {
86         __local_bh_disable((unsigned long)__builtin_return_address(0));
87 }
88
89 EXPORT_SYMBOL(local_bh_disable);
90
91 void __local_bh_enable(void)
92 {
93         WARN_ON_ONCE(in_irq());
94
95         /*
96          * softirqs should never be enabled by __local_bh_enable(),
97          * it always nests inside local_bh_enable() sections:
98          */
99         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101         sub_preempt_count(SOFTIRQ_OFFSET);
102 }
103 EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105 /*
106  * Special-case - softirqs can safely be enabled in
107  * cond_resched_softirq(), or by __do_softirq(),
108  * without processing still-pending softirqs:
109  */
110 void _local_bh_enable(void)
111 {
112         WARN_ON_ONCE(in_irq());
113         WARN_ON_ONCE(!irqs_disabled());
114
115         if (softirq_count() == SOFTIRQ_OFFSET)
116                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117         sub_preempt_count(SOFTIRQ_OFFSET);
118 }
119
120 EXPORT_SYMBOL(_local_bh_enable);
121
122 void local_bh_enable(void)
123 {
124         unsigned long flags;
125
126         WARN_ON_ONCE(in_irq());
127         WARN_ON_ONCE(irqs_disabled());
128
129         local_irq_save(flags);
130         /*
131          * Are softirqs going to be turned on now:
132          */
133         if (softirq_count() == SOFTIRQ_OFFSET)
134                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135         /*
136          * Keep preemption disabled until we are done with
137          * softirq processing:
138          */
139         sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141         if (unlikely(!in_interrupt() && local_softirq_pending()))
142                 do_softirq();
143
144         dec_preempt_count();
145         local_irq_restore(flags);
146         preempt_check_resched();
147 }
148 EXPORT_SYMBOL(local_bh_enable);
149
150 void local_bh_enable_ip(unsigned long ip)
151 {
152         unsigned long flags;
153
154         WARN_ON_ONCE(in_irq());
155
156         local_irq_save(flags);
157         /*
158          * Are softirqs going to be turned on now:
159          */
160         if (softirq_count() == SOFTIRQ_OFFSET)
161                 trace_softirqs_on(ip);
162         /*
163          * Keep preemption disabled until we are done with
164          * softirq processing:
165          */
166         sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168         if (unlikely(!in_interrupt() && local_softirq_pending()))
169                 do_softirq();
170
171         dec_preempt_count();
172         local_irq_restore(flags);
173         preempt_check_resched();
174 }
175 EXPORT_SYMBOL(local_bh_enable_ip);
176
177 /*
178  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
179  * and we fall back to softirqd after that.
180  *
181  * This number has been established via experimentation.
182  * The two things to balance is latency against fairness -
183  * we want to handle softirqs as soon as possible, but they
184  * should not be able to lock up the box.
185  */
186 #define MAX_SOFTIRQ_RESTART 10
187
188 asmlinkage void __do_softirq(void)
189 {
190         struct softirq_action *h;
191         __u32 pending;
192         int max_restart = MAX_SOFTIRQ_RESTART;
193         int cpu;
194
195         pending = local_softirq_pending();
196         account_system_vtime(current);
197
198         __local_bh_disable((unsigned long)__builtin_return_address(0));
199         trace_softirq_enter();
200
201         cpu = smp_processor_id();
202 restart:
203         /* Reset the pending bitmask before enabling irqs */
204         set_softirq_pending(0);
205
206         local_irq_enable();
207
208         h = softirq_vec;
209
210         do {
211                 if (pending & 1) {
212                         h->action(h);
213                         rcu_bh_qsctr_inc(cpu);
214                 }
215                 h++;
216                 pending >>= 1;
217         } while (pending);
218
219         local_irq_disable();
220
221         pending = local_softirq_pending();
222         if (pending && --max_restart)
223                 goto restart;
224
225         if (pending)
226                 wakeup_softirqd();
227
228         trace_softirq_exit();
229
230         account_system_vtime(current);
231         _local_bh_enable();
232 }
233
234 #ifndef __ARCH_HAS_DO_SOFTIRQ
235
236 asmlinkage void do_softirq(void)
237 {
238         __u32 pending;
239         unsigned long flags;
240
241         if (in_interrupt())
242                 return;
243
244         local_irq_save(flags);
245
246         pending = local_softirq_pending();
247
248         if (pending)
249                 __do_softirq();
250
251         local_irq_restore(flags);
252 }
253
254 EXPORT_SYMBOL(do_softirq);
255
256 #endif
257
258 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
259 # define invoke_softirq()       __do_softirq()
260 #else
261 # define invoke_softirq()       do_softirq()
262 #endif
263
264 /*
265  * Exit an interrupt context. Process softirqs if needed and possible:
266  */
267 void irq_exit(void)
268 {
269         account_system_vtime(current);
270         trace_hardirq_exit();
271         sub_preempt_count(IRQ_EXIT_OFFSET);
272         if (!in_interrupt() && local_softirq_pending())
273                 invoke_softirq();
274         preempt_enable_no_resched();
275 }
276
277 /*
278  * This function must run with irqs disabled!
279  */
280 inline fastcall void raise_softirq_irqoff(unsigned int nr)
281 {
282         __raise_softirq_irqoff(nr);
283
284         /*
285          * If we're in an interrupt or softirq, we're done
286          * (this also catches softirq-disabled code). We will
287          * actually run the softirq once we return from
288          * the irq or softirq.
289          *
290          * Otherwise we wake up ksoftirqd to make sure we
291          * schedule the softirq soon.
292          */
293         if (!in_interrupt())
294                 wakeup_softirqd();
295 }
296
297 EXPORT_SYMBOL(raise_softirq_irqoff);
298
299 void fastcall raise_softirq(unsigned int nr)
300 {
301         unsigned long flags;
302
303         local_irq_save(flags);
304         raise_softirq_irqoff(nr);
305         local_irq_restore(flags);
306 }
307
308 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
309 {
310         softirq_vec[nr].data = data;
311         softirq_vec[nr].action = action;
312 }
313
314 EXPORT_SYMBOL(open_softirq);
315
316 /* Tasklets */
317 struct tasklet_head
318 {
319         struct tasklet_struct *list;
320 };
321
322 /* Some compilers disobey section attribute on statics when not
323    initialized -- RR */
324 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
325 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
326
327 void fastcall __tasklet_schedule(struct tasklet_struct *t)
328 {
329         unsigned long flags;
330
331         local_irq_save(flags);
332         t->next = __get_cpu_var(tasklet_vec).list;
333         __get_cpu_var(tasklet_vec).list = t;
334         raise_softirq_irqoff(TASKLET_SOFTIRQ);
335         local_irq_restore(flags);
336 }
337
338 EXPORT_SYMBOL(__tasklet_schedule);
339
340 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
341 {
342         unsigned long flags;
343
344         local_irq_save(flags);
345         t->next = __get_cpu_var(tasklet_hi_vec).list;
346         __get_cpu_var(tasklet_hi_vec).list = t;
347         raise_softirq_irqoff(HI_SOFTIRQ);
348         local_irq_restore(flags);
349 }
350
351 EXPORT_SYMBOL(__tasklet_hi_schedule);
352
353 static void tasklet_action(struct softirq_action *a)
354 {
355         struct tasklet_struct *list;
356
357         local_irq_disable();
358         list = __get_cpu_var(tasklet_vec).list;
359         __get_cpu_var(tasklet_vec).list = NULL;
360         local_irq_enable();
361
362         while (list) {
363                 struct tasklet_struct *t = list;
364
365                 list = list->next;
366
367                 if (tasklet_trylock(t)) {
368                         if (!atomic_read(&t->count)) {
369                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
370                                         BUG();
371                                 t->func(t->data);
372                                 tasklet_unlock(t);
373                                 continue;
374                         }
375                         tasklet_unlock(t);
376                 }
377
378                 local_irq_disable();
379                 t->next = __get_cpu_var(tasklet_vec).list;
380                 __get_cpu_var(tasklet_vec).list = t;
381                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
382                 local_irq_enable();
383         }
384 }
385
386 static void tasklet_hi_action(struct softirq_action *a)
387 {
388         struct tasklet_struct *list;
389
390         local_irq_disable();
391         list = __get_cpu_var(tasklet_hi_vec).list;
392         __get_cpu_var(tasklet_hi_vec).list = NULL;
393         local_irq_enable();
394
395         while (list) {
396                 struct tasklet_struct *t = list;
397
398                 list = list->next;
399
400                 if (tasklet_trylock(t)) {
401                         if (!atomic_read(&t->count)) {
402                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
403                                         BUG();
404                                 t->func(t->data);
405                                 tasklet_unlock(t);
406                                 continue;
407                         }
408                         tasklet_unlock(t);
409                 }
410
411                 local_irq_disable();
412                 t->next = __get_cpu_var(tasklet_hi_vec).list;
413                 __get_cpu_var(tasklet_hi_vec).list = t;
414                 __raise_softirq_irqoff(HI_SOFTIRQ);
415                 local_irq_enable();
416         }
417 }
418
419
420 void tasklet_init(struct tasklet_struct *t,
421                   void (*func)(unsigned long), unsigned long data)
422 {
423         t->next = NULL;
424         t->state = 0;
425         atomic_set(&t->count, 0);
426         t->func = func;
427         t->data = data;
428 }
429
430 EXPORT_SYMBOL(tasklet_init);
431
432 void tasklet_kill(struct tasklet_struct *t)
433 {
434         if (in_interrupt())
435                 printk("Attempt to kill tasklet from interrupt\n");
436
437         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
438                 do
439                         yield();
440                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
441         }
442         tasklet_unlock_wait(t);
443         clear_bit(TASKLET_STATE_SCHED, &t->state);
444 }
445
446 EXPORT_SYMBOL(tasklet_kill);
447
448 void __init softirq_init(void)
449 {
450         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
451         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
452 }
453
454 static int ksoftirqd(void * __bind_cpu)
455 {
456         set_user_nice(current, 19);
457         current->flags |= PF_NOFREEZE;
458
459         set_current_state(TASK_INTERRUPTIBLE);
460
461         while (!kthread_should_stop()) {
462                 preempt_disable();
463                 if (!local_softirq_pending()) {
464                         preempt_enable_no_resched();
465                         schedule();
466                         preempt_disable();
467                 }
468
469                 __set_current_state(TASK_RUNNING);
470
471                 while (local_softirq_pending()) {
472                         /* Preempt disable stops cpu going offline.
473                            If already offline, we'll be on wrong CPU:
474                            don't process */
475                         if (cpu_is_offline((long)__bind_cpu))
476                                 goto wait_to_die;
477                         do_softirq();
478                         preempt_enable_no_resched();
479                         cond_resched();
480                         preempt_disable();
481                 }
482                 preempt_enable();
483                 set_current_state(TASK_INTERRUPTIBLE);
484         }
485         __set_current_state(TASK_RUNNING);
486         return 0;
487
488 wait_to_die:
489         preempt_enable();
490         /* Wait for kthread_stop */
491         set_current_state(TASK_INTERRUPTIBLE);
492         while (!kthread_should_stop()) {
493                 schedule();
494                 set_current_state(TASK_INTERRUPTIBLE);
495         }
496         __set_current_state(TASK_RUNNING);
497         return 0;
498 }
499
500 #ifdef CONFIG_HOTPLUG_CPU
501 /*
502  * tasklet_kill_immediate is called to remove a tasklet which can already be
503  * scheduled for execution on @cpu.
504  *
505  * Unlike tasklet_kill, this function removes the tasklet
506  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
507  *
508  * When this function is called, @cpu must be in the CPU_DEAD state.
509  */
510 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
511 {
512         struct tasklet_struct **i;
513
514         BUG_ON(cpu_online(cpu));
515         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
516
517         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
518                 return;
519
520         /* CPU is dead, so no lock needed. */
521         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
522                 if (*i == t) {
523                         *i = t->next;
524                         return;
525                 }
526         }
527         BUG();
528 }
529
530 static void takeover_tasklets(unsigned int cpu)
531 {
532         struct tasklet_struct **i;
533
534         /* CPU is dead, so no lock needed. */
535         local_irq_disable();
536
537         /* Find end, append list for that CPU. */
538         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
539         *i = per_cpu(tasklet_vec, cpu).list;
540         per_cpu(tasklet_vec, cpu).list = NULL;
541         raise_softirq_irqoff(TASKLET_SOFTIRQ);
542
543         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
544         *i = per_cpu(tasklet_hi_vec, cpu).list;
545         per_cpu(tasklet_hi_vec, cpu).list = NULL;
546         raise_softirq_irqoff(HI_SOFTIRQ);
547
548         local_irq_enable();
549 }
550 #endif /* CONFIG_HOTPLUG_CPU */
551
552 static int __devinit cpu_callback(struct notifier_block *nfb,
553                                   unsigned long action,
554                                   void *hcpu)
555 {
556         int hotcpu = (unsigned long)hcpu;
557         struct task_struct *p;
558
559         switch (action) {
560         case CPU_UP_PREPARE:
561                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
562                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
563                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
564                 if (IS_ERR(p)) {
565                         printk("ksoftirqd for %i failed\n", hotcpu);
566                         return NOTIFY_BAD;
567                 }
568                 kthread_bind(p, hotcpu);
569                 per_cpu(ksoftirqd, hotcpu) = p;
570                 break;
571         case CPU_ONLINE:
572                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
573                 break;
574 #ifdef CONFIG_HOTPLUG_CPU
575         case CPU_UP_CANCELED:
576                 if (!per_cpu(ksoftirqd, hotcpu))
577                         break;
578                 /* Unbind so it can run.  Fall thru. */
579                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
580                              any_online_cpu(cpu_online_map));
581         case CPU_DEAD:
582                 p = per_cpu(ksoftirqd, hotcpu);
583                 per_cpu(ksoftirqd, hotcpu) = NULL;
584                 kthread_stop(p);
585                 takeover_tasklets(hotcpu);
586                 break;
587 #endif /* CONFIG_HOTPLUG_CPU */
588         }
589         return NOTIFY_OK;
590 }
591
592 static struct notifier_block __devinitdata cpu_nfb = {
593         .notifier_call = cpu_callback
594 };
595
596 __init int spawn_ksoftirqd(void)
597 {
598         void *cpu = (void *)(long)smp_processor_id();
599         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
600         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
601         register_cpu_notifier(&cpu_nfb);
602         return 0;
603 }
604
605 #ifdef CONFIG_SMP
606 /*
607  * Call a function on all processors
608  */
609 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
610 {
611         int ret = 0;
612
613         preempt_disable();
614         ret = smp_call_function(func, info, retry, wait);
615         local_irq_disable();
616         func(info);
617         local_irq_enable();
618         preempt_enable();
619         return ret;
620 }
621 EXPORT_SYMBOL(on_each_cpu);
622 #endif