xfs: remove nr_to_write writeback windup.
[safe/jmp/linux-2.6] / kernel / softlockup.c
1 /*
2  * Detect Soft Lockups
3  *
4  * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
5  *
6  * this code detects soft lockups: incidents in where on a CPU
7  * the kernel does not reschedule for 10 seconds or more.
8  */
9 #include <linux/mm.h>
10 #include <linux/cpu.h>
11 #include <linux/nmi.h>
12 #include <linux/init.h>
13 #include <linux/delay.h>
14 #include <linux/freezer.h>
15 #include <linux/kthread.h>
16 #include <linux/lockdep.h>
17 #include <linux/notifier.h>
18 #include <linux/module.h>
19 #include <linux/sysctl.h>
20
21 #include <asm/irq_regs.h>
22
23 static DEFINE_SPINLOCK(print_lock);
24
25 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28 static DEFINE_PER_CPU(bool, softlock_touch_sync);
29
30 static int __read_mostly did_panic;
31 int __read_mostly softlockup_thresh = 60;
32
33 /*
34  * Should we panic (and reboot, if panic_timeout= is set) when a
35  * soft-lockup occurs:
36  */
37 unsigned int __read_mostly softlockup_panic =
38                                 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
39
40 static int __init softlockup_panic_setup(char *str)
41 {
42         softlockup_panic = simple_strtoul(str, NULL, 0);
43
44         return 1;
45 }
46 __setup("softlockup_panic=", softlockup_panic_setup);
47
48 static int
49 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
50 {
51         did_panic = 1;
52
53         return NOTIFY_DONE;
54 }
55
56 static struct notifier_block panic_block = {
57         .notifier_call = softlock_panic,
58 };
59
60 /*
61  * Returns seconds, approximately.  We don't need nanosecond
62  * resolution, and we don't need to waste time with a big divide when
63  * 2^30ns == 1.074s.
64  */
65 static unsigned long get_timestamp(int this_cpu)
66 {
67         return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
68 }
69
70 static void __touch_softlockup_watchdog(void)
71 {
72         int this_cpu = raw_smp_processor_id();
73
74         __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
75 }
76
77 void touch_softlockup_watchdog(void)
78 {
79         __raw_get_cpu_var(softlockup_touch_ts) = 0;
80 }
81 EXPORT_SYMBOL(touch_softlockup_watchdog);
82
83 void touch_softlockup_watchdog_sync(void)
84 {
85         __raw_get_cpu_var(softlock_touch_sync) = true;
86         __raw_get_cpu_var(softlockup_touch_ts) = 0;
87 }
88
89 void touch_all_softlockup_watchdogs(void)
90 {
91         int cpu;
92
93         /* Cause each CPU to re-update its timestamp rather than complain */
94         for_each_online_cpu(cpu)
95                 per_cpu(softlockup_touch_ts, cpu) = 0;
96 }
97 EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
98
99 int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
100                              void __user *buffer,
101                              size_t *lenp, loff_t *ppos)
102 {
103         touch_all_softlockup_watchdogs();
104         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
105 }
106
107 /*
108  * This callback runs from the timer interrupt, and checks
109  * whether the watchdog thread has hung or not:
110  */
111 void softlockup_tick(void)
112 {
113         int this_cpu = smp_processor_id();
114         unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
115         unsigned long print_ts;
116         struct pt_regs *regs = get_irq_regs();
117         unsigned long now;
118
119         /* Is detection switched off? */
120         if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
121                 /* Be sure we don't false trigger if switched back on */
122                 if (touch_ts)
123                         per_cpu(softlockup_touch_ts, this_cpu) = 0;
124                 return;
125         }
126
127         if (touch_ts == 0) {
128                 if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
129                         /*
130                          * If the time stamp was touched atomically
131                          * make sure the scheduler tick is up to date.
132                          */
133                         per_cpu(softlock_touch_sync, this_cpu) = false;
134                         sched_clock_tick();
135                 }
136                 __touch_softlockup_watchdog();
137                 return;
138         }
139
140         print_ts = per_cpu(softlockup_print_ts, this_cpu);
141
142         /* report at most once a second */
143         if (print_ts == touch_ts || did_panic)
144                 return;
145
146         /* do not print during early bootup: */
147         if (unlikely(system_state != SYSTEM_RUNNING)) {
148                 __touch_softlockup_watchdog();
149                 return;
150         }
151
152         now = get_timestamp(this_cpu);
153
154         /*
155          * Wake up the high-prio watchdog task twice per
156          * threshold timespan.
157          */
158         if (time_after(now - softlockup_thresh/2, touch_ts))
159                 wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
160
161         /* Warn about unreasonable delays: */
162         if (time_before_eq(now - softlockup_thresh, touch_ts))
163                 return;
164
165         per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
166
167         spin_lock(&print_lock);
168         printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
169                         this_cpu, now - touch_ts,
170                         current->comm, task_pid_nr(current));
171         print_modules();
172         print_irqtrace_events(current);
173         if (regs)
174                 show_regs(regs);
175         else
176                 dump_stack();
177         spin_unlock(&print_lock);
178
179         if (softlockup_panic)
180                 panic("softlockup: hung tasks");
181 }
182
183 /*
184  * The watchdog thread - runs every second and touches the timestamp.
185  */
186 static int watchdog(void *__bind_cpu)
187 {
188         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190         sched_setscheduler(current, SCHED_FIFO, &param);
191
192         /* initialize timestamp */
193         __touch_softlockup_watchdog();
194
195         set_current_state(TASK_INTERRUPTIBLE);
196         /*
197          * Run briefly once per second to reset the softlockup timestamp.
198          * If this gets delayed for more than 60 seconds then the
199          * debug-printout triggers in softlockup_tick().
200          */
201         while (!kthread_should_stop()) {
202                 __touch_softlockup_watchdog();
203                 schedule();
204
205                 if (kthread_should_stop())
206                         break;
207
208                 set_current_state(TASK_INTERRUPTIBLE);
209         }
210         __set_current_state(TASK_RUNNING);
211
212         return 0;
213 }
214
215 /*
216  * Create/destroy watchdog threads as CPUs come and go:
217  */
218 static int __cpuinit
219 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
220 {
221         int hotcpu = (unsigned long)hcpu;
222         struct task_struct *p;
223
224         switch (action) {
225         case CPU_UP_PREPARE:
226         case CPU_UP_PREPARE_FROZEN:
227                 BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
228                 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
229                 if (IS_ERR(p)) {
230                         printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
231                         return NOTIFY_BAD;
232                 }
233                 per_cpu(softlockup_touch_ts, hotcpu) = 0;
234                 per_cpu(softlockup_watchdog, hotcpu) = p;
235                 kthread_bind(p, hotcpu);
236                 break;
237         case CPU_ONLINE:
238         case CPU_ONLINE_FROZEN:
239                 wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
240                 break;
241 #ifdef CONFIG_HOTPLUG_CPU
242         case CPU_UP_CANCELED:
243         case CPU_UP_CANCELED_FROZEN:
244                 if (!per_cpu(softlockup_watchdog, hotcpu))
245                         break;
246                 /* Unbind so it can run.  Fall thru. */
247                 kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
248                              cpumask_any(cpu_online_mask));
249         case CPU_DEAD:
250         case CPU_DEAD_FROZEN:
251                 p = per_cpu(softlockup_watchdog, hotcpu);
252                 per_cpu(softlockup_watchdog, hotcpu) = NULL;
253                 kthread_stop(p);
254                 break;
255 #endif /* CONFIG_HOTPLUG_CPU */
256         }
257         return NOTIFY_OK;
258 }
259
260 static struct notifier_block __cpuinitdata cpu_nfb = {
261         .notifier_call = cpu_callback
262 };
263
264 static int __initdata nosoftlockup;
265
266 static int __init nosoftlockup_setup(char *str)
267 {
268         nosoftlockup = 1;
269         return 1;
270 }
271 __setup("nosoftlockup", nosoftlockup_setup);
272
273 static int __init spawn_softlockup_task(void)
274 {
275         void *cpu = (void *)(long)smp_processor_id();
276         int err;
277
278         if (nosoftlockup)
279                 return 0;
280
281         err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
282         if (err == NOTIFY_BAD) {
283                 BUG();
284                 return 1;
285         }
286         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
287         register_cpu_notifier(&cpu_nfb);
288
289         atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
290
291         return 0;
292 }
293 early_initcall(spawn_softlockup_task);