PM/Suspend: Do not shrink memory before suspend
[safe/jmp/linux-2.6] / kernel / power / main.c
1 /*
2  * kernel/power/main.c - PM subsystem core functionality.
3  *
4  * Copyright (c) 2003 Patrick Mochel
5  * Copyright (c) 2003 Open Source Development Lab
6  * 
7  * This file is released under the GPLv2
8  *
9  */
10
11 #include <linux/module.h>
12 #include <linux/suspend.h>
13 #include <linux/kobject.h>
14 #include <linux/string.h>
15 #include <linux/delay.h>
16 #include <linux/errno.h>
17 #include <linux/kmod.h>
18 #include <linux/init.h>
19 #include <linux/console.h>
20 #include <linux/cpu.h>
21 #include <linux/resume-trace.h>
22 #include <linux/freezer.h>
23 #include <linux/vmstat.h>
24 #include <linux/syscalls.h>
25
26 #include "power.h"
27
28 DEFINE_MUTEX(pm_mutex);
29
30 unsigned int pm_flags;
31 EXPORT_SYMBOL(pm_flags);
32
33 #ifdef CONFIG_PM_SLEEP
34
35 /* Routines for PM-transition notifications */
36
37 static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
38
39 int register_pm_notifier(struct notifier_block *nb)
40 {
41         return blocking_notifier_chain_register(&pm_chain_head, nb);
42 }
43 EXPORT_SYMBOL_GPL(register_pm_notifier);
44
45 int unregister_pm_notifier(struct notifier_block *nb)
46 {
47         return blocking_notifier_chain_unregister(&pm_chain_head, nb);
48 }
49 EXPORT_SYMBOL_GPL(unregister_pm_notifier);
50
51 int pm_notifier_call_chain(unsigned long val)
52 {
53         return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
54                         == NOTIFY_BAD) ? -EINVAL : 0;
55 }
56
57 #ifdef CONFIG_PM_DEBUG
58 int pm_test_level = TEST_NONE;
59
60 static const char * const pm_tests[__TEST_AFTER_LAST] = {
61         [TEST_NONE] = "none",
62         [TEST_CORE] = "core",
63         [TEST_CPUS] = "processors",
64         [TEST_PLATFORM] = "platform",
65         [TEST_DEVICES] = "devices",
66         [TEST_FREEZER] = "freezer",
67 };
68
69 static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
70                                 char *buf)
71 {
72         char *s = buf;
73         int level;
74
75         for (level = TEST_FIRST; level <= TEST_MAX; level++)
76                 if (pm_tests[level]) {
77                         if (level == pm_test_level)
78                                 s += sprintf(s, "[%s] ", pm_tests[level]);
79                         else
80                                 s += sprintf(s, "%s ", pm_tests[level]);
81                 }
82
83         if (s != buf)
84                 /* convert the last space to a newline */
85                 *(s-1) = '\n';
86
87         return (s - buf);
88 }
89
90 static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
91                                 const char *buf, size_t n)
92 {
93         const char * const *s;
94         int level;
95         char *p;
96         int len;
97         int error = -EINVAL;
98
99         p = memchr(buf, '\n', n);
100         len = p ? p - buf : n;
101
102         mutex_lock(&pm_mutex);
103
104         level = TEST_FIRST;
105         for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
106                 if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
107                         pm_test_level = level;
108                         error = 0;
109                         break;
110                 }
111
112         mutex_unlock(&pm_mutex);
113
114         return error ? error : n;
115 }
116
117 power_attr(pm_test);
118 #endif /* CONFIG_PM_DEBUG */
119
120 #endif /* CONFIG_PM_SLEEP */
121
122 #ifdef CONFIG_SUSPEND
123
124 static int suspend_test(int level)
125 {
126 #ifdef CONFIG_PM_DEBUG
127         if (pm_test_level == level) {
128                 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
129                 mdelay(5000);
130                 return 1;
131         }
132 #endif /* !CONFIG_PM_DEBUG */
133         return 0;
134 }
135
136 #ifdef CONFIG_PM_TEST_SUSPEND
137
138 /*
139  * We test the system suspend code by setting an RTC wakealarm a short
140  * time in the future, then suspending.  Suspending the devices won't
141  * normally take long ... some systems only need a few milliseconds.
142  *
143  * The time it takes is system-specific though, so when we test this
144  * during system bootup we allow a LOT of time.
145  */
146 #define TEST_SUSPEND_SECONDS    5
147
148 static unsigned long suspend_test_start_time;
149
150 static void suspend_test_start(void)
151 {
152         /* FIXME Use better timebase than "jiffies", ideally a clocksource.
153          * What we want is a hardware counter that will work correctly even
154          * during the irqs-are-off stages of the suspend/resume cycle...
155          */
156         suspend_test_start_time = jiffies;
157 }
158
159 static void suspend_test_finish(const char *label)
160 {
161         long nj = jiffies - suspend_test_start_time;
162         unsigned msec;
163
164         msec = jiffies_to_msecs(abs(nj));
165         pr_info("PM: %s took %d.%03d seconds\n", label,
166                         msec / 1000, msec % 1000);
167
168         /* Warning on suspend means the RTC alarm period needs to be
169          * larger -- the system was sooo slooowwww to suspend that the
170          * alarm (should have) fired before the system went to sleep!
171          *
172          * Warning on either suspend or resume also means the system
173          * has some performance issues.  The stack dump of a WARN_ON
174          * is more likely to get the right attention than a printk...
175          */
176         WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
177 }
178
179 #else
180
181 static void suspend_test_start(void)
182 {
183 }
184
185 static void suspend_test_finish(const char *label)
186 {
187 }
188
189 #endif
190
191 static struct platform_suspend_ops *suspend_ops;
192
193 /**
194  *      suspend_set_ops - Set the global suspend method table.
195  *      @ops:   Pointer to ops structure.
196  */
197
198 void suspend_set_ops(struct platform_suspend_ops *ops)
199 {
200         mutex_lock(&pm_mutex);
201         suspend_ops = ops;
202         mutex_unlock(&pm_mutex);
203 }
204
205 /**
206  * suspend_valid_only_mem - generic memory-only valid callback
207  *
208  * Platform drivers that implement mem suspend only and only need
209  * to check for that in their .valid callback can use this instead
210  * of rolling their own .valid callback.
211  */
212 int suspend_valid_only_mem(suspend_state_t state)
213 {
214         return state == PM_SUSPEND_MEM;
215 }
216
217 /**
218  *      suspend_prepare - Do prep work before entering low-power state.
219  *
220  *      This is common code that is called for each state that we're entering.
221  *      Run suspend notifiers, allocate a console and stop all processes.
222  */
223 static int suspend_prepare(void)
224 {
225         int error;
226
227         if (!suspend_ops || !suspend_ops->enter)
228                 return -EPERM;
229
230         pm_prepare_console();
231
232         error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
233         if (error)
234                 goto Finish;
235
236         error = usermodehelper_disable();
237         if (error)
238                 goto Finish;
239
240         error = suspend_freeze_processes();
241         if (!error)
242                 return 0;
243
244         suspend_thaw_processes();
245         usermodehelper_enable();
246  Finish:
247         pm_notifier_call_chain(PM_POST_SUSPEND);
248         pm_restore_console();
249         return error;
250 }
251
252 /* default implementation */
253 void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
254 {
255         local_irq_disable();
256 }
257
258 /* default implementation */
259 void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
260 {
261         local_irq_enable();
262 }
263
264 /**
265  *      suspend_enter - enter the desired system sleep state.
266  *      @state:         state to enter
267  *
268  *      This function should be called after devices have been suspended.
269  */
270 static int suspend_enter(suspend_state_t state)
271 {
272         int error;
273
274         if (suspend_ops->prepare) {
275                 error = suspend_ops->prepare();
276                 if (error)
277                         return error;
278         }
279
280         error = dpm_suspend_noirq(PMSG_SUSPEND);
281         if (error) {
282                 printk(KERN_ERR "PM: Some devices failed to power down\n");
283                 goto Platfrom_finish;
284         }
285
286         if (suspend_ops->prepare_late) {
287                 error = suspend_ops->prepare_late();
288                 if (error)
289                         goto Power_up_devices;
290         }
291
292         if (suspend_test(TEST_PLATFORM))
293                 goto Platform_wake;
294
295         error = disable_nonboot_cpus();
296         if (error || suspend_test(TEST_CPUS))
297                 goto Enable_cpus;
298
299         arch_suspend_disable_irqs();
300         BUG_ON(!irqs_disabled());
301
302         error = sysdev_suspend(PMSG_SUSPEND);
303         if (!error) {
304                 if (!suspend_test(TEST_CORE))
305                         error = suspend_ops->enter(state);
306                 sysdev_resume();
307         }
308
309         arch_suspend_enable_irqs();
310         BUG_ON(irqs_disabled());
311
312  Enable_cpus:
313         enable_nonboot_cpus();
314
315  Platform_wake:
316         if (suspend_ops->wake)
317                 suspend_ops->wake();
318
319  Power_up_devices:
320         dpm_resume_noirq(PMSG_RESUME);
321
322  Platfrom_finish:
323         if (suspend_ops->finish)
324                 suspend_ops->finish();
325
326         return error;
327 }
328
329 /**
330  *      suspend_devices_and_enter - suspend devices and enter the desired system
331  *                                  sleep state.
332  *      @state:           state to enter
333  */
334 int suspend_devices_and_enter(suspend_state_t state)
335 {
336         int error;
337
338         if (!suspend_ops)
339                 return -ENOSYS;
340
341         if (suspend_ops->begin) {
342                 error = suspend_ops->begin(state);
343                 if (error)
344                         goto Close;
345         }
346         suspend_console();
347         suspend_test_start();
348         error = dpm_suspend_start(PMSG_SUSPEND);
349         if (error) {
350                 printk(KERN_ERR "PM: Some devices failed to suspend\n");
351                 goto Recover_platform;
352         }
353         suspend_test_finish("suspend devices");
354         if (suspend_test(TEST_DEVICES))
355                 goto Recover_platform;
356
357         suspend_enter(state);
358
359  Resume_devices:
360         suspend_test_start();
361         dpm_resume_end(PMSG_RESUME);
362         suspend_test_finish("resume devices");
363         resume_console();
364  Close:
365         if (suspend_ops->end)
366                 suspend_ops->end();
367         return error;
368
369  Recover_platform:
370         if (suspend_ops->recover)
371                 suspend_ops->recover();
372         goto Resume_devices;
373 }
374
375 /**
376  *      suspend_finish - Do final work before exiting suspend sequence.
377  *
378  *      Call platform code to clean up, restart processes, and free the 
379  *      console that we've allocated. This is not called for suspend-to-disk.
380  */
381 static void suspend_finish(void)
382 {
383         suspend_thaw_processes();
384         usermodehelper_enable();
385         pm_notifier_call_chain(PM_POST_SUSPEND);
386         pm_restore_console();
387 }
388
389
390
391
392 static const char * const pm_states[PM_SUSPEND_MAX] = {
393         [PM_SUSPEND_STANDBY]    = "standby",
394         [PM_SUSPEND_MEM]        = "mem",
395 };
396
397 static inline int valid_state(suspend_state_t state)
398 {
399         /* All states need lowlevel support and need to be valid
400          * to the lowlevel implementation, no valid callback
401          * implies that none are valid. */
402         if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state))
403                 return 0;
404         return 1;
405 }
406
407
408 /**
409  *      enter_state - Do common work of entering low-power state.
410  *      @state:         pm_state structure for state we're entering.
411  *
412  *      Make sure we're the only ones trying to enter a sleep state. Fail
413  *      if someone has beat us to it, since we don't want anything weird to
414  *      happen when we wake up.
415  *      Then, do the setup for suspend, enter the state, and cleaup (after
416  *      we've woken up).
417  */
418 static int enter_state(suspend_state_t state)
419 {
420         int error;
421
422         if (!valid_state(state))
423                 return -ENODEV;
424
425         if (!mutex_trylock(&pm_mutex))
426                 return -EBUSY;
427
428         printk(KERN_INFO "PM: Syncing filesystems ... ");
429         sys_sync();
430         printk("done.\n");
431
432         pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
433         error = suspend_prepare();
434         if (error)
435                 goto Unlock;
436
437         if (suspend_test(TEST_FREEZER))
438                 goto Finish;
439
440         pr_debug("PM: Entering %s sleep\n", pm_states[state]);
441         error = suspend_devices_and_enter(state);
442
443  Finish:
444         pr_debug("PM: Finishing wakeup.\n");
445         suspend_finish();
446  Unlock:
447         mutex_unlock(&pm_mutex);
448         return error;
449 }
450
451
452 /**
453  *      pm_suspend - Externally visible function for suspending system.
454  *      @state:         Enumerated value of state to enter.
455  *
456  *      Determine whether or not value is within range, get state 
457  *      structure, and enter (above).
458  */
459
460 int pm_suspend(suspend_state_t state)
461 {
462         if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
463                 return enter_state(state);
464         return -EINVAL;
465 }
466
467 EXPORT_SYMBOL(pm_suspend);
468
469 #endif /* CONFIG_SUSPEND */
470
471 struct kobject *power_kobj;
472
473 /**
474  *      state - control system power state.
475  *
476  *      show() returns what states are supported, which is hard-coded to
477  *      'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
478  *      'disk' (Suspend-to-Disk).
479  *
480  *      store() accepts one of those strings, translates it into the 
481  *      proper enumerated value, and initiates a suspend transition.
482  */
483
484 static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
485                           char *buf)
486 {
487         char *s = buf;
488 #ifdef CONFIG_SUSPEND
489         int i;
490
491         for (i = 0; i < PM_SUSPEND_MAX; i++) {
492                 if (pm_states[i] && valid_state(i))
493                         s += sprintf(s,"%s ", pm_states[i]);
494         }
495 #endif
496 #ifdef CONFIG_HIBERNATION
497         s += sprintf(s, "%s\n", "disk");
498 #else
499         if (s != buf)
500                 /* convert the last space to a newline */
501                 *(s-1) = '\n';
502 #endif
503         return (s - buf);
504 }
505
506 static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
507                            const char *buf, size_t n)
508 {
509 #ifdef CONFIG_SUSPEND
510         suspend_state_t state = PM_SUSPEND_STANDBY;
511         const char * const *s;
512 #endif
513         char *p;
514         int len;
515         int error = -EINVAL;
516
517         p = memchr(buf, '\n', n);
518         len = p ? p - buf : n;
519
520         /* First, check if we are requested to hibernate */
521         if (len == 4 && !strncmp(buf, "disk", len)) {
522                 error = hibernate();
523   goto Exit;
524         }
525
526 #ifdef CONFIG_SUSPEND
527         for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
528                 if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
529                         break;
530         }
531         if (state < PM_SUSPEND_MAX && *s)
532                 error = enter_state(state);
533 #endif
534
535  Exit:
536         return error ? error : n;
537 }
538
539 power_attr(state);
540
541 #ifdef CONFIG_PM_TRACE
542 int pm_trace_enabled;
543
544 static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
545                              char *buf)
546 {
547         return sprintf(buf, "%d\n", pm_trace_enabled);
548 }
549
550 static ssize_t
551 pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
552                const char *buf, size_t n)
553 {
554         int val;
555
556         if (sscanf(buf, "%d", &val) == 1) {
557                 pm_trace_enabled = !!val;
558                 return n;
559         }
560         return -EINVAL;
561 }
562
563 power_attr(pm_trace);
564 #endif /* CONFIG_PM_TRACE */
565
566 static struct attribute * g[] = {
567         &state_attr.attr,
568 #ifdef CONFIG_PM_TRACE
569         &pm_trace_attr.attr,
570 #endif
571 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
572         &pm_test_attr.attr,
573 #endif
574         NULL,
575 };
576
577 static struct attribute_group attr_group = {
578         .attrs = g,
579 };
580
581
582 static int __init pm_init(void)
583 {
584         power_kobj = kobject_create_and_add("power", NULL);
585         if (!power_kobj)
586                 return -ENOMEM;
587         return sysfs_create_group(power_kobj, &attr_group);
588 }
589
590 core_initcall(pm_init);
591
592
593 #ifdef CONFIG_PM_TEST_SUSPEND
594
595 #include <linux/rtc.h>
596
597 /*
598  * To test system suspend, we need a hands-off mechanism to resume the
599  * system.  RTCs wake alarms are a common self-contained mechanism.
600  */
601
602 static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
603 {
604         static char err_readtime[] __initdata =
605                 KERN_ERR "PM: can't read %s time, err %d\n";
606         static char err_wakealarm [] __initdata =
607                 KERN_ERR "PM: can't set %s wakealarm, err %d\n";
608         static char err_suspend[] __initdata =
609                 KERN_ERR "PM: suspend test failed, error %d\n";
610         static char info_test[] __initdata =
611                 KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
612
613         unsigned long           now;
614         struct rtc_wkalrm       alm;
615         int                     status;
616
617         /* this may fail if the RTC hasn't been initialized */
618         status = rtc_read_time(rtc, &alm.time);
619         if (status < 0) {
620                 printk(err_readtime, dev_name(&rtc->dev), status);
621                 return;
622         }
623         rtc_tm_to_time(&alm.time, &now);
624
625         memset(&alm, 0, sizeof alm);
626         rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
627         alm.enabled = true;
628
629         status = rtc_set_alarm(rtc, &alm);
630         if (status < 0) {
631                 printk(err_wakealarm, dev_name(&rtc->dev), status);
632                 return;
633         }
634
635         if (state == PM_SUSPEND_MEM) {
636                 printk(info_test, pm_states[state]);
637                 status = pm_suspend(state);
638                 if (status == -ENODEV)
639                         state = PM_SUSPEND_STANDBY;
640         }
641         if (state == PM_SUSPEND_STANDBY) {
642                 printk(info_test, pm_states[state]);
643                 status = pm_suspend(state);
644         }
645         if (status < 0)
646                 printk(err_suspend, status);
647
648         /* Some platforms can't detect that the alarm triggered the
649          * wakeup, or (accordingly) disable it after it afterwards.
650          * It's supposed to give oneshot behavior; cope.
651          */
652         alm.enabled = false;
653         rtc_set_alarm(rtc, &alm);
654 }
655
656 static int __init has_wakealarm(struct device *dev, void *name_ptr)
657 {
658         struct rtc_device *candidate = to_rtc_device(dev);
659
660         if (!candidate->ops->set_alarm)
661                 return 0;
662         if (!device_may_wakeup(candidate->dev.parent))
663                 return 0;
664
665         *(const char **)name_ptr = dev_name(dev);
666         return 1;
667 }
668
669 /*
670  * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
671  * at startup time.  They're normally disabled, for faster boot and because
672  * we can't know which states really work on this particular system.
673  */
674 static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
675
676 static char warn_bad_state[] __initdata =
677         KERN_WARNING "PM: can't test '%s' suspend state\n";
678
679 static int __init setup_test_suspend(char *value)
680 {
681         unsigned i;
682
683         /* "=mem" ==> "mem" */
684         value++;
685         for (i = 0; i < PM_SUSPEND_MAX; i++) {
686                 if (!pm_states[i])
687                         continue;
688                 if (strcmp(pm_states[i], value) != 0)
689                         continue;
690                 test_state = (__force suspend_state_t) i;
691                 return 0;
692         }
693         printk(warn_bad_state, value);
694         return 0;
695 }
696 __setup("test_suspend", setup_test_suspend);
697
698 static int __init test_suspend(void)
699 {
700         static char             warn_no_rtc[] __initdata =
701                 KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
702
703         char                    *pony = NULL;
704         struct rtc_device       *rtc = NULL;
705
706         /* PM is initialized by now; is that state testable? */
707         if (test_state == PM_SUSPEND_ON)
708                 goto done;
709         if (!valid_state(test_state)) {
710                 printk(warn_bad_state, pm_states[test_state]);
711                 goto done;
712         }
713
714         /* RTCs have initialized by now too ... can we use one? */
715         class_find_device(rtc_class, NULL, &pony, has_wakealarm);
716         if (pony)
717                 rtc = rtc_class_open(pony);
718         if (!rtc) {
719                 printk(warn_no_rtc);
720                 goto done;
721         }
722
723         /* go for it */
724         test_wakealarm(rtc, test_state);
725         rtc_class_close(rtc);
726 done:
727         return 0;
728 }
729 late_initcall(test_suspend);
730
731 #endif /* CONFIG_PM_TEST_SUSPEND */