nfsd: track last inode only in use_wgather case
[safe/jmp/linux-2.6] / kernel / power / disk.c
index 324ac01..5cb080e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/device.h>
+#include <linux/kmod.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
+#include <scsi/scsi_scan.h>
+#include <asm/suspend.h>
 
 #include "power.h"
 
 
 static int noresume = 0;
-char resume_file[256] = CONFIG_PM_STD_PARTITION;
+static char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 
@@ -45,17 +48,18 @@ enum {
 
 static int hibernation_mode = HIBERNATION_SHUTDOWN;
 
-static struct hibernation_ops *hibernation_ops;
+static struct platform_hibernation_ops *hibernation_ops;
 
 /**
  * hibernation_set_ops - set the global hibernate operations
  * @ops: the hibernation operations to use in subsequent hibernation transitions
  */
 
-void hibernation_set_ops(struct hibernation_ops *ops)
+void hibernation_set_ops(struct platform_hibernation_ops *ops)
 {
-       if (ops && !(ops->prepare && ops->enter && ops->finish
-           && ops->pre_restore && ops->restore_cleanup)) {
+       if (ops && !(ops->begin && ops->end &&  ops->pre_snapshot
+           && ops->prepare && ops->finish && ops->enter && ops->pre_restore
+           && ops->restore_cleanup)) {
                WARN_ON(1);
                return;
        }
@@ -69,16 +73,85 @@ void hibernation_set_ops(struct hibernation_ops *ops)
        mutex_unlock(&pm_mutex);
 }
 
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+       return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
+#ifdef CONFIG_PM_DEBUG
+static void hibernation_debug_sleep(void)
+{
+       printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+       mdelay(5000);
+}
+
+static int hibernation_testmode(int mode)
+{
+       if (hibernation_mode == mode) {
+               hibernation_debug_sleep();
+               return 1;
+       }
+       return 0;
+}
+
+static int hibernation_test(int level)
+{
+       if (pm_test_level == level) {
+               hibernation_debug_sleep();
+               return 1;
+       }
+       return 0;
+}
+#else /* !CONFIG_PM_DEBUG */
+static int hibernation_testmode(int mode) { return 0; }
+static int hibernation_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
+/**
+ *     platform_begin - tell the platform driver that we're starting
+ *     hibernation
+ */
+
+static int platform_begin(int platform_mode)
+{
+       return (platform_mode && hibernation_ops) ?
+               hibernation_ops->begin() : 0;
+}
+
+/**
+ *     platform_end - tell the platform driver that we've entered the
+ *     working state
+ */
+
+static void platform_end(int platform_mode)
+{
+       if (platform_mode && hibernation_ops)
+               hibernation_ops->end();
+}
 
 /**
- *     platform_prepare - prepare the machine for hibernation using the
+ *     platform_pre_snapshot - prepare the machine for hibernation using the
  *     platform driver if so configured and return an error code if it fails
  */
 
-static int platform_prepare(int platform_mode)
+static int platform_pre_snapshot(int platform_mode)
 {
        return (platform_mode && hibernation_ops) ?
-               hibernation_ops->prepare() : 0;
+               hibernation_ops->pre_snapshot() : 0;
+}
+
+/**
+ *     platform_leave - prepare the machine for switching to the normal mode
+ *     of operation using the platform driver (called with interrupts disabled)
+ */
+
+static void platform_leave(int platform_mode)
+{
+       if (platform_mode && hibernation_ops)
+               hibernation_ops->leave();
 }
 
 /**
@@ -118,10 +191,101 @@ static void platform_restore_cleanup(int platform_mode)
 }
 
 /**
+ *     platform_recover - recover the platform from a failure to suspend
+ *     devices.
+ */
+
+static void platform_recover(int platform_mode)
+{
+       if (platform_mode && hibernation_ops && hibernation_ops->recover)
+               hibernation_ops->recover();
+}
+
+/**
+ *     create_image - freeze devices that need to be frozen with interrupts
+ *     off, create the hibernation image and thaw those devices.  Control
+ *     reappears in this routine after a restore.
+ */
+
+static int create_image(int platform_mode)
+{
+       int error;
+
+       error = arch_prepare_suspend();
+       if (error)
+               return error;
+
+       /* At this point, device_suspend() has been called, but *not*
+        * device_power_down(). We *must* call device_power_down() now.
+        * Otherwise, drivers for some devices (e.g. interrupt controllers)
+        * become desynchronized with the actual state of the hardware
+        * at resume time, and evil weirdness ensues.
+        */
+       error = device_power_down(PMSG_FREEZE);
+       if (error) {
+               printk(KERN_ERR "PM: Some devices failed to power down, "
+                       "aborting hibernation\n");
+               return error;
+       }
+
+       error = platform_pre_snapshot(platform_mode);
+       if (error || hibernation_test(TEST_PLATFORM))
+               goto Platform_finish;
+
+       error = disable_nonboot_cpus();
+       if (error || hibernation_test(TEST_CPUS)
+           || hibernation_testmode(HIBERNATION_TEST))
+               goto Enable_cpus;
+
+       local_irq_disable();
+
+       error = sysdev_suspend(PMSG_FREEZE);
+       if (error) {
+               printk(KERN_ERR "PM: Some system devices failed to power down, "
+                       "aborting hibernation\n");
+               goto Enable_irqs;
+       }
+
+       if (hibernation_test(TEST_CORE))
+               goto Power_up;
+
+       in_suspend = 1;
+       save_processor_state();
+       error = swsusp_arch_suspend();
+       if (error)
+               printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+                       error);
+       /* Restore control flow magically appears here */
+       restore_processor_state();
+       if (!in_suspend)
+               platform_leave(platform_mode);
+
+ Power_up:
+       sysdev_resume();
+       /* NOTE:  device_power_up() is just a resume() for devices
+        * that suspended with irqs off ... no overall powerup.
+        */
+
+ Enable_irqs:
+       local_irq_enable();
+
+ Enable_cpus:
+       enable_nonboot_cpus();
+
+ Platform_finish:
+       platform_finish(platform_mode);
+
+       device_power_up(in_suspend ?
+               (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+
+       return error;
+}
+
+/**
  *     hibernation_snapshot - quiesce devices and create the hibernation
  *     snapshot image.
  *     @platform_mode - if set, use the platform driver, if available, to
- *                      prepare the platform frimware for the power transition.
+ *                      prepare the platform firmware for the power transition.
  *
  *     Must be called with pm_mutex held
  */
@@ -130,37 +294,108 @@ int hibernation_snapshot(int platform_mode)
 {
        int error;
 
+       error = platform_begin(platform_mode);
+       if (error)
+               return error;
+
        /* Free memory before shutting down devices. */
        error = swsusp_shrink_memory();
        if (error)
-               return error;
+               goto Close;
 
        suspend_console();
        error = device_suspend(PMSG_FREEZE);
        if (error)
-               goto Resume_console;
+               goto Recover_platform;
 
-       error = platform_prepare(platform_mode);
+       if (hibernation_test(TEST_DEVICES))
+               goto Recover_platform;
+
+       error = create_image(platform_mode);
+       /* Control returns here after successful restore */
+
+ Resume_devices:
+       device_resume(in_suspend ?
+               (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+       resume_console();
+ Close:
+       platform_end(platform_mode);
+       return error;
+
+ Recover_platform:
+       platform_recover(platform_mode);
+       goto Resume_devices;
+}
+
+/**
+ *     resume_target_kernel - prepare devices that need to be suspended with
+ *     interrupts off, restore the contents of highmem that have not been
+ *     restored yet from the image and run the low level code that will restore
+ *     the remaining contents of memory and switch to the just restored target
+ *     kernel.
+ */
+
+static int resume_target_kernel(bool platform_mode)
+{
+       int error;
+
+       error = device_power_down(PMSG_QUIESCE);
+       if (error) {
+               printk(KERN_ERR "PM: Some devices failed to power down, "
+                       "aborting resume\n");
+               return error;
+       }
+
+       error = platform_pre_restore(platform_mode);
        if (error)
-               goto Resume_devices;
+               goto Cleanup;
 
        error = disable_nonboot_cpus();
+       if (error)
+               goto Enable_cpus;
+
+       local_irq_disable();
+
+       error = sysdev_suspend(PMSG_QUIESCE);
+       if (error)
+               goto Enable_irqs;
+
+       /* We'll ignore saved state, but this gets preempt count (etc) right */
+       save_processor_state();
+       error = restore_highmem();
        if (!error) {
-               if (hibernation_mode != HIBERNATION_TEST) {
-                       in_suspend = 1;
-                       error = swsusp_suspend();
-                       /* Control returns here after successful restore */
-               } else {
-                       printk("swsusp debug: Waiting for 5 seconds.\n");
-                       mdelay(5000);
-               }
+               error = swsusp_arch_resume();
+               /*
+                * The code below is only ever reached in case of a failure.
+                * Otherwise execution continues at place where
+                * swsusp_arch_suspend() was called
+                */
+               BUG_ON(!error);
+               /* This call to restore_highmem() undos the previous one */
+               restore_highmem();
        }
+       /*
+        * The only reason why swsusp_arch_resume() can fail is memory being
+        * very tight, so we have to free it as soon as we can to avoid
+        * subsequent failures
+        */
+       swsusp_free();
+       restore_processor_state();
+       touch_softlockup_watchdog();
+
+       sysdev_resume();
+
+ Enable_irqs:
+       local_irq_enable();
+
+ Enable_cpus:
        enable_nonboot_cpus();
- Resume_devices:
-       platform_finish(platform_mode);
-       device_resume();
- Resume_console:
-       resume_console();
+
+ Cleanup:
+       platform_restore_cleanup(platform_mode);
+
+       device_power_up(PMSG_RECOVER);
+
        return error;
 }
 
@@ -168,7 +403,7 @@ int hibernation_snapshot(int platform_mode)
  *     hibernation_restore - quiesce devices and restore the hibernation
  *     snapshot image.  If successful, control returns in hibernation_snaphot()
  *     @platform_mode - if set, use the platform driver, if available, to
- *                      prepare the platform frimware for the transition.
+ *                      prepare the platform firmware for the transition.
  *
  *     Must be called with pm_mutex held
  */
@@ -179,20 +414,11 @@ int hibernation_restore(int platform_mode)
 
        pm_prepare_console();
        suspend_console();
-       error = device_suspend(PMSG_PRETHAW);
-       if (error)
-               goto Finish;
-
-       error = platform_pre_restore(platform_mode);
+       error = device_suspend(PMSG_QUIESCE);
        if (!error) {
-               error = disable_nonboot_cpus();
-               if (!error)
-                       error = swsusp_resume();
-               enable_nonboot_cpus();
+               error = resume_target_kernel(platform_mode);
+               device_resume(PMSG_RECOVER);
        }
-       platform_restore_cleanup(platform_mode);
-       device_resume();
- Finish:
        resume_console();
        pm_restore_console();
        return error;
@@ -207,20 +433,62 @@ int hibernation_platform_enter(void)
 {
        int error;
 
-       if (hibernation_ops) {
-               kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
-               /*
-                * We have cancelled the power transition by running
-                * hibernation_ops->finish() before saving the image, so we
-                * should let the firmware know that we're going to enter the
-                * sleep state after all
-                */
-               error = hibernation_ops->prepare();
-               if (!error)
-                       error = hibernation_ops->enter();
-       } else {
-               error = -ENOSYS;
+       if (!hibernation_ops)
+               return -ENOSYS;
+
+       /*
+        * We have cancelled the power transition by running
+        * hibernation_ops->finish() before saving the image, so we should let
+        * the firmware know that we're going to enter the sleep state after all
+        */
+       error = hibernation_ops->begin();
+       if (error)
+               goto Close;
+
+       entering_platform_hibernation = true;
+       suspend_console();
+       error = device_suspend(PMSG_HIBERNATE);
+       if (error) {
+               if (hibernation_ops->recover)
+                       hibernation_ops->recover();
+               goto Resume_devices;
        }
+
+       error = device_power_down(PMSG_HIBERNATE);
+       if (error)
+               goto Resume_devices;
+
+       error = hibernation_ops->prepare();
+       if (error)
+               goto Platofrm_finish;
+
+       error = disable_nonboot_cpus();
+       if (error)
+               goto Platofrm_finish;
+
+       local_irq_disable();
+       sysdev_suspend(PMSG_HIBERNATE);
+       hibernation_ops->enter();
+       /* We should never get here */
+       while (1);
+
+       /*
+        * We don't need to reenable the nonboot CPUs or resume consoles, since
+        * the system is going to be halted anyway.
+        */
+ Platofrm_finish:
+       hibernation_ops->finish();
+
+       device_power_up(PMSG_RESTORE);
+
+ Resume_devices:
+       entering_platform_hibernation = false;
+       device_resume(PMSG_RESTORE);
+       resume_console();
+
+ Close:
+       hibernation_ops->end();
+
        return error;
 }
 
@@ -237,38 +505,31 @@ static void power_down(void)
        case HIBERNATION_TEST:
        case HIBERNATION_TESTPROC:
                break;
-       case HIBERNATION_SHUTDOWN:
-               kernel_power_off();
-               break;
        case HIBERNATION_REBOOT:
                kernel_restart(NULL);
                break;
        case HIBERNATION_PLATFORM:
                hibernation_platform_enter();
+       case HIBERNATION_SHUTDOWN:
+               kernel_power_off();
+               break;
        }
        kernel_halt();
        /*
         * Valid image is on the disk, if we continue we risk serious data
         * corruption after resume.
         */
-       printk(KERN_CRIT "Please power me down manually\n");
+       printk(KERN_CRIT "PM: Please power down manually\n");
        while(1);
 }
 
-static void unprepare_processes(void)
-{
-       thaw_processes();
-       pm_restore_console();
-}
-
 static int prepare_processes(void)
 {
        int error = 0;
 
-       pm_prepare_console();
        if (freeze_processes()) {
                error = -EBUSY;
-               unprepare_processes();
+               thaw_processes();
        }
        return error;
 }
@@ -288,24 +549,34 @@ int hibernate(void)
                goto Unlock;
        }
 
+       pm_prepare_console();
        error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
        if (error)
                goto Exit;
 
+       error = usermodehelper_disable();
+       if (error)
+               goto Exit;
+
        /* Allocate memory management structures */
        error = create_basic_memory_bitmaps();
        if (error)
                goto Exit;
 
+       printk(KERN_INFO "PM: Syncing filesystems ... ");
+       sys_sync();
+       printk("done.\n");
+
        error = prepare_processes();
        if (error)
                goto Finish;
 
-       if (hibernation_mode == HIBERNATION_TESTPROC) {
-               printk("swsusp debug: Waiting for 5 seconds.\n");
-               mdelay(5000);
+       if (hibernation_test(TEST_FREEZER))
                goto Thaw;
-       }
+
+       if (hibernation_testmode(HIBERNATION_TESTPROC))
+               goto Thaw;
+
        error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
        if (in_suspend && !error) {
                unsigned int flags = 0;
@@ -322,11 +593,13 @@ int hibernate(void)
                swsusp_free();
        }
  Thaw:
-       unprepare_processes();
+       thaw_processes();
  Finish:
        free_basic_memory_bitmaps();
+       usermodehelper_enable();
  Exit:
        pm_notifier_call_chain(PM_POST_HIBERNATION);
+       pm_restore_console();
        atomic_inc(&snapshot_device_available);
  Unlock:
        mutex_unlock(&pm_mutex);
@@ -351,29 +624,61 @@ static int software_resume(void)
        int error;
        unsigned int flags;
 
-       mutex_lock(&pm_mutex);
-       if (!swsusp_resume_device) {
-               if (!strlen(resume_file)) {
-                       mutex_unlock(&pm_mutex);
-                       return -ENOENT;
-               }
-               swsusp_resume_device = name_to_dev_t(resume_file);
-               pr_debug("swsusp: Resume From Partition %s\n", resume_file);
-       } else {
-               pr_debug("swsusp: Resume From Partition %d:%d\n",
-                        MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+       /*
+        * If the user said "noresume".. bail out early.
+        */
+       if (noresume)
+               return 0;
+
+       /*
+        * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
+        * is configured into the kernel. Since the regular hibernate
+        * trigger path is via sysfs which takes a buffer mutex before
+        * calling hibernate functions (which take pm_mutex) this can
+        * cause lockdep to complain about a possible ABBA deadlock
+        * which cannot happen since we're in the boot code here and
+        * sysfs can't be invoked yet. Therefore, we use a subclass
+        * here to avoid lockdep complaining.
+        */
+       mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
+       if (swsusp_resume_device)
+               goto Check_image;
+
+       if (!strlen(resume_file)) {
+               error = -ENOENT;
+               goto Unlock;
        }
 
-       if (noresume) {
-               /**
-                * FIXME: If noresume is specified, we need to find the partition
-                * and reset it back to normal swap space.
+       pr_debug("PM: Checking image partition %s\n", resume_file);
+
+       /* Check if the device is there */
+       swsusp_resume_device = name_to_dev_t(resume_file);
+       if (!swsusp_resume_device) {
+               /*
+                * Some device discovery might still be in progress; we need
+                * to wait for this to finish.
                 */
-               mutex_unlock(&pm_mutex);
-               return 0;
+               wait_for_device_probe();
+               /*
+                * We can't depend on SCSI devices being available after loading
+                * one of their modules until scsi_complete_async_scans() is
+                * called and the resume device usually is a SCSI one.
+                */
+               scsi_complete_async_scans();
+
+               swsusp_resume_device = name_to_dev_t(resume_file);
+               if (!swsusp_resume_device) {
+                       error = -ENODEV;
+                       goto Unlock;
+               }
        }
 
-       pr_debug("PM: Checking swsusp image.\n");
+ Check_image:
+       pr_debug("PM: Resume from partition %d:%d\n",
+               MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+
+       pr_debug("PM: Checking hibernation image.\n");
        error = swsusp_check();
        if (error)
                goto Unlock;
@@ -384,6 +689,15 @@ static int software_resume(void)
                goto Unlock;
        }
 
+       pm_prepare_console();
+       error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+       if (error)
+               goto Finish;
+
+       error = usermodehelper_disable();
+       if (error)
+               goto Finish;
+
        error = create_basic_memory_bitmaps();
        if (error)
                goto Finish;
@@ -391,11 +705,11 @@ static int software_resume(void)
        pr_debug("PM: Preparing processes for restore.\n");
        error = prepare_processes();
        if (error) {
-               swsusp_close();
+               swsusp_close(FMODE_READ);
                goto Done;
        }
 
-       pr_debug("PM: Reading swsusp image.\n");
+       pr_debug("PM: Reading hibernation image.\n");
 
        error = swsusp_read(&flags);
        if (!error)
@@ -403,10 +717,13 @@ static int software_resume(void)
 
        printk(KERN_ERR "PM: Restore failed, recovering.\n");
        swsusp_free();
-       unprepare_processes();
+       thaw_processes();
  Done:
        free_basic_memory_bitmaps();
+       usermodehelper_enable();
  Finish:
+       pm_notifier_call_chain(PM_POST_RESTORE);
+       pm_restore_console();
        atomic_inc(&snapshot_device_available);
        /* For success case, the suspend path will release the lock */
  Unlock:
@@ -452,7 +769,8 @@ static const char * const hibernation_modes[] = {
  *     supports it (as determined by having hibernation_ops).
  */
 
-static ssize_t disk_show(struct kset *kset, char *buf)
+static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
+                        char *buf)
 {
        int i;
        char *start = buf;
@@ -482,7 +800,8 @@ static ssize_t disk_show(struct kset *kset, char *buf)
 }
 
 
-static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
+static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
+                         const char *buf, size_t n)
 {
        int error = 0;
        int i;
@@ -519,7 +838,7 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
                error = -EINVAL;
 
        if (!error)
-               pr_debug("PM: suspend-to-disk mode set to '%s'\n",
+               pr_debug("PM: Hibernation mode set to '%s'\n",
                         hibernation_modes[mode]);
        mutex_unlock(&pm_mutex);
        return error ? error : n;
@@ -527,13 +846,15 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
 
 power_attr(disk);
 
-static ssize_t resume_show(struct kset *kset, char *buf)
+static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
+                          char *buf)
 {
        return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
                       MINOR(swsusp_resume_device));
 }
 
-static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
+static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
+                           const char *buf, size_t n)
 {
        unsigned int maj, min;
        dev_t res;
@@ -549,7 +870,7 @@ static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
        mutex_lock(&pm_mutex);
        swsusp_resume_device = res;
        mutex_unlock(&pm_mutex);
-       printk("Attempting manual resume\n");
+       printk(KERN_INFO "PM: Starting manual resume from disk\n");
        noresume = 0;
        software_resume();
        ret = n;
@@ -559,12 +880,14 @@ static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
 
 power_attr(resume);
 
-static ssize_t image_size_show(struct kset *kset, char *buf)
+static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
+                              char *buf)
 {
        return sprintf(buf, "%lu\n", image_size);
 }
 
-static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n)
+static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
+                               const char *buf, size_t n)
 {
        unsigned long size;
 
@@ -593,7 +916,7 @@ static struct attribute_group attr_group = {
 
 static int __init pm_disk_init(void)
 {
-       return sysfs_create_group(&power_subsys.kobj, &attr_group);
+       return sysfs_create_group(power_kobj, &attr_group);
 }
 
 core_initcall(pm_disk_init);