Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 03:03:54 +0000 (20:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 03:03:54 +0000 (20:03 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/suspend-2.6: (23 commits)
  at_hdmac: Rework suspend_late()/resume_early()
  PM: Reset transition_started at dpm_resume_noirq
  PM: Update kerneldoc comments in drivers/base/power/main.c
  PM: Add convenience macro to make switching to dev_pm_ops less error-prone
  hp-wmi: Switch driver to dev_pm_ops
  floppy: Switch driver to dev_pm_ops
  PM: Trivial fixes
  PM / Hibernate / Memory hotplug: Always use for_each_populated_zone()
  PM/Hibernate: Do not try to allocate too much memory too hard (rev. 2)
  PM/Hibernate: Do not release preallocated memory unnecessarily (rev. 2)
  PM/Hibernate: Rework shrinking of memory
  PM: Fix typo in label name s/Platofrm_finish/Platform_finish/
  PM: Run-time PM platform device bus support
  PM: Introduce core framework for run-time PM of I/O devices (rev. 17)
  Driver Core: Make PM operations a const pointer
  PM: Remove platform device suspend_late()/resume_early() V2
  USB: Rework musb suspend()/resume_early()
  I2C: Rework i2c-s3c2410 suspend_late()/resume() V2
  I2C: Rework i2c-pxa suspend_late()/resume_early()
  DMA: Rework txx9dmac suspend_late()/resume_early()
  ...

Fix trivial conflict in drivers/base/platform.c (due to same
constification patch being merged in both sides, along with some other
PM work in the PM branch)

32 files changed:
Documentation/power/runtime_pm.txt [new file with mode: 0644]
arch/arm/include/asm/device.h
arch/arm/plat-omap/debug-leds.c
arch/arm/plat-omap/gpio.c
arch/ia64/include/asm/device.h
arch/microblaze/include/asm/device.h
arch/powerpc/include/asm/device.h
arch/sparc/include/asm/device.h
arch/x86/include/asm/device.h
drivers/base/dd.c
drivers/base/platform.c
drivers/base/power/Makefile
drivers/base/power/main.c
drivers/base/power/power.h
drivers/base/power/runtime.c [new file with mode: 0644]
drivers/block/floppy.c
drivers/dma/at_hdmac.c
drivers/dma/dw_dmac.c
drivers/dma/txx9dmac.c
drivers/i2c/busses/i2c-pxa.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/platform/x86/hp-wmi.c
drivers/usb/musb/musb_core.c
include/asm-generic/device.h
include/linux/platform_device.h
include/linux/pm.h
include/linux/pm_runtime.h [new file with mode: 0644]
kernel/power/Kconfig
kernel/power/hibernate.c
kernel/power/main.c
kernel/power/power.h
kernel/power/snapshot.c

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
new file mode 100644 (file)
index 0000000..f49a33b
--- /dev/null
@@ -0,0 +1,378 @@
+Run-time Power Management Framework for I/O Devices
+
+(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+
+1. Introduction
+
+Support for run-time power management (run-time PM) of I/O devices is provided
+at the power management core (PM core) level by means of:
+
+* The power management workqueue pm_wq in which bus types and device drivers can
+  put their PM-related work items.  It is strongly recommended that pm_wq be
+  used for queuing all work items related to run-time PM, because this allows
+  them to be synchronized with system-wide power transitions (suspend to RAM,
+  hibernation and resume from system sleep states).  pm_wq is declared in
+  include/linux/pm_runtime.h and defined in kernel/power/main.c.
+
+* A number of run-time PM fields in the 'power' member of 'struct device' (which
+  is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
+  be used for synchronizing run-time PM operations with one another.
+
+* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in
+  include/linux/pm.h).
+
+* A set of helper functions defined in drivers/base/power/runtime.c that can be
+  used for carrying out run-time PM operations in such a way that the
+  synchronization between them is taken care of by the PM core.  Bus types and
+  device drivers are encouraged to use these functions.
+
+The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM
+fields of 'struct dev_pm_info' and the core helper functions provided for
+run-time PM are described below.
+
+2. Device Run-time PM Callbacks
+
+There are three device run-time PM callbacks defined in 'struct dev_pm_ops':
+
+struct dev_pm_ops {
+       ...
+       int (*runtime_suspend)(struct device *dev);
+       int (*runtime_resume)(struct device *dev);
+       void (*runtime_idle)(struct device *dev);
+       ...
+};
+
+The ->runtime_suspend() callback is executed by the PM core for the bus type of
+the device being suspended.  The bus type's callback is then _entirely_
+_responsible_ for handling the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_suspend() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_suspend()
+callback in a device driver as long as the bus type's ->runtime_suspend() knows
+what to do to handle the device).
+
+  * Once the bus type's ->runtime_suspend() callback has completed successfully
+    for given device, the PM core regards the device as suspended, which need
+    not mean that the device has been put into a low power state.  It is
+    supposed to mean, however, that the device will not process data and will
+    not communicate with the CPU(s) and RAM until its bus type's
+    ->runtime_resume() callback is executed for it.  The run-time PM status of
+    a device after successful execution of its bus type's ->runtime_suspend()
+    callback is 'suspended'.
+
+  * If the bus type's ->runtime_suspend() callback returns -EBUSY or -EAGAIN,
+    the device's run-time PM status is supposed to be 'active', which means that
+    the device _must_ be fully operational afterwards.
+
+  * If the bus type's ->runtime_suspend() callback returns an error code
+    different from -EBUSY or -EAGAIN, the PM core regards this as a fatal
+    error and will refuse to run the helper functions described in Section 4
+    for the device, until the status of it is directly set either to 'active'
+    or to 'suspended' (the PM core provides special helper functions for this
+    purpose).
+
+In particular, if the driver requires remote wakeup capability for proper
+functioning and device_may_wakeup() returns 'false' for the device, then
+->runtime_suspend() should return -EBUSY.  On the other hand, if
+device_may_wakeup() returns 'true' for the device and the device is put
+into a low power state during the execution of its bus type's
+->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism
+allowing the device to request a change of its power state, such as PCI PME)
+will be enabled for the device.  Generally, remote wake-up should be enabled
+for all input devices put into a low power state at run time.
+
+The ->runtime_resume() callback is executed by the PM core for the bus type of
+the device being woken up.  The bus type's callback is then _entirely_
+_responsible_ for handling the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_resume() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_resume()
+callback in a device driver as long as the bus type's ->runtime_resume() knows
+what to do to handle the device).
+
+  * Once the bus type's ->runtime_resume() callback has completed successfully,
+    the PM core regards the device as fully operational, which means that the
+    device _must_ be able to complete I/O operations as needed.  The run-time
+    PM status of the device is then 'active'.
+
+  * If the bus type's ->runtime_resume() callback returns an error code, the PM
+    core regards this as a fatal error and will refuse to run the helper
+    functions described in Section 4 for the device, until its status is
+    directly set either to 'active' or to 'suspended' (the PM core provides
+    special helper functions for this purpose).
+
+The ->runtime_idle() callback is executed by the PM core for the bus type of
+given device whenever the device appears to be idle, which is indicated to the
+PM core by two counters, the device's usage counter and the counter of 'active'
+children of the device.
+
+  * If any of these counters is decreased using a helper function provided by
+    the PM core and it turns out to be equal to zero, the other counter is
+    checked.  If that counter also is equal to zero, the PM core executes the
+    device bus type's ->runtime_idle() callback (with the device as an
+    argument).
+
+The action performed by a bus type's ->runtime_idle() callback is totally
+dependent on the bus type in question, but the expected and recommended action
+is to check if the device can be suspended (i.e. if all of the conditions
+necessary for suspending the device are satisfied) and to queue up a suspend
+request for the device in that case.
+
+The helper functions provided by the PM core, described in Section 4, guarantee
+that the following constraints are met with respect to the bus type's run-time
+PM callbacks:
+
+(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
+    ->runtime_suspend() in parallel with ->runtime_resume() or with another
+    instance of ->runtime_suspend() for the same device) with the exception that
+    ->runtime_suspend() or ->runtime_resume() can be executed in parallel with
+    ->runtime_idle() (although ->runtime_idle() will not be started while any
+    of the other callbacks is being executed for the same device).
+
+(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
+    devices (i.e. the PM core will only execute ->runtime_idle() or
+    ->runtime_suspend() for the devices the run-time PM status of which is
+    'active').
+
+(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
+    the usage counter of which is equal to zero _and_ either the counter of
+    'active' children of which is equal to zero, or the 'power.ignore_children'
+    flag of which is set.
+
+(4) ->runtime_resume() can only be executed for 'suspended' devices  (i.e. the
+    PM core will only execute ->runtime_resume() for the devices the run-time
+    PM status of which is 'suspended').
+
+Additionally, the helper functions provided by the PM core obey the following
+rules:
+
+  * If ->runtime_suspend() is about to be executed or there's a pending request
+    to execute it, ->runtime_idle() will not be executed for the same device.
+
+  * A request to execute or to schedule the execution of ->runtime_suspend()
+    will cancel any pending requests to execute ->runtime_idle() for the same
+    device.
+
+  * If ->runtime_resume() is about to be executed or there's a pending request
+    to execute it, the other callbacks will not be executed for the same device.
+
+  * A request to execute ->runtime_resume() will cancel any pending or
+    scheduled requests to execute the other callbacks for the same device.
+
+3. Run-time PM Device Fields
+
+The following device run-time PM fields are present in 'struct dev_pm_info', as
+defined in include/linux/pm.h:
+
+  struct timer_list suspend_timer;
+    - timer used for scheduling (delayed) suspend request
+
+  unsigned long timer_expires;
+    - timer expiration time, in jiffies (if this is different from zero, the
+      timer is running and will expire at that time, otherwise the timer is not
+      running)
+
+  struct work_struct work;
+    - work structure used for queuing up requests (i.e. work items in pm_wq)
+
+  wait_queue_head_t wait_queue;
+    - wait queue used if any of the helper functions needs to wait for another
+      one to complete
+
+  spinlock_t lock;
+    - lock used for synchronisation
+
+  atomic_t usage_count;
+    - the usage counter of the device
+
+  atomic_t child_count;
+    - the count of 'active' children of the device
+
+  unsigned int ignore_children;
+    - if set, the value of child_count is ignored (but still updated)
+
+  unsigned int disable_depth;
+    - used for disabling the helper funcions (they work normally if this is
+      equal to zero); the initial value of it is 1 (i.e. run-time PM is
+      initially disabled for all devices)
+
+  unsigned int runtime_error;
+    - if set, there was a fatal error (one of the callbacks returned error code
+      as described in Section 2), so the helper funtions will not work until
+      this flag is cleared; this is the error code returned by the failing
+      callback
+
+  unsigned int idle_notification;
+    - if set, ->runtime_idle() is being executed
+
+  unsigned int request_pending;
+    - if set, there's a pending request (i.e. a work item queued up into pm_wq)
+
+  enum rpm_request request;
+    - type of request that's pending (valid if request_pending is set)
+
+  unsigned int deferred_resume;
+    - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
+      being executed for that device and it is not practical to wait for the
+      suspend to complete; means "start a resume as soon as you've suspended"
+
+  enum rpm_status runtime_status;
+    - the run-time PM status of the device; this field's initial value is
+      RPM_SUSPENDED, which means that each device is initially regarded by the
+      PM core as 'suspended', regardless of its real hardware status
+
+All of the above fields are members of the 'power' member of 'struct device'.
+
+4. Run-time PM Device Helper Functions
+
+The following run-time PM helper functions are defined in
+drivers/base/power/runtime.c and include/linux/pm_runtime.h:
+
+  void pm_runtime_init(struct device *dev);
+    - initialize the device run-time PM fields in 'struct dev_pm_info'
+
+  void pm_runtime_remove(struct device *dev);
+    - make sure that the run-time PM of the device will be disabled after
+      removing the device from device hierarchy
+
+  int pm_runtime_idle(struct device *dev);
+    - execute ->runtime_idle() for the device's bus type; returns 0 on success
+      or error code on failure, where -EINPROGRESS means that ->runtime_idle()
+      is already being executed
+
+  int pm_runtime_suspend(struct device *dev);
+    - execute ->runtime_suspend() for the device's bus type; returns 0 on
+      success, 1 if the device's run-time PM status was already 'suspended', or
+      error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
+      to suspend the device again in future
+
+  int pm_runtime_resume(struct device *dev);
+    - execute ->runtime_resume() for the device's bus type; returns 0 on
+      success, 1 if the device's run-time PM status was already 'active' or
+      error code on failure, where -EAGAIN means it may be safe to attempt to
+      resume the device again in future, but 'power.runtime_error' should be
+      checked additionally
+
+  int pm_request_idle(struct device *dev);
+    - submit a request to execute ->runtime_idle() for the device's bus type
+      (the request is represented by a work item in pm_wq); returns 0 on success
+      or error code if the request has not been queued up
+
+  int pm_schedule_suspend(struct device *dev, unsigned int delay);
+    - schedule the execution of ->runtime_suspend() for the device's bus type
+      in future, where 'delay' is the time to wait before queuing up a suspend
+      work item in pm_wq, in milliseconds (if 'delay' is zero, the work item is
+      queued up immediately); returns 0 on success, 1 if the device's PM
+      run-time status was already 'suspended', or error code if the request
+      hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
+      ->runtime_suspend() is already scheduled and not yet expired, the new
+      value of 'delay' will be used as the time to wait
+
+  int pm_request_resume(struct device *dev);
+    - submit a request to execute ->runtime_resume() for the device's bus type
+      (the request is represented by a work item in pm_wq); returns 0 on
+      success, 1 if the device's run-time PM status was already 'active', or
+      error code if the request hasn't been queued up
+
+  void pm_runtime_get_noresume(struct device *dev);
+    - increment the device's usage counter
+
+  int pm_runtime_get(struct device *dev);
+    - increment the device's usage counter, run pm_request_resume(dev) and
+      return its result
+
+  int pm_runtime_get_sync(struct device *dev);
+    - increment the device's usage counter, run pm_runtime_resume(dev) and
+      return its result
+
+  void pm_runtime_put_noidle(struct device *dev);
+    - decrement the device's usage counter
+
+  int pm_runtime_put(struct device *dev);
+    - decrement the device's usage counter, run pm_request_idle(dev) and return
+      its result
+
+  int pm_runtime_put_sync(struct device *dev);
+    - decrement the device's usage counter, run pm_runtime_idle(dev) and return
+      its result
+
+  void pm_runtime_enable(struct device *dev);
+    - enable the run-time PM helper functions to run the device bus type's
+      run-time PM callbacks described in Section 2
+
+  int pm_runtime_disable(struct device *dev);
+    - prevent the run-time PM helper functions from running the device bus
+      type's run-time PM callbacks, make sure that all of the pending run-time
+      PM operations on the device are either completed or canceled; returns
+      1 if there was a resume request pending and it was necessary to execute
+      ->runtime_resume() for the device's bus type to satisfy that request,
+      otherwise 0 is returned
+
+  void pm_suspend_ignore_children(struct device *dev, bool enable);
+    - set/unset the power.ignore_children flag of the device
+
+  int pm_runtime_set_active(struct device *dev);
+    - clear the device's 'power.runtime_error' flag, set the device's run-time
+      PM status to 'active' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero); it will fail and return error code if the device has a parent
+      which is not active and the 'power.ignore_children' flag of which is unset
+
+  void pm_runtime_set_suspended(struct device *dev);
+    - clear the device's 'power.runtime_error' flag, set the device's run-time
+      PM status to 'suspended' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero)
+
+It is safe to execute the following helper functions from interrupt context:
+
+pm_request_idle()
+pm_schedule_suspend()
+pm_request_resume()
+pm_runtime_get_noresume()
+pm_runtime_get()
+pm_runtime_put_noidle()
+pm_runtime_put()
+pm_suspend_ignore_children()
+pm_runtime_set_active()
+pm_runtime_set_suspended()
+pm_runtime_enable()
+
+5. Run-time PM Initialization, Device Probing and Removal
+
+Initially, the run-time PM is disabled for all devices, which means that the
+majority of the run-time PM helper funtions described in Section 4 will return
+-EAGAIN until pm_runtime_enable() is called for the device.
+
+In addition to that, the initial run-time PM status of all devices is
+'suspended', but it need not reflect the actual physical state of the device.
+Thus, if the device is initially active (i.e. it is able to process I/O), its
+run-time PM status must be changed to 'active', with the help of
+pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
+
+However, if the device has a parent and the parent's run-time PM is enabled,
+calling pm_runtime_set_active() for the device will affect the parent, unless
+the parent's 'power.ignore_children' flag is set.  Namely, in that case the
+parent won't be able to suspend at run time, using the PM core's helper
+functions, as long as the child's status is 'active', even if the child's
+run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
+the child yet or pm_runtime_disable() has been called for it).  For this reason,
+once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
+should be called for it too as soon as reasonably possible or its run-time PM
+status should be changed back to 'suspended' with the help of
+pm_runtime_set_suspended().
+
+If the default initial run-time PM status of the device (i.e. 'suspended')
+reflects the actual state of the device, its bus type's or its driver's
+->probe() callback will likely need to wake it up using one of the PM core's
+helper functions described in Section 4.  In that case, pm_runtime_resume()
+should be used.  Of course, for this purpose the device's run-time PM has to be
+enabled earlier by calling pm_runtime_enable().
+
+If the device bus type's or driver's ->probe() or ->remove() callback runs
+pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
+they will fail returning -EAGAIN, because the device's usage counter is
+incremented by the core before executing ->probe() and ->remove().  Still, it
+may be desirable to suspend the device as soon as ->probe() or ->remove() has
+finished, so the PM core uses pm_runtime_idle_sync() to invoke the device bus
+type's ->runtime_idle() callback at that time.
index c61642b..9f390ce 100644 (file)
@@ -12,4 +12,7 @@ struct dev_archdata {
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif
index be4eefd..9395898 100644 (file)
@@ -281,24 +281,27 @@ static int /* __init */ fpga_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int fpga_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+static int fpga_suspend_noirq(struct device *dev)
 {
        __raw_writew(~0, &fpga->leds);
        return 0;
 }
 
-static int fpga_resume_early(struct platform_device *pdev)
+static int fpga_resume_noirq(struct device *dev)
 {
        __raw_writew(~hw_led_state, &fpga->leds);
        return 0;
 }
 
+static struct dev_pm_ops fpga_dev_pm_ops = {
+       .suspend_noirq = fpga_suspend_noirq,
+       .resume_noirq = fpga_resume_noirq,
+};
 
 static struct platform_driver led_driver = {
        .driver.name    = "omap_dbg_led",
+       .driver.pm      = &fpga_dev_pm_ops,
        .probe          = fpga_probe,
-       .suspend_late   = fpga_suspend_late,
-       .resume_early   = fpga_resume_early,
 };
 
 static int __init fpga_init(void)
index fd21937..176c86e 100644 (file)
@@ -1418,8 +1418,9 @@ static struct irq_chip mpuio_irq_chip = {
 
 #include <linux/platform_device.h>
 
-static int omap_mpuio_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+static int omap_mpuio_suspend_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct gpio_bank        *bank = platform_get_drvdata(pdev);
        void __iomem            *mask_reg = bank->base + OMAP_MPUIO_GPIO_MASKIT;
        unsigned long           flags;
@@ -1432,8 +1433,9 @@ static int omap_mpuio_suspend_late(struct platform_device *pdev, pm_message_t me
        return 0;
 }
 
-static int omap_mpuio_resume_early(struct platform_device *pdev)
+static int omap_mpuio_resume_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct gpio_bank        *bank = platform_get_drvdata(pdev);
        void __iomem            *mask_reg = bank->base + OMAP_MPUIO_GPIO_MASKIT;
        unsigned long           flags;
@@ -1445,14 +1447,18 @@ static int omap_mpuio_resume_early(struct platform_device *pdev)
        return 0;
 }
 
+static struct dev_pm_ops omap_mpuio_dev_pm_ops = {
+       .suspend_noirq = omap_mpuio_suspend_noirq,
+       .resume_noirq = omap_mpuio_resume_noirq,
+};
+
 /* use platform_driver for this, now that there's no longer any
  * point to sys_device (other than not disturbing old code).
  */
 static struct platform_driver omap_mpuio_driver = {
-       .suspend_late   = omap_mpuio_suspend_late,
-       .resume_early   = omap_mpuio_resume_early,
        .driver         = {
                .name   = "mpuio",
+               .pm     = &omap_mpuio_dev_pm_ops,
        },
 };
 
index 41ab85d..d66d446 100644 (file)
@@ -15,4 +15,7 @@ struct dev_archdata {
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_IA64_DEVICE_H */
index c042830..30286db 100644 (file)
@@ -16,6 +16,9 @@ struct dev_archdata {
        struct device_node      *of_node;
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_MICROBLAZE_DEVICE_H */
 
 
index 7d2277c..e3e06e0 100644 (file)
@@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad)
        return ad->of_node;
 }
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_POWERPC_DEVICE_H */
index 3702e08..f3b85b6 100644 (file)
@@ -32,4 +32,7 @@ dev_archdata_get_node(const struct dev_archdata *ad)
        return ad->prom_node;
 }
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_SPARC_DEVICE_H */
index 4994a20..cee34e9 100644 (file)
@@ -13,4 +13,7 @@ struct dma_map_ops *dma_ops;
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_X86_DEVICE_H */
index f010687..7b34b3a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/async.h>
+#include <linux/pm_runtime.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -202,7 +203,10 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
        pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
                 drv->bus->name, __func__, dev_name(dev), drv->name);
 
+       pm_runtime_get_noresume(dev);
+       pm_runtime_barrier(dev);
        ret = really_probe(dev, drv);
+       pm_runtime_put_sync(dev);
 
        return ret;
 }
@@ -245,7 +249,9 @@ int device_attach(struct device *dev)
                        ret = 0;
                }
        } else {
+               pm_runtime_get_noresume(dev);
                ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
+               pm_runtime_put_sync(dev);
        }
        up(&dev->sem);
        return ret;
@@ -306,6 +312,9 @@ static void __device_release_driver(struct device *dev)
 
        drv = dev->driver;
        if (drv) {
+               pm_runtime_get_noresume(dev);
+               pm_runtime_barrier(dev);
+
                driver_sysfs_remove(dev);
 
                if (dev->bus)
@@ -324,6 +333,8 @@ static void __device_release_driver(struct device *dev)
                        blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
                                                     BUS_NOTIFY_UNBOUND_DRIVER,
                                                     dev);
+
+               pm_runtime_put_sync(dev);
        }
 }
 
index 0b111e8..0f7d434 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/bootmem.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 
 #include "base.h"
 
@@ -625,30 +626,6 @@ static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
        return ret;
 }
 
-static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg)
-{
-       struct platform_driver *pdrv = to_platform_driver(dev->driver);
-       struct platform_device *pdev = to_platform_device(dev);
-       int ret = 0;
-
-       if (dev->driver && pdrv->suspend_late)
-               ret = pdrv->suspend_late(pdev, mesg);
-
-       return ret;
-}
-
-static int platform_legacy_resume_early(struct device *dev)
-{
-       struct platform_driver *pdrv = to_platform_driver(dev->driver);
-       struct platform_device *pdev = to_platform_device(dev);
-       int ret = 0;
-
-       if (dev->driver && pdrv->resume_early)
-               ret = pdrv->resume_early(pdev);
-
-       return ret;
-}
-
 static int platform_legacy_resume(struct device *dev)
 {
        struct platform_driver *pdrv = to_platform_driver(dev->driver);
@@ -680,6 +657,13 @@ static void platform_pm_complete(struct device *dev)
                drv->pm->complete(dev);
 }
 
+#else /* !CONFIG_PM_SLEEP */
+
+#define platform_pm_prepare            NULL
+#define platform_pm_complete           NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_SUSPEND
 
 static int platform_pm_suspend(struct device *dev)
@@ -711,8 +695,6 @@ static int platform_pm_suspend_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->suspend_noirq)
                        ret = drv->pm->suspend_noirq(dev);
-       } else {
-               ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND);
        }
 
        return ret;
@@ -747,8 +729,6 @@ static int platform_pm_resume_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->resume_noirq)
                        ret = drv->pm->resume_noirq(dev);
-       } else {
-               ret = platform_legacy_resume_early(dev);
        }
 
        return ret;
@@ -794,8 +774,6 @@ static int platform_pm_freeze_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->freeze_noirq)
                        ret = drv->pm->freeze_noirq(dev);
-       } else {
-               ret = platform_legacy_suspend_late(dev, PMSG_FREEZE);
        }
 
        return ret;
@@ -830,8 +808,6 @@ static int platform_pm_thaw_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->thaw_noirq)
                        ret = drv->pm->thaw_noirq(dev);
-       } else {
-               ret = platform_legacy_resume_early(dev);
        }
 
        return ret;
@@ -866,8 +842,6 @@ static int platform_pm_poweroff_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->poweroff_noirq)
                        ret = drv->pm->poweroff_noirq(dev);
-       } else {
-               ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE);
        }
 
        return ret;
@@ -902,8 +876,6 @@ static int platform_pm_restore_noirq(struct device *dev)
        if (drv->pm) {
                if (drv->pm->restore_noirq)
                        ret = drv->pm->restore_noirq(dev);
-       } else {
-               ret = platform_legacy_resume_early(dev);
        }
 
        return ret;
@@ -922,6 +894,31 @@ static int platform_pm_restore_noirq(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
+#ifdef CONFIG_PM_RUNTIME
+
+int __weak platform_pm_runtime_suspend(struct device *dev)
+{
+       return -ENOSYS;
+};
+
+int __weak platform_pm_runtime_resume(struct device *dev)
+{
+       return -ENOSYS;
+};
+
+int __weak platform_pm_runtime_idle(struct device *dev)
+{
+       return -ENOSYS;
+};
+
+#else /* !CONFIG_PM_RUNTIME */
+
+#define platform_pm_runtime_suspend NULL
+#define platform_pm_runtime_resume NULL
+#define platform_pm_runtime_idle NULL
+
+#endif /* !CONFIG_PM_RUNTIME */
+
 static const struct dev_pm_ops platform_dev_pm_ops = {
        .prepare = platform_pm_prepare,
        .complete = platform_pm_complete,
@@ -937,22 +934,17 @@ static const struct dev_pm_ops platform_dev_pm_ops = {
        .thaw_noirq = platform_pm_thaw_noirq,
        .poweroff_noirq = platform_pm_poweroff_noirq,
        .restore_noirq = platform_pm_restore_noirq,
+       .runtime_suspend = platform_pm_runtime_suspend,
+       .runtime_resume = platform_pm_runtime_resume,
+       .runtime_idle = platform_pm_runtime_idle,
 };
 
-#define PLATFORM_PM_OPS_PTR    (&platform_dev_pm_ops)
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define PLATFORM_PM_OPS_PTR    NULL
-
-#endif /* !CONFIG_PM_SLEEP */
-
 struct bus_type platform_bus_type = {
        .name           = "platform",
        .dev_attrs      = platform_dev_attrs,
        .match          = platform_match,
        .uevent         = platform_uevent,
-       .pm             = PLATFORM_PM_OPS_PTR,
+       .pm             = &platform_dev_pm_ops,
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
index 911208b..3ce3519 100644 (file)
@@ -1,5 +1,6 @@
 obj-$(CONFIG_PM)       += sysfs.o
 obj-$(CONFIG_PM_SLEEP) += main.o
+obj-$(CONFIG_PM_RUNTIME)       += runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)     += trace.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
index 1b1a786..e0dc407 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/kallsyms.h>
 #include <linux/mutex.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
@@ -49,7 +50,17 @@ static DEFINE_MUTEX(dpm_list_mtx);
 static bool transition_started;
 
 /**
- *     device_pm_lock - lock the list of active devices used by the PM core
+ * device_pm_init - Initialize the PM-related part of a device object.
+ * @dev: Device object being initialized.
+ */
+void device_pm_init(struct device *dev)
+{
+       dev->power.status = DPM_ON;
+       pm_runtime_init(dev);
+}
+
+/**
+ * device_pm_lock - Lock the list of active devices used by the PM core.
  */
 void device_pm_lock(void)
 {
@@ -57,7 +68,7 @@ void device_pm_lock(void)
 }
 
 /**
- *     device_pm_unlock - unlock the list of active devices used by the PM core
+ * device_pm_unlock - Unlock the list of active devices used by the PM core.
  */
 void device_pm_unlock(void)
 {
@@ -65,8 +76,8 @@ void device_pm_unlock(void)
 }
 
 /**
- *     device_pm_add - add a device to the list of active devices
- *     @dev:   Device to be added to the list
+ * device_pm_add - Add a device to the PM core's list of active devices.
+ * @dev: Device to add to the list.
  */
 void device_pm_add(struct device *dev)
 {
@@ -92,10 +103,8 @@ void device_pm_add(struct device *dev)
 }
 
 /**
- *     device_pm_remove - remove a device from the list of active devices
- *     @dev:   Device to be removed from the list
- *
- *     This function also removes the device's PM-related sysfs attributes.
+ * device_pm_remove - Remove a device from the PM core's list of active devices.
+ * @dev: Device to be removed from the list.
  */
 void device_pm_remove(struct device *dev)
 {
@@ -105,12 +114,13 @@ void device_pm_remove(struct device *dev)
        mutex_lock(&dpm_list_mtx);
        list_del_init(&dev->power.entry);
        mutex_unlock(&dpm_list_mtx);
+       pm_runtime_remove(dev);
 }
 
 /**
- *     device_pm_move_before - move device in dpm_list
- *     @deva:  Device to move in dpm_list
- *     @devb:  Device @deva should come before
+ * device_pm_move_before - Move device in the PM core's list of active devices.
+ * @deva: Device to move in dpm_list.
+ * @devb: Device @deva should come before.
  */
 void device_pm_move_before(struct device *deva, struct device *devb)
 {
@@ -124,9 +134,9 @@ void device_pm_move_before(struct device *deva, struct device *devb)
 }
 
 /**
- *     device_pm_move_after - move device in dpm_list
- *     @deva:  Device to move in dpm_list
- *     @devb:  Device @deva should come after
+ * device_pm_move_after - Move device in the PM core's list of active devices.
+ * @deva: Device to move in dpm_list.
+ * @devb: Device @deva should come after.
  */
 void device_pm_move_after(struct device *deva, struct device *devb)
 {
@@ -140,8 +150,8 @@ void device_pm_move_after(struct device *deva, struct device *devb)
 }
 
 /**
- *     device_pm_move_last - move device to end of dpm_list
- *     @dev:   Device to move in dpm_list
+ * device_pm_move_last - Move device to end of the PM core's list of devices.
+ * @dev: Device to move in dpm_list.
  */
 void device_pm_move_last(struct device *dev)
 {
@@ -152,10 +162,10 @@ void device_pm_move_last(struct device *dev)
 }
 
 /**
- *     pm_op - execute the PM operation appropiate for given PM event
- *     @dev:   Device.
- *     @ops:   PM operations to choose from.
- *     @state: PM transition of the system being carried out.
+ * pm_op - Execute the PM operation appropriate for given PM event.
+ * @dev: Device to handle.
+ * @ops: PM operations to choose from.
+ * @state: PM transition of the system being carried out.
  */
 static int pm_op(struct device *dev,
                 const struct dev_pm_ops *ops,
@@ -213,13 +223,13 @@ static int pm_op(struct device *dev,
 }
 
 /**
- *     pm_noirq_op - execute the PM operation appropiate for given PM event
- *     @dev:   Device.
- *     @ops:   PM operations to choose from.
- *     @state: PM transition of the system being carried out.
+ * pm_noirq_op - Execute the PM operation appropriate for given PM event.
+ * @dev: Device to handle.
+ * @ops: PM operations to choose from.
+ * @state: PM transition of the system being carried out.
  *
- *     The operation is executed with interrupts disabled by the only remaining
- *     functional CPU in the system.
+ * The driver of @dev will not receive interrupts while this function is being
+ * executed.
  */
 static int pm_noirq_op(struct device *dev,
                        const struct dev_pm_ops *ops,
@@ -317,11 +327,12 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *     device_resume_noirq - Power on one device (early resume).
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_resume_noirq - Execute an "early resume" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
  *
- *     Must be called with interrupts disabled.
+ * The driver of @dev will not receive interrupts while this function is being
+ * executed.
  */
 static int device_resume_noirq(struct device *dev, pm_message_t state)
 {
@@ -343,20 +354,18 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_resume_noirq - Power on all regular (non-sysdev) devices.
- *     @state: PM transition of the system being carried out.
- *
- *     Call the "noirq" resume handlers for all devices marked as
- *     DPM_OFF_IRQ and enable device drivers to receive interrupts.
+ * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices.
+ * @state: PM transition of the system being carried out.
  *
- *     Must be called under dpm_list_mtx.  Device drivers should not receive
- *     interrupts while it's being executed.
+ * Call the "noirq" resume handlers for all devices marked as DPM_OFF_IRQ and
+ * enable device drivers to receive interrupts.
  */
 void dpm_resume_noirq(pm_message_t state)
 {
        struct device *dev;
 
        mutex_lock(&dpm_list_mtx);
+       transition_started = false;
        list_for_each_entry(dev, &dpm_list, power.entry)
                if (dev->power.status > DPM_OFF) {
                        int error;
@@ -372,9 +381,9 @@ void dpm_resume_noirq(pm_message_t state)
 EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- *     device_resume - Restore state for one device.
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_resume - Execute "resume" callbacks for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
  */
 static int device_resume(struct device *dev, pm_message_t state)
 {
@@ -423,11 +432,11 @@ static int device_resume(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_resume - Resume every device.
- *     @state: PM transition of the system being carried out.
+ * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
+ * @state: PM transition of the system being carried out.
  *
- *     Execute the appropriate "resume" callback for all devices the status of
- *     which indicates that they are inactive.
+ * Execute the appropriate "resume" callback for all devices whose status
+ * indicates that they are suspended.
  */
 static void dpm_resume(pm_message_t state)
 {
@@ -435,7 +444,6 @@ static void dpm_resume(pm_message_t state)
 
        INIT_LIST_HEAD(&list);
        mutex_lock(&dpm_list_mtx);
-       transition_started = false;
        while (!list_empty(&dpm_list)) {
                struct device *dev = to_device(dpm_list.next);
 
@@ -464,9 +472,9 @@ static void dpm_resume(pm_message_t state)
 }
 
 /**
- *     device_complete - Complete a PM transition for given device
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_complete - Complete a PM transition for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
  */
 static void device_complete(struct device *dev, pm_message_t state)
 {
@@ -491,11 +499,11 @@ static void device_complete(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_complete - Complete a PM transition for all devices.
- *     @state: PM transition of the system being carried out.
+ * dpm_complete - Complete a PM transition for all non-sysdev devices.
+ * @state: PM transition of the system being carried out.
  *
- *     Execute the ->complete() callbacks for all devices that are not marked
- *     as DPM_ON.
+ * Execute the ->complete() callbacks for all devices whose PM status is not
+ * DPM_ON (this allows new devices to be registered).
  */
 static void dpm_complete(pm_message_t state)
 {
@@ -512,6 +520,7 @@ static void dpm_complete(pm_message_t state)
                        mutex_unlock(&dpm_list_mtx);
 
                        device_complete(dev, state);
+                       pm_runtime_put_noidle(dev);
 
                        mutex_lock(&dpm_list_mtx);
                }
@@ -524,11 +533,11 @@ static void dpm_complete(pm_message_t state)
 }
 
 /**
- *     dpm_resume_end - Restore state of each device in system.
- *     @state: PM transition of the system being carried out.
+ * dpm_resume_end - Execute "resume" callbacks and complete system transition.
+ * @state: PM transition of the system being carried out.
  *
- *     Resume all the devices, unlock them all, and allow new
- *     devices to be registered once again.
+ * Execute "resume" callbacks for all devices and complete the PM transition of
+ * the system.
  */
 void dpm_resume_end(pm_message_t state)
 {
@@ -542,9 +551,11 @@ EXPORT_SYMBOL_GPL(dpm_resume_end);
 /*------------------------- Suspend routines -------------------------*/
 
 /**
- *     resume_event - return a PM message representing the resume event
- *                    corresponding to given sleep state.
- *     @sleep_state: PM message representing a sleep state.
+ * resume_event - Return a "resume" message for given "suspend" sleep state.
+ * @sleep_state: PM message representing a sleep state.
+ *
+ * Return a PM message representing the resume event corresponding to given
+ * sleep state.
  */
 static pm_message_t resume_event(pm_message_t sleep_state)
 {
@@ -561,11 +572,12 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 }
 
 /**
- *     device_suspend_noirq - Shut down one device (late suspend).
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_suspend_noirq - Execute a "late suspend" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
  *
- *     This is called with interrupts off and only a single CPU running.
+ * The driver of @dev will not receive interrupts while this function is being
+ * executed.
  */
 static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
@@ -582,13 +594,11 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_suspend_noirq - Power down all regular (non-sysdev) devices.
- *     @state: PM transition of the system being carried out.
- *
- *     Prevent device drivers from receiving interrupts and call the "noirq"
- *     suspend handlers.
+ * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
+ * @state: PM transition of the system being carried out.
  *
- *     Must be called under dpm_list_mtx.
+ * Prevent device drivers from receiving interrupts and call the "noirq" suspend
+ * handlers for all non-sysdev devices.
  */
 int dpm_suspend_noirq(pm_message_t state)
 {
@@ -613,9 +623,9 @@ int dpm_suspend_noirq(pm_message_t state)
 EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
 
 /**
- *     device_suspend - Save state of one device.
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_suspend - Execute "suspend" callbacks for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
  */
 static int device_suspend(struct device *dev, pm_message_t state)
 {
@@ -662,10 +672,8 @@ static int device_suspend(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_suspend - Suspend every device.
- *     @state: PM transition of the system being carried out.
- *
- *     Execute the appropriate "suspend" callbacks for all devices.
+ * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
+ * @state: PM transition of the system being carried out.
  */
 static int dpm_suspend(pm_message_t state)
 {
@@ -699,9 +707,12 @@ static int dpm_suspend(pm_message_t state)
 }
 
 /**
- *     device_prepare - Execute the ->prepare() callback(s) for given device.
- *     @dev:   Device.
- *     @state: PM transition of the system being carried out.
+ * device_prepare - Prepare a device for system power transition.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Execute the ->prepare() callback(s) for given device.  No new children of the
+ * device may be registered after this function has returned.
  */
 static int device_prepare(struct device *dev, pm_message_t state)
 {
@@ -737,10 +748,10 @@ static int device_prepare(struct device *dev, pm_message_t state)
 }
 
 /**
- *     dpm_prepare - Prepare all devices for a PM transition.
- *     @state: PM transition of the system being carried out.
+ * dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
+ * @state: PM transition of the system being carried out.
  *
- *     Execute the ->prepare() callback for all devices.
+ * Execute the ->prepare() callback(s) for all devices.
  */
 static int dpm_prepare(pm_message_t state)
 {
@@ -757,7 +768,14 @@ static int dpm_prepare(pm_message_t state)
                dev->power.status = DPM_PREPARING;
                mutex_unlock(&dpm_list_mtx);
 
-               error = device_prepare(dev, state);
+               pm_runtime_get_noresume(dev);
+               if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) {
+                       /* Wake-up requested during system sleep transition. */
+                       pm_runtime_put_noidle(dev);
+                       error = -EBUSY;
+               } else {
+                       error = device_prepare(dev, state);
+               }
 
                mutex_lock(&dpm_list_mtx);
                if (error) {
@@ -784,10 +802,11 @@ static int dpm_prepare(pm_message_t state)
 }
 
 /**
- *     dpm_suspend_start - Save state and stop all devices in system.
- *     @state: PM transition of the system being carried out.
+ * dpm_suspend_start - Prepare devices for PM transition and suspend them.
+ * @state: PM transition of the system being carried out.
  *
- *     Prepare and suspend all devices.
+ * Prepare all non-sysdev devices for system PM transition and execute "suspend"
+ * callbacks for them.
  */
 int dpm_suspend_start(pm_message_t state)
 {
index c7cb4fc..b8fa1aa 100644 (file)
@@ -1,7 +1,14 @@
-static inline void device_pm_init(struct device *dev)
-{
-       dev->power.status = DPM_ON;
-}
+#ifdef CONFIG_PM_RUNTIME
+
+extern void pm_runtime_init(struct device *dev);
+extern void pm_runtime_remove(struct device *dev);
+
+#else /* !CONFIG_PM_RUNTIME */
+
+static inline void pm_runtime_init(struct device *dev) {}
+static inline void pm_runtime_remove(struct device *dev) {}
+
+#endif /* !CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
 
@@ -16,23 +23,33 @@ static inline struct device *to_device(struct list_head *entry)
        return container_of(entry, struct device, power.entry);
 }
 
+extern void device_pm_init(struct device *dev);
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
 extern void device_pm_move_before(struct device *, struct device *);
 extern void device_pm_move_after(struct device *, struct device *);
 extern void device_pm_move_last(struct device *);
 
-#else /* CONFIG_PM_SLEEP */
+#else /* !CONFIG_PM_SLEEP */
+
+static inline void device_pm_init(struct device *dev)
+{
+       pm_runtime_init(dev);
+}
+
+static inline void device_pm_remove(struct device *dev)
+{
+       pm_runtime_remove(dev);
+}
 
 static inline void device_pm_add(struct device *dev) {}
-static inline void device_pm_remove(struct device *dev) {}
 static inline void device_pm_move_before(struct device *deva,
                                         struct device *devb) {}
 static inline void device_pm_move_after(struct device *deva,
                                        struct device *devb) {}
 static inline void device_pm_move_last(struct device *dev) {}
 
-#endif
+#endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
new file mode 100644 (file)
index 0000000..38556f6
--- /dev/null
@@ -0,0 +1,1011 @@
+/*
+ * drivers/base/power/runtime.c - Helper functions for device run-time PM
+ *
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sched.h>
+#include <linux/pm_runtime.h>
+#include <linux/jiffies.h>
+
+static int __pm_runtime_resume(struct device *dev, bool from_wq);
+static int __pm_request_idle(struct device *dev);
+static int __pm_request_resume(struct device *dev);
+
+/**
+ * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
+ * @dev: Device to handle.
+ */
+static void pm_runtime_deactivate_timer(struct device *dev)
+{
+       if (dev->power.timer_expires > 0) {
+               del_timer(&dev->power.suspend_timer);
+               dev->power.timer_expires = 0;
+       }
+}
+
+/**
+ * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests.
+ * @dev: Device to handle.
+ */
+static void pm_runtime_cancel_pending(struct device *dev)
+{
+       pm_runtime_deactivate_timer(dev);
+       /*
+        * In case there's a request pending, make sure its work function will
+        * return without doing anything.
+        */
+       dev->power.request = RPM_REQ_NONE;
+}
+
+/**
+ * __pm_runtime_idle - Notify device bus type if the device can be suspended.
+ * @dev: Device to notify the bus type about.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_runtime_idle(struct device *dev)
+       __releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+       int retval = 0;
+
+       dev_dbg(dev, "__pm_runtime_idle()!\n");
+
+       if (dev->power.runtime_error)
+               retval = -EINVAL;
+       else if (dev->power.idle_notification)
+               retval = -EINPROGRESS;
+       else if (atomic_read(&dev->power.usage_count) > 0
+           || dev->power.disable_depth > 0
+           || dev->power.runtime_status != RPM_ACTIVE)
+               retval = -EAGAIN;
+       else if (!pm_children_suspended(dev))
+               retval = -EBUSY;
+       if (retval)
+               goto out;
+
+       if (dev->power.request_pending) {
+               /*
+                * If an idle notification request is pending, cancel it.  Any
+                * other pending request takes precedence over us.
+                */
+               if (dev->power.request == RPM_REQ_IDLE) {
+                       dev->power.request = RPM_REQ_NONE;
+               } else if (dev->power.request != RPM_REQ_NONE) {
+                       retval = -EAGAIN;
+                       goto out;
+               }
+       }
+
+       dev->power.idle_notification = true;
+
+       if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) {
+               spin_unlock_irq(&dev->power.lock);
+
+               dev->bus->pm->runtime_idle(dev);
+
+               spin_lock_irq(&dev->power.lock);
+       }
+
+       dev->power.idle_notification = false;
+       wake_up_all(&dev->power.wait_queue);
+
+ out:
+       dev_dbg(dev, "__pm_runtime_idle() returns %d!\n", retval);
+
+       return retval;
+}
+
+/**
+ * pm_runtime_idle - Notify device bus type if the device can be suspended.
+ * @dev: Device to notify the bus type about.
+ */
+int pm_runtime_idle(struct device *dev)
+{
+       int retval;
+
+       spin_lock_irq(&dev->power.lock);
+       retval = __pm_runtime_idle(dev);
+       spin_unlock_irq(&dev->power.lock);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_idle);
+
+/**
+ * __pm_runtime_suspend - Carry out run-time suspend of given device.
+ * @dev: Device to suspend.
+ * @from_wq: If set, the function has been called via pm_wq.
+ *
+ * Check if the device can be suspended and run the ->runtime_suspend() callback
+ * provided by its bus type.  If another suspend has been started earlier, wait
+ * for it to finish.  If an idle notification or suspend request is pending or
+ * scheduled, cancel it.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+int __pm_runtime_suspend(struct device *dev, bool from_wq)
+       __releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+       struct device *parent = NULL;
+       bool notify = false;
+       int retval = 0;
+
+       dev_dbg(dev, "__pm_runtime_suspend()%s!\n",
+               from_wq ? " from workqueue" : "");
+
+ repeat:
+       if (dev->power.runtime_error) {
+               retval = -EINVAL;
+               goto out;
+       }
+
+       /* Pending resume requests take precedence over us. */
+       if (dev->power.request_pending
+           && dev->power.request == RPM_REQ_RESUME) {
+               retval = -EAGAIN;
+               goto out;
+       }
+
+       /* Other scheduled or pending requests need to be canceled. */
+       pm_runtime_cancel_pending(dev);
+
+       if (dev->power.runtime_status == RPM_SUSPENDED)
+               retval = 1;
+       else if (dev->power.runtime_status == RPM_RESUMING
+           || dev->power.disable_depth > 0
+           || atomic_read(&dev->power.usage_count) > 0)
+               retval = -EAGAIN;
+       else if (!pm_children_suspended(dev))
+               retval = -EBUSY;
+       if (retval)
+               goto out;
+
+       if (dev->power.runtime_status == RPM_SUSPENDING) {
+               DEFINE_WAIT(wait);
+
+               if (from_wq) {
+                       retval = -EINPROGRESS;
+                       goto out;
+               }
+
+               /* Wait for the other suspend running in parallel with us. */
+               for (;;) {
+                       prepare_to_wait(&dev->power.wait_queue, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       if (dev->power.runtime_status != RPM_SUSPENDING)
+                               break;
+
+                       spin_unlock_irq(&dev->power.lock);
+
+                       schedule();
+
+                       spin_lock_irq(&dev->power.lock);
+               }
+               finish_wait(&dev->power.wait_queue, &wait);
+               goto repeat;
+       }
+
+       dev->power.runtime_status = RPM_SUSPENDING;
+
+       if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
+               spin_unlock_irq(&dev->power.lock);
+
+               retval = dev->bus->pm->runtime_suspend(dev);
+
+               spin_lock_irq(&dev->power.lock);
+               dev->power.runtime_error = retval;
+       } else {
+               retval = -ENOSYS;
+       }
+
+       if (retval) {
+               dev->power.runtime_status = RPM_ACTIVE;
+               pm_runtime_cancel_pending(dev);
+               dev->power.deferred_resume = false;
+
+               if (retval == -EAGAIN || retval == -EBUSY) {
+                       notify = true;
+                       dev->power.runtime_error = 0;
+               }
+       } else {
+               dev->power.runtime_status = RPM_SUSPENDED;
+
+               if (dev->parent) {
+                       parent = dev->parent;
+                       atomic_add_unless(&parent->power.child_count, -1, 0);
+               }
+       }
+       wake_up_all(&dev->power.wait_queue);
+
+       if (dev->power.deferred_resume) {
+               dev->power.deferred_resume = false;
+               __pm_runtime_resume(dev, false);
+               retval = -EAGAIN;
+               goto out;
+       }
+
+       if (notify)
+               __pm_runtime_idle(dev);
+
+       if (parent && !parent->power.ignore_children) {
+               spin_unlock_irq(&dev->power.lock);
+
+               pm_request_idle(parent);
+
+               spin_lock_irq(&dev->power.lock);
+       }
+
+ out:
+       dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval);
+
+       return retval;
+}
+
+/**
+ * pm_runtime_suspend - Carry out run-time suspend of given device.
+ * @dev: Device to suspend.
+ */
+int pm_runtime_suspend(struct device *dev)
+{
+       int retval;
+
+       spin_lock_irq(&dev->power.lock);
+       retval = __pm_runtime_suspend(dev, false);
+       spin_unlock_irq(&dev->power.lock);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_suspend);
+
+/**
+ * __pm_runtime_resume - Carry out run-time resume of given device.
+ * @dev: Device to resume.
+ * @from_wq: If set, the function has been called via pm_wq.
+ *
+ * Check if the device can be woken up and run the ->runtime_resume() callback
+ * provided by its bus type.  If another resume has been started earlier, wait
+ * for it to finish.  If there's a suspend running in parallel with this
+ * function, wait for it to finish and resume the device.  Cancel any scheduled
+ * or pending requests.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+int __pm_runtime_resume(struct device *dev, bool from_wq)
+       __releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+       struct device *parent = NULL;
+       int retval = 0;
+
+       dev_dbg(dev, "__pm_runtime_resume()%s!\n",
+               from_wq ? " from workqueue" : "");
+
+ repeat:
+       if (dev->power.runtime_error) {
+               retval = -EINVAL;
+               goto out;
+       }
+
+       pm_runtime_cancel_pending(dev);
+
+       if (dev->power.runtime_status == RPM_ACTIVE)
+               retval = 1;
+       else if (dev->power.disable_depth > 0)
+               retval = -EAGAIN;
+       if (retval)
+               goto out;
+
+       if (dev->power.runtime_status == RPM_RESUMING
+           || dev->power.runtime_status == RPM_SUSPENDING) {
+               DEFINE_WAIT(wait);
+
+               if (from_wq) {
+                       if (dev->power.runtime_status == RPM_SUSPENDING)
+                               dev->power.deferred_resume = true;
+                       retval = -EINPROGRESS;
+                       goto out;
+               }
+
+               /* Wait for the operation carried out in parallel with us. */
+               for (;;) {
+                       prepare_to_wait(&dev->power.wait_queue, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       if (dev->power.runtime_status != RPM_RESUMING
+                           && dev->power.runtime_status != RPM_SUSPENDING)
+                               break;
+
+                       spin_unlock_irq(&dev->power.lock);
+
+                       schedule();
+
+                       spin_lock_irq(&dev->power.lock);
+               }
+               finish_wait(&dev->power.wait_queue, &wait);
+               goto repeat;
+       }
+
+       if (!parent && dev->parent) {
+               /*
+                * Increment the parent's resume counter and resume it if
+                * necessary.
+                */
+               parent = dev->parent;
+               spin_unlock_irq(&dev->power.lock);
+
+               pm_runtime_get_noresume(parent);
+
+               spin_lock_irq(&parent->power.lock);
+               /*
+                * We can resume if the parent's run-time PM is disabled or it
+                * is set to ignore children.
+                */
+               if (!parent->power.disable_depth
+                   && !parent->power.ignore_children) {
+                       __pm_runtime_resume(parent, false);
+                       if (parent->power.runtime_status != RPM_ACTIVE)
+                               retval = -EBUSY;
+               }
+               spin_unlock_irq(&parent->power.lock);
+
+               spin_lock_irq(&dev->power.lock);
+               if (retval)
+                       goto out;
+               goto repeat;
+       }
+
+       dev->power.runtime_status = RPM_RESUMING;
+
+       if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
+               spin_unlock_irq(&dev->power.lock);
+
+               retval = dev->bus->pm->runtime_resume(dev);
+
+               spin_lock_irq(&dev->power.lock);
+               dev->power.runtime_error = retval;
+       } else {
+               retval = -ENOSYS;
+       }
+
+       if (retval) {
+               dev->power.runtime_status = RPM_SUSPENDED;
+               pm_runtime_cancel_pending(dev);
+       } else {
+               dev->power.runtime_status = RPM_ACTIVE;
+               if (parent)
+                       atomic_inc(&parent->power.child_count);
+       }
+       wake_up_all(&dev->power.wait_queue);
+
+       if (!retval)
+               __pm_request_idle(dev);
+
+ out:
+       if (parent) {
+               spin_unlock_irq(&dev->power.lock);
+
+               pm_runtime_put(parent);
+
+               spin_lock_irq(&dev->power.lock);
+       }
+
+       dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval);
+
+       return retval;
+}
+
+/**
+ * pm_runtime_resume - Carry out run-time resume of given device.
+ * @dev: Device to suspend.
+ */
+int pm_runtime_resume(struct device *dev)
+{
+       int retval;
+
+       spin_lock_irq(&dev->power.lock);
+       retval = __pm_runtime_resume(dev, false);
+       spin_unlock_irq(&dev->power.lock);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_resume);
+
+/**
+ * pm_runtime_work - Universal run-time PM work function.
+ * @work: Work structure used for scheduling the execution of this function.
+ *
+ * Use @work to get the device object the work is to be done for, determine what
+ * is to be done and execute the appropriate run-time PM function.
+ */
+static void pm_runtime_work(struct work_struct *work)
+{
+       struct device *dev = container_of(work, struct device, power.work);
+       enum rpm_request req;
+
+       spin_lock_irq(&dev->power.lock);
+
+       if (!dev->power.request_pending)
+               goto out;
+
+       req = dev->power.request;
+       dev->power.request = RPM_REQ_NONE;
+       dev->power.request_pending = false;
+
+       switch (req) {
+       case RPM_REQ_NONE:
+               break;
+       case RPM_REQ_IDLE:
+               __pm_runtime_idle(dev);
+               break;
+       case RPM_REQ_SUSPEND:
+               __pm_runtime_suspend(dev, true);
+               break;
+       case RPM_REQ_RESUME:
+               __pm_runtime_resume(dev, true);
+               break;
+       }
+
+ out:
+       spin_unlock_irq(&dev->power.lock);
+}
+
+/**
+ * __pm_request_idle - Submit an idle notification request for given device.
+ * @dev: Device to handle.
+ *
+ * Check if the device's run-time PM status is correct for suspending the device
+ * and queue up a request to run __pm_runtime_idle() for it.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_idle(struct device *dev)
+{
+       int retval = 0;
+
+       if (dev->power.runtime_error)
+               retval = -EINVAL;
+       else if (atomic_read(&dev->power.usage_count) > 0
+           || dev->power.disable_depth > 0
+           || dev->power.runtime_status == RPM_SUSPENDED
+           || dev->power.runtime_status == RPM_SUSPENDING)
+               retval = -EAGAIN;
+       else if (!pm_children_suspended(dev))
+               retval = -EBUSY;
+       if (retval)
+               return retval;
+
+       if (dev->power.request_pending) {
+               /* Any requests other then RPM_REQ_IDLE take precedence. */
+               if (dev->power.request == RPM_REQ_NONE)
+                       dev->power.request = RPM_REQ_IDLE;
+               else if (dev->power.request != RPM_REQ_IDLE)
+                       retval = -EAGAIN;
+               return retval;
+       }
+
+       dev->power.request = RPM_REQ_IDLE;
+       dev->power.request_pending = true;
+       queue_work(pm_wq, &dev->power.work);
+
+       return retval;
+}
+
+/**
+ * pm_request_idle - Submit an idle notification request for given device.
+ * @dev: Device to handle.
+ */
+int pm_request_idle(struct device *dev)
+{
+       unsigned long flags;
+       int retval;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+       retval = __pm_request_idle(dev);
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_request_idle);
+
+/**
+ * __pm_request_suspend - Submit a suspend request for given device.
+ * @dev: Device to suspend.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_suspend(struct device *dev)
+{
+       int retval = 0;
+
+       if (dev->power.runtime_error)
+               return -EINVAL;
+
+       if (dev->power.runtime_status == RPM_SUSPENDED)
+               retval = 1;
+       else if (atomic_read(&dev->power.usage_count) > 0
+           || dev->power.disable_depth > 0)
+               retval = -EAGAIN;
+       else if (dev->power.runtime_status == RPM_SUSPENDING)
+               retval = -EINPROGRESS;
+       else if (!pm_children_suspended(dev))
+               retval = -EBUSY;
+       if (retval < 0)
+               return retval;
+
+       pm_runtime_deactivate_timer(dev);
+
+       if (dev->power.request_pending) {
+               /*
+                * Pending resume requests take precedence over us, but we can
+                * overtake any other pending request.
+                */
+               if (dev->power.request == RPM_REQ_RESUME)
+                       retval = -EAGAIN;
+               else if (dev->power.request != RPM_REQ_SUSPEND)
+                       dev->power.request = retval ?
+                                               RPM_REQ_NONE : RPM_REQ_SUSPEND;
+               return retval;
+       } else if (retval) {
+               return retval;
+       }
+
+       dev->power.request = RPM_REQ_SUSPEND;
+       dev->power.request_pending = true;
+       queue_work(pm_wq, &dev->power.work);
+
+       return 0;
+}
+
+/**
+ * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
+ * @data: Device pointer passed by pm_schedule_suspend().
+ *
+ * Check if the time is right and execute __pm_request_suspend() in that case.
+ */
+static void pm_suspend_timer_fn(unsigned long data)
+{
+       struct device *dev = (struct device *)data;
+       unsigned long flags;
+       unsigned long expires;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+
+       expires = dev->power.timer_expires;
+       /* If 'expire' is after 'jiffies' we've been called too early. */
+       if (expires > 0 && !time_after(expires, jiffies)) {
+               dev->power.timer_expires = 0;
+               __pm_request_suspend(dev);
+       }
+
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+
+/**
+ * pm_schedule_suspend - Set up a timer to submit a suspend request in future.
+ * @dev: Device to suspend.
+ * @delay: Time to wait before submitting a suspend request, in milliseconds.
+ */
+int pm_schedule_suspend(struct device *dev, unsigned int delay)
+{
+       unsigned long flags;
+       int retval = 0;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+
+       if (dev->power.runtime_error) {
+               retval = -EINVAL;
+               goto out;
+       }
+
+       if (!delay) {
+               retval = __pm_request_suspend(dev);
+               goto out;
+       }
+
+       pm_runtime_deactivate_timer(dev);
+
+       if (dev->power.request_pending) {
+               /*
+                * Pending resume requests take precedence over us, but any
+                * other pending requests have to be canceled.
+                */
+               if (dev->power.request == RPM_REQ_RESUME) {
+                       retval = -EAGAIN;
+                       goto out;
+               }
+               dev->power.request = RPM_REQ_NONE;
+       }
+
+       if (dev->power.runtime_status == RPM_SUSPENDED)
+               retval = 1;
+       else if (dev->power.runtime_status == RPM_SUSPENDING)
+               retval = -EINPROGRESS;
+       else if (atomic_read(&dev->power.usage_count) > 0
+           || dev->power.disable_depth > 0)
+               retval = -EAGAIN;
+       else if (!pm_children_suspended(dev))
+               retval = -EBUSY;
+       if (retval)
+               goto out;
+
+       dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
+       mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
+
+ out:
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_schedule_suspend);
+
+/**
+ * pm_request_resume - Submit a resume request for given device.
+ * @dev: Device to resume.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_resume(struct device *dev)
+{
+       int retval = 0;
+
+       if (dev->power.runtime_error)
+               return -EINVAL;
+
+       if (dev->power.runtime_status == RPM_ACTIVE)
+               retval = 1;
+       else if (dev->power.runtime_status == RPM_RESUMING)
+               retval = -EINPROGRESS;
+       else if (dev->power.disable_depth > 0)
+               retval = -EAGAIN;
+       if (retval < 0)
+               return retval;
+
+       pm_runtime_deactivate_timer(dev);
+
+       if (dev->power.request_pending) {
+               /* If non-resume request is pending, we can overtake it. */
+               dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME;
+               return retval;
+       } else if (retval) {
+               return retval;
+       }
+
+       dev->power.request = RPM_REQ_RESUME;
+       dev->power.request_pending = true;
+       queue_work(pm_wq, &dev->power.work);
+
+       return retval;
+}
+
+/**
+ * pm_request_resume - Submit a resume request for given device.
+ * @dev: Device to resume.
+ */
+int pm_request_resume(struct device *dev)
+{
+       unsigned long flags;
+       int retval;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+       retval = __pm_request_resume(dev);
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_request_resume);
+
+/**
+ * __pm_runtime_get - Reference count a device and wake it up, if necessary.
+ * @dev: Device to handle.
+ * @sync: If set and the device is suspended, resume it synchronously.
+ *
+ * Increment the usage count of the device and if it was zero previously,
+ * resume it or submit a resume request for it, depending on the value of @sync.
+ */
+int __pm_runtime_get(struct device *dev, bool sync)
+{
+       int retval = 1;
+
+       if (atomic_add_return(1, &dev->power.usage_count) == 1)
+               retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_get);
+
+/**
+ * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
+ * @dev: Device to handle.
+ * @sync: If the device's bus type is to be notified, do that synchronously.
+ *
+ * Decrement the usage count of the device and if it reaches zero, carry out a
+ * synchronous idle notification or submit an idle notification request for it,
+ * depending on the value of @sync.
+ */
+int __pm_runtime_put(struct device *dev, bool sync)
+{
+       int retval = 0;
+
+       if (atomic_dec_and_test(&dev->power.usage_count))
+               retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_put);
+
+/**
+ * __pm_runtime_set_status - Set run-time PM status of a device.
+ * @dev: Device to handle.
+ * @status: New run-time PM status of the device.
+ *
+ * If run-time PM of the device is disabled or its power.runtime_error field is
+ * different from zero, the status may be changed either to RPM_ACTIVE, or to
+ * RPM_SUSPENDED, as long as that reflects the actual state of the device.
+ * However, if the device has a parent and the parent is not active, and the
+ * parent's power.ignore_children flag is unset, the device's status cannot be
+ * set to RPM_ACTIVE, so -EBUSY is returned in that case.
+ *
+ * If successful, __pm_runtime_set_status() clears the power.runtime_error field
+ * and the device parent's counter of unsuspended children is modified to
+ * reflect the new status.  If the new status is RPM_SUSPENDED, an idle
+ * notification request for the parent is submitted.
+ */
+int __pm_runtime_set_status(struct device *dev, unsigned int status)
+{
+       struct device *parent = dev->parent;
+       unsigned long flags;
+       bool notify_parent = false;
+       int error = 0;
+
+       if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
+               return -EINVAL;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+
+       if (!dev->power.runtime_error && !dev->power.disable_depth) {
+               error = -EAGAIN;
+               goto out;
+       }
+
+       if (dev->power.runtime_status == status)
+               goto out_set;
+
+       if (status == RPM_SUSPENDED) {
+               /* It always is possible to set the status to 'suspended'. */
+               if (parent) {
+                       atomic_add_unless(&parent->power.child_count, -1, 0);
+                       notify_parent = !parent->power.ignore_children;
+               }
+               goto out_set;
+       }
+
+       if (parent) {
+               spin_lock_irq(&parent->power.lock);
+
+               /*
+                * It is invalid to put an active child under a parent that is
+                * not active, has run-time PM enabled and the
+                * 'power.ignore_children' flag unset.
+                */
+               if (!parent->power.disable_depth
+                   && !parent->power.ignore_children
+                   && parent->power.runtime_status != RPM_ACTIVE) {
+                       error = -EBUSY;
+               } else {
+                       if (dev->power.runtime_status == RPM_SUSPENDED)
+                               atomic_inc(&parent->power.child_count);
+               }
+
+               spin_unlock_irq(&parent->power.lock);
+
+               if (error)
+                       goto out;
+       }
+
+ out_set:
+       dev->power.runtime_status = status;
+       dev->power.runtime_error = 0;
+ out:
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+
+       if (notify_parent)
+               pm_request_idle(parent);
+
+       return error;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_set_status);
+
+/**
+ * __pm_runtime_barrier - Cancel pending requests and wait for completions.
+ * @dev: Device to handle.
+ *
+ * Flush all pending requests for the device from pm_wq and wait for all
+ * run-time PM operations involving the device in progress to complete.
+ *
+ * Should be called under dev->power.lock with interrupts disabled.
+ */
+static void __pm_runtime_barrier(struct device *dev)
+{
+       pm_runtime_deactivate_timer(dev);
+
+       if (dev->power.request_pending) {
+               dev->power.request = RPM_REQ_NONE;
+               spin_unlock_irq(&dev->power.lock);
+
+               cancel_work_sync(&dev->power.work);
+
+               spin_lock_irq(&dev->power.lock);
+               dev->power.request_pending = false;
+       }
+
+       if (dev->power.runtime_status == RPM_SUSPENDING
+           || dev->power.runtime_status == RPM_RESUMING
+           || dev->power.idle_notification) {
+               DEFINE_WAIT(wait);
+
+               /* Suspend, wake-up or idle notification in progress. */
+               for (;;) {
+                       prepare_to_wait(&dev->power.wait_queue, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       if (dev->power.runtime_status != RPM_SUSPENDING
+                           && dev->power.runtime_status != RPM_RESUMING
+                           && !dev->power.idle_notification)
+                               break;
+                       spin_unlock_irq(&dev->power.lock);
+
+                       schedule();
+
+                       spin_lock_irq(&dev->power.lock);
+               }
+               finish_wait(&dev->power.wait_queue, &wait);
+       }
+}
+
+/**
+ * pm_runtime_barrier - Flush pending requests and wait for completions.
+ * @dev: Device to handle.
+ *
+ * Prevent the device from being suspended by incrementing its usage counter and
+ * if there's a pending resume request for the device, wake the device up.
+ * Next, make sure that all pending requests for the device have been flushed
+ * from pm_wq and wait for all run-time PM operations involving the device in
+ * progress to complete.
+ *
+ * Return value:
+ * 1, if there was a resume request pending and the device had to be woken up,
+ * 0, otherwise
+ */
+int pm_runtime_barrier(struct device *dev)
+{
+       int retval = 0;
+
+       pm_runtime_get_noresume(dev);
+       spin_lock_irq(&dev->power.lock);
+
+       if (dev->power.request_pending
+           && dev->power.request == RPM_REQ_RESUME) {
+               __pm_runtime_resume(dev, false);
+               retval = 1;
+       }
+
+       __pm_runtime_barrier(dev);
+
+       spin_unlock_irq(&dev->power.lock);
+       pm_runtime_put_noidle(dev);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_barrier);
+
+/**
+ * __pm_runtime_disable - Disable run-time PM of a device.
+ * @dev: Device to handle.
+ * @check_resume: If set, check if there's a resume request for the device.
+ *
+ * Increment power.disable_depth for the device and if was zero previously,
+ * cancel all pending run-time PM requests for the device and wait for all
+ * operations in progress to complete.  The device can be either active or
+ * suspended after its run-time PM has been disabled.
+ *
+ * If @check_resume is set and there's a resume request pending when
+ * __pm_runtime_disable() is called and power.disable_depth is zero, the
+ * function will wake up the device before disabling its run-time PM.
+ */
+void __pm_runtime_disable(struct device *dev, bool check_resume)
+{
+       spin_lock_irq(&dev->power.lock);
+
+       if (dev->power.disable_depth > 0) {
+               dev->power.disable_depth++;
+               goto out;
+       }
+
+       /*
+        * Wake up the device if there's a resume request pending, because that
+        * means there probably is some I/O to process and disabling run-time PM
+        * shouldn't prevent the device from processing the I/O.
+        */
+       if (check_resume && dev->power.request_pending
+           && dev->power.request == RPM_REQ_RESUME) {
+               /*
+                * Prevent suspends and idle notifications from being carried
+                * out after we have woken up the device.
+                */
+               pm_runtime_get_noresume(dev);
+
+               __pm_runtime_resume(dev, false);
+
+               pm_runtime_put_noidle(dev);
+       }
+
+       if (!dev->power.disable_depth++)
+               __pm_runtime_barrier(dev);
+
+ out:
+       spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_disable);
+
+/**
+ * pm_runtime_enable - Enable run-time PM of a device.
+ * @dev: Device to handle.
+ */
+void pm_runtime_enable(struct device *dev)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&dev->power.lock, flags);
+
+       if (dev->power.disable_depth > 0)
+               dev->power.disable_depth--;
+       else
+               dev_warn(dev, "Unbalanced %s!\n", __func__);
+
+       spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_enable);
+
+/**
+ * pm_runtime_init - Initialize run-time PM fields in given device object.
+ * @dev: Device object to initialize.
+ */
+void pm_runtime_init(struct device *dev)
+{
+       spin_lock_init(&dev->power.lock);
+
+       dev->power.runtime_status = RPM_SUSPENDED;
+       dev->power.idle_notification = false;
+
+       dev->power.disable_depth = 1;
+       atomic_set(&dev->power.usage_count, 0);
+
+       dev->power.runtime_error = 0;
+
+       atomic_set(&dev->power.child_count, 0);
+       pm_suspend_ignore_children(dev, false);
+
+       dev->power.request_pending = false;
+       dev->power.request = RPM_REQ_NONE;
+       dev->power.deferred_resume = false;
+       INIT_WORK(&dev->power.work, pm_runtime_work);
+
+       dev->power.timer_expires = 0;
+       setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn,
+                       (unsigned long)dev);
+
+       init_waitqueue_head(&dev->power.wait_queue);
+}
+
+/**
+ * pm_runtime_remove - Prepare for removing a device from device hierarchy.
+ * @dev: Device object being removed from device hierarchy.
+ */
+void pm_runtime_remove(struct device *dev)
+{
+       __pm_runtime_disable(dev, false);
+
+       /* Change the status back to 'suspended' to match the initial status. */
+       if (dev->power.runtime_status == RPM_ACTIVE)
+               pm_runtime_set_suspended(dev);
+}
index 91b7530..2b387c2 100644 (file)
@@ -4151,7 +4151,7 @@ static void floppy_device_release(struct device *dev)
 {
 }
 
-static int floppy_resume(struct platform_device *dev)
+static int floppy_resume(struct device *dev)
 {
        int fdc;
 
@@ -4162,10 +4162,15 @@ static int floppy_resume(struct platform_device *dev)
        return 0;
 }
 
-static struct platform_driver floppy_driver = {
+static struct dev_pm_ops floppy_pm_ops = {
        .resume = floppy_resume,
+       .restore = floppy_resume,
+};
+
+static struct platform_driver floppy_driver = {
        .driver = {
                .name = "floppy",
+               .pm = &floppy_pm_ops,
        },
 };
 
index 9a1e5fb..c8522e6 100644 (file)
@@ -1166,32 +1166,37 @@ static void at_dma_shutdown(struct platform_device *pdev)
        clk_disable(atdma->clk);
 }
 
-static int at_dma_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+static int at_dma_suspend_noirq(struct device *dev)
 {
-       struct at_dma   *atdma = platform_get_drvdata(pdev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct at_dma *atdma = platform_get_drvdata(pdev);
 
        at_dma_off(platform_get_drvdata(pdev));
        clk_disable(atdma->clk);
        return 0;
 }
 
-static int at_dma_resume_early(struct platform_device *pdev)
+static int at_dma_resume_noirq(struct device *dev)
 {
-       struct at_dma   *atdma = platform_get_drvdata(pdev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct at_dma *atdma = platform_get_drvdata(pdev);
 
        clk_enable(atdma->clk);
        dma_writel(atdma, EN, AT_DMA_ENABLE);
        return 0;
-
 }
 
+static struct dev_pm_ops at_dma_dev_pm_ops = {
+       .suspend_noirq = at_dma_suspend_noirq,
+       .resume_noirq = at_dma_resume_noirq,
+};
+
 static struct platform_driver at_dma_driver = {
        .remove         = __exit_p(at_dma_remove),
        .shutdown       = at_dma_shutdown,
-       .suspend_late   = at_dma_suspend_late,
-       .resume_early   = at_dma_resume_early,
        .driver = {
                .name   = "at_hdmac",
+               .pm     = &at_dma_dev_pm_ops,
        },
 };
 
index 98c9a84..933c143 100644 (file)
@@ -1399,8 +1399,9 @@ static void dw_shutdown(struct platform_device *pdev)
        clk_disable(dw->clk);
 }
 
-static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+static int dw_suspend_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct dw_dma   *dw = platform_get_drvdata(pdev);
 
        dw_dma_off(platform_get_drvdata(pdev));
@@ -1408,23 +1409,27 @@ static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
        return 0;
 }
 
-static int dw_resume_early(struct platform_device *pdev)
+static int dw_resume_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct dw_dma   *dw = platform_get_drvdata(pdev);
 
        clk_enable(dw->clk);
        dma_writel(dw, CFG, DW_CFG_DMA_EN);
        return 0;
-
 }
 
+static struct dev_pm_ops dw_dev_pm_ops = {
+       .suspend_noirq = dw_suspend_noirq,
+       .resume_noirq = dw_resume_noirq,
+};
+
 static struct platform_driver dw_driver = {
        .remove         = __exit_p(dw_remove),
        .shutdown       = dw_shutdown,
-       .suspend_late   = dw_suspend_late,
-       .resume_early   = dw_resume_early,
        .driver = {
                .name   = "dw_dmac",
+               .pm     = &dw_dev_pm_ops,
        },
 };
 
index 88dab52..7837930 100644 (file)
@@ -1291,17 +1291,18 @@ static void txx9dmac_shutdown(struct platform_device *pdev)
        txx9dmac_off(ddev);
 }
 
-static int txx9dmac_suspend_late(struct platform_device *pdev,
-                                pm_message_t mesg)
+static int txx9dmac_suspend_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
 
        txx9dmac_off(ddev);
        return 0;
 }
 
-static int txx9dmac_resume_early(struct platform_device *pdev)
+static int txx9dmac_resume_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
        struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
        u32 mcr;
@@ -1314,6 +1315,11 @@ static int txx9dmac_resume_early(struct platform_device *pdev)
 
 }
 
+static struct dev_pm_ops txx9dmac_dev_pm_ops = {
+       .suspend_noirq = txx9dmac_suspend_noirq,
+       .resume_noirq = txx9dmac_resume_noirq,
+};
+
 static struct platform_driver txx9dmac_chan_driver = {
        .remove         = __exit_p(txx9dmac_chan_remove),
        .driver = {
@@ -1324,10 +1330,9 @@ static struct platform_driver txx9dmac_chan_driver = {
 static struct platform_driver txx9dmac_driver = {
        .remove         = __exit_p(txx9dmac_remove),
        .shutdown       = txx9dmac_shutdown,
-       .suspend_late   = txx9dmac_suspend_late,
-       .resume_early   = txx9dmac_resume_early,
        .driver = {
                .name   = "txx9dmac",
+               .pm     = &txx9dmac_dev_pm_ops,
        },
 };
 
index 762e1e5..0495557 100644 (file)
@@ -1134,35 +1134,44 @@ static int __exit i2c_pxa_remove(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM
-static int i2c_pxa_suspend_late(struct platform_device *dev, pm_message_t state)
+static int i2c_pxa_suspend_noirq(struct device *dev)
 {
-       struct pxa_i2c *i2c = platform_get_drvdata(dev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct pxa_i2c *i2c = platform_get_drvdata(pdev);
+
        clk_disable(i2c->clk);
+
        return 0;
 }
 
-static int i2c_pxa_resume_early(struct platform_device *dev)
+static int i2c_pxa_resume_noirq(struct device *dev)
 {
-       struct pxa_i2c *i2c = platform_get_drvdata(dev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct pxa_i2c *i2c = platform_get_drvdata(pdev);
 
        clk_enable(i2c->clk);
        i2c_pxa_reset(i2c);
 
        return 0;
 }
+
+static struct dev_pm_ops i2c_pxa_dev_pm_ops = {
+       .suspend_noirq = i2c_pxa_suspend_noirq,
+       .resume_noirq = i2c_pxa_resume_noirq,
+};
+
+#define I2C_PXA_DEV_PM_OPS (&i2c_pxa_dev_pm_ops)
 #else
-#define i2c_pxa_suspend_late NULL
-#define i2c_pxa_resume_early NULL
+#define I2C_PXA_DEV_PM_OPS NULL
 #endif
 
 static struct platform_driver i2c_pxa_driver = {
        .probe          = i2c_pxa_probe,
        .remove         = __exit_p(i2c_pxa_remove),
-       .suspend_late   = i2c_pxa_suspend_late,
-       .resume_early   = i2c_pxa_resume_early,
        .driver         = {
                .name   = "pxa2xx-i2c",
                .owner  = THIS_MODULE,
+               .pm     = I2C_PXA_DEV_PM_OPS,
        },
        .id_table       = i2c_pxa_id_table,
 };
index 20bb0ce..96aafb9 100644 (file)
@@ -946,17 +946,20 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int s3c24xx_i2c_suspend_late(struct platform_device *dev,
-                                   pm_message_t msg)
+static int s3c24xx_i2c_suspend_noirq(struct device *dev)
 {
-       struct s3c24xx_i2c *i2c = platform_get_drvdata(dev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev);
+
        i2c->suspended = 1;
+
        return 0;
 }
 
-static int s3c24xx_i2c_resume(struct platform_device *dev)
+static int s3c24xx_i2c_resume(struct device *dev)
 {
-       struct s3c24xx_i2c *i2c = platform_get_drvdata(dev);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev);
 
        i2c->suspended = 0;
        s3c24xx_i2c_init(i2c);
@@ -964,9 +967,14 @@ static int s3c24xx_i2c_resume(struct platform_device *dev)
        return 0;
 }
 
+static struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
+       .suspend_noirq = s3c24xx_i2c_suspend_noirq,
+       .resume = s3c24xx_i2c_resume,
+};
+
+#define S3C24XX_DEV_PM_OPS (&s3c24xx_i2c_dev_pm_ops)
 #else
-#define s3c24xx_i2c_suspend_late NULL
-#define s3c24xx_i2c_resume NULL
+#define S3C24XX_DEV_PM_OPS NULL
 #endif
 
 /* device driver for platform bus bits */
@@ -985,12 +993,11 @@ MODULE_DEVICE_TABLE(platform, s3c24xx_driver_ids);
 static struct platform_driver s3c24xx_i2c_driver = {
        .probe          = s3c24xx_i2c_probe,
        .remove         = s3c24xx_i2c_remove,
-       .suspend_late   = s3c24xx_i2c_suspend_late,
-       .resume         = s3c24xx_i2c_resume,
        .id_table       = s3c24xx_driver_ids,
        .driver         = {
                .owner  = THIS_MODULE,
                .name   = "s3c-i2c",
+               .pm     = S3C24XX_DEV_PM_OPS,
        },
 };
 
index a2ad53e..af04f5b 100644 (file)
@@ -53,7 +53,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
 
 static int __init hp_wmi_bios_setup(struct platform_device *device);
 static int __exit hp_wmi_bios_remove(struct platform_device *device);
-static int hp_wmi_resume_handler(struct platform_device *device);
+static int hp_wmi_resume_handler(struct device *device);
 
 struct bios_args {
        u32 signature;
@@ -94,14 +94,19 @@ static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
 
+static struct dev_pm_ops hp_wmi_pm_ops = {
+       .resume  = hp_wmi_resume_handler,
+       .restore  = hp_wmi_resume_handler,
+};
+
 static struct platform_driver hp_wmi_driver = {
        .driver = {
-                  .name = "hp-wmi",
-                  .owner = THIS_MODULE,
+               .name = "hp-wmi",
+               .owner = THIS_MODULE,
+               .pm = &hp_wmi_pm_ops,
        },
        .probe = hp_wmi_bios_setup,
        .remove = hp_wmi_bios_remove,
-       .resume = hp_wmi_resume_handler,
 };
 
 static int hp_wmi_perform_query(int query, int write, int value)
@@ -512,7 +517,7 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
        return 0;
 }
 
-static int hp_wmi_resume_handler(struct platform_device *device)
+static int hp_wmi_resume_handler(struct device *device)
 {
        /*
         * Hardware state may have changed while suspended, so trigger
index c7c1ca0..1d26bed 100644 (file)
@@ -2167,8 +2167,9 @@ static int __devexit musb_remove(struct platform_device *pdev)
 
 #ifdef CONFIG_PM
 
-static int musb_suspend(struct platform_device *pdev, pm_message_t message)
+static int musb_suspend(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        unsigned long   flags;
        struct musb     *musb = dev_to_musb(&pdev->dev);
 
@@ -2195,8 +2196,9 @@ static int musb_suspend(struct platform_device *pdev, pm_message_t message)
        return 0;
 }
 
-static int musb_resume_early(struct platform_device *pdev)
+static int musb_resume_noirq(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct musb     *musb = dev_to_musb(&pdev->dev);
 
        if (!musb->clock)
@@ -2214,9 +2216,14 @@ static int musb_resume_early(struct platform_device *pdev)
        return 0;
 }
 
+static struct dev_pm_ops musb_dev_pm_ops = {
+       .suspend        = musb_suspend,
+       .resume_noirq   = musb_resume_noirq,
+};
+
+#define MUSB_DEV_PM_OPS (&musb_dev_pm_ops)
 #else
-#define        musb_suspend    NULL
-#define        musb_resume_early       NULL
+#define        MUSB_DEV_PM_OPS NULL
 #endif
 
 static struct platform_driver musb_driver = {
@@ -2224,11 +2231,10 @@ static struct platform_driver musb_driver = {
                .name           = (char *)musb_driver_name,
                .bus            = &platform_bus_type,
                .owner          = THIS_MODULE,
+               .pm             = MUSB_DEV_PM_OPS,
        },
        .remove         = __devexit_p(musb_remove),
        .shutdown       = musb_shutdown,
-       .suspend        = musb_suspend,
-       .resume_early   = musb_resume_early,
 };
 
 /*-------------------------------------------------------------------------*/
index c17c960..d7c76bb 100644 (file)
@@ -9,4 +9,7 @@
 struct dev_archdata {
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_GENERIC_DEVICE_H */
index 8dc5123..3c6675c 100644 (file)
@@ -22,6 +22,9 @@ struct platform_device {
        struct resource * resource;
 
        struct platform_device_id       *id_entry;
+
+       /* arch specific additions */
+       struct pdev_archdata    archdata;
 };
 
 #define platform_get_device_id(pdev)   ((pdev)->id_entry)
@@ -57,8 +60,6 @@ struct platform_driver {
        int (*remove)(struct platform_device *);
        void (*shutdown)(struct platform_device *);
        int (*suspend)(struct platform_device *, pm_message_t state);
-       int (*suspend_late)(struct platform_device *, pm_message_t state);
-       int (*resume_early)(struct platform_device *);
        int (*resume)(struct platform_device *);
        struct device_driver driver;
        struct platform_device_id *id_table;
index b3f7476..3b7e04b 100644 (file)
 #define _LINUX_PM_H
 
 #include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/timer.h>
 
 /*
  * Callbacks for platform drivers to implement.
@@ -165,6 +169,28 @@ typedef struct pm_message {
  * It is allowed to unregister devices while the above callbacks are being
  * executed.  However, it is not allowed to unregister a device from within any
  * of its own callbacks.
+ *
+ * There also are the following callbacks related to run-time power management
+ * of devices:
+ *
+ * @runtime_suspend: Prepare the device for a condition in which it won't be
+ *     able to communicate with the CPU(s) and RAM due to power management.
+ *     This need not mean that the device should be put into a low power state.
+ *     For example, if the device is behind a link which is about to be turned
+ *     off, the device may remain at full power.  If the device does go to low
+ *     power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a
+ *     hardware mechanism allowing the device to request a change of its power
+ *     state, such as PCI PME) should be enabled for it.
+ *
+ * @runtime_resume: Put the device into the fully active state in response to a
+ *     wake-up event generated by hardware or at the request of software.  If
+ *     necessary, put the device into the full power state and restore its
+ *     registers, so that it is fully operational.
+ *
+ * @runtime_idle: Device appears to be inactive and it might be put into a low
+ *     power state if all of the necessary conditions are satisfied.  Check
+ *     these conditions and handle the device as appropriate, possibly queueing
+ *     a suspend request for it.  The return value is ignored by the PM core.
  */
 
 struct dev_pm_ops {
@@ -182,8 +208,25 @@ struct dev_pm_ops {
        int (*thaw_noirq)(struct device *dev);
        int (*poweroff_noirq)(struct device *dev);
        int (*restore_noirq)(struct device *dev);
+       int (*runtime_suspend)(struct device *dev);
+       int (*runtime_resume)(struct device *dev);
+       int (*runtime_idle)(struct device *dev);
 };
 
+/*
+ * Use this if you want to use the same suspend and resume callbacks for suspend
+ * to RAM and hibernation.
+ */
+#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+struct dev_pm_ops name = { \
+       .suspend = suspend_fn, \
+       .resume = resume_fn, \
+       .freeze = suspend_fn, \
+       .thaw = resume_fn, \
+       .poweroff = suspend_fn, \
+       .restore = resume_fn, \
+}
+
 /**
  * PM_EVENT_ messages
  *
@@ -315,14 +358,80 @@ enum dpm_state {
        DPM_OFF_IRQ,
 };
 
+/**
+ * Device run-time power management status.
+ *
+ * These status labels are used internally by the PM core to indicate the
+ * current status of a device with respect to the PM core operations.  They do
+ * not reflect the actual power state of the device or its status as seen by the
+ * driver.
+ *
+ * RPM_ACTIVE          Device is fully operational.  Indicates that the device
+ *                     bus type's ->runtime_resume() callback has completed
+ *                     successfully.
+ *
+ * RPM_SUSPENDED       Device bus type's ->runtime_suspend() callback has
+ *                     completed successfully.  The device is regarded as
+ *                     suspended.
+ *
+ * RPM_RESUMING                Device bus type's ->runtime_resume() callback is being
+ *                     executed.
+ *
+ * RPM_SUSPENDING      Device bus type's ->runtime_suspend() callback is being
+ *                     executed.
+ */
+
+enum rpm_status {
+       RPM_ACTIVE = 0,
+       RPM_RESUMING,
+       RPM_SUSPENDED,
+       RPM_SUSPENDING,
+};
+
+/**
+ * Device run-time power management request types.
+ *
+ * RPM_REQ_NONE                Do nothing.
+ *
+ * RPM_REQ_IDLE                Run the device bus type's ->runtime_idle() callback
+ *
+ * RPM_REQ_SUSPEND     Run the device bus type's ->runtime_suspend() callback
+ *
+ * RPM_REQ_RESUME      Run the device bus type's ->runtime_resume() callback
+ */
+
+enum rpm_request {
+       RPM_REQ_NONE = 0,
+       RPM_REQ_IDLE,
+       RPM_REQ_SUSPEND,
+       RPM_REQ_RESUME,
+};
+
 struct dev_pm_info {
        pm_message_t            power_state;
-       unsigned                can_wakeup:1;
-       unsigned                should_wakeup:1;
+       unsigned int            can_wakeup:1;
+       unsigned int            should_wakeup:1;
        enum dpm_state          status;         /* Owned by the PM core */
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_SLEEP
        struct list_head        entry;
 #endif
+#ifdef CONFIG_PM_RUNTIME
+       struct timer_list       suspend_timer;
+       unsigned long           timer_expires;
+       struct work_struct      work;
+       wait_queue_head_t       wait_queue;
+       spinlock_t              lock;
+       atomic_t                usage_count;
+       atomic_t                child_count;
+       unsigned int            disable_depth:3;
+       unsigned int            ignore_children:1;
+       unsigned int            idle_notification:1;
+       unsigned int            request_pending:1;
+       unsigned int            deferred_resume:1;
+       enum rpm_request        request;
+       enum rpm_status         runtime_status;
+       int                     runtime_error;
+#endif
 };
 
 /*
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
new file mode 100644 (file)
index 0000000..4408704
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * pm_runtime.h - Device run-time power management helper functions.
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef _LINUX_PM_RUNTIME_H
+#define _LINUX_PM_RUNTIME_H
+
+#include <linux/device.h>
+#include <linux/pm.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+extern struct workqueue_struct *pm_wq;
+
+extern int pm_runtime_idle(struct device *dev);
+extern int pm_runtime_suspend(struct device *dev);
+extern int pm_runtime_resume(struct device *dev);
+extern int pm_request_idle(struct device *dev);
+extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
+extern int pm_request_resume(struct device *dev);
+extern int __pm_runtime_get(struct device *dev, bool sync);
+extern int __pm_runtime_put(struct device *dev, bool sync);
+extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
+extern int pm_runtime_barrier(struct device *dev);
+extern void pm_runtime_enable(struct device *dev);
+extern void __pm_runtime_disable(struct device *dev, bool check_resume);
+
+static inline bool pm_children_suspended(struct device *dev)
+{
+       return dev->power.ignore_children
+               || !atomic_read(&dev->power.child_count);
+}
+
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
+{
+       dev->power.ignore_children = enable;
+}
+
+static inline void pm_runtime_get_noresume(struct device *dev)
+{
+       atomic_inc(&dev->power.usage_count);
+}
+
+static inline void pm_runtime_put_noidle(struct device *dev)
+{
+       atomic_add_unless(&dev->power.usage_count, -1, 0);
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
+static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; }
+static inline int pm_runtime_resume(struct device *dev) { return 0; }
+static inline int pm_request_idle(struct device *dev) { return -ENOSYS; }
+static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
+{
+       return -ENOSYS;
+}
+static inline int pm_request_resume(struct device *dev) { return 0; }
+static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; }
+static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; }
+static inline int __pm_runtime_set_status(struct device *dev,
+                                           unsigned int status) { return 0; }
+static inline int pm_runtime_barrier(struct device *dev) { return 0; }
+static inline void pm_runtime_enable(struct device *dev) {}
+static inline void __pm_runtime_disable(struct device *dev, bool c) {}
+
+static inline bool pm_children_suspended(struct device *dev) { return false; }
+static inline void pm_suspend_ignore_children(struct device *dev, bool en) {}
+static inline void pm_runtime_get_noresume(struct device *dev) {}
+static inline void pm_runtime_put_noidle(struct device *dev) {}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+static inline int pm_runtime_get(struct device *dev)
+{
+       return __pm_runtime_get(dev, false);
+}
+
+static inline int pm_runtime_get_sync(struct device *dev)
+{
+       return __pm_runtime_get(dev, true);
+}
+
+static inline int pm_runtime_put(struct device *dev)
+{
+       return __pm_runtime_put(dev, false);
+}
+
+static inline int pm_runtime_put_sync(struct device *dev)
+{
+       return __pm_runtime_put(dev, true);
+}
+
+static inline int pm_runtime_set_active(struct device *dev)
+{
+       return __pm_runtime_set_status(dev, RPM_ACTIVE);
+}
+
+static inline void pm_runtime_set_suspended(struct device *dev)
+{
+       __pm_runtime_set_status(dev, RPM_SUSPENDED);
+}
+
+static inline void pm_runtime_disable(struct device *dev)
+{
+       __pm_runtime_disable(dev, true);
+}
+
+#endif
index 72067cb..91e09d3 100644 (file)
@@ -208,3 +208,17 @@ config APM_EMULATION
          random kernel OOPSes or reboots that don't seem to be related to
          anything, try disabling/enabling this option (or disabling/enabling
          APM in your BIOS).
+
+config PM_RUNTIME
+       bool "Run-time PM core functionality"
+       depends on PM
+       ---help---
+         Enable functionality allowing I/O devices to be put into energy-saving
+         (low power) states at run time (or autosuspended) after a specified
+         period of inactivity and woken up in response to a hardware-generated
+         wake-up event or a driver's request.
+
+         Hardware support is generally required for this functionality to work
+         and the bus type drivers of the buses the devices are on are
+         responsible for the actual handling of the autosuspend requests and
+         wake-up events.
index 81d2e74..04b3a83 100644 (file)
@@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode)
        if (error)
                return error;
 
-       /* Free memory before shutting down devices. */
-       error = swsusp_shrink_memory();
+       /* Preallocate image memory before shutting down devices. */
+       error = hibernate_preallocate_memory();
        if (error)
                goto Close;
 
@@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode)
        /* Control returns here after successful restore */
 
  Resume_devices:
+       /* We may need to release the preallocated image pages here. */
+       if (error || !in_suspend)
+               swsusp_free();
+
        dpm_resume_end(in_suspend ?
                (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
        resume_console();
@@ -460,11 +464,11 @@ int hibernation_platform_enter(void)
 
        error = hibernation_ops->prepare();
        if (error)
-               goto Platofrm_finish;
+               goto Platform_finish;
 
        error = disable_nonboot_cpus();
        if (error)
-               goto Platofrm_finish;
+               goto Platform_finish;
 
        local_irq_disable();
        sysdev_suspend(PMSG_HIBERNATE);
@@ -476,7 +480,7 @@ int hibernation_platform_enter(void)
         * We don't need to reenable the nonboot CPUs or resume consoles, since
         * the system is going to be halted anyway.
         */
- Platofrm_finish:
+ Platform_finish:
        hibernation_ops->finish();
 
        dpm_suspend_noirq(PMSG_RESTORE);
@@ -578,7 +582,10 @@ int hibernate(void)
                goto Thaw;
 
        error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
-       if (in_suspend && !error) {
+       if (error)
+               goto Thaw;
+
+       if (in_suspend) {
                unsigned int flags = 0;
 
                if (hibernation_mode == HIBERNATION_PLATFORM)
@@ -590,8 +597,8 @@ int hibernate(void)
                        power_down();
        } else {
                pr_debug("PM: Image restored successfully.\n");
-               swsusp_free();
        }
+
  Thaw:
        thaw_processes();
  Finish:
index f710e36..347d2cc 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/resume-trace.h>
+#include <linux/workqueue.h>
 
 #include "power.h"
 
@@ -217,8 +218,24 @@ static struct attribute_group attr_group = {
        .attrs = g,
 };
 
+#ifdef CONFIG_PM_RUNTIME
+struct workqueue_struct *pm_wq;
+
+static int __init pm_start_workqueue(void)
+{
+       pm_wq = create_freezeable_workqueue("pm");
+
+       return pm_wq ? 0 : -ENOMEM;
+}
+#else
+static inline int pm_start_workqueue(void) { return 0; }
+#endif
+
 static int __init pm_init(void)
 {
+       int error = pm_start_workqueue();
+       if (error)
+               return error;
        power_kobj = kobject_create_and_add("power", NULL);
        if (!power_kobj)
                return -ENOMEM;
index 26d5a26..46c5a26 100644 (file)
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void);
 
 extern int create_basic_memory_bitmaps(void);
 extern void free_basic_memory_bitmaps(void);
-extern int swsusp_shrink_memory(void);
+extern int hibernate_preallocate_memory(void);
 
 /**
  *     Auxiliary structure used for reading the snapshot image data and
index 523a451..97955b0 100644 (file)
@@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 
 #define BM_END_OF_MAP  (~0UL)
 
-#define BM_BITS_PER_BLOCK      (PAGE_SIZE << 3)
+#define BM_BITS_PER_BLOCK      (PAGE_SIZE * BITS_PER_BYTE)
 
 struct bm_block {
        struct list_head hook;  /* hook into a list of bitmap blocks */
@@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 
 /**
  *     create_bm_block_list - create a list of block bitmap objects
- *     @nr_blocks - number of blocks to allocate
+ *     @pages - number of pages to track
  *     @list - list to put the allocated blocks into
  *     @ca - chain allocator to be used for allocating memory
  */
@@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void)
        struct zone *zone;
        unsigned int n = 0;
 
-       for_each_zone(zone) {
+       for_each_populated_zone(zone) {
                unsigned long pfn, max_zone_pfn;
 
                if (!is_highmem(zone))
@@ -916,7 +916,7 @@ static unsigned int count_data_pages(void)
        unsigned long pfn, max_zone_pfn;
        unsigned int n = 0;
 
-       for_each_zone(zone) {
+       for_each_populated_zone(zone) {
                if (is_highmem(zone))
                        continue;
 
@@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
        struct zone *zone;
        unsigned long pfn;
 
-       for_each_zone(zone) {
+       for_each_populated_zone(zone) {
                unsigned long max_zone_pfn;
 
                mark_free_pages(zone);
@@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/*
+ * Numbers of normal and highmem page frames allocated for hibernation image
+ * before suspending devices.
+ */
+unsigned int alloc_normal, alloc_highmem;
+/*
+ * Memory bitmap used for marking saveable pages (during hibernation) or
+ * hibernation image pages (during restore)
+ */
+static struct memory_bitmap orig_bm;
+/*
+ * Memory bitmap used during hibernation for marking allocated page frames that
+ * will contain copies of saveable pages.  During restore it is initially used
+ * for marking hibernation image pages, but then the set bits from it are
+ * duplicated in @orig_bm and it is released.  On highmem systems it is next
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
+ */
+static struct memory_bitmap copy_bm;
 
 /**
  *     swsusp_free - free pages allocated for the suspend.
@@ -1046,7 +1065,7 @@ void swsusp_free(void)
        struct zone *zone;
        unsigned long pfn, max_zone_pfn;
 
-       for_each_zone(zone) {
+       for_each_populated_zone(zone) {
                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn)) {
@@ -1064,74 +1083,286 @@ void swsusp_free(void)
        nr_meta_pages = 0;
        restore_pblist = NULL;
        buffer = NULL;
+       alloc_normal = 0;
+       alloc_highmem = 0;
 }
 
+/* Helper functions used for the shrinking of memory. */
+
+#define GFP_IMAGE      (GFP_KERNEL | __GFP_NOWARN)
+
 /**
- *     swsusp_shrink_memory -  Try to free as much memory as needed
- *
- *     ... but do not OOM-kill anyone
+ * preallocate_image_pages - Allocate a number of pages for hibernation image
+ * @nr_pages: Number of page frames to allocate.
+ * @mask: GFP flags to use for the allocation.
  *
- *     Notice: all userland should be stopped before it is called, or
- *     livelock is possible.
+ * Return value: Number of page frames actually allocated
+ */
+static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
+{
+       unsigned long nr_alloc = 0;
+
+       while (nr_pages > 0) {
+               struct page *page;
+
+               page = alloc_image_page(mask);
+               if (!page)
+                       break;
+               memory_bm_set_bit(&copy_bm, page_to_pfn(page));
+               if (PageHighMem(page))
+                       alloc_highmem++;
+               else
+                       alloc_normal++;
+               nr_pages--;
+               nr_alloc++;
+       }
+
+       return nr_alloc;
+}
+
+static unsigned long preallocate_image_memory(unsigned long nr_pages)
+{
+       return preallocate_image_pages(nr_pages, GFP_IMAGE);
+}
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long preallocate_image_highmem(unsigned long nr_pages)
+{
+       return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
+}
+
+/**
+ *  __fraction - Compute (an approximation of) x * (multiplier / base)
  */
+static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
+{
+       x *= multiplier;
+       do_div(x, base);
+       return (unsigned long)x;
+}
+
+static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+                                               unsigned long highmem,
+                                               unsigned long total)
+{
+       unsigned long alloc = __fraction(nr_pages, highmem, total);
 
-#define SHRINK_BITE    10000
-static inline unsigned long __shrink_memory(long tmp)
+       return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
+}
+#else /* CONFIG_HIGHMEM */
+static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
 {
-       if (tmp > SHRINK_BITE)
-               tmp = SHRINK_BITE;
-       return shrink_all_memory(tmp);
+       return 0;
 }
 
-int swsusp_shrink_memory(void)
+static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+                                               unsigned long highmem,
+                                               unsigned long total)
+{
+       return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * free_unnecessary_pages - Release preallocated pages not needed for the image
+ */
+static void free_unnecessary_pages(void)
+{
+       unsigned long save_highmem, to_free_normal, to_free_highmem;
+
+       to_free_normal = alloc_normal - count_data_pages();
+       save_highmem = count_highmem_pages();
+       if (alloc_highmem > save_highmem) {
+               to_free_highmem = alloc_highmem - save_highmem;
+       } else {
+               to_free_highmem = 0;
+               to_free_normal -= save_highmem - alloc_highmem;
+       }
+
+       memory_bm_position_reset(&copy_bm);
+
+       while (to_free_normal > 0 && to_free_highmem > 0) {
+               unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+               struct page *page = pfn_to_page(pfn);
+
+               if (PageHighMem(page)) {
+                       if (!to_free_highmem)
+                               continue;
+                       to_free_highmem--;
+                       alloc_highmem--;
+               } else {
+                       if (!to_free_normal)
+                               continue;
+                       to_free_normal--;
+                       alloc_normal--;
+               }
+               memory_bm_clear_bit(&copy_bm, pfn);
+               swsusp_unset_page_forbidden(page);
+               swsusp_unset_page_free(page);
+               __free_page(page);
+       }
+}
+
+/**
+ * minimum_image_size - Estimate the minimum acceptable size of an image
+ * @saveable: Number of saveable pages in the system.
+ *
+ * We want to avoid attempting to free too much memory too hard, so estimate the
+ * minimum acceptable size of a hibernation image to use as the lower limit for
+ * preallocating memory.
+ *
+ * We assume that the minimum image size should be proportional to
+ *
+ * [number of saveable pages] - [number of pages that can be freed in theory]
+ *
+ * where the second term is the sum of (1) reclaimable slab pages, (2) active
+ * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
+ * minus mapped file pages.
+ */
+static unsigned long minimum_image_size(unsigned long saveable)
+{
+       unsigned long size;
+
+       size = global_page_state(NR_SLAB_RECLAIMABLE)
+               + global_page_state(NR_ACTIVE_ANON)
+               + global_page_state(NR_INACTIVE_ANON)
+               + global_page_state(NR_ACTIVE_FILE)
+               + global_page_state(NR_INACTIVE_FILE)
+               - global_page_state(NR_FILE_MAPPED);
+
+       return saveable <= size ? 0 : saveable - size;
+}
+
+/**
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ *
+ * To create a hibernation image it is necessary to make a copy of every page
+ * frame in use.  We also need a number of page frames to be free during
+ * hibernation for allocations made while saving the image and for device
+ * drivers, in case they need to allocate memory from their hibernation
+ * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES,
+ * respectively, both of which are rough estimates).  To make this happen, we
+ * compute the total number of available page frames and allocate at least
+ *
+ * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES
+ *
+ * of them, which corresponds to the maximum size of a hibernation image.
+ *
+ * If image_size is set below the number following from the above formula,
+ * the preallocation of memory is continued until the total number of saveable
+ * pages in the system is below the requested image size or the minimum
+ * acceptable image size returned by minimum_image_size(), whichever is greater.
+ */
+int hibernate_preallocate_memory(void)
 {
-       long tmp;
        struct zone *zone;
-       unsigned long pages = 0;
-       unsigned int i = 0;
-       char *p = "-\\|/";
+       unsigned long saveable, size, max_size, count, highmem, pages = 0;
+       unsigned long alloc, save_highmem, pages_highmem;
        struct timeval start, stop;
+       int error;
 
-       printk(KERN_INFO "PM: Shrinking memory...  ");
+       printk(KERN_INFO "PM: Preallocating image memory... ");
        do_gettimeofday(&start);
-       do {
-               long size, highmem_size;
-
-               highmem_size = count_highmem_pages();
-               size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
-               tmp = size;
-               size += highmem_size;
-               for_each_populated_zone(zone) {
-                       tmp += snapshot_additional_pages(zone);
-                       if (is_highmem(zone)) {
-                               highmem_size -=
-                                       zone_page_state(zone, NR_FREE_PAGES);
-                       } else {
-                               tmp -= zone_page_state(zone, NR_FREE_PAGES);
-                               tmp += zone->lowmem_reserve[ZONE_NORMAL];
-                       }
-               }
 
-               if (highmem_size < 0)
-                       highmem_size = 0;
+       error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
+       if (error)
+               goto err_out;
 
-               tmp += highmem_size;
-               if (tmp > 0) {
-                       tmp = __shrink_memory(tmp);
-                       if (!tmp)
-                               return -ENOMEM;
-                       pages += tmp;
-               } else if (size > image_size / PAGE_SIZE) {
-                       tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
-                       pages += tmp;
-               }
-               printk("\b%c", p[i++%4]);
-       } while (tmp > 0);
+       error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
+       if (error)
+               goto err_out;
+
+       alloc_normal = 0;
+       alloc_highmem = 0;
+
+       /* Count the number of saveable data pages. */
+       save_highmem = count_highmem_pages();
+       saveable = count_data_pages();
+
+       /*
+        * Compute the total number of page frames we can use (count) and the
+        * number of pages needed for image metadata (size).
+        */
+       count = saveable;
+       saveable += save_highmem;
+       highmem = save_highmem;
+       size = 0;
+       for_each_populated_zone(zone) {
+               size += snapshot_additional_pages(zone);
+               if (is_highmem(zone))
+                       highmem += zone_page_state(zone, NR_FREE_PAGES);
+               else
+                       count += zone_page_state(zone, NR_FREE_PAGES);
+       }
+       count += highmem;
+       count -= totalreserve_pages;
+
+       /* Compute the maximum number of saveable pages to leave in memory. */
+       max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
+       size = DIV_ROUND_UP(image_size, PAGE_SIZE);
+       if (size > max_size)
+               size = max_size;
+       /*
+        * If the maximum is not less than the current number of saveable pages
+        * in memory, allocate page frames for the image and we're done.
+        */
+       if (size >= saveable) {
+               pages = preallocate_image_highmem(save_highmem);
+               pages += preallocate_image_memory(saveable - pages);
+               goto out;
+       }
+
+       /* Estimate the minimum size of the image. */
+       pages = minimum_image_size(saveable);
+       if (size < pages)
+               size = min_t(unsigned long, pages, max_size);
+
+       /*
+        * Let the memory management subsystem know that we're going to need a
+        * large number of page frames to allocate and make it free some memory.
+        * NOTE: If this is not done, performance will be hurt badly in some
+        * test cases.
+        */
+       shrink_all_memory(saveable - size);
+
+       /*
+        * The number of saveable pages in memory was too high, so apply some
+        * pressure to decrease it.  First, make room for the largest possible
+        * image and fail if that doesn't work.  Next, try to decrease the size
+        * of the image as much as indicated by 'size' using allocations from
+        * highmem and non-highmem zones separately.
+        */
+       pages_highmem = preallocate_image_highmem(highmem / 2);
+       alloc = (count - max_size) - pages_highmem;
+       pages = preallocate_image_memory(alloc);
+       if (pages < alloc)
+               goto err_out;
+       size = max_size - size;
+       alloc = size;
+       size = preallocate_highmem_fraction(size, highmem, count);
+       pages_highmem += size;
+       alloc -= size;
+       pages += preallocate_image_memory(alloc);
+       pages += pages_highmem;
+
+       /*
+        * We only need as many page frames for the image as there are saveable
+        * pages in memory, but we have allocated more.  Release the excessive
+        * ones now.
+        */
+       free_unnecessary_pages();
+
+ out:
        do_gettimeofday(&stop);
-       printk("\bdone (%lu pages freed)\n", pages);
-       swsusp_show_speed(&start, &stop, pages, "Freed");
+       printk(KERN_CONT "done (allocated %lu pages)\n", pages);
+       swsusp_show_speed(&start, &stop, pages, "Allocated");
 
        return 0;
+
+ err_out:
+       printk(KERN_CONT "\n");
+       swsusp_free();
+       return -ENOMEM;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1142,7 +1373,7 @@ int swsusp_shrink_memory(void)
 
 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 {
-       unsigned int free_highmem = count_free_highmem_pages();
+       unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
 
        if (free_highmem >= nr_highmem)
                nr_highmem = 0;
@@ -1164,19 +1395,17 @@ count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 {
        struct zone *zone;
-       unsigned int free = 0, meta = 0;
+       unsigned int free = alloc_normal;
 
-       for_each_zone(zone) {
-               meta += snapshot_additional_pages(zone);
+       for_each_populated_zone(zone)
                if (!is_highmem(zone))
                        free += zone_page_state(zone, NR_FREE_PAGES);
-       }
 
        nr_pages += count_pages_for_highmem(nr_highmem);
-       pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
-               nr_pages, PAGES_FOR_IO, meta, free);
+       pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
+               nr_pages, PAGES_FOR_IO, free);
 
-       return free > nr_pages + PAGES_FOR_IO + meta;
+       return free > nr_pages + PAGES_FOR_IO;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1198,7 +1427,7 @@ static inline int get_highmem_buffer(int safe_needed)
  */
 
 static inline unsigned int
-alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 {
        unsigned int to_alloc = count_free_highmem_pages();
 
@@ -1218,7 +1447,7 @@ alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 static inline int get_highmem_buffer(int safe_needed) { return 0; }
 
 static inline unsigned int
-alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
@@ -1237,51 +1466,36 @@ static int
 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
                unsigned int nr_pages, unsigned int nr_highmem)
 {
-       int error;
-
-       error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
-       if (error)
-               goto Free;
-
-       error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
-       if (error)
-               goto Free;
+       int error = 0;
 
        if (nr_highmem > 0) {
                error = get_highmem_buffer(PG_ANY);
                if (error)
-                       goto Free;
-
-               nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
+                       goto err_out;
+               if (nr_highmem > alloc_highmem) {
+                       nr_highmem -= alloc_highmem;
+                       nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
+               }
        }
-       while (nr_pages-- > 0) {
-               struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
-
-               if (!page)
-                       goto Free;
+       if (nr_pages > alloc_normal) {
+               nr_pages -= alloc_normal;
+               while (nr_pages-- > 0) {
+                       struct page *page;
 
-               memory_bm_set_bit(copy_bm, page_to_pfn(page));
+                       page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+                       if (!page)
+                               goto err_out;
+                       memory_bm_set_bit(copy_bm, page_to_pfn(page));
+               }
        }
+
        return 0;
 
Free:
err_out:
        swsusp_free();
-       return -ENOMEM;
+       return error;
 }
 
-/* Memory bitmap used for marking saveable pages (during suspend) or the
- * suspend image pages (during resume)
- */
-static struct memory_bitmap orig_bm;
-/* Memory bitmap used on suspend for marking allocated pages that will contain
- * the copies of saveable pages.  During resume it is initially used for
- * marking the suspend image pages, but then its set bits are duplicated in
- * @orig_bm and it is released.  Next, on systems with high memory, it may be
- * used for marking "safe" highmem pages, but it has to be reinitialized for
- * this purpose.
- */
-static struct memory_bitmap copy_bm;
-
 asmlinkage int swsusp_save(void)
 {
        unsigned int nr_pages, nr_highmem;
@@ -1474,7 +1688,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
        unsigned long pfn, max_zone_pfn;
 
        /* Clear page flags */
-       for_each_zone(zone) {
+       for_each_populated_zone(zone) {
                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn))