drm/nv50: fix iommu errors caused by device reading from address 0
[safe/jmp/linux-2.6] / drivers / cpufreq / cpufreq.c
index 89a29cd..063b218 100644 (file)
  * also protects the cpufreq_cpu_data array.
  */
 static struct cpufreq_driver *cpufreq_driver;
-static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
+static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
 #ifdef CONFIG_HOTPLUG_CPU
 /* This one keeps track of the previously set governor of a removed CPU */
-static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
+static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
 #endif
 static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
@@ -61,15 +61,17 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock);
  *   are concerned with are online after they get the lock.
  * - Governor routines that can be called in cpufreq hotplug path should not
  *   take this sem as top level hotplug notifier handler takes this.
+ * - Lock should not be held across
+ *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
  */
-static DEFINE_PER_CPU(int, policy_cpu);
+static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
 
 #define lock_policy_rwsem(mode, cpu)                                   \
 int lock_policy_rwsem_##mode                                           \
 (int cpu)                                                              \
 {                                                                      \
-       int policy_cpu = per_cpu(policy_cpu, cpu);                      \
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
        BUG_ON(policy_cpu == -1);                                       \
        down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
        if (unlikely(!cpu_online(cpu))) {                               \
@@ -88,7 +90,7 @@ EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
 
 void unlock_policy_rwsem_read(int cpu)
 {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
        BUG_ON(policy_cpu == -1);
        up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
 }
@@ -96,7 +98,7 @@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
 
 void unlock_policy_rwsem_write(int cpu)
 {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
        BUG_ON(policy_cpu == -1);
        up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
 }
@@ -104,7 +106,8 @@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
 
 
 /* internal prototypes */
-static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
+static int __cpufreq_governor(struct cpufreq_policy *policy,
+               unsigned int event);
 static unsigned int __cpufreq_get(unsigned int cpu);
 static void handle_update(struct work_struct *work);
 
@@ -118,22 +121,24 @@ static void handle_update(struct work_struct *work);
 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
 static struct srcu_notifier_head cpufreq_transition_notifier_list;
 
+static bool init_cpufreq_transition_notifier_list_called;
 static int __init init_cpufreq_transition_notifier_list(void)
 {
        srcu_init_notifier_head(&cpufreq_transition_notifier_list);
+       init_cpufreq_transition_notifier_list_called = true;
        return 0;
 }
 pure_initcall(init_cpufreq_transition_notifier_list);
 
 static LIST_HEAD(cpufreq_governor_list);
-static DEFINE_MUTEX (cpufreq_governor_mutex);
+static DEFINE_MUTEX(cpufreq_governor_mutex);
 
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 {
        struct cpufreq_policy *data;
        unsigned long flags;
 
-       if (cpu >= NR_CPUS)
+       if (cpu >= nr_cpu_ids)
                goto err_out;
 
        /* get the cpufreq driver */
@@ -147,7 +152,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 
 
        /* get the CPU */
-       data = cpufreq_cpu_data[cpu];
+       data = per_cpu(cpufreq_cpu_data, cpu);
 
        if (!data)
                goto err_out_put_module;
@@ -216,7 +221,7 @@ static void cpufreq_debug_disable_ratelimit(void)
 }
 
 void cpufreq_debug_printk(unsigned int type, const char *prefix,
-                                                       const char *fmt, ...)
+                       const char *fmt, ...)
 {
        char s[256];
        va_list args;
@@ -325,7 +330,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
        dprintk("notification %u of frequency transition to %u kHz\n",
                state, freqs->new);
 
-       policy = cpufreq_cpu_data[freqs->cpu];
+       policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
        switch (state) {
 
        case CPUFREQ_PRECHANGE:
@@ -369,7 +374,7 @@ static struct cpufreq_governor *__find_governor(const char *str_governor)
        struct cpufreq_governor *t;
 
        list_for_each_entry(t, &cpufreq_governor_list, governor_list)
-               if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
+               if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
                        return t;
 
        return NULL;
@@ -378,7 +383,7 @@ static struct cpufreq_governor *__find_governor(const char *str_governor)
 /**
  * cpufreq_parse_governor - parse a governor string
  */
-static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
+static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
                                struct cpufreq_governor **governor)
 {
        int err = -EINVAL;
@@ -410,7 +415,7 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
                                int ret;
 
                                mutex_unlock(&cpufreq_governor_mutex);
-                               ret = request_module(name);
+                               ret = request_module("%s", name);
                                mutex_lock(&cpufreq_governor_mutex);
 
                                if (ret == 0)
@@ -427,15 +432,11 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
 
                mutex_unlock(&cpufreq_governor_mutex);
        }
-  out:
+out:
        return err;
 }
 
 
-/* drivers/base/cpu.c */
-extern struct sysdev_class cpu_sysdev_class;
-
-
 /**
  * cpufreq_per_cpu_attr_read() / show_##file_name() -
  * print out cpufreq information
@@ -446,13 +447,14 @@ extern struct sysdev_class cpu_sysdev_class;
 
 #define show_one(file_name, object)                    \
 static ssize_t show_##file_name                                \
-(struct cpufreq_policy * policy, char *buf)            \
+(struct cpufreq_policy *policy, char *buf)             \
 {                                                      \
-       return sprintf (buf, "%u\n", policy->object);   \
+       return sprintf(buf, "%u\n", policy->object);    \
 }
 
 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
+show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 show_one(scaling_min_freq, min);
 show_one(scaling_max_freq, max);
 show_one(scaling_cur_freq, cur);
@@ -465,7 +467,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
  */
 #define store_one(file_name, object)                   \
 static ssize_t store_##file_name                                       \
-(struct cpufreq_policy * policy, const char *buf, size_t count)                \
+(struct cpufreq_policy *policy, const char *buf, size_t count)         \
 {                                                                      \
        unsigned int ret = -EINVAL;                                     \
        struct cpufreq_policy new_policy;                               \
@@ -474,7 +476,7 @@ static ssize_t store_##file_name                                    \
        if (ret)                                                        \
                return -EINVAL;                                         \
                                                                        \
-       ret = sscanf (buf, "%u", &new_policy.object);                   \
+       ret = sscanf(buf, "%u", &new_policy.object);                    \
        if (ret != 1)                                                   \
                return -EINVAL;                                         \
                                                                        \
@@ -484,14 +486,14 @@ static ssize_t store_##file_name                                  \
        return ret ? ret : count;                                       \
 }
 
-store_one(scaling_min_freq,min);
-store_one(scaling_max_freq,max);
+store_one(scaling_min_freq, min);
+store_one(scaling_max_freq, max);
 
 /**
  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
  */
-static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
-                                                       char *buf)
+static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
+                                       char *buf)
 {
        unsigned int cur_freq = __cpufreq_get(policy->cpu);
        if (!cur_freq)
@@ -503,15 +505,15 @@ static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
 /**
  * show_scaling_governor - show the current policy for the specified CPU
  */
-static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
-                                                       char *buf)
+static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 {
-       if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
+       if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
                return sprintf(buf, "powersave\n");
        else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
                return sprintf(buf, "performance\n");
        else if (policy->governor)
-               return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
+               return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
+                               policy->governor->name);
        return -EINVAL;
 }
 
@@ -519,8 +521,8 @@ static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
 /**
  * store_scaling_governor - store policy for the specified CPU
  */
-static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
-                                      const char *buf, size_t count)
+static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
+                                       const char *buf, size_t count)
 {
        unsigned int ret = -EINVAL;
        char    str_governor[16];
@@ -530,7 +532,7 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
        if (ret)
                return ret;
 
-       ret = sscanf (buf, "%15s", str_governor);
+       ret = sscanf(buf, "%15s", str_governor);
        if (ret != 1)
                return -EINVAL;
 
@@ -554,7 +556,7 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
 /**
  * show_scaling_driver - show the cpufreq driver currently loaded
  */
-static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
+static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
 {
        return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
 }
@@ -562,8 +564,8 @@ static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
 /**
  * show_scaling_available_governors - show the available CPUfreq governors
  */
-static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
-                               char *buf)
+static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
+                                               char *buf)
 {
        ssize_t i = 0;
        struct cpufreq_governor *t;
@@ -574,7 +576,8 @@ static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
        }
 
        list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
-               if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
+               if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
+                   - (CPUFREQ_NAME_LEN + 2)))
                        goto out;
                i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
        }
@@ -582,32 +585,49 @@ out:
        i += sprintf(&buf[i], "\n");
        return i;
 }
-/**
- * show_affected_cpus - show the CPUs affected by each transition
- */
-static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
+
+static ssize_t show_cpus(const struct cpumask *mask, char *buf)
 {
        ssize_t i = 0;
        unsigned int cpu;
 
-       for_each_cpu_mask(cpu, policy->cpus) {
+       for_each_cpu(cpu, mask) {
                if (i)
                        i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
                i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
                if (i >= (PAGE_SIZE - 5))
-                   break;
+                       break;
        }
        i += sprintf(&buf[i], "\n");
        return i;
 }
 
+/**
+ * show_related_cpus - show the CPUs affected by each transition even if
+ * hw coordination is in use
+ */
+static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
+{
+       if (cpumask_empty(policy->related_cpus))
+               return show_cpus(policy->cpus, buf);
+       return show_cpus(policy->related_cpus, buf);
+}
+
+/**
+ * show_affected_cpus - show the CPUs affected by each transition
+ */
+static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
+{
+       return show_cpus(policy->cpus, buf);
+}
+
 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
-               const char *buf, size_t count)
+                                       const char *buf, size_t count)
 {
        unsigned int freq = 0;
        unsigned int ret;
 
-       if (!policy->governor->store_setspeed)
+       if (!policy->governor || !policy->governor->store_setspeed)
                return -EINVAL;
 
        ret = sscanf(buf, "%u", &freq);
@@ -621,42 +641,50 @@ static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
 
 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
 {
-       if (!policy->governor->show_setspeed)
+       if (!policy->governor || !policy->governor->show_setspeed)
                return sprintf(buf, "<unsupported>\n");
 
        return policy->governor->show_setspeed(policy, buf);
 }
 
-#define define_one_ro(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0444, show_##_name, NULL)
-
-#define define_one_ro0400(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0400, show_##_name, NULL)
-
-#define define_one_rw(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_ro0400(cpuinfo_cur_freq);
-define_one_ro(cpuinfo_min_freq);
-define_one_ro(cpuinfo_max_freq);
-define_one_ro(scaling_available_governors);
-define_one_ro(scaling_driver);
-define_one_ro(scaling_cur_freq);
-define_one_ro(affected_cpus);
-define_one_rw(scaling_min_freq);
-define_one_rw(scaling_max_freq);
-define_one_rw(scaling_governor);
-define_one_rw(scaling_setspeed);
-
-static struct attribute * default_attrs[] = {
+/**
+ * show_scaling_driver - show the current cpufreq HW/BIOS limitation
+ */
+static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
+{
+       unsigned int limit;
+       int ret;
+       if (cpufreq_driver->bios_limit) {
+               ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+               if (!ret)
+                       return sprintf(buf, "%u\n", limit);
+       }
+       return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
+}
+
+cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
+cpufreq_freq_attr_ro(cpuinfo_min_freq);
+cpufreq_freq_attr_ro(cpuinfo_max_freq);
+cpufreq_freq_attr_ro(cpuinfo_transition_latency);
+cpufreq_freq_attr_ro(scaling_available_governors);
+cpufreq_freq_attr_ro(scaling_driver);
+cpufreq_freq_attr_ro(scaling_cur_freq);
+cpufreq_freq_attr_ro(bios_limit);
+cpufreq_freq_attr_ro(related_cpus);
+cpufreq_freq_attr_ro(affected_cpus);
+cpufreq_freq_attr_rw(scaling_min_freq);
+cpufreq_freq_attr_rw(scaling_max_freq);
+cpufreq_freq_attr_rw(scaling_governor);
+cpufreq_freq_attr_rw(scaling_setspeed);
+
+static struct attribute *default_attrs[] = {
        &cpuinfo_min_freq.attr,
        &cpuinfo_max_freq.attr,
+       &cpuinfo_transition_latency.attr,
        &scaling_min_freq.attr,
        &scaling_max_freq.attr,
        &affected_cpus.attr,
+       &related_cpus.attr,
        &scaling_governor.attr,
        &scaling_driver.attr,
        &scaling_available_governors.attr,
@@ -664,20 +692,23 @@ static struct attribute * default_attrs[] = {
        NULL
 };
 
-#define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
-#define to_attr(a) container_of(a,struct freq_attr,attr)
+struct kobject *cpufreq_global_kobject;
+EXPORT_SYMBOL(cpufreq_global_kobject);
 
-static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
+#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
+#define to_attr(a) container_of(a, struct freq_attr, attr)
+
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
-       struct cpufreq_policy * policy = to_policy(kobj);
-       struct freq_attr * fattr = to_attr(attr);
-       ssize_t ret;
+       struct cpufreq_policy *policy = to_policy(kobj);
+       struct freq_attr *fattr = to_attr(attr);
+       ssize_t ret = -EINVAL;
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
-               return -EINVAL;
+               goto no_policy;
 
        if (lock_policy_rwsem_read(policy->cpu) < 0)
-               return -EINVAL;
+               goto fail;
 
        if (fattr->show)
                ret = fattr->show(policy, buf);
@@ -685,23 +716,24 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
                ret = -EIO;
 
        unlock_policy_rwsem_read(policy->cpu);
-
+fail:
        cpufreq_cpu_put(policy);
+no_policy:
        return ret;
 }
 
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
-                    const char * buf, size_t count)
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+                    const char *buf, size_t count)
 {
-       struct cpufreq_policy * policy = to_policy(kobj);
-       struct freq_attr * fattr = to_attr(attr);
-       ssize_t ret;
+       struct cpufreq_policy *policy = to_policy(kobj);
+       struct freq_attr *fattr = to_attr(attr);
+       ssize_t ret = -EINVAL;
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
-               return -EINVAL;
+               goto no_policy;
 
        if (lock_policy_rwsem_write(policy->cpu) < 0)
-               return -EINVAL;
+               goto fail;
 
        if (fattr->store)
                ret = fattr->store(policy, buf, count);
@@ -709,19 +741,20 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr,
                ret = -EIO;
 
        unlock_policy_rwsem_write(policy->cpu);
-
+fail:
        cpufreq_cpu_put(policy);
+no_policy:
        return ret;
 }
 
-static void cpufreq_sysfs_release(struct kobject * kobj)
+static void cpufreq_sysfs_release(struct kobject *kobj)
 {
-       struct cpufreq_policy * policy = to_policy(kobj);
+       struct cpufreq_policy *policy = to_policy(kobj);
        dprintk("last reference is dropped\n");
        complete(&policy->kobj_unregister);
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
        .show   = show,
        .store  = store,
 };
@@ -732,24 +765,210 @@ static struct kobj_type ktype_cpufreq = {
        .release        = cpufreq_sysfs_release,
 };
 
+/*
+ * Returns:
+ *   Negative: Failure
+ *   0:        Success
+ *   Positive: When we have a managed CPU and the sysfs got symlinked
+ */
+static int cpufreq_add_dev_policy(unsigned int cpu,
+                                 struct cpufreq_policy *policy,
+                                 struct sys_device *sys_dev)
+{
+       int ret = 0;
+#ifdef CONFIG_SMP
+       unsigned long flags;
+       unsigned int j;
+#ifdef CONFIG_HOTPLUG_CPU
+       struct cpufreq_governor *gov;
+
+       gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
+       if (gov) {
+               policy->governor = gov;
+               dprintk("Restoring governor %s for cpu %d\n",
+                      policy->governor->name, cpu);
+       }
+#endif
+
+       for_each_cpu(j, policy->cpus) {
+               struct cpufreq_policy *managed_policy;
+
+               if (cpu == j)
+                       continue;
+
+               /* Check for existing affected CPUs.
+                * They may not be aware of it due to CPU Hotplug.
+                * cpufreq_cpu_put is called when the device is removed
+                * in __cpufreq_remove_dev()
+                */
+               managed_policy = cpufreq_cpu_get(j);
+               if (unlikely(managed_policy)) {
+
+                       /* Set proper policy_cpu */
+                       unlock_policy_rwsem_write(cpu);
+                       per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
+
+                       if (lock_policy_rwsem_write(cpu) < 0) {
+                               /* Should not go through policy unlock path */
+                               if (cpufreq_driver->exit)
+                                       cpufreq_driver->exit(policy);
+                               cpufreq_cpu_put(managed_policy);
+                               return -EBUSY;
+                       }
+
+                       spin_lock_irqsave(&cpufreq_driver_lock, flags);
+                       cpumask_copy(managed_policy->cpus, policy->cpus);
+                       per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
+                       spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+                       dprintk("CPU already managed, adding link\n");
+                       ret = sysfs_create_link(&sys_dev->kobj,
+                                               &managed_policy->kobj,
+                                               "cpufreq");
+                       if (ret)
+                               cpufreq_cpu_put(managed_policy);
+                       /*
+                        * Success. We only needed to be added to the mask.
+                        * Call driver->exit() because only the cpu parent of
+                        * the kobj needed to call init().
+                        */
+                       if (cpufreq_driver->exit)
+                               cpufreq_driver->exit(policy);
+
+                       if (!ret)
+                               return 1;
+                       else
+                               return ret;
+               }
+       }
+#endif
+       return ret;
+}
+
+
+/* symlink affected CPUs */
+static int cpufreq_add_dev_symlink(unsigned int cpu,
+                                  struct cpufreq_policy *policy)
+{
+       unsigned int j;
+       int ret = 0;
+
+       for_each_cpu(j, policy->cpus) {
+               struct cpufreq_policy *managed_policy;
+               struct sys_device *cpu_sys_dev;
+
+               if (j == cpu)
+                       continue;
+               if (!cpu_online(j))
+                       continue;
+
+               dprintk("CPU %u already managed, adding link\n", j);
+               managed_policy = cpufreq_cpu_get(cpu);
+               cpu_sys_dev = get_cpu_sysdev(j);
+               ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
+                                       "cpufreq");
+               if (ret) {
+                       cpufreq_cpu_put(managed_policy);
+                       return ret;
+               }
+       }
+       return ret;
+}
+
+static int cpufreq_add_dev_interface(unsigned int cpu,
+                                    struct cpufreq_policy *policy,
+                                    struct sys_device *sys_dev)
+{
+       struct cpufreq_policy new_policy;
+       struct freq_attr **drv_attr;
+       unsigned long flags;
+       int ret = 0;
+       unsigned int j;
+
+       /* prepare interface data */
+       ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+                                  &sys_dev->kobj, "cpufreq");
+       if (ret)
+               return ret;
+
+       /* set up files for this cpu device */
+       drv_attr = cpufreq_driver->attr;
+       while ((drv_attr) && (*drv_attr)) {
+               ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+               if (ret)
+                       goto err_out_kobj_put;
+               drv_attr++;
+       }
+       if (cpufreq_driver->get) {
+               ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
+               if (ret)
+                       goto err_out_kobj_put;
+       }
+       if (cpufreq_driver->target) {
+               ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+               if (ret)
+                       goto err_out_kobj_put;
+       }
+       if (cpufreq_driver->bios_limit) {
+               ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
+               if (ret)
+                       goto err_out_kobj_put;
+       }
+
+       spin_lock_irqsave(&cpufreq_driver_lock, flags);
+       for_each_cpu(j, policy->cpus) {
+       if (!cpu_online(j))
+               continue;
+               per_cpu(cpufreq_cpu_data, j) = policy;
+               per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
+       }
+       spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+       ret = cpufreq_add_dev_symlink(cpu, policy);
+       if (ret)
+               goto err_out_kobj_put;
+
+       memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+       /* assure that the starting sequence is run in __cpufreq_set_policy */
+       policy->governor = NULL;
+
+       /* set default policy */
+       ret = __cpufreq_set_policy(policy, &new_policy);
+       policy->user_policy.policy = policy->policy;
+       policy->user_policy.governor = policy->governor;
+
+       if (ret) {
+               dprintk("setting policy failed\n");
+               if (cpufreq_driver->exit)
+                       cpufreq_driver->exit(policy);
+       }
+       return ret;
+
+err_out_kobj_put:
+       kobject_put(&policy->kobj);
+       wait_for_completion(&policy->kobj_unregister);
+       return ret;
+}
+
 
 /**
  * cpufreq_add_dev - add a CPU device
  *
  * Adds the cpufreq interface for a CPU device.
+ *
+ * The Oracle says: try running cpufreq registration/unregistration concurrently
+ * with with cpu hotplugging and all hell will break loose. Tried to clean this
+ * mess up, but more thorough testing is needed. - Mathieu
  */
-static int cpufreq_add_dev (struct sys_device * sys_dev)
+static int cpufreq_add_dev(struct sys_device *sys_dev)
 {
        unsigned int cpu = sys_dev->id;
-       int ret = 0;
-       struct cpufreq_policy new_policy;
+       int ret = 0, found = 0;
        struct cpufreq_policy *policy;
-       struct freq_attr **drv_attr;
-       struct sys_device *cpu_sys_dev;
        unsigned long flags;
        unsigned int j;
-#ifdef CONFIG_SMP
-       struct cpufreq_policy *managed_policy;
+#ifdef CONFIG_HOTPLUG_CPU
+       int sibling;
 #endif
 
        if (cpu_is_offline(cpu))
@@ -774,158 +993,71 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                goto module_out;
        }
 
+       ret = -ENOMEM;
        policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
-       if (!policy) {
-               ret = -ENOMEM;
+       if (!policy)
                goto nomem_out;
-       }
+
+       if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
+               goto err_free_policy;
+
+       if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
+               goto err_free_cpumask;
 
        policy->cpu = cpu;
-       policy->cpus = cpumask_of_cpu(cpu);
+       cpumask_copy(policy->cpus, cpumask_of(cpu));
 
        /* Initially set CPU itself as the policy_cpu */
-       per_cpu(policy_cpu, cpu) = cpu;
-       lock_policy_rwsem_write(cpu);
+       per_cpu(cpufreq_policy_cpu, cpu) = cpu;
+       ret = (lock_policy_rwsem_write(cpu) < 0);
+       WARN_ON(ret);
 
        init_completion(&policy->kobj_unregister);
        INIT_WORK(&policy->update, handle_update);
 
        /* Set governor before ->init, so that driver could check it */
-       policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+#ifdef CONFIG_HOTPLUG_CPU
+       for_each_online_cpu(sibling) {
+               struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
+               if (cp && cp->governor &&
+                   (cpumask_test_cpu(cpu, cp->related_cpus))) {
+                       policy->governor = cp->governor;
+                       found = 1;
+                       break;
+               }
+       }
+#endif
+       if (!found)
+               policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
        /* call driver. From then on the cpufreq must be able
         * to accept all calls to ->verify and ->setpolicy for this CPU
         */
        ret = cpufreq_driver->init(policy);
        if (ret) {
                dprintk("initialization failed\n");
-               unlock_policy_rwsem_write(cpu);
-               goto err_out;
-       }
-       policy->user_policy.min = policy->cpuinfo.min_freq;
-       policy->user_policy.max = policy->cpuinfo.max_freq;
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_HOTPLUG_CPU
-       if (cpufreq_cpu_governor[cpu]){
-               policy->governor = cpufreq_cpu_governor[cpu];
-               dprintk("Restoring governor %s for cpu %d\n",
-                      policy->governor->name, cpu);
+               goto err_unlock_policy;
        }
-#endif
-
-       for_each_cpu_mask(j, policy->cpus) {
-               if (cpu == j)
-                       continue;
-
-               /* check for existing affected CPUs.  They may not be aware
-                * of it due to CPU Hotplug.
-                */
-               managed_policy = cpufreq_cpu_get(j);
-               if (unlikely(managed_policy)) {
-
-                       /* Set proper policy_cpu */
-                       unlock_policy_rwsem_write(cpu);
-                       per_cpu(policy_cpu, cpu) = managed_policy->cpu;
-
-                       if (lock_policy_rwsem_write(cpu) < 0)
-                               goto err_out_driver_exit;
-
-                       spin_lock_irqsave(&cpufreq_driver_lock, flags);
-                       managed_policy->cpus = policy->cpus;
-                       cpufreq_cpu_data[cpu] = managed_policy;
-                       spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-                       dprintk("CPU already managed, adding link\n");
-                       ret = sysfs_create_link(&sys_dev->kobj,
-                                               &managed_policy->kobj,
-                                               "cpufreq");
-                       if (ret) {
-                               unlock_policy_rwsem_write(cpu);
-                               goto err_out_driver_exit;
-                       }
+       policy->user_policy.min = policy->min;
+       policy->user_policy.max = policy->max;
 
-                       cpufreq_debug_enable_ratelimit();
-                       ret = 0;
-                       unlock_policy_rwsem_write(cpu);
-                       goto err_out_driver_exit; /* call driver->exit() */
-               }
-       }
-#endif
-       memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+                                    CPUFREQ_START, policy);
 
-       /* prepare interface data */
-       ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
-                                  "cpufreq");
+       ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
        if (ret) {
-               unlock_policy_rwsem_write(cpu);
-               goto err_out_driver_exit;
-       }
-       /* set up files for this cpu device */
-       drv_attr = cpufreq_driver->attr;
-       while ((drv_attr) && (*drv_attr)) {
-               ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
-               if (ret) {
-                       unlock_policy_rwsem_write(cpu);
-                       goto err_out_driver_exit;
-               }
-               drv_attr++;
-       }
-       if (cpufreq_driver->get){
-               ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
-               if (ret) {
-                       unlock_policy_rwsem_write(cpu);
-                       goto err_out_driver_exit;
-               }
-       }
-       if (cpufreq_driver->target){
-               ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
-               if (ret) {
-                       unlock_policy_rwsem_write(cpu);
-                       goto err_out_driver_exit;
-               }
-       }
-
-       spin_lock_irqsave(&cpufreq_driver_lock, flags);
-       for_each_cpu_mask(j, policy->cpus) {
-               cpufreq_cpu_data[j] = policy;
-               per_cpu(policy_cpu, j) = policy->cpu;
-       }
-       spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-       /* symlink affected CPUs */
-       for_each_cpu_mask(j, policy->cpus) {
-               if (j == cpu)
-                       continue;
-               if (!cpu_online(j))
-                       continue;
-
-               dprintk("CPU %u already managed, adding link\n", j);
-               cpufreq_cpu_get(cpu);
-               cpu_sys_dev = get_cpu_sysdev(j);
-               ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
-                                       "cpufreq");
-               if (ret) {
-                       unlock_policy_rwsem_write(cpu);
-                       goto err_out_unregister;
-               }
+               if (ret > 0)
+                       /* This is a managed cpu, symlink created,
+                          exit with 0 */
+                       ret = 0;
+               goto err_unlock_policy;
        }
 
-       policy->governor = NULL; /* to assure that the starting sequence is
-                                 * run in cpufreq_set_policy */
-
-       /* set default policy */
-       ret = __cpufreq_set_policy(policy, &new_policy);
-       policy->user_policy.policy = policy->policy;
-       policy->user_policy.governor = policy->governor;
+       ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
+       if (ret)
+               goto err_out_unregister;
 
        unlock_policy_rwsem_write(cpu);
 
-       if (ret) {
-               dprintk("setting policy failed\n");
-               goto err_out_unregister;
-       }
-
        kobject_uevent(&policy->kobj, KOBJ_ADD);
        module_put(cpufreq_driver->owner);
        dprintk("initialization complete\n");
@@ -936,20 +1068,19 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 
 err_out_unregister:
        spin_lock_irqsave(&cpufreq_driver_lock, flags);
-       for_each_cpu_mask(j, policy->cpus)
-               cpufreq_cpu_data[j] = NULL;
+       for_each_cpu(j, policy->cpus)
+               per_cpu(cpufreq_cpu_data, j) = NULL;
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
        kobject_put(&policy->kobj);
        wait_for_completion(&policy->kobj_unregister);
 
-err_out_driver_exit:
-       if (cpufreq_driver->exit)
-               cpufreq_driver->exit(policy);
-
-err_out:
+err_unlock_policy:
+       unlock_policy_rwsem_write(cpu);
+err_free_cpumask:
+       free_cpumask_var(policy->cpus);
+err_free_policy:
        kfree(policy);
-
 nomem_out:
        module_put(cpufreq_driver->owner);
 module_out:
@@ -965,11 +1096,13 @@ module_out:
  * Caller should already have policy_rwsem in write mode for this CPU.
  * This routine frees the rwsem before returning.
  */
-static int __cpufreq_remove_dev (struct sys_device * sys_dev)
+static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 {
        unsigned int cpu = sys_dev->id;
        unsigned long flags;
        struct cpufreq_policy *data;
+       struct kobject *kobj;
+       struct completion *cmp;
 #ifdef CONFIG_SMP
        struct sys_device *cpu_sys_dev;
        unsigned int j;
@@ -979,7 +1112,7 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev)
        dprintk("unregistering CPU %u\n", cpu);
 
        spin_lock_irqsave(&cpufreq_driver_lock, flags);
-       data = cpufreq_cpu_data[cpu];
+       data = per_cpu(cpufreq_cpu_data, cpu);
 
        if (!data) {
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -987,7 +1120,7 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev)
                unlock_policy_rwsem_write(cpu);
                return -EINVAL;
        }
-       cpufreq_cpu_data[cpu] = NULL;
+       per_cpu(cpufreq_cpu_data, cpu) = NULL;
 
 
 #ifdef CONFIG_SMP
@@ -996,12 +1129,13 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev)
         */
        if (unlikely(cpu != data->cpu)) {
                dprintk("removing link\n");
-               cpu_clear(cpu, data->cpus);
+               cpumask_clear_cpu(cpu, data->cpus);
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-               sysfs_remove_link(&sys_dev->kobj, "cpufreq");
+               kobj = &sys_dev->kobj;
                cpufreq_cpu_put(data);
                cpufreq_debug_enable_ratelimit();
                unlock_policy_rwsem_write(cpu);
+               sysfs_remove_link(kobj, "cpufreq");
                return 0;
        }
 #endif
@@ -1009,34 +1143,39 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev)
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_HOTPLUG_CPU
-       cpufreq_cpu_governor[cpu] = data->governor;
+       strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
+                       CPUFREQ_NAME_LEN);
 #endif
 
        /* if we have other CPUs still registered, we need to unlink them,
         * or else wait_for_completion below will lock up. Clean the
-        * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
-        * links afterwards.
+        * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
+        * the sysfs links afterwards.
         */
-       if (unlikely(cpus_weight(data->cpus) > 1)) {
-               for_each_cpu_mask(j, data->cpus) {
+       if (unlikely(cpumask_weight(data->cpus) > 1)) {
+               for_each_cpu(j, data->cpus) {
                        if (j == cpu)
                                continue;
-                       cpufreq_cpu_data[j] = NULL;
+                       per_cpu(cpufreq_cpu_data, j) = NULL;
                }
        }
 
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-       if (unlikely(cpus_weight(data->cpus) > 1)) {
-               for_each_cpu_mask(j, data->cpus) {
+       if (unlikely(cpumask_weight(data->cpus) > 1)) {
+               for_each_cpu(j, data->cpus) {
                        if (j == cpu)
                                continue;
                        dprintk("removing link for cpu %u\n", j);
 #ifdef CONFIG_HOTPLUG_CPU
-                       cpufreq_cpu_governor[j] = data->governor;
+                       strncpy(per_cpu(cpufreq_cpu_governor, j),
+                               data->governor->name, CPUFREQ_NAME_LEN);
 #endif
                        cpu_sys_dev = get_cpu_sysdev(j);
-                       sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
+                       kobj = &cpu_sys_dev->kobj;
+                       unlock_policy_rwsem_write(cpu);
+                       sysfs_remove_link(kobj, "cpufreq");
+                       lock_policy_rwsem_write(cpu);
                        cpufreq_cpu_put(data);
                }
        }
@@ -1047,29 +1186,35 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev)
        if (cpufreq_driver->target)
                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
 
+       kobj = &data->kobj;
+       cmp = &data->kobj_unregister;
        unlock_policy_rwsem_write(cpu);
-
-       kobject_put(&data->kobj);
+       kobject_put(kobj);
 
        /* we need to make sure that the underlying kobj is actually
         * not referenced anymore by anybody before we proceed with
         * unloading.
         */
        dprintk("waiting for dropping of refcount\n");
-       wait_for_completion(&data->kobj_unregister);
+       wait_for_completion(cmp);
        dprintk("wait complete\n");
 
+       lock_policy_rwsem_write(cpu);
        if (cpufreq_driver->exit)
                cpufreq_driver->exit(data);
+       unlock_policy_rwsem_write(cpu);
 
+       free_cpumask_var(data->related_cpus);
+       free_cpumask_var(data->cpus);
        kfree(data);
+       per_cpu(cpufreq_cpu_data, cpu) = NULL;
 
        cpufreq_debug_enable_ratelimit();
        return 0;
 }
 
 
-static int cpufreq_remove_dev (struct sys_device * sys_dev)
+static int cpufreq_remove_dev(struct sys_device *sys_dev)
 {
        unsigned int cpu = sys_dev->id;
        int retval;
@@ -1100,8 +1245,8 @@ static void handle_update(struct work_struct *work)
  *     @old_freq: CPU frequency the kernel thinks the CPU runs at
  *     @new_freq: CPU frequency the CPU actually runs at
  *
- *     We adjust to current frequency first, and need to clean up later. So either call
- *     to cpufreq_update_policy() or schedule handle_update()).
+ *     We adjust to current frequency first, and need to clean up later.
+ *     So either call to cpufreq_update_policy() or schedule handle_update()).
  */
 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
                                unsigned int new_freq)
@@ -1136,18 +1281,18 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
                cpufreq_cpu_put(policy);
        }
 
-       return (ret_freq);
+       return ret_freq;
 }
 EXPORT_SYMBOL(cpufreq_quick_get);
 
 
 static unsigned int __cpufreq_get(unsigned int cpu)
 {
-       struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
+       struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
        unsigned int ret_freq = 0;
 
        if (!cpufreq_driver->get)
-               return (ret_freq);
+               return ret_freq;
 
        ret_freq = cpufreq_driver->get(cpu);
 
@@ -1161,7 +1306,7 @@ static unsigned int __cpufreq_get(unsigned int cpu)
                }
        }
 
-       return (ret_freq);
+       return ret_freq;
 }
 
 /**
@@ -1188,7 +1333,7 @@ unsigned int cpufreq_get(unsigned int cpu)
 out_policy:
        cpufreq_cpu_put(policy);
 out:
-       return (ret_freq);
+       return ret_freq;
 }
 EXPORT_SYMBOL(cpufreq_get);
 
@@ -1197,11 +1342,11 @@ EXPORT_SYMBOL(cpufreq_get);
  *     cpufreq_suspend - let the low level driver prepare for suspend
  */
 
-static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
+static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
 {
-       int cpu = sysdev->id;
        int ret = 0;
-       unsigned int cur_freq = 0;
+
+       int cpu = sysdev->id;
        struct cpufreq_policy *cpu_policy;
 
        dprintk("suspending cpu %u\n", cpu);
@@ -1219,70 +1364,36 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
                return -EINVAL;
 
        /* only handle each CPU group once */
-       if (unlikely(cpu_policy->cpu != cpu)) {
-               cpufreq_cpu_put(cpu_policy);
-               return 0;
-       }
+       if (unlikely(cpu_policy->cpu != cpu))
+               goto out;
 
        if (cpufreq_driver->suspend) {
                ret = cpufreq_driver->suspend(cpu_policy, pmsg);
-               if (ret) {
+               if (ret)
                        printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
                                        "step on CPU %u\n", cpu_policy->cpu);
-                       cpufreq_cpu_put(cpu_policy);
-                       return ret;
-               }
-       }
-
-
-       if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
-               goto out;
-
-       if (cpufreq_driver->get)
-               cur_freq = cpufreq_driver->get(cpu_policy->cpu);
-
-       if (!cur_freq || !cpu_policy->cur) {
-               printk(KERN_ERR "cpufreq: suspend failed to assert current "
-                      "frequency is what timing core thinks it is.\n");
-               goto out;
-       }
-
-       if (unlikely(cur_freq != cpu_policy->cur)) {
-               struct cpufreq_freqs freqs;
-
-               if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-                       dprintk("Warning: CPU frequency is %u, "
-                              "cpufreq assumed %u kHz.\n",
-                              cur_freq, cpu_policy->cur);
-
-               freqs.cpu = cpu;
-               freqs.old = cpu_policy->cur;
-               freqs.new = cur_freq;
-
-               srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
-                                   CPUFREQ_SUSPENDCHANGE, &freqs);
-               adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
-
-               cpu_policy->cur = cur_freq;
        }
 
 out:
        cpufreq_cpu_put(cpu_policy);
-       return 0;
+       return ret;
 }
 
 /**
  *     cpufreq_resume -  restore proper CPU frequency handling after resume
  *
  *     1.) resume CPUfreq hardware support (cpufreq_driver->resume())
- *     2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
- *     3.) schedule call cpufreq_update_policy() ASAP as interrupts are
- *         restored.
+ *     2.) schedule call cpufreq_update_policy() ASAP as interrupts are
+ *         restored. It will verify that the current freq is in sync with
+ *         what we believe it to be. This is a bit later than when it
+ *         should be, but nonethteless it's better than calling
+ *         cpufreq_driver->get() here which might re-enable interrupts...
  */
-static int cpufreq_resume(struct sys_device * sysdev)
+static int cpufreq_resume(struct sys_device *sysdev)
 {
-       int cpu = sysdev->id;
        int ret = 0;
+
+       int cpu = sysdev->id;
        struct cpufreq_policy *cpu_policy;
 
        dprintk("resuming cpu %u\n", cpu);
@@ -1300,57 +1411,21 @@ static int cpufreq_resume(struct sys_device * sysdev)
                return -EINVAL;
 
        /* only handle each CPU group once */
-       if (unlikely(cpu_policy->cpu != cpu)) {
-               cpufreq_cpu_put(cpu_policy);
-               return 0;
-       }
+       if (unlikely(cpu_policy->cpu != cpu))
+               goto fail;
 
        if (cpufreq_driver->resume) {
                ret = cpufreq_driver->resume(cpu_policy);
                if (ret) {
                        printk(KERN_ERR "cpufreq: resume failed in ->resume "
                                        "step on CPU %u\n", cpu_policy->cpu);
-                       cpufreq_cpu_put(cpu_policy);
-                       return ret;
+                       goto fail;
                }
        }
 
-       if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
-               unsigned int cur_freq = 0;
-
-               if (cpufreq_driver->get)
-                       cur_freq = cpufreq_driver->get(cpu_policy->cpu);
-
-               if (!cur_freq || !cpu_policy->cur) {
-                       printk(KERN_ERR "cpufreq: resume failed to assert "
-                                       "current frequency is what timing core "
-                                       "thinks it is.\n");
-                       goto out;
-               }
-
-               if (unlikely(cur_freq != cpu_policy->cur)) {
-                       struct cpufreq_freqs freqs;
-
-                       if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-                               dprintk("Warning: CPU frequency "
-                                      "is %u, cpufreq assumed %u kHz.\n",
-                                      cur_freq, cpu_policy->cur);
-
-                       freqs.cpu = cpu;
-                       freqs.old = cpu_policy->cur;
-                       freqs.new = cur_freq;
-
-                       srcu_notifier_call_chain(
-                                       &cpufreq_transition_notifier_list,
-                                       CPUFREQ_RESUMECHANGE, &freqs);
-                       adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
-
-                       cpu_policy->cur = cur_freq;
-               }
-       }
-
-out:
        schedule_work(&cpu_policy->update);
+
+fail:
        cpufreq_cpu_put(cpu_policy);
        return ret;
 }
@@ -1384,6 +1459,8 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
 {
        int ret;
 
+       WARN_ON(!init_cpufreq_transition_notifier_list_called);
+
        switch (list) {
        case CPUFREQ_TRANSITION_NOTIFIER:
                ret = srcu_notifier_chain_register(
@@ -1458,25 +1535,27 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
                          unsigned int target_freq,
                          unsigned int relation)
 {
-       int ret;
+       int ret = -EINVAL;
 
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
-               return -EINVAL;
+               goto no_policy;
 
        if (unlikely(lock_policy_rwsem_write(policy->cpu)))
-               return -EINVAL;
+               goto fail;
 
        ret = __cpufreq_driver_target(policy, target_freq, relation);
 
        unlock_policy_rwsem_write(policy->cpu);
 
+fail:
        cpufreq_cpu_put(policy);
+no_policy:
        return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
 {
        int ret = 0;
 
@@ -1484,8 +1563,8 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
        if (!policy)
                return -EINVAL;
 
-       if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
-               ret = cpufreq_driver->getavg(policy->cpu);
+       if (cpu_online(cpu) && cpufreq_driver->getavg)
+               ret = cpufreq_driver->getavg(policycpu);
 
        cpufreq_cpu_put(policy);
        return ret;
@@ -1567,9 +1646,22 @@ EXPORT_SYMBOL_GPL(cpufreq_register_governor);
 
 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
 {
+#ifdef CONFIG_HOTPLUG_CPU
+       int cpu;
+#endif
+
        if (!governor)
                return;
 
+#ifdef CONFIG_HOTPLUG_CPU
+       for_each_present_cpu(cpu) {
+               if (cpu_online(cpu))
+                       continue;
+               if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
+                       strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
+       }
+#endif
+
        mutex_lock(&cpufreq_governor_mutex);
        list_del(&governor->governor_list);
        mutex_unlock(&cpufreq_governor_mutex);
@@ -1585,7 +1677,8 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
 
 /**
  * cpufreq_get_policy - get the current cpufreq_policy
- * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
+ * @policy: struct cpufreq_policy into which the current cpufreq_policy
+ *     is written
  *
  * Reads the current cpufreq policy.
  */
@@ -1669,8 +1762,17 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
                        dprintk("governor switch\n");
 
                        /* end old governor */
-                       if (data->governor)
+                       if (data->governor) {
+                               /*
+                                * Need to release the rwsem around governor
+                                * stop due to lock dependency between
+                                * cancel_delayed_work_sync and the read lock
+                                * taken in the delayed work handler.
+                                */
+                               unlock_policy_rwsem_write(data->cpu);
                                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+                               lock_policy_rwsem_write(data->cpu);
+                       }
 
                        /* start new governor */
                        data->governor = policy->governor;
@@ -1708,13 +1810,17 @@ int cpufreq_update_policy(unsigned int cpu)
 {
        struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
        struct cpufreq_policy policy;
-       int ret = 0;
+       int ret;
 
-       if (!data)
-               return -ENODEV;
+       if (!data) {
+               ret = -ENODEV;
+               goto no_policy;
+       }
 
-       if (unlikely(lock_policy_rwsem_write(cpu)))
-               return -EINVAL;
+       if (unlikely(lock_policy_rwsem_write(cpu))) {
+               ret = -EINVAL;
+               goto fail;
+       }
 
        dprintk("updating policy for CPU %u\n", cpu);
        memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1741,7 +1847,9 @@ int cpufreq_update_policy(unsigned int cpu)
 
        unlock_policy_rwsem_write(cpu);
 
+fail:
        cpufreq_cpu_put(data);
+no_policy:
        return ret;
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
@@ -1775,7 +1883,7 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
        return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
+static struct notifier_block __refdata cpufreq_cpu_notifier =
 {
     .notifier_call = cpufreq_cpu_callback,
 };
@@ -1816,16 +1924,19 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
        cpufreq_driver = driver_data;
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-       ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
+       ret = sysdev_driver_register(&cpu_sysdev_class,
+                                       &cpufreq_sysdev_driver);
 
        if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
                int i;
                ret = -ENODEV;
 
                /* check for at least one working CPU */
-               for (i=0; i<NR_CPUS; i++)
-                       if (cpufreq_cpu_data[i])
+               for (i = 0; i < nr_cpu_ids; i++)
+                       if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
                                ret = 0;
+                               break;
+                       }
 
                /* if all ->init() calls failed, unregister */
                if (ret) {
@@ -1846,7 +1957,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
                cpufreq_debug_enable_ratelimit();
        }
 
-       return (ret);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
 
@@ -1888,10 +1999,14 @@ static int __init cpufreq_core_init(void)
        int cpu;
 
        for_each_possible_cpu(cpu) {
-               per_cpu(policy_cpu, cpu) = -1;
+               per_cpu(cpufreq_policy_cpu, cpu) = -1;
                init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
        }
+
+       cpufreq_global_kobject = kobject_create_and_add("cpufreq",
+                                               &cpu_sysdev_class.kset.kobj);
+       BUG_ON(!cpufreq_global_kobject);
+
        return 0;
 }
-
 core_initcall(cpufreq_core_init);