kobjects: fix up improper use of the kobject name field
[safe/jmp/linux-2.6] / drivers / cpufreq / cpufreq.c
index d35a9f0..2ce3de5 100644 (file)
@@ -29,7 +29,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
+                                               "cpufreq-core", msg)
 
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  */
 static struct cpufreq_driver *cpufreq_driver;
 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
+#ifdef CONFIG_HOTPLUG_CPU
+/* This one keeps track of the previously set governor of a removed CPU */
+static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
+#endif
 static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
+/*
+ * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
+ * all cpufreq/hotplug/workqueue/etc related lock issues.
+ *
+ * The rules for this semaphore:
+ * - Any routine that wants to read from the policy structure will
+ *   do a down_read on this semaphore.
+ * - Any routine that will write to the policy structure and/or may take away
+ *   the policy altogether (eg. CPU hotplug), will hold this lock in write
+ *   mode before doing so.
+ *
+ * Additional rules:
+ * - All holders of the lock should check to make sure that the CPU they
+ *   are concerned with are online after they get the lock.
+ * - Governor routines that can be called in cpufreq hotplug path should not
+ *   take this sem as top level hotplug notifier handler takes this.
+ */
+static DEFINE_PER_CPU(int, policy_cpu);
+static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
+
+#define lock_policy_rwsem(mode, cpu)                                   \
+int lock_policy_rwsem_##mode                                           \
+(int cpu)                                                              \
+{                                                                      \
+       int policy_cpu = per_cpu(policy_cpu, cpu);                      \
+       BUG_ON(policy_cpu == -1);                                       \
+       down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
+       if (unlikely(!cpu_online(cpu))) {                               \
+               up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
+               return -1;                                              \
+       }                                                               \
+                                                                       \
+       return 0;                                                       \
+}
+
+lock_policy_rwsem(read, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
+
+lock_policy_rwsem(write, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
+
+void unlock_policy_rwsem_read(int cpu)
+{
+       int policy_cpu = per_cpu(policy_cpu, cpu);
+       BUG_ON(policy_cpu == -1);
+       up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
+
+void unlock_policy_rwsem_write(int cpu)
+{
+       int policy_cpu = per_cpu(policy_cpu, cpu);
+       BUG_ON(policy_cpu == -1);
+       up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
+
+
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
-static void handle_update(void *data);
+static unsigned int __cpufreq_get(unsigned int cpu);
+static void handle_update(struct work_struct *work);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -52,8 +116,14 @@ static void handle_update(void *data);
  * The mutex locks both lists.
  */
 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
-static BLOCKING_NOTIFIER_HEAD(cpufreq_transition_notifier_list);
+static struct srcu_notifier_head cpufreq_transition_notifier_list;
 
+static int __init init_cpufreq_transition_notifier_list(void)
+{
+       srcu_init_notifier_head(&cpufreq_transition_notifier_list);
+       return 0;
+}
+pure_initcall(init_cpufreq_transition_notifier_list);
 
 static LIST_HEAD(cpufreq_governor_list);
 static DEFINE_MUTEX (cpufreq_governor_mutex);
@@ -145,7 +215,8 @@ static void cpufreq_debug_disable_ratelimit(void)
        spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
 }
 
-void cpufreq_debug_printk(unsigned int type, const char *prefix, const char *fmt, ...)
+void cpufreq_debug_printk(unsigned int type, const char *prefix,
+                                                       const char *fmt, ...)
 {
        char s[256];
        va_list args;
@@ -155,7 +226,8 @@ void cpufreq_debug_printk(unsigned int type, const char *prefix, const char *fmt
        WARN_ON(!prefix);
        if (type & debug) {
                spin_lock_irqsave(&disable_ratelimit_lock, flags);
-               if (!disable_ratelimit && debug_ratelimit && !printk_ratelimit()) {
+               if (!disable_ratelimit && debug_ratelimit
+                                       && !printk_ratelimit()) {
                        spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
                        return;
                }
@@ -176,10 +248,12 @@ EXPORT_SYMBOL(cpufreq_debug_printk);
 
 
 module_param(debug, uint, 0644);
-MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core, 2 to debug drivers, and 4 to debug governors.");
+MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
+                       " 2 to debug drivers, and 4 to debug governors.");
 
 module_param(debug_ratelimit, uint, 0644);
-MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging: set to 0 to disable ratelimiting.");
+MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
+                                       " set to 0 to disable ratelimiting.");
 
 #else /* !CONFIG_CPU_FREQ_DEBUG */
 
@@ -213,17 +287,23 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
        if (!l_p_j_ref_freq) {
                l_p_j_ref = loops_per_jiffy;
                l_p_j_ref_freq = ci->old;
-               dprintk("saving %lu as reference value for loops_per_jiffy; freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
+               dprintk("saving %lu as reference value for loops_per_jiffy;"
+                       "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
        }
        if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
            (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
            (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
-               loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new);
-               dprintk("scaling loops_per_jiffy to %lu for frequency %u kHz\n", loops_per_jiffy, ci->new);
+               loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
+                                                               ci->new);
+               dprintk("scaling loops_per_jiffy to %lu"
+                       "for frequency %u kHz\n", loops_per_jiffy, ci->new);
        }
 }
 #else
-static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) { return; }
+static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
+{
+       return;
+}
 #endif
 
 
@@ -262,14 +342,14 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
                                freqs->old = policy->cur;
                        }
                }
-               blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
+               srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_PRECHANGE, freqs);
                adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
                break;
 
        case CPUFREQ_POSTCHANGE:
                adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-               blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
+               srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
                        policy->cur = freqs->new;
@@ -310,7 +390,8 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
                if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
                        *policy = CPUFREQ_POLICY_PERFORMANCE;
                        err = 0;
-               } else if (!strnicmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) {
+               } else if (!strnicmp(str_governor, "powersave",
+                                               CPUFREQ_NAME_LEN)) {
                        *policy = CPUFREQ_POLICY_POWERSAVE;
                        err = 0;
                }
@@ -322,7 +403,8 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
                t = __find_governor(str_governor);
 
                if (t == NULL) {
-                       char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", str_governor);
+                       char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
+                                                               str_governor);
 
                        if (name) {
                                int ret;
@@ -355,7 +437,8 @@ extern struct sysdev_class cpu_sysdev_class;
 
 
 /**
- * cpufreq_per_cpu_attr_read() / show_##file_name() - print out cpufreq information
+ * cpufreq_per_cpu_attr_read() / show_##file_name() -
+ * print out cpufreq information
  *
  * Write out information from cpufreq_driver->policy[cpu]; object must be
  * "unsigned int".
@@ -374,7 +457,8 @@ show_one(scaling_min_freq, min);
 show_one(scaling_max_freq, max);
 show_one(scaling_cur_freq, cur);
 
-static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy);
+static int __cpufreq_set_policy(struct cpufreq_policy *data,
+                               struct cpufreq_policy *policy);
 
 /**
  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
@@ -394,12 +478,8 @@ static ssize_t store_##file_name                                   \
        if (ret != 1)                                                   \
                return -EINVAL;                                         \
                                                                        \
-       lock_cpu_hotplug();                                             \
-       mutex_lock(&policy->lock);                                      \
        ret = __cpufreq_set_policy(policy, &new_policy);                \
        policy->user_policy.object = policy->object;                    \
-       mutex_unlock(&policy->lock);                                    \
-       unlock_cpu_hotplug();                                           \
                                                                        \
        return ret ? ret : count;                                       \
 }
@@ -410,9 +490,10 @@ store_one(scaling_max_freq,max);
 /**
  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
  */
-static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, char *buf)
+static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
+                                                       char *buf)
 {
-       unsigned int cur_freq = cpufreq_get(policy->cpu);
+       unsigned int cur_freq = __cpufreq_get(policy->cpu);
        if (!cur_freq)
                return sprintf(buf, "<unknown>");
        return sprintf(buf, "%u\n", cur_freq);
@@ -422,7 +503,8 @@ static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, char *buf)
 /**
  * show_scaling_governor - show the current policy for the specified CPU
  */
-static ssize_t show_scaling_governor (struct cpufreq_policy * policy, char *buf)
+static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
+                                                       char *buf)
 {
        if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
                return sprintf(buf, "powersave\n");
@@ -452,23 +534,21 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
        if (ret != 1)
                return -EINVAL;
 
-       if (cpufreq_parse_governor(str_governor, &new_policy.policy, &new_policy.governor))
+       if (cpufreq_parse_governor(str_governor, &new_policy.policy,
+                                               &new_policy.governor))
                return -EINVAL;
 
-       lock_cpu_hotplug();
-
        /* Do not use cpufreq_set_policy here or the user_policy.max
           will be wrongly overridden */
-       mutex_lock(&policy->lock);
        ret = __cpufreq_set_policy(policy, &new_policy);
 
        policy->user_policy.policy = policy->policy;
        policy->user_policy.governor = policy->governor;
-       mutex_unlock(&policy->lock);
 
-       unlock_cpu_hotplug();
-
-       return ret ? ret : count;
+       if (ret)
+               return ret;
+       else
+               return count;
 }
 
 /**
@@ -482,7 +562,7 @@ static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
 /**
  * show_scaling_available_governors - show the available CPUfreq governors
  */
-static ssize_t show_scaling_available_governors (struct cpufreq_policy * policy,
+static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
                                char *buf)
 {
        ssize_t i = 0;
@@ -568,7 +648,17 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
                return -EINVAL;
-       ret = fattr->show ? fattr->show(policy,buf) : -EIO;
+
+       if (lock_policy_rwsem_read(policy->cpu) < 0)
+               return -EINVAL;
+
+       if (fattr->show)
+               ret = fattr->show(policy, buf);
+       else
+               ret = -EIO;
+
+       unlock_policy_rwsem_read(policy->cpu);
+
        cpufreq_cpu_put(policy);
        return ret;
 }
@@ -582,7 +672,17 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr,
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
                return -EINVAL;
-       ret = fattr->store ? fattr->store(policy,buf,count) : -EIO;
+
+       if (lock_policy_rwsem_write(policy->cpu) < 0)
+               return -EINVAL;
+
+       if (fattr->store)
+               ret = fattr->store(policy, buf, count);
+       else
+               ret = -EIO;
+
+       unlock_policy_rwsem_write(policy->cpu);
+
        cpufreq_cpu_put(policy);
        return ret;
 }
@@ -656,10 +756,12 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
        policy->cpu = cpu;
        policy->cpus = cpumask_of_cpu(cpu);
 
-       mutex_init(&policy->lock);
-       mutex_lock(&policy->lock);
+       /* Initially set CPU itself as the policy_cpu */
+       per_cpu(policy_cpu, cpu) = cpu;
+       lock_policy_rwsem_write(cpu);
+
        init_completion(&policy->kobj_unregister);
-       INIT_WORK(&policy->update, handle_update, (void *)(long)cpu);
+       INIT_WORK(&policy->update, handle_update);
 
        /* call driver. From then on the cpufreq must be able
         * to accept all calls to ->verify and ->setpolicy for this CPU
@@ -667,11 +769,22 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
        ret = cpufreq_driver->init(policy);
        if (ret) {
                dprintk("initialization failed\n");
-               mutex_unlock(&policy->lock);
+               unlock_policy_rwsem_write(cpu);
                goto err_out;
        }
+       policy->user_policy.min = policy->cpuinfo.min_freq;
+       policy->user_policy.max = policy->cpuinfo.max_freq;
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_HOTPLUG_CPU
+       if (cpufreq_cpu_governor[cpu]){
+               policy->governor = cpufreq_cpu_governor[cpu];
+               dprintk("Restoring governor %s for cpu %d\n",
+                      policy->governor->name, cpu);
+       }
+#endif
+
        for_each_cpu_mask(j, policy->cpus) {
                if (cpu == j)
                        continue;
@@ -681,18 +794,31 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                 */
                managed_policy = cpufreq_cpu_get(j);
                if (unlikely(managed_policy)) {
+
+                       /* Set proper policy_cpu */
+                       unlock_policy_rwsem_write(cpu);
+                       per_cpu(policy_cpu, cpu) = managed_policy->cpu;
+
+                       if (lock_policy_rwsem_write(cpu) < 0)
+                               goto err_out_driver_exit;
+
                        spin_lock_irqsave(&cpufreq_driver_lock, flags);
                        managed_policy->cpus = policy->cpus;
                        cpufreq_cpu_data[cpu] = managed_policy;
                        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
                        dprintk("CPU already managed, adding link\n");
-                       sysfs_create_link(&sys_dev->kobj,
-                                         &managed_policy->kobj, "cpufreq");
+                       ret = sysfs_create_link(&sys_dev->kobj,
+                                               &managed_policy->kobj,
+                                               "cpufreq");
+                       if (ret) {
+                               unlock_policy_rwsem_write(cpu);
+                               goto err_out_driver_exit;
+                       }
 
                        cpufreq_debug_enable_ratelimit();
-                       mutex_unlock(&policy->lock);
                        ret = 0;
+                       unlock_policy_rwsem_write(cpu);
                        goto err_out_driver_exit; /* call driver->exit() */
                }
        }
@@ -702,27 +828,37 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
        /* prepare interface data */
        policy->kobj.parent = &sys_dev->kobj;
        policy->kobj.ktype = &ktype_cpufreq;
-       strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN);
+       kobject_set_name(&policy->kobj, "cpufreq");
 
        ret = kobject_register(&policy->kobj);
        if (ret) {
-               mutex_unlock(&policy->lock);
+               unlock_policy_rwsem_write(cpu);
                goto err_out_driver_exit;
        }
        /* set up files for this cpu device */
        drv_attr = cpufreq_driver->attr;
        while ((drv_attr) && (*drv_attr)) {
-               sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+               ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+               if (ret)
+                       goto err_out_driver_exit;
                drv_attr++;
        }
-       if (cpufreq_driver->get)
-               sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
-       if (cpufreq_driver->target)
-               sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+       if (cpufreq_driver->get){
+               ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
+               if (ret)
+                       goto err_out_driver_exit;
+       }
+       if (cpufreq_driver->target){
+               ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+               if (ret)
+                       goto err_out_driver_exit;
+       }
 
        spin_lock_irqsave(&cpufreq_driver_lock, flags);
-       for_each_cpu_mask(j, policy->cpus)
+       for_each_cpu_mask(j, policy->cpus) {
                cpufreq_cpu_data[j] = policy;
+               per_cpu(policy_cpu, j) = policy->cpu;
+       }
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
        /* symlink affected CPUs */
@@ -735,16 +871,24 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                dprintk("CPU %u already managed, adding link\n", j);
                cpufreq_cpu_get(cpu);
                cpu_sys_dev = get_cpu_sysdev(j);
-               sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
-                                 "cpufreq");
+               ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
+                                       "cpufreq");
+               if (ret) {
+                       unlock_policy_rwsem_write(cpu);
+                       goto err_out_unregister;
+               }
        }
 
        policy->governor = NULL; /* to assure that the starting sequence is
                                  * run in cpufreq_set_policy */
-       mutex_unlock(&policy->lock);
 
        /* set default policy */
-       ret = cpufreq_set_policy(&new_policy);
+       ret = __cpufreq_set_policy(policy, &new_policy);
+       policy->user_policy.policy = policy->policy;
+       policy->user_policy.governor = policy->governor;
+
+       unlock_policy_rwsem_write(cpu);
+
        if (ret) {
                dprintk("setting policy failed\n");
                goto err_out_unregister;
@@ -782,11 +926,13 @@ module_out:
 
 
 /**
- * cpufreq_remove_dev - remove a CPU device
+ * __cpufreq_remove_dev - remove a CPU device
  *
  * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
  */
-static int cpufreq_remove_dev (struct sys_device * sys_dev)
+static int __cpufreq_remove_dev (struct sys_device * sys_dev)
 {
        unsigned int cpu = sys_dev->id;
        unsigned long flags;
@@ -805,6 +951,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        if (!data) {
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return -EINVAL;
        }
        cpufreq_cpu_data[cpu] = NULL;
@@ -821,6 +968,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
                sysfs_remove_link(&sys_dev->kobj, "cpufreq");
                cpufreq_cpu_put(data);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return 0;
        }
 #endif
@@ -829,10 +977,16 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        if (!kobject_get(&data->kobj)) {
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return -EFAULT;
        }
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_HOTPLUG_CPU
+       cpufreq_cpu_governor[cpu] = data->governor;
+#endif
+
        /* if we have other CPUs still registered, we need to unlink them,
         * or else wait_for_completion below will lock up. Clean the
         * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
@@ -853,6 +1007,9 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
                        if (j == cpu)
                                continue;
                        dprintk("removing link for cpu %u\n", j);
+#ifdef CONFIG_HOTPLUG_CPU
+                       cpufreq_cpu_governor[j] = data->governor;
+#endif
                        cpu_sys_dev = get_cpu_sysdev(j);
                        sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
                        cpufreq_cpu_put(data);
@@ -862,10 +1019,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 #endif
 
-       mutex_lock(&data->lock);
        if (cpufreq_driver->target)
                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
-       mutex_unlock(&data->lock);
+
+       unlock_policy_rwsem_write(cpu);
 
        kobject_unregister(&data->kobj);
 
@@ -889,9 +1046,27 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 }
 
 
-static void handle_update(void *data)
+static int cpufreq_remove_dev (struct sys_device * sys_dev)
 {
-       unsigned int cpu = (unsigned int)(long)data;
+       unsigned int cpu = sys_dev->id;
+       int retval;
+
+       if (cpu_is_offline(cpu))
+               return 0;
+
+       if (unlikely(lock_policy_rwsem_write(cpu)))
+               BUG();
+
+       retval = __cpufreq_remove_dev(sys_dev);
+       return retval;
+}
+
+
+static void handle_update(struct work_struct *work)
+{
+       struct cpufreq_policy *policy =
+               container_of(work, struct cpufreq_policy, update);
+       unsigned int cpu = policy->cpu;
        dprintk("handle_update for cpu %u called\n", cpu);
        cpufreq_update_policy(cpu);
 }
@@ -905,7 +1080,8 @@ static void handle_update(void *data)
  *     We adjust to current frequency first, and need to clean up later. So either call
  *     to cpufreq_update_policy() or schedule handle_update()).
  */
-static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigned int new_freq)
+static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
+                               unsigned int new_freq)
 {
        struct cpufreq_freqs freqs;
 
@@ -921,7 +1097,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne
 
 
 /**
- * cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur
+ * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
  * @cpu: CPU number
  *
  * This is the last known freq, without actually getting it from the driver.
@@ -930,20 +1106,46 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne
 unsigned int cpufreq_quick_get(unsigned int cpu)
 {
        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-       unsigned int ret = 0;
+       unsigned int ret_freq = 0;
 
        if (policy) {
-               mutex_lock(&policy->lock);
-               ret = policy->cur;
-               mutex_unlock(&policy->lock);
+               if (unlikely(lock_policy_rwsem_read(cpu)))
+                       return ret_freq;
+
+               ret_freq = policy->cur;
+
+               unlock_policy_rwsem_read(cpu);
                cpufreq_cpu_put(policy);
        }
 
-       return (ret);
+       return (ret_freq);
 }
 EXPORT_SYMBOL(cpufreq_quick_get);
 
 
+static unsigned int __cpufreq_get(unsigned int cpu)
+{
+       struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
+       unsigned int ret_freq = 0;
+
+       if (!cpufreq_driver->get)
+               return (ret_freq);
+
+       ret_freq = cpufreq_driver->get(cpu);
+
+       if (ret_freq && policy->cur &&
+               !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
+               /* verify no discrepancy between actual and
+                                       saved value exists */
+               if (unlikely(ret_freq != policy->cur)) {
+                       cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
+                       schedule_work(&policy->update);
+               }
+       }
+
+       return (ret_freq);
+}
+
 /**
  * cpufreq_get - get the current CPU frequency (in kHz)
  * @cpu: CPU number
@@ -952,33 +1154,23 @@ EXPORT_SYMBOL(cpufreq_quick_get);
  */
 unsigned int cpufreq_get(unsigned int cpu)
 {
+       unsigned int ret_freq = 0;
        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-       unsigned int ret = 0;
 
        if (!policy)
-               return 0;
-
-       if (!cpufreq_driver->get)
                goto out;
 
-       mutex_lock(&policy->lock);
+       if (unlikely(lock_policy_rwsem_read(cpu)))
+               goto out_policy;
 
-       ret = cpufreq_driver->get(cpu);
+       ret_freq = __cpufreq_get(cpu);
 
-       if (ret && policy->cur && !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
-               /* verify no discrepancy between actual and saved value exists */
-               if (unlikely(ret != policy->cur)) {
-                       cpufreq_out_of_sync(cpu, policy->cur, ret);
-                       schedule_work(&policy->update);
-               }
-       }
+       unlock_policy_rwsem_read(cpu);
 
-       mutex_unlock(&policy->lock);
-
-out:
+out_policy:
        cpufreq_cpu_put(policy);
-
-       return (ret);
+out:
+       return (ret_freq);
 }
 EXPORT_SYMBOL(cpufreq_get);
 
@@ -990,11 +1182,11 @@ EXPORT_SYMBOL(cpufreq_get);
 static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
 {
        int cpu = sysdev->id;
-       unsigned int ret = 0;
+       int ret = 0;
        unsigned int cur_freq = 0;
        struct cpufreq_policy *cpu_policy;
 
-       dprintk("resuming cpu %u\n", cpu);
+       dprintk("suspending cpu %u\n", cpu);
 
        if (!cpu_online(cpu))
                return 0;
@@ -1049,7 +1241,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
                freqs.old = cpu_policy->cur;
                freqs.new = cur_freq;
 
-               blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
+               srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                    CPUFREQ_SUSPENDCHANGE, &freqs);
                adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
 
@@ -1072,7 +1264,7 @@ out:
 static int cpufreq_resume(struct sys_device * sysdev)
 {
        int cpu = sysdev->id;
-       unsigned int ret = 0;
+       int ret = 0;
        struct cpufreq_policy *cpu_policy;
 
        dprintk("resuming cpu %u\n", cpu);
@@ -1130,7 +1322,7 @@ static int cpufreq_resume(struct sys_device * sysdev)
                        freqs.old = cpu_policy->cur;
                        freqs.new = cur_freq;
 
-                       blocking_notifier_call_chain(
+                       srcu_notifier_call_chain(
                                        &cpufreq_transition_notifier_list,
                                        CPUFREQ_RESUMECHANGE, &freqs);
                        adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
@@ -1176,7 +1368,7 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
 
        switch (list) {
        case CPUFREQ_TRANSITION_NOTIFIER:
-               ret = blocking_notifier_chain_register(
+               ret = srcu_notifier_chain_register(
                                &cpufreq_transition_notifier_list, nb);
                break;
        case CPUFREQ_POLICY_NOTIFIER:
@@ -1208,7 +1400,7 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
 
        switch (list) {
        case CPUFREQ_TRANSITION_NOTIFIER:
-               ret = blocking_notifier_chain_unregister(
+               ret = srcu_notifier_chain_unregister(
                                &cpufreq_transition_notifier_list, nb);
                break;
        case CPUFREQ_POLICY_NOTIFIER:
@@ -1229,7 +1421,6 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
  *********************************************************************/
 
 
-/* Must be called with lock_cpu_hotplug held */
 int __cpufreq_driver_target(struct cpufreq_policy *policy,
                            unsigned int target_freq,
                            unsigned int relation)
@@ -1255,35 +1446,52 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
        if (!policy)
                return -EINVAL;
 
-       lock_cpu_hotplug();
-       mutex_lock(&policy->lock);
+       if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+               return -EINVAL;
 
        ret = __cpufreq_driver_target(policy, target_freq, relation);
 
-       mutex_unlock(&policy->lock);
-       unlock_cpu_hotplug();
+       unlock_policy_rwsem_write(policy->cpu);
 
        cpufreq_cpu_put(policy);
        return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+{
+       int ret = 0;
+
+       policy = cpufreq_cpu_get(policy->cpu);
+       if (!policy)
+               return -EINVAL;
+
+       if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+               ret = cpufreq_driver->getavg(policy->cpu);
+
+       cpufreq_cpu_put(policy);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
+
 /*
- * Locking: Must be called with the lock_cpu_hotplug() lock held
  * when "event" is CPUFREQ_GOV_LIMITS
  */
 
-static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
+static int __cpufreq_governor(struct cpufreq_policy *policy,
+                                       unsigned int event)
 {
        int ret;
 
        if (!try_module_get(policy->governor->owner))
                return -EINVAL;
 
-       dprintk("__cpufreq_governor for CPU %u, event %u\n", policy->cpu, event);
+       dprintk("__cpufreq_governor for CPU %u, event %u\n",
+                                               policy->cpu, event);
        ret = policy->governor->governor(policy, event);
 
-       /* we keep one module reference alive for each CPU governed by this CPU */
+       /* we keep one module reference alive for
+                       each CPU governed by this CPU */
        if ((event != CPUFREQ_GOV_START) || ret)
                module_put(policy->governor->owner);
        if ((event == CPUFREQ_GOV_STOP) && !ret)
@@ -1348,9 +1556,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
        if (!cpu_policy)
                return -EINVAL;
 
-       mutex_lock(&cpu_policy->lock);
        memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
-       mutex_unlock(&cpu_policy->lock);
 
        cpufreq_cpu_put(cpu_policy);
        return 0;
@@ -1359,9 +1565,11 @@ EXPORT_SYMBOL(cpufreq_get_policy);
 
 
 /*
- * Locking: Must be called with the lock_cpu_hotplug() lock held
+ * data   : current policy.
+ * policy : policy to be set.
  */
-static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy)
+static int __cpufreq_set_policy(struct cpufreq_policy *data,
+                               struct cpufreq_policy *policy)
 {
        int ret = 0;
 
@@ -1369,7 +1577,8 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_poli
        dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
                policy->min, policy->max);
 
-       memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
+       memcpy(&policy->cpuinfo, &data->cpuinfo,
+                               sizeof(struct cpufreq_cpuinfo));
 
        if (policy->min > data->min && policy->min > policy->max) {
                ret = -EINVAL;
@@ -1402,7 +1611,8 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_poli
        data->min = policy->min;
        data->max = policy->max;
 
-       dprintk("new min and max freqs are %u - %u kHz\n", data->min, data->max);
+       dprintk("new min and max freqs are %u - %u kHz\n",
+                                       data->min, data->max);
 
        if (cpufreq_driver->setpolicy) {
                data->policy = policy->policy;
@@ -1423,10 +1633,12 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_poli
                        data->governor = policy->governor;
                        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
                                /* new governor failed, so re-start old one */
-                               dprintk("starting governor %s failed\n", data->governor->name);
+                               dprintk("starting governor %s failed\n",
+                                                       data->governor->name);
                                if (old_gov) {
                                        data->governor = old_gov;
-                                       __cpufreq_governor(data, CPUFREQ_GOV_START);
+                                       __cpufreq_governor(data,
+                                                          CPUFREQ_GOV_START);
                                }
                                ret = -EINVAL;
                                goto error_out;
@@ -1443,45 +1655,6 @@ error_out:
 }
 
 /**
- *     cpufreq_set_policy - set a new CPUFreq policy
- *     @policy: policy to be set.
- *
- *     Sets a new CPU frequency and voltage scaling policy.
- */
-int cpufreq_set_policy(struct cpufreq_policy *policy)
-{
-       int ret = 0;
-       struct cpufreq_policy *data;
-
-       if (!policy)
-               return -EINVAL;
-
-       data = cpufreq_cpu_get(policy->cpu);
-       if (!data)
-               return -EINVAL;
-
-       lock_cpu_hotplug();
-
-       /* lock this CPU */
-       mutex_lock(&data->lock);
-
-       ret = __cpufreq_set_policy(data, policy);
-       data->user_policy.min = data->min;
-       data->user_policy.max = data->max;
-       data->user_policy.policy = data->policy;
-       data->user_policy.governor = data->governor;
-
-       mutex_unlock(&data->lock);
-
-       unlock_cpu_hotplug();
-       cpufreq_cpu_put(data);
-
-       return ret;
-}
-EXPORT_SYMBOL(cpufreq_set_policy);
-
-
-/**
  *     cpufreq_update_policy - re-evaluate an existing cpufreq policy
  *     @cpu: CPU which shall be re-evaluated
  *
@@ -1497,8 +1670,8 @@ int cpufreq_update_policy(unsigned int cpu)
        if (!data)
                return -ENODEV;
 
-       lock_cpu_hotplug();
-       mutex_lock(&data->lock);
+       if (unlikely(lock_policy_rwsem_write(cpu)))
+               return -EINVAL;
 
        dprintk("updating policy for CPU %u\n", cpu);
        memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1516,49 +1689,43 @@ int cpufreq_update_policy(unsigned int cpu)
                        data->cur = policy.cur;
                } else {
                        if (data->cur != policy.cur)
-                               cpufreq_out_of_sync(cpu, data->cur, policy.cur);
+                               cpufreq_out_of_sync(cpu, data->cur,
+                                                               policy.cur);
                }
        }
 
        ret = __cpufreq_set_policy(data, &policy);
 
-       mutex_unlock(&data->lock);
-       unlock_cpu_hotplug();
+       unlock_policy_rwsem_write(cpu);
+
        cpufreq_cpu_put(data);
        return ret;
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int cpufreq_cpu_callback(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
 {
        unsigned int cpu = (unsigned long)hcpu;
-       struct cpufreq_policy *policy;
        struct sys_device *sys_dev;
 
        sys_dev = get_cpu_sysdev(cpu);
-
        if (sys_dev) {
                switch (action) {
                case CPU_ONLINE:
+               case CPU_ONLINE_FROZEN:
                        cpufreq_add_dev(sys_dev);
                        break;
                case CPU_DOWN_PREPARE:
-                       /*
-                        * We attempt to put this cpu in lowest frequency
-                        * possible before going down. This will permit
-                        * hardware-managed P-State to switch other related
-                        * threads to min or higher speeds if possible.
-                        */
-                       policy = cpufreq_cpu_data[cpu];
-                       if (policy) {
-                               cpufreq_driver_target(policy, policy->min,
-                                               CPUFREQ_RELATION_H);
-                       }
+               case CPU_DOWN_PREPARE_FROZEN:
+                       if (unlikely(lock_policy_rwsem_write(cpu)))
+                               BUG();
+
+                       __cpufreq_remove_dev(sys_dev);
                        break;
-               case CPU_DEAD:
-                       cpufreq_remove_dev(sys_dev);
+               case CPU_DOWN_FAILED:
+               case CPU_DOWN_FAILED_FROZEN:
+                       cpufreq_add_dev(sys_dev);
                        break;
                }
        }
@@ -1569,7 +1736,6 @@ static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
 {
     .notifier_call = cpufreq_cpu_callback,
 };
-#endif /* CONFIG_HOTPLUG_CPU */
 
 /*********************************************************************
  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
@@ -1620,8 +1786,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
                /* if all ->init() calls failed, unregister */
                if (ret) {
-                       dprintk("no CPU initialized for driver %s\n", driver_data->name);
-                       sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
+                       dprintk("no CPU initialized for driver %s\n",
+                                                       driver_data->name);
+                       sysdev_driver_unregister(&cpu_sysdev_class,
+                                               &cpufreq_sysdev_driver);
 
                        spin_lock_irqsave(&cpufreq_driver_lock, flags);
                        cpufreq_driver = NULL;
@@ -1671,3 +1839,16 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
        return 0;
 }
 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
+
+static int __init cpufreq_core_init(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               per_cpu(policy_cpu, cpu) = -1;
+               init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
+       }
+       return 0;
+}
+
+core_initcall(cpufreq_core_init);