Merge branch 'next-devicetree' of git://git.secretlab.ca/git/linux-2.6
[safe/jmp/linux-2.6] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33                                                 "cpufreq-core", msg)
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70 #define lock_policy_rwsem(mode, cpu)                                    \
71 int lock_policy_rwsem_##mode                                            \
72 (int cpu)                                                               \
73 {                                                                       \
74         int policy_cpu = per_cpu(policy_cpu, cpu);                      \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77         if (unlikely(!cpu_online(cpu))) {                               \
78                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
79                 return -1;                                              \
80         }                                                               \
81                                                                         \
82         return 0;                                                       \
83 }
84
85 lock_policy_rwsem(read, cpu);
86 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
87
88 lock_policy_rwsem(write, cpu);
89 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
90
91 void unlock_policy_rwsem_read(int cpu)
92 {
93         int policy_cpu = per_cpu(policy_cpu, cpu);
94         BUG_ON(policy_cpu == -1);
95         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
96 }
97 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
98
99 void unlock_policy_rwsem_write(int cpu)
100 {
101         int policy_cpu = per_cpu(policy_cpu, cpu);
102         BUG_ON(policy_cpu == -1);
103         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
104 }
105 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
106
107
108 /* internal prototypes */
109 static int __cpufreq_governor(struct cpufreq_policy *policy,
110                 unsigned int event);
111 static unsigned int __cpufreq_get(unsigned int cpu);
112 static void handle_update(struct work_struct *work);
113
114 /**
115  * Two notifier lists: the "policy" list is involved in the
116  * validation process for a new CPU frequency policy; the
117  * "transition" list for kernel code that needs to handle
118  * changes to devices when the CPU clock speed changes.
119  * The mutex locks both lists.
120  */
121 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
122 static struct srcu_notifier_head cpufreq_transition_notifier_list;
123
124 static bool init_cpufreq_transition_notifier_list_called;
125 static int __init init_cpufreq_transition_notifier_list(void)
126 {
127         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
128         init_cpufreq_transition_notifier_list_called = true;
129         return 0;
130 }
131 pure_initcall(init_cpufreq_transition_notifier_list);
132
133 static LIST_HEAD(cpufreq_governor_list);
134 static DEFINE_MUTEX(cpufreq_governor_mutex);
135
136 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
137 {
138         struct cpufreq_policy *data;
139         unsigned long flags;
140
141         if (cpu >= nr_cpu_ids)
142                 goto err_out;
143
144         /* get the cpufreq driver */
145         spin_lock_irqsave(&cpufreq_driver_lock, flags);
146
147         if (!cpufreq_driver)
148                 goto err_out_unlock;
149
150         if (!try_module_get(cpufreq_driver->owner))
151                 goto err_out_unlock;
152
153
154         /* get the CPU */
155         data = per_cpu(cpufreq_cpu_data, cpu);
156
157         if (!data)
158                 goto err_out_put_module;
159
160         if (!kobject_get(&data->kobj))
161                 goto err_out_put_module;
162
163         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
164         return data;
165
166 err_out_put_module:
167         module_put(cpufreq_driver->owner);
168 err_out_unlock:
169         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
170 err_out:
171         return NULL;
172 }
173 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
174
175
176 void cpufreq_cpu_put(struct cpufreq_policy *data)
177 {
178         kobject_put(&data->kobj);
179         module_put(cpufreq_driver->owner);
180 }
181 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
182
183
184 /*********************************************************************
185  *                     UNIFIED DEBUG HELPERS                         *
186  *********************************************************************/
187 #ifdef CONFIG_CPU_FREQ_DEBUG
188
189 /* what part(s) of the CPUfreq subsystem are debugged? */
190 static unsigned int debug;
191
192 /* is the debug output ratelimit'ed using printk_ratelimit? User can
193  * set or modify this value.
194  */
195 static unsigned int debug_ratelimit = 1;
196
197 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
198  * loading of a cpufreq driver, temporarily disabled when a new policy
199  * is set, and disabled upon cpufreq driver removal
200  */
201 static unsigned int disable_ratelimit = 1;
202 static DEFINE_SPINLOCK(disable_ratelimit_lock);
203
204 static void cpufreq_debug_enable_ratelimit(void)
205 {
206         unsigned long flags;
207
208         spin_lock_irqsave(&disable_ratelimit_lock, flags);
209         if (disable_ratelimit)
210                 disable_ratelimit--;
211         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
212 }
213
214 static void cpufreq_debug_disable_ratelimit(void)
215 {
216         unsigned long flags;
217
218         spin_lock_irqsave(&disable_ratelimit_lock, flags);
219         disable_ratelimit++;
220         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
221 }
222
223 void cpufreq_debug_printk(unsigned int type, const char *prefix,
224                         const char *fmt, ...)
225 {
226         char s[256];
227         va_list args;
228         unsigned int len;
229         unsigned long flags;
230
231         WARN_ON(!prefix);
232         if (type & debug) {
233                 spin_lock_irqsave(&disable_ratelimit_lock, flags);
234                 if (!disable_ratelimit && debug_ratelimit
235                                         && !printk_ratelimit()) {
236                         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
237                         return;
238                 }
239                 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
240
241                 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
242
243                 va_start(args, fmt);
244                 len += vsnprintf(&s[len], (256 - len), fmt, args);
245                 va_end(args);
246
247                 printk(s);
248
249                 WARN_ON(len < 5);
250         }
251 }
252 EXPORT_SYMBOL(cpufreq_debug_printk);
253
254
255 module_param(debug, uint, 0644);
256 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
257                         " 2 to debug drivers, and 4 to debug governors.");
258
259 module_param(debug_ratelimit, uint, 0644);
260 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
261                                         " set to 0 to disable ratelimiting.");
262
263 #else /* !CONFIG_CPU_FREQ_DEBUG */
264
265 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
266 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
267
268 #endif /* CONFIG_CPU_FREQ_DEBUG */
269
270
271 /*********************************************************************
272  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
273  *********************************************************************/
274
275 /**
276  * adjust_jiffies - adjust the system "loops_per_jiffy"
277  *
278  * This function alters the system "loops_per_jiffy" for the clock
279  * speed change. Note that loops_per_jiffy cannot be updated on SMP
280  * systems as each CPU might be scaled differently. So, use the arch
281  * per-CPU loops_per_jiffy value wherever possible.
282  */
283 #ifndef CONFIG_SMP
284 static unsigned long l_p_j_ref;
285 static unsigned int  l_p_j_ref_freq;
286
287 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
288 {
289         if (ci->flags & CPUFREQ_CONST_LOOPS)
290                 return;
291
292         if (!l_p_j_ref_freq) {
293                 l_p_j_ref = loops_per_jiffy;
294                 l_p_j_ref_freq = ci->old;
295                 dprintk("saving %lu as reference value for loops_per_jiffy; "
296                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
297         }
298         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
299             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
300             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
301                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
302                                                                 ci->new);
303                 dprintk("scaling loops_per_jiffy to %lu "
304                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
305         }
306 }
307 #else
308 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
309 {
310         return;
311 }
312 #endif
313
314
315 /**
316  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
317  * on frequency transition.
318  *
319  * This function calls the transition notifiers and the "adjust_jiffies"
320  * function. It is called twice on all CPU frequency changes that have
321  * external effects.
322  */
323 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
324 {
325         struct cpufreq_policy *policy;
326
327         BUG_ON(irqs_disabled());
328
329         freqs->flags = cpufreq_driver->flags;
330         dprintk("notification %u of frequency transition to %u kHz\n",
331                 state, freqs->new);
332
333         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
334         switch (state) {
335
336         case CPUFREQ_PRECHANGE:
337                 /* detect if the driver reported a value as "old frequency"
338                  * which is not equal to what the cpufreq core thinks is
339                  * "old frequency".
340                  */
341                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
342                         if ((policy) && (policy->cpu == freqs->cpu) &&
343                             (policy->cur) && (policy->cur != freqs->old)) {
344                                 dprintk("Warning: CPU frequency is"
345                                         " %u, cpufreq assumed %u kHz.\n",
346                                         freqs->old, policy->cur);
347                                 freqs->old = policy->cur;
348                         }
349                 }
350                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
351                                 CPUFREQ_PRECHANGE, freqs);
352                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
353                 break;
354
355         case CPUFREQ_POSTCHANGE:
356                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
357                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
358                                 CPUFREQ_POSTCHANGE, freqs);
359                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
360                         policy->cur = freqs->new;
361                 break;
362         }
363 }
364 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
365
366
367
368 /*********************************************************************
369  *                          SYSFS INTERFACE                          *
370  *********************************************************************/
371
372 static struct cpufreq_governor *__find_governor(const char *str_governor)
373 {
374         struct cpufreq_governor *t;
375
376         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
377                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
378                         return t;
379
380         return NULL;
381 }
382
383 /**
384  * cpufreq_parse_governor - parse a governor string
385  */
386 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
387                                 struct cpufreq_governor **governor)
388 {
389         int err = -EINVAL;
390
391         if (!cpufreq_driver)
392                 goto out;
393
394         if (cpufreq_driver->setpolicy) {
395                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
396                         *policy = CPUFREQ_POLICY_PERFORMANCE;
397                         err = 0;
398                 } else if (!strnicmp(str_governor, "powersave",
399                                                 CPUFREQ_NAME_LEN)) {
400                         *policy = CPUFREQ_POLICY_POWERSAVE;
401                         err = 0;
402                 }
403         } else if (cpufreq_driver->target) {
404                 struct cpufreq_governor *t;
405
406                 mutex_lock(&cpufreq_governor_mutex);
407
408                 t = __find_governor(str_governor);
409
410                 if (t == NULL) {
411                         char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
412                                                                 str_governor);
413
414                         if (name) {
415                                 int ret;
416
417                                 mutex_unlock(&cpufreq_governor_mutex);
418                                 ret = request_module("%s", name);
419                                 mutex_lock(&cpufreq_governor_mutex);
420
421                                 if (ret == 0)
422                                         t = __find_governor(str_governor);
423                         }
424
425                         kfree(name);
426                 }
427
428                 if (t != NULL) {
429                         *governor = t;
430                         err = 0;
431                 }
432
433                 mutex_unlock(&cpufreq_governor_mutex);
434         }
435 out:
436         return err;
437 }
438
439
440 /**
441  * cpufreq_per_cpu_attr_read() / show_##file_name() -
442  * print out cpufreq information
443  *
444  * Write out information from cpufreq_driver->policy[cpu]; object must be
445  * "unsigned int".
446  */
447
448 #define show_one(file_name, object)                     \
449 static ssize_t show_##file_name                         \
450 (struct cpufreq_policy *policy, char *buf)              \
451 {                                                       \
452         return sprintf(buf, "%u\n", policy->object);    \
453 }
454
455 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
456 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
457 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
458 show_one(scaling_min_freq, min);
459 show_one(scaling_max_freq, max);
460 show_one(scaling_cur_freq, cur);
461
462 static int __cpufreq_set_policy(struct cpufreq_policy *data,
463                                 struct cpufreq_policy *policy);
464
465 /**
466  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
467  */
468 #define store_one(file_name, object)                    \
469 static ssize_t store_##file_name                                        \
470 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
471 {                                                                       \
472         unsigned int ret = -EINVAL;                                     \
473         struct cpufreq_policy new_policy;                               \
474                                                                         \
475         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
476         if (ret)                                                        \
477                 return -EINVAL;                                         \
478                                                                         \
479         ret = sscanf(buf, "%u", &new_policy.object);                    \
480         if (ret != 1)                                                   \
481                 return -EINVAL;                                         \
482                                                                         \
483         ret = __cpufreq_set_policy(policy, &new_policy);                \
484         policy->user_policy.object = policy->object;                    \
485                                                                         \
486         return ret ? ret : count;                                       \
487 }
488
489 store_one(scaling_min_freq, min);
490 store_one(scaling_max_freq, max);
491
492 /**
493  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
494  */
495 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
496                                         char *buf)
497 {
498         unsigned int cur_freq = __cpufreq_get(policy->cpu);
499         if (!cur_freq)
500                 return sprintf(buf, "<unknown>");
501         return sprintf(buf, "%u\n", cur_freq);
502 }
503
504
505 /**
506  * show_scaling_governor - show the current policy for the specified CPU
507  */
508 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
509 {
510         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
511                 return sprintf(buf, "powersave\n");
512         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
513                 return sprintf(buf, "performance\n");
514         else if (policy->governor)
515                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
516                                 policy->governor->name);
517         return -EINVAL;
518 }
519
520
521 /**
522  * store_scaling_governor - store policy for the specified CPU
523  */
524 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
525                                         const char *buf, size_t count)
526 {
527         unsigned int ret = -EINVAL;
528         char    str_governor[16];
529         struct cpufreq_policy new_policy;
530
531         ret = cpufreq_get_policy(&new_policy, policy->cpu);
532         if (ret)
533                 return ret;
534
535         ret = sscanf(buf, "%15s", str_governor);
536         if (ret != 1)
537                 return -EINVAL;
538
539         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
540                                                 &new_policy.governor))
541                 return -EINVAL;
542
543         /* Do not use cpufreq_set_policy here or the user_policy.max
544            will be wrongly overridden */
545         ret = __cpufreq_set_policy(policy, &new_policy);
546
547         policy->user_policy.policy = policy->policy;
548         policy->user_policy.governor = policy->governor;
549
550         if (ret)
551                 return ret;
552         else
553                 return count;
554 }
555
556 /**
557  * show_scaling_driver - show the cpufreq driver currently loaded
558  */
559 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
560 {
561         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
562 }
563
564 /**
565  * show_scaling_available_governors - show the available CPUfreq governors
566  */
567 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
568                                                 char *buf)
569 {
570         ssize_t i = 0;
571         struct cpufreq_governor *t;
572
573         if (!cpufreq_driver->target) {
574                 i += sprintf(buf, "performance powersave");
575                 goto out;
576         }
577
578         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
579                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
580                     - (CPUFREQ_NAME_LEN + 2)))
581                         goto out;
582                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
583         }
584 out:
585         i += sprintf(&buf[i], "\n");
586         return i;
587 }
588
589 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
590 {
591         ssize_t i = 0;
592         unsigned int cpu;
593
594         for_each_cpu(cpu, mask) {
595                 if (i)
596                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
597                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
598                 if (i >= (PAGE_SIZE - 5))
599                         break;
600         }
601         i += sprintf(&buf[i], "\n");
602         return i;
603 }
604
605 /**
606  * show_related_cpus - show the CPUs affected by each transition even if
607  * hw coordination is in use
608  */
609 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
610 {
611         if (cpumask_empty(policy->related_cpus))
612                 return show_cpus(policy->cpus, buf);
613         return show_cpus(policy->related_cpus, buf);
614 }
615
616 /**
617  * show_affected_cpus - show the CPUs affected by each transition
618  */
619 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
620 {
621         return show_cpus(policy->cpus, buf);
622 }
623
624 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
625                                         const char *buf, size_t count)
626 {
627         unsigned int freq = 0;
628         unsigned int ret;
629
630         if (!policy->governor || !policy->governor->store_setspeed)
631                 return -EINVAL;
632
633         ret = sscanf(buf, "%u", &freq);
634         if (ret != 1)
635                 return -EINVAL;
636
637         policy->governor->store_setspeed(policy, freq);
638
639         return count;
640 }
641
642 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
643 {
644         if (!policy->governor || !policy->governor->show_setspeed)
645                 return sprintf(buf, "<unsupported>\n");
646
647         return policy->governor->show_setspeed(policy, buf);
648 }
649
650 #define define_one_ro(_name) \
651 static struct freq_attr _name = \
652 __ATTR(_name, 0444, show_##_name, NULL)
653
654 #define define_one_ro0400(_name) \
655 static struct freq_attr _name = \
656 __ATTR(_name, 0400, show_##_name, NULL)
657
658 #define define_one_rw(_name) \
659 static struct freq_attr _name = \
660 __ATTR(_name, 0644, show_##_name, store_##_name)
661
662 define_one_ro0400(cpuinfo_cur_freq);
663 define_one_ro(cpuinfo_min_freq);
664 define_one_ro(cpuinfo_max_freq);
665 define_one_ro(cpuinfo_transition_latency);
666 define_one_ro(scaling_available_governors);
667 define_one_ro(scaling_driver);
668 define_one_ro(scaling_cur_freq);
669 define_one_ro(related_cpus);
670 define_one_ro(affected_cpus);
671 define_one_rw(scaling_min_freq);
672 define_one_rw(scaling_max_freq);
673 define_one_rw(scaling_governor);
674 define_one_rw(scaling_setspeed);
675
676 static struct attribute *default_attrs[] = {
677         &cpuinfo_min_freq.attr,
678         &cpuinfo_max_freq.attr,
679         &cpuinfo_transition_latency.attr,
680         &scaling_min_freq.attr,
681         &scaling_max_freq.attr,
682         &affected_cpus.attr,
683         &related_cpus.attr,
684         &scaling_governor.attr,
685         &scaling_driver.attr,
686         &scaling_available_governors.attr,
687         &scaling_setspeed.attr,
688         NULL
689 };
690
691 struct kobject *cpufreq_global_kobject;
692 EXPORT_SYMBOL(cpufreq_global_kobject);
693
694 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
695 #define to_attr(a) container_of(a, struct freq_attr, attr)
696
697 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
698 {
699         struct cpufreq_policy *policy = to_policy(kobj);
700         struct freq_attr *fattr = to_attr(attr);
701         ssize_t ret = -EINVAL;
702         policy = cpufreq_cpu_get(policy->cpu);
703         if (!policy)
704                 goto no_policy;
705
706         if (lock_policy_rwsem_read(policy->cpu) < 0)
707                 goto fail;
708
709         if (fattr->show)
710                 ret = fattr->show(policy, buf);
711         else
712                 ret = -EIO;
713
714         unlock_policy_rwsem_read(policy->cpu);
715 fail:
716         cpufreq_cpu_put(policy);
717 no_policy:
718         return ret;
719 }
720
721 static ssize_t store(struct kobject *kobj, struct attribute *attr,
722                      const char *buf, size_t count)
723 {
724         struct cpufreq_policy *policy = to_policy(kobj);
725         struct freq_attr *fattr = to_attr(attr);
726         ssize_t ret = -EINVAL;
727         policy = cpufreq_cpu_get(policy->cpu);
728         if (!policy)
729                 goto no_policy;
730
731         if (lock_policy_rwsem_write(policy->cpu) < 0)
732                 goto fail;
733
734         if (fattr->store)
735                 ret = fattr->store(policy, buf, count);
736         else
737                 ret = -EIO;
738
739         unlock_policy_rwsem_write(policy->cpu);
740 fail:
741         cpufreq_cpu_put(policy);
742 no_policy:
743         return ret;
744 }
745
746 static void cpufreq_sysfs_release(struct kobject *kobj)
747 {
748         struct cpufreq_policy *policy = to_policy(kobj);
749         dprintk("last reference is dropped\n");
750         complete(&policy->kobj_unregister);
751 }
752
753 static struct sysfs_ops sysfs_ops = {
754         .show   = show,
755         .store  = store,
756 };
757
758 static struct kobj_type ktype_cpufreq = {
759         .sysfs_ops      = &sysfs_ops,
760         .default_attrs  = default_attrs,
761         .release        = cpufreq_sysfs_release,
762 };
763
764 /*
765  * Returns:
766  *   Negative: Failure
767  *   0:        Success
768  *   Positive: When we have a managed CPU and the sysfs got symlinked
769  */
770 int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy,
771                 struct sys_device *sys_dev)
772 {
773         int ret = 0;
774 #ifdef CONFIG_SMP
775         unsigned long flags;
776         unsigned int j;
777 #ifdef CONFIG_HOTPLUG_CPU
778         struct cpufreq_governor *gov;
779
780         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
781         if (gov) {
782                 policy->governor = gov;
783                 dprintk("Restoring governor %s for cpu %d\n",
784                        policy->governor->name, cpu);
785         }
786 #endif
787
788         for_each_cpu(j, policy->cpus) {
789                 struct cpufreq_policy *managed_policy;
790
791                 if (cpu == j)
792                         continue;
793
794                 /* Check for existing affected CPUs.
795                  * They may not be aware of it due to CPU Hotplug.
796                  * cpufreq_cpu_put is called when the device is removed
797                  * in __cpufreq_remove_dev()
798                  */
799                 managed_policy = cpufreq_cpu_get(j);
800                 if (unlikely(managed_policy)) {
801
802                         /* Set proper policy_cpu */
803                         unlock_policy_rwsem_write(cpu);
804                         per_cpu(policy_cpu, cpu) = managed_policy->cpu;
805
806                         if (lock_policy_rwsem_write(cpu) < 0) {
807                                 /* Should not go through policy unlock path */
808                                 if (cpufreq_driver->exit)
809                                         cpufreq_driver->exit(policy);
810                                 cpufreq_cpu_put(managed_policy);
811                                 return -EBUSY;
812                         }
813
814                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
815                         cpumask_copy(managed_policy->cpus, policy->cpus);
816                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
817                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
818
819                         dprintk("CPU already managed, adding link\n");
820                         ret = sysfs_create_link(&sys_dev->kobj,
821                                                 &managed_policy->kobj,
822                                                 "cpufreq");
823                         if (ret)
824                                 cpufreq_cpu_put(managed_policy);
825                         /*
826                          * Success. We only needed to be added to the mask.
827                          * Call driver->exit() because only the cpu parent of
828                          * the kobj needed to call init().
829                          */
830                         if (cpufreq_driver->exit)
831                                 cpufreq_driver->exit(policy);
832
833                         if (!ret)
834                                 return 1;
835                         else
836                                 return ret;
837                 }
838         }
839 #endif
840         return ret;
841 }
842
843
844 /* symlink affected CPUs */
845 int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy)
846 {
847         unsigned int j;
848         int ret = 0;
849
850         for_each_cpu(j, policy->cpus) {
851                 struct cpufreq_policy *managed_policy;
852                 struct sys_device *cpu_sys_dev;
853
854                 if (j == cpu)
855                         continue;
856                 if (!cpu_online(j))
857                         continue;
858
859                 dprintk("CPU %u already managed, adding link\n", j);
860                 managed_policy = cpufreq_cpu_get(cpu);
861                 cpu_sys_dev = get_cpu_sysdev(j);
862                 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
863                                         "cpufreq");
864                 if (ret) {
865                         cpufreq_cpu_put(managed_policy);
866                         return ret;
867                 }
868         }
869         return ret;
870 }
871
872 int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy,
873                 struct sys_device *sys_dev)
874 {
875         struct cpufreq_policy new_policy;
876         struct freq_attr **drv_attr;
877         unsigned long flags;
878         int ret = 0;
879         unsigned int j;
880
881         /* prepare interface data */
882         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
883                                    &sys_dev->kobj, "cpufreq");
884         if (ret)
885                 return ret;
886
887         /* set up files for this cpu device */
888         drv_attr = cpufreq_driver->attr;
889         while ((drv_attr) && (*drv_attr)) {
890                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
891                 if (ret)
892                         goto err_out_kobj_put;
893                 drv_attr++;
894         }
895         if (cpufreq_driver->get) {
896                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
897                 if (ret)
898                         goto err_out_kobj_put;
899         }
900         if (cpufreq_driver->target) {
901                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
902                 if (ret)
903                         goto err_out_kobj_put;
904         }
905
906         spin_lock_irqsave(&cpufreq_driver_lock, flags);
907         for_each_cpu(j, policy->cpus) {
908         if (!cpu_online(j))
909                 continue;
910                 per_cpu(cpufreq_cpu_data, j) = policy;
911                 per_cpu(policy_cpu, j) = policy->cpu;
912         }
913         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
914
915         ret = cpufreq_add_dev_symlink(cpu, policy);
916         if (ret)
917                 goto err_out_kobj_put;
918
919         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
920         /* assure that the starting sequence is run in __cpufreq_set_policy */
921         policy->governor = NULL;
922
923         /* set default policy */
924         ret = __cpufreq_set_policy(policy, &new_policy);
925         policy->user_policy.policy = policy->policy;
926         policy->user_policy.governor = policy->governor;
927
928         if (ret) {
929                 dprintk("setting policy failed\n");
930                 if (cpufreq_driver->exit)
931                         cpufreq_driver->exit(policy);
932         }
933         return ret;
934
935 err_out_kobj_put:
936         kobject_put(&policy->kobj);
937         wait_for_completion(&policy->kobj_unregister);
938         return ret;
939 }
940
941
942 /**
943  * cpufreq_add_dev - add a CPU device
944  *
945  * Adds the cpufreq interface for a CPU device.
946  *
947  * The Oracle says: try running cpufreq registration/unregistration concurrently
948  * with with cpu hotplugging and all hell will break loose. Tried to clean this
949  * mess up, but more thorough testing is needed. - Mathieu
950  */
951 static int cpufreq_add_dev(struct sys_device *sys_dev)
952 {
953         unsigned int cpu = sys_dev->id;
954         int ret = 0, found = 0;
955         struct cpufreq_policy *policy;
956         unsigned long flags;
957         unsigned int j;
958 #ifdef CONFIG_HOTPLUG_CPU
959         int sibling;
960 #endif
961
962         if (cpu_is_offline(cpu))
963                 return 0;
964
965         cpufreq_debug_disable_ratelimit();
966         dprintk("adding CPU %u\n", cpu);
967
968 #ifdef CONFIG_SMP
969         /* check whether a different CPU already registered this
970          * CPU because it is in the same boat. */
971         policy = cpufreq_cpu_get(cpu);
972         if (unlikely(policy)) {
973                 cpufreq_cpu_put(policy);
974                 cpufreq_debug_enable_ratelimit();
975                 return 0;
976         }
977 #endif
978
979         if (!try_module_get(cpufreq_driver->owner)) {
980                 ret = -EINVAL;
981                 goto module_out;
982         }
983
984         ret = -ENOMEM;
985         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
986         if (!policy)
987                 goto nomem_out;
988
989         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
990                 goto err_free_policy;
991
992         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
993                 goto err_free_cpumask;
994
995         policy->cpu = cpu;
996         cpumask_copy(policy->cpus, cpumask_of(cpu));
997
998         /* Initially set CPU itself as the policy_cpu */
999         per_cpu(policy_cpu, cpu) = cpu;
1000         ret = (lock_policy_rwsem_write(cpu) < 0);
1001         WARN_ON(ret);
1002
1003         init_completion(&policy->kobj_unregister);
1004         INIT_WORK(&policy->update, handle_update);
1005
1006         /* Set governor before ->init, so that driver could check it */
1007 #ifdef CONFIG_HOTPLUG_CPU
1008         for_each_online_cpu(sibling) {
1009                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
1010                 if (cp && cp->governor &&
1011                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
1012                         policy->governor = cp->governor;
1013                         found = 1;
1014                         break;
1015                 }
1016         }
1017 #endif
1018         if (!found)
1019                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
1020         /* call driver. From then on the cpufreq must be able
1021          * to accept all calls to ->verify and ->setpolicy for this CPU
1022          */
1023         ret = cpufreq_driver->init(policy);
1024         if (ret) {
1025                 dprintk("initialization failed\n");
1026                 goto err_unlock_policy;
1027         }
1028         policy->user_policy.min = policy->min;
1029         policy->user_policy.max = policy->max;
1030
1031         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1032                                      CPUFREQ_START, policy);
1033
1034         ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
1035         if (ret) {
1036                 if (ret > 0)
1037                         /* This is a managed cpu, symlink created,
1038                            exit with 0 */
1039                         ret = 0;
1040                 goto err_unlock_policy;
1041         }
1042
1043         ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
1044         if (ret)
1045                 goto err_out_unregister;
1046
1047         unlock_policy_rwsem_write(cpu);
1048
1049         kobject_uevent(&policy->kobj, KOBJ_ADD);
1050         module_put(cpufreq_driver->owner);
1051         dprintk("initialization complete\n");
1052         cpufreq_debug_enable_ratelimit();
1053
1054         return 0;
1055
1056
1057 err_out_unregister:
1058         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1059         for_each_cpu(j, policy->cpus)
1060                 per_cpu(cpufreq_cpu_data, j) = NULL;
1061         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1062
1063         kobject_put(&policy->kobj);
1064         wait_for_completion(&policy->kobj_unregister);
1065
1066 err_unlock_policy:
1067         unlock_policy_rwsem_write(cpu);
1068 err_free_cpumask:
1069         free_cpumask_var(policy->cpus);
1070 err_free_policy:
1071         kfree(policy);
1072 nomem_out:
1073         module_put(cpufreq_driver->owner);
1074 module_out:
1075         cpufreq_debug_enable_ratelimit();
1076         return ret;
1077 }
1078
1079
1080 /**
1081  * __cpufreq_remove_dev - remove a CPU device
1082  *
1083  * Removes the cpufreq interface for a CPU device.
1084  * Caller should already have policy_rwsem in write mode for this CPU.
1085  * This routine frees the rwsem before returning.
1086  */
1087 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1088 {
1089         unsigned int cpu = sys_dev->id;
1090         unsigned long flags;
1091         struct cpufreq_policy *data;
1092 #ifdef CONFIG_SMP
1093         struct sys_device *cpu_sys_dev;
1094         unsigned int j;
1095 #endif
1096
1097         cpufreq_debug_disable_ratelimit();
1098         dprintk("unregistering CPU %u\n", cpu);
1099
1100         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1101         data = per_cpu(cpufreq_cpu_data, cpu);
1102
1103         if (!data) {
1104                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1105                 cpufreq_debug_enable_ratelimit();
1106                 unlock_policy_rwsem_write(cpu);
1107                 return -EINVAL;
1108         }
1109         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1110
1111
1112 #ifdef CONFIG_SMP
1113         /* if this isn't the CPU which is the parent of the kobj, we
1114          * only need to unlink, put and exit
1115          */
1116         if (unlikely(cpu != data->cpu)) {
1117                 dprintk("removing link\n");
1118                 cpumask_clear_cpu(cpu, data->cpus);
1119                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1120                 sysfs_remove_link(&sys_dev->kobj, "cpufreq");
1121                 cpufreq_cpu_put(data);
1122                 cpufreq_debug_enable_ratelimit();
1123                 unlock_policy_rwsem_write(cpu);
1124                 return 0;
1125         }
1126 #endif
1127
1128 #ifdef CONFIG_SMP
1129
1130 #ifdef CONFIG_HOTPLUG_CPU
1131         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1132                         CPUFREQ_NAME_LEN);
1133 #endif
1134
1135         /* if we have other CPUs still registered, we need to unlink them,
1136          * or else wait_for_completion below will lock up. Clean the
1137          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1138          * the sysfs links afterwards.
1139          */
1140         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1141                 for_each_cpu(j, data->cpus) {
1142                         if (j == cpu)
1143                                 continue;
1144                         per_cpu(cpufreq_cpu_data, j) = NULL;
1145                 }
1146         }
1147
1148         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1149
1150         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1151                 for_each_cpu(j, data->cpus) {
1152                         if (j == cpu)
1153                                 continue;
1154                         dprintk("removing link for cpu %u\n", j);
1155 #ifdef CONFIG_HOTPLUG_CPU
1156                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1157                                 data->governor->name, CPUFREQ_NAME_LEN);
1158 #endif
1159                         cpu_sys_dev = get_cpu_sysdev(j);
1160                         sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1161                         cpufreq_cpu_put(data);
1162                 }
1163         }
1164 #else
1165         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1166 #endif
1167
1168         if (cpufreq_driver->target)
1169                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1170
1171         kobject_put(&data->kobj);
1172
1173         /* we need to make sure that the underlying kobj is actually
1174          * not referenced anymore by anybody before we proceed with
1175          * unloading.
1176          */
1177         dprintk("waiting for dropping of refcount\n");
1178         wait_for_completion(&data->kobj_unregister);
1179         dprintk("wait complete\n");
1180
1181         if (cpufreq_driver->exit)
1182                 cpufreq_driver->exit(data);
1183
1184         unlock_policy_rwsem_write(cpu);
1185
1186         free_cpumask_var(data->related_cpus);
1187         free_cpumask_var(data->cpus);
1188         kfree(data);
1189         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1190
1191         cpufreq_debug_enable_ratelimit();
1192         return 0;
1193 }
1194
1195
1196 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1197 {
1198         unsigned int cpu = sys_dev->id;
1199         int retval;
1200
1201         if (cpu_is_offline(cpu))
1202                 return 0;
1203
1204         if (unlikely(lock_policy_rwsem_write(cpu)))
1205                 BUG();
1206
1207         retval = __cpufreq_remove_dev(sys_dev);
1208         return retval;
1209 }
1210
1211
1212 static void handle_update(struct work_struct *work)
1213 {
1214         struct cpufreq_policy *policy =
1215                 container_of(work, struct cpufreq_policy, update);
1216         unsigned int cpu = policy->cpu;
1217         dprintk("handle_update for cpu %u called\n", cpu);
1218         cpufreq_update_policy(cpu);
1219 }
1220
1221 /**
1222  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1223  *      @cpu: cpu number
1224  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1225  *      @new_freq: CPU frequency the CPU actually runs at
1226  *
1227  *      We adjust to current frequency first, and need to clean up later.
1228  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1229  */
1230 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1231                                 unsigned int new_freq)
1232 {
1233         struct cpufreq_freqs freqs;
1234
1235         dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1236                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1237
1238         freqs.cpu = cpu;
1239         freqs.old = old_freq;
1240         freqs.new = new_freq;
1241         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1242         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1243 }
1244
1245
1246 /**
1247  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1248  * @cpu: CPU number
1249  *
1250  * This is the last known freq, without actually getting it from the driver.
1251  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1252  */
1253 unsigned int cpufreq_quick_get(unsigned int cpu)
1254 {
1255         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1256         unsigned int ret_freq = 0;
1257
1258         if (policy) {
1259                 ret_freq = policy->cur;
1260                 cpufreq_cpu_put(policy);
1261         }
1262
1263         return ret_freq;
1264 }
1265 EXPORT_SYMBOL(cpufreq_quick_get);
1266
1267
1268 static unsigned int __cpufreq_get(unsigned int cpu)
1269 {
1270         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1271         unsigned int ret_freq = 0;
1272
1273         if (!cpufreq_driver->get)
1274                 return ret_freq;
1275
1276         ret_freq = cpufreq_driver->get(cpu);
1277
1278         if (ret_freq && policy->cur &&
1279                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1280                 /* verify no discrepancy between actual and
1281                                         saved value exists */
1282                 if (unlikely(ret_freq != policy->cur)) {
1283                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1284                         schedule_work(&policy->update);
1285                 }
1286         }
1287
1288         return ret_freq;
1289 }
1290
1291 /**
1292  * cpufreq_get - get the current CPU frequency (in kHz)
1293  * @cpu: CPU number
1294  *
1295  * Get the CPU current (static) CPU frequency
1296  */
1297 unsigned int cpufreq_get(unsigned int cpu)
1298 {
1299         unsigned int ret_freq = 0;
1300         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1301
1302         if (!policy)
1303                 goto out;
1304
1305         if (unlikely(lock_policy_rwsem_read(cpu)))
1306                 goto out_policy;
1307
1308         ret_freq = __cpufreq_get(cpu);
1309
1310         unlock_policy_rwsem_read(cpu);
1311
1312 out_policy:
1313         cpufreq_cpu_put(policy);
1314 out:
1315         return ret_freq;
1316 }
1317 EXPORT_SYMBOL(cpufreq_get);
1318
1319
1320 /**
1321  *      cpufreq_suspend - let the low level driver prepare for suspend
1322  */
1323
1324 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
1325 {
1326         int ret = 0;
1327
1328         int cpu = sysdev->id;
1329         struct cpufreq_policy *cpu_policy;
1330
1331         dprintk("suspending cpu %u\n", cpu);
1332
1333         if (!cpu_online(cpu))
1334                 return 0;
1335
1336         /* we may be lax here as interrupts are off. Nonetheless
1337          * we need to grab the correct cpu policy, as to check
1338          * whether we really run on this CPU.
1339          */
1340
1341         cpu_policy = cpufreq_cpu_get(cpu);
1342         if (!cpu_policy)
1343                 return -EINVAL;
1344
1345         /* only handle each CPU group once */
1346         if (unlikely(cpu_policy->cpu != cpu))
1347                 goto out;
1348
1349         if (cpufreq_driver->suspend) {
1350                 ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1351                 if (ret)
1352                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1353                                         "step on CPU %u\n", cpu_policy->cpu);
1354         }
1355
1356 out:
1357         cpufreq_cpu_put(cpu_policy);
1358         return ret;
1359 }
1360
1361 /**
1362  *      cpufreq_resume -  restore proper CPU frequency handling after resume
1363  *
1364  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1365  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1366  *          restored. It will verify that the current freq is in sync with
1367  *          what we believe it to be. This is a bit later than when it
1368  *          should be, but nonethteless it's better than calling
1369  *          cpufreq_driver->get() here which might re-enable interrupts...
1370  */
1371 static int cpufreq_resume(struct sys_device *sysdev)
1372 {
1373         int ret = 0;
1374
1375         int cpu = sysdev->id;
1376         struct cpufreq_policy *cpu_policy;
1377
1378         dprintk("resuming cpu %u\n", cpu);
1379
1380         if (!cpu_online(cpu))
1381                 return 0;
1382
1383         /* we may be lax here as interrupts are off. Nonetheless
1384          * we need to grab the correct cpu policy, as to check
1385          * whether we really run on this CPU.
1386          */
1387
1388         cpu_policy = cpufreq_cpu_get(cpu);
1389         if (!cpu_policy)
1390                 return -EINVAL;
1391
1392         /* only handle each CPU group once */
1393         if (unlikely(cpu_policy->cpu != cpu))
1394                 goto fail;
1395
1396         if (cpufreq_driver->resume) {
1397                 ret = cpufreq_driver->resume(cpu_policy);
1398                 if (ret) {
1399                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1400                                         "step on CPU %u\n", cpu_policy->cpu);
1401                         goto fail;
1402                 }
1403         }
1404
1405         schedule_work(&cpu_policy->update);
1406
1407 fail:
1408         cpufreq_cpu_put(cpu_policy);
1409         return ret;
1410 }
1411
1412 static struct sysdev_driver cpufreq_sysdev_driver = {
1413         .add            = cpufreq_add_dev,
1414         .remove         = cpufreq_remove_dev,
1415         .suspend        = cpufreq_suspend,
1416         .resume         = cpufreq_resume,
1417 };
1418
1419
1420 /*********************************************************************
1421  *                     NOTIFIER LISTS INTERFACE                      *
1422  *********************************************************************/
1423
1424 /**
1425  *      cpufreq_register_notifier - register a driver with cpufreq
1426  *      @nb: notifier function to register
1427  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1428  *
1429  *      Add a driver to one of two lists: either a list of drivers that
1430  *      are notified about clock rate changes (once before and once after
1431  *      the transition), or a list of drivers that are notified about
1432  *      changes in cpufreq policy.
1433  *
1434  *      This function may sleep, and has the same return conditions as
1435  *      blocking_notifier_chain_register.
1436  */
1437 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1438 {
1439         int ret;
1440
1441         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1442
1443         switch (list) {
1444         case CPUFREQ_TRANSITION_NOTIFIER:
1445                 ret = srcu_notifier_chain_register(
1446                                 &cpufreq_transition_notifier_list, nb);
1447                 break;
1448         case CPUFREQ_POLICY_NOTIFIER:
1449                 ret = blocking_notifier_chain_register(
1450                                 &cpufreq_policy_notifier_list, nb);
1451                 break;
1452         default:
1453                 ret = -EINVAL;
1454         }
1455
1456         return ret;
1457 }
1458 EXPORT_SYMBOL(cpufreq_register_notifier);
1459
1460
1461 /**
1462  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1463  *      @nb: notifier block to be unregistered
1464  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1465  *
1466  *      Remove a driver from the CPU frequency notifier list.
1467  *
1468  *      This function may sleep, and has the same return conditions as
1469  *      blocking_notifier_chain_unregister.
1470  */
1471 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1472 {
1473         int ret;
1474
1475         switch (list) {
1476         case CPUFREQ_TRANSITION_NOTIFIER:
1477                 ret = srcu_notifier_chain_unregister(
1478                                 &cpufreq_transition_notifier_list, nb);
1479                 break;
1480         case CPUFREQ_POLICY_NOTIFIER:
1481                 ret = blocking_notifier_chain_unregister(
1482                                 &cpufreq_policy_notifier_list, nb);
1483                 break;
1484         default:
1485                 ret = -EINVAL;
1486         }
1487
1488         return ret;
1489 }
1490 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1491
1492
1493 /*********************************************************************
1494  *                              GOVERNORS                            *
1495  *********************************************************************/
1496
1497
1498 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1499                             unsigned int target_freq,
1500                             unsigned int relation)
1501 {
1502         int retval = -EINVAL;
1503
1504         dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1505                 target_freq, relation);
1506         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1507                 retval = cpufreq_driver->target(policy, target_freq, relation);
1508
1509         return retval;
1510 }
1511 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1512
1513 int cpufreq_driver_target(struct cpufreq_policy *policy,
1514                           unsigned int target_freq,
1515                           unsigned int relation)
1516 {
1517         int ret = -EINVAL;
1518
1519         policy = cpufreq_cpu_get(policy->cpu);
1520         if (!policy)
1521                 goto no_policy;
1522
1523         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1524                 goto fail;
1525
1526         ret = __cpufreq_driver_target(policy, target_freq, relation);
1527
1528         unlock_policy_rwsem_write(policy->cpu);
1529
1530 fail:
1531         cpufreq_cpu_put(policy);
1532 no_policy:
1533         return ret;
1534 }
1535 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1536
1537 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1538 {
1539         int ret = 0;
1540
1541         policy = cpufreq_cpu_get(policy->cpu);
1542         if (!policy)
1543                 return -EINVAL;
1544
1545         if (cpu_online(cpu) && cpufreq_driver->getavg)
1546                 ret = cpufreq_driver->getavg(policy, cpu);
1547
1548         cpufreq_cpu_put(policy);
1549         return ret;
1550 }
1551 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1552
1553 /*
1554  * when "event" is CPUFREQ_GOV_LIMITS
1555  */
1556
1557 static int __cpufreq_governor(struct cpufreq_policy *policy,
1558                                         unsigned int event)
1559 {
1560         int ret;
1561
1562         /* Only must be defined when default governor is known to have latency
1563            restrictions, like e.g. conservative or ondemand.
1564            That this is the case is already ensured in Kconfig
1565         */
1566 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1567         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1568 #else
1569         struct cpufreq_governor *gov = NULL;
1570 #endif
1571
1572         if (policy->governor->max_transition_latency &&
1573             policy->cpuinfo.transition_latency >
1574             policy->governor->max_transition_latency) {
1575                 if (!gov)
1576                         return -EINVAL;
1577                 else {
1578                         printk(KERN_WARNING "%s governor failed, too long"
1579                                " transition latency of HW, fallback"
1580                                " to %s governor\n",
1581                                policy->governor->name,
1582                                gov->name);
1583                         policy->governor = gov;
1584                 }
1585         }
1586
1587         if (!try_module_get(policy->governor->owner))
1588                 return -EINVAL;
1589
1590         dprintk("__cpufreq_governor for CPU %u, event %u\n",
1591                                                 policy->cpu, event);
1592         ret = policy->governor->governor(policy, event);
1593
1594         /* we keep one module reference alive for
1595                         each CPU governed by this CPU */
1596         if ((event != CPUFREQ_GOV_START) || ret)
1597                 module_put(policy->governor->owner);
1598         if ((event == CPUFREQ_GOV_STOP) && !ret)
1599                 module_put(policy->governor->owner);
1600
1601         return ret;
1602 }
1603
1604
1605 int cpufreq_register_governor(struct cpufreq_governor *governor)
1606 {
1607         int err;
1608
1609         if (!governor)
1610                 return -EINVAL;
1611
1612         mutex_lock(&cpufreq_governor_mutex);
1613
1614         err = -EBUSY;
1615         if (__find_governor(governor->name) == NULL) {
1616                 err = 0;
1617                 list_add(&governor->governor_list, &cpufreq_governor_list);
1618         }
1619
1620         mutex_unlock(&cpufreq_governor_mutex);
1621         return err;
1622 }
1623 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1624
1625
1626 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1627 {
1628 #ifdef CONFIG_HOTPLUG_CPU
1629         int cpu;
1630 #endif
1631
1632         if (!governor)
1633                 return;
1634
1635 #ifdef CONFIG_HOTPLUG_CPU
1636         for_each_present_cpu(cpu) {
1637                 if (cpu_online(cpu))
1638                         continue;
1639                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1640                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1641         }
1642 #endif
1643
1644         mutex_lock(&cpufreq_governor_mutex);
1645         list_del(&governor->governor_list);
1646         mutex_unlock(&cpufreq_governor_mutex);
1647         return;
1648 }
1649 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1650
1651
1652
1653 /*********************************************************************
1654  *                          POLICY INTERFACE                         *
1655  *********************************************************************/
1656
1657 /**
1658  * cpufreq_get_policy - get the current cpufreq_policy
1659  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1660  *      is written
1661  *
1662  * Reads the current cpufreq policy.
1663  */
1664 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1665 {
1666         struct cpufreq_policy *cpu_policy;
1667         if (!policy)
1668                 return -EINVAL;
1669
1670         cpu_policy = cpufreq_cpu_get(cpu);
1671         if (!cpu_policy)
1672                 return -EINVAL;
1673
1674         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1675
1676         cpufreq_cpu_put(cpu_policy);
1677         return 0;
1678 }
1679 EXPORT_SYMBOL(cpufreq_get_policy);
1680
1681
1682 /*
1683  * data   : current policy.
1684  * policy : policy to be set.
1685  */
1686 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1687                                 struct cpufreq_policy *policy)
1688 {
1689         int ret = 0;
1690
1691         cpufreq_debug_disable_ratelimit();
1692         dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1693                 policy->min, policy->max);
1694
1695         memcpy(&policy->cpuinfo, &data->cpuinfo,
1696                                 sizeof(struct cpufreq_cpuinfo));
1697
1698         if (policy->min > data->max || policy->max < data->min) {
1699                 ret = -EINVAL;
1700                 goto error_out;
1701         }
1702
1703         /* verify the cpu speed can be set within this limit */
1704         ret = cpufreq_driver->verify(policy);
1705         if (ret)
1706                 goto error_out;
1707
1708         /* adjust if necessary - all reasons */
1709         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1710                         CPUFREQ_ADJUST, policy);
1711
1712         /* adjust if necessary - hardware incompatibility*/
1713         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1714                         CPUFREQ_INCOMPATIBLE, policy);
1715
1716         /* verify the cpu speed can be set within this limit,
1717            which might be different to the first one */
1718         ret = cpufreq_driver->verify(policy);
1719         if (ret)
1720                 goto error_out;
1721
1722         /* notification of the new policy */
1723         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1724                         CPUFREQ_NOTIFY, policy);
1725
1726         data->min = policy->min;
1727         data->max = policy->max;
1728
1729         dprintk("new min and max freqs are %u - %u kHz\n",
1730                                         data->min, data->max);
1731
1732         if (cpufreq_driver->setpolicy) {
1733                 data->policy = policy->policy;
1734                 dprintk("setting range\n");
1735                 ret = cpufreq_driver->setpolicy(policy);
1736         } else {
1737                 if (policy->governor != data->governor) {
1738                         /* save old, working values */
1739                         struct cpufreq_governor *old_gov = data->governor;
1740
1741                         dprintk("governor switch\n");
1742
1743                         /* end old governor */
1744                         if (data->governor) {
1745                                 /*
1746                                  * Need to release the rwsem around governor
1747                                  * stop due to lock dependency between
1748                                  * cancel_delayed_work_sync and the read lock
1749                                  * taken in the delayed work handler.
1750                                  */
1751                                 unlock_policy_rwsem_write(data->cpu);
1752                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1753                                 lock_policy_rwsem_write(data->cpu);
1754                         }
1755
1756                         /* start new governor */
1757                         data->governor = policy->governor;
1758                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1759                                 /* new governor failed, so re-start old one */
1760                                 dprintk("starting governor %s failed\n",
1761                                                         data->governor->name);
1762                                 if (old_gov) {
1763                                         data->governor = old_gov;
1764                                         __cpufreq_governor(data,
1765                                                            CPUFREQ_GOV_START);
1766                                 }
1767                                 ret = -EINVAL;
1768                                 goto error_out;
1769                         }
1770                         /* might be a policy change, too, so fall through */
1771                 }
1772                 dprintk("governor: change or update limits\n");
1773                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1774         }
1775
1776 error_out:
1777         cpufreq_debug_enable_ratelimit();
1778         return ret;
1779 }
1780
1781 /**
1782  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1783  *      @cpu: CPU which shall be re-evaluated
1784  *
1785  *      Usefull for policy notifiers which have different necessities
1786  *      at different times.
1787  */
1788 int cpufreq_update_policy(unsigned int cpu)
1789 {
1790         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1791         struct cpufreq_policy policy;
1792         int ret;
1793
1794         if (!data) {
1795                 ret = -ENODEV;
1796                 goto no_policy;
1797         }
1798
1799         if (unlikely(lock_policy_rwsem_write(cpu))) {
1800                 ret = -EINVAL;
1801                 goto fail;
1802         }
1803
1804         dprintk("updating policy for CPU %u\n", cpu);
1805         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1806         policy.min = data->user_policy.min;
1807         policy.max = data->user_policy.max;
1808         policy.policy = data->user_policy.policy;
1809         policy.governor = data->user_policy.governor;
1810
1811         /* BIOS might change freq behind our back
1812           -> ask driver for current freq and notify governors about a change */
1813         if (cpufreq_driver->get) {
1814                 policy.cur = cpufreq_driver->get(cpu);
1815                 if (!data->cur) {
1816                         dprintk("Driver did not initialize current freq");
1817                         data->cur = policy.cur;
1818                 } else {
1819                         if (data->cur != policy.cur)
1820                                 cpufreq_out_of_sync(cpu, data->cur,
1821                                                                 policy.cur);
1822                 }
1823         }
1824
1825         ret = __cpufreq_set_policy(data, &policy);
1826
1827         unlock_policy_rwsem_write(cpu);
1828
1829 fail:
1830         cpufreq_cpu_put(data);
1831 no_policy:
1832         return ret;
1833 }
1834 EXPORT_SYMBOL(cpufreq_update_policy);
1835
1836 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1837                                         unsigned long action, void *hcpu)
1838 {
1839         unsigned int cpu = (unsigned long)hcpu;
1840         struct sys_device *sys_dev;
1841
1842         sys_dev = get_cpu_sysdev(cpu);
1843         if (sys_dev) {
1844                 switch (action) {
1845                 case CPU_ONLINE:
1846                 case CPU_ONLINE_FROZEN:
1847                         cpufreq_add_dev(sys_dev);
1848                         break;
1849                 case CPU_DOWN_PREPARE:
1850                 case CPU_DOWN_PREPARE_FROZEN:
1851                         if (unlikely(lock_policy_rwsem_write(cpu)))
1852                                 BUG();
1853
1854                         __cpufreq_remove_dev(sys_dev);
1855                         break;
1856                 case CPU_DOWN_FAILED:
1857                 case CPU_DOWN_FAILED_FROZEN:
1858                         cpufreq_add_dev(sys_dev);
1859                         break;
1860                 }
1861         }
1862         return NOTIFY_OK;
1863 }
1864
1865 static struct notifier_block __refdata cpufreq_cpu_notifier =
1866 {
1867     .notifier_call = cpufreq_cpu_callback,
1868 };
1869
1870 /*********************************************************************
1871  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1872  *********************************************************************/
1873
1874 /**
1875  * cpufreq_register_driver - register a CPU Frequency driver
1876  * @driver_data: A struct cpufreq_driver containing the values#
1877  * submitted by the CPU Frequency driver.
1878  *
1879  *   Registers a CPU Frequency driver to this core code. This code
1880  * returns zero on success, -EBUSY when another driver got here first
1881  * (and isn't unregistered in the meantime).
1882  *
1883  */
1884 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1885 {
1886         unsigned long flags;
1887         int ret;
1888
1889         if (!driver_data || !driver_data->verify || !driver_data->init ||
1890             ((!driver_data->setpolicy) && (!driver_data->target)))
1891                 return -EINVAL;
1892
1893         dprintk("trying to register driver %s\n", driver_data->name);
1894
1895         if (driver_data->setpolicy)
1896                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1897
1898         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1899         if (cpufreq_driver) {
1900                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1901                 return -EBUSY;
1902         }
1903         cpufreq_driver = driver_data;
1904         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1905
1906         ret = sysdev_driver_register(&cpu_sysdev_class,
1907                                         &cpufreq_sysdev_driver);
1908
1909         if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1910                 int i;
1911                 ret = -ENODEV;
1912
1913                 /* check for at least one working CPU */
1914                 for (i = 0; i < nr_cpu_ids; i++)
1915                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1916                                 ret = 0;
1917                                 break;
1918                         }
1919
1920                 /* if all ->init() calls failed, unregister */
1921                 if (ret) {
1922                         dprintk("no CPU initialized for driver %s\n",
1923                                                         driver_data->name);
1924                         sysdev_driver_unregister(&cpu_sysdev_class,
1925                                                 &cpufreq_sysdev_driver);
1926
1927                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1928                         cpufreq_driver = NULL;
1929                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1930                 }
1931         }
1932
1933         if (!ret) {
1934                 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1935                 dprintk("driver %s up and running\n", driver_data->name);
1936                 cpufreq_debug_enable_ratelimit();
1937         }
1938
1939         return ret;
1940 }
1941 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1942
1943
1944 /**
1945  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1946  *
1947  *    Unregister the current CPUFreq driver. Only call this if you have
1948  * the right to do so, i.e. if you have succeeded in initialising before!
1949  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1950  * currently not initialised.
1951  */
1952 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1953 {
1954         unsigned long flags;
1955
1956         cpufreq_debug_disable_ratelimit();
1957
1958         if (!cpufreq_driver || (driver != cpufreq_driver)) {
1959                 cpufreq_debug_enable_ratelimit();
1960                 return -EINVAL;
1961         }
1962
1963         dprintk("unregistering driver %s\n", driver->name);
1964
1965         sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1966         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1967
1968         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1969         cpufreq_driver = NULL;
1970         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1971
1972         return 0;
1973 }
1974 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1975
1976 static int __init cpufreq_core_init(void)
1977 {
1978         int cpu;
1979
1980         for_each_possible_cpu(cpu) {
1981                 per_cpu(policy_cpu, cpu) = -1;
1982                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1983         }
1984
1985         cpufreq_global_kobject = kobject_create_and_add("cpufreq",
1986                                                 &cpu_sysdev_class.kset.kobj);
1987         BUG_ON(!cpufreq_global_kobject);
1988
1989         return 0;
1990 }
1991 core_initcall(cpufreq_core_init);