Freezer: make kernel threads nonfreezable by default
[safe/jmp/linux-2.6] / arch / i386 / kernel / apm.c
index 45199bb..47001d5 100644 (file)
  *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
  *
  * [This document is available from Microsoft at:
- *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
+ *    http://www.microsoft.com/whdc/archive/amp_12.mspx]
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/miscdevice.h>
 #include <linux/apm_bios.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
 #include <linux/pm_legacy.h>
+#include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/freezer.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/dmi.h>
 #include <linux/suspend.h>
+#include <linux/kthread.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/i8253.h>
+#include <asm/paravirt.h>
+#include <asm/reboot.h>
 
 #include "io_ports.h"
 
-extern unsigned long get_cmos_time(void);
-extern void machine_real_restart(unsigned char *, int);
-
 #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
 extern int (*console_blank_hook)(int);
 #endif
@@ -373,24 +374,17 @@ static struct {
        unsigned short  segment;
 }                              apm_bios_entry;
 static int                     clock_slowed;
-static int                     idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int                     idle_period = DEFAULT_IDLE_PERIOD;
+static int                     idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
+static int                     idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
 static int                     set_pm_idle;
 static int                     suspends_pending;
 static int                     standbys_pending;
 static int                     ignore_sys_suspend;
 static int                     ignore_normal_resume;
-static int                     bounce_interval = DEFAULT_BOUNCE_INTERVAL;
+static int                     bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
 
-#ifdef CONFIG_APM_RTC_IS_GMT
-#      define  clock_cmos_diff 0
-#      define  got_clock_diff  1
-#else
-static long                    clock_cmos_diff;
-static int                     got_clock_diff;
-#endif
-static int                     debug;
-static int                     smp;
+static int                     debug __read_mostly;
+static int                     smp __read_mostly;
 static int                     apm_disabled = -1;
 #ifdef CONFIG_SMP
 static int                     power_off;
@@ -402,8 +396,6 @@ static int                  realmode_power_off = 1;
 #else
 static int                     realmode_power_off;
 #endif
-static int                     exit_kapmd;
-static int                     kapmd_running;
 #ifdef CONFIG_APM_ALLOW_INTS
 static int                     allow_ints = 1;
 #else
@@ -415,15 +407,17 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *       user_list;
 static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct      bad_bios_desc = { 0, 0x00409200 };
+static const struct desc_struct        bad_bios_desc = { 0, 0x00409200 };
+
+static const char              driver_version[] = "1.16ac";    /* no spaces */
 
-static char                    driver_version[] = "1.16ac";    /* no spaces */
+static struct task_struct *kapmd_task;
 
 /*
  *     APM event names taken from the APM 1.2 specification. These are
  *     the message codes that the BIOS uses to tell us about events
  */
-static char *  apm_event_name[] = {
+static const char *    const apm_event_name[] = {
        "system standby",
        "system suspend",
        "normal resume",
@@ -539,11 +533,30 @@ static inline void apm_restore_cpus(cpumask_t mask)
  * Also, we KNOW that for the non error case of apm_bios_call, there
  * is no useful data returned in the low order 8 bits of eax.
  */
-#define APM_DO_CLI     \
-       if (apm_info.allow_ints) \
-               local_irq_enable(); \
-       else \
+
+static inline unsigned long __apm_irq_save(void)
+{
+       unsigned long flags;
+       local_save_flags(flags);
+       if (apm_info.allow_ints) {
+               if (irqs_disabled_flags(flags))
+                       local_irq_enable();
+       } else
+               local_irq_disable();
+
+       return flags;
+}
+
+#define apm_irq_save(flags) \
+       do { flags = __apm_irq_save(); } while (0)
+
+static inline void apm_irq_restore(unsigned long flags)
+{
+       if (irqs_disabled_flags(flags))
                local_irq_disable();
+       else if (irqs_disabled())
+               local_irq_enable();
+}
 
 #ifdef APM_ZERO_SEGS
 #      define APM_DECL_SEGS \
@@ -595,12 +608,11 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
        save_desc_40 = gdt[0x40 / 8];
        gdt[0x40 / 8] = bad_bios_desc;
 
-       local_save_flags(flags);
-       APM_DO_CLI;
+       apm_irq_save(flags);
        APM_DO_SAVE_SEGS;
        apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
        APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
+       apm_irq_restore(flags);
        gdt[0x40 / 8] = save_desc_40;
        put_cpu();
        apm_restore_cpus(cpus);
@@ -615,7 +627,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
  *     @ecx_in: ECX register value for BIOS call
  *     @eax: EAX register on return from the BIOS call
  *
- *     Make a BIOS call that does only returns one value, or just status.
+ *     Make a BIOS call that returns one value only, or just status.
  *     If there is an error, then the error code is returned in AH
  *     (bits 8-15 of eax) and this function returns non-zero. This is
  *     used for simpler BIOS operations. This call may hold interrupts
@@ -639,12 +651,11 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
        save_desc_40 = gdt[0x40 / 8];
        gdt[0x40 / 8] = bad_bios_desc;
 
-       local_save_flags(flags);
-       APM_DO_CLI;
+       apm_irq_save(flags);
        APM_DO_SAVE_SEGS;
        error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
        APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
+       apm_irq_restore(flags);
        gdt[0x40 / 8] = save_desc_40;
        put_cpu();
        apm_restore_cpus(cpus);
@@ -763,17 +774,21 @@ static int apm_do_idle(void)
        int     idled = 0;
        int     polling;
 
-       polling = test_thread_flag(TIF_POLLING_NRFLAG);
+       polling = !!(current_thread_info()->status & TS_POLLING);
        if (polling) {
-               clear_thread_flag(TIF_POLLING_NRFLAG);
-               smp_mb__after_clear_bit();
+               current_thread_info()->status &= ~TS_POLLING;
+               /*
+                * TS_POLLING-cleared state must be visible before we
+                * test NEED_RESCHED:
+                */
+               smp_mb();
        }
        if (!need_resched()) {
                idled = 1;
                ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
        }
        if (polling)
-               set_thread_flag(TIF_POLLING_NRFLAG);
+               current_thread_info()->status |= TS_POLLING;
 
        if (!idled)
                return 0;
@@ -821,9 +836,7 @@ static void apm_do_busy(void)
 #define IDLE_CALC_LIMIT   (HZ * 100)
 #define IDLE_LEAKY_MAX    16
 
-static void (*original_pm_idle)(void);
-
-extern void default_idle(void);
+static void (*original_pm_idle)(void) __read_mostly;
 
 /**
  * apm_cpu_idle                -       cpu idling for APM capable Linux
@@ -1064,21 +1077,23 @@ static int apm_engage_power_management(u_short device, int enable)
  
 static int apm_console_blank(int blank)
 {
-       int     error;
-       u_short state;
+       int error = APM_NOT_ENGAGED; /* silence gcc */
+       int i;
+       u_short state;
+       static const u_short dev[3] = { 0x100, 0x1FF, 0x101 };
 
        state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
-       /* Blank the first display device */
-       error = set_power_state(0x100, state);
-       if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
-               /* try to blank them all instead */
-               error = set_power_state(0x1ff, state);
-               if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
-                       /* try to blank device one instead */
-                       error = set_power_state(0x101, state);
+
+       for (i = 0; i < ARRAY_SIZE(dev); i++) {
+               error = set_power_state(dev[i], state);
+
+               if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
+                       return 1;
+
+               if (error == APM_NOT_ENGAGED)
+                       break;
        }
-       if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
-               return 1;
+
        if (error == APM_NOT_ENGAGED) {
                static int tried;
                int eng_error;
@@ -1104,7 +1119,8 @@ static int queue_empty(struct apm_user *as)
 
 static apm_event_t get_queued_event(struct apm_user *as)
 {
-       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+       if (++as->event_tail >= APM_MAX_EVENTS)
+               as->event_tail = 0;
        return as->events[as->event_tail];
 }
 
@@ -1118,13 +1134,16 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
        for (as = user_list; as != NULL; as = as->next) {
                if ((as == sender) || (!as->reader))
                        continue;
-               as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
+               if (++as->event_head >= APM_MAX_EVENTS)
+                       as->event_head = 0;
+
                if (as->event_head == as->event_tail) {
                        static int notified;
 
                        if (notified++ == 0)
                            printk(KERN_ERR "apm: an event queue overflowed\n");
-                       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+                       if (++as->event_tail >= APM_MAX_EVENTS)
+                               as->event_tail = 0;
                }
                as->events[as->event_head] = event;
                if ((!as->suser) || (!as->writer))
@@ -1148,33 +1167,13 @@ out:
        spin_unlock(&user_list_lock);
 }
 
-static void set_time(void)
-{
-       if (got_clock_diff) {   /* Must know time zone in order to set clock */
-               xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
-               xtime.tv_nsec = 0; 
-       } 
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
-       /*
-        * Estimate time zone so that set_time can update the clock
-        */
-       clock_cmos_diff = -get_cmos_time();
-       clock_cmos_diff += get_seconds();
-       got_clock_diff = 1;
-#endif
-}
-
 static void reinit_timer(void)
 {
 #ifdef INIT_TIMER_AFTER_SUSPEND
        unsigned long flags;
 
        spin_lock_irqsave(&i8253_lock, flags);
-       /* set the clock to 100 Hz */
+       /* set the clock to HZ */
        outb_p(0x34, PIT_MODE);         /* binary, mode 2, LSB/MSB, ch 0 */
        udelay(10);
        outb_p(LATCH & 0xff, PIT_CH0);  /* LSB */
@@ -1207,19 +1206,6 @@ static int suspend(int vetoable)
        local_irq_disable();
        device_power_down(PMSG_SUSPEND);
 
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-
-       /* protect against access to timer chip registers */
-       spin_lock(&i8253_lock);
-
-       get_time_diff();
-       /*
-        * Irq spinlock must be dropped around set_system_power_state.
-        * We'll undo any timer changes due to interrupts below.
-        */
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
        local_irq_enable();
 
        save_processor_state();
@@ -1228,13 +1214,7 @@ static int suspend(int vetoable)
        restore_processor_state();
 
        local_irq_disable();
-       write_seqlock(&xtime_lock);
-       spin_lock(&i8253_lock);
        reinit_timer();
-       set_time();
-
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
 
        if (err == APM_NO_ERROR)
                err = APM_SUCCESS;
@@ -1263,11 +1243,6 @@ static void standby(void)
 
        local_irq_disable();
        device_power_down(PMSG_SUSPEND);
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-       /* If needed, notify drivers here */
-       get_time_diff();
-       write_sequnlock(&xtime_lock);
        local_irq_enable();
 
        err = set_system_power_state(APM_STATE_STANDBY);
@@ -1282,7 +1257,7 @@ static void standby(void)
 static apm_event_t get_event(void)
 {
        int             error;
-       apm_event_t     event;
+       apm_event_t     event = APM_NO_EVENTS; /* silence gcc */
        apm_eventinfo_t info;
 
        static int notified;
@@ -1361,9 +1336,6 @@ static void check_events(void)
                        ignore_bounce = 1;
                        if ((event != APM_NORMAL_RESUME)
                            || (ignore_normal_resume == 0)) {
-                               write_seqlock_irq(&xtime_lock);
-                               set_time();
-                               write_sequnlock_irq(&xtime_lock);
                                device_resume();
                                pm_send_all(PM_RESUME, (void *)0);
                                queue_event(event, NULL);
@@ -1379,9 +1351,6 @@ static void check_events(void)
                        break;
 
                case APM_UPDATE_TIME:
-                       write_seqlock_irq(&xtime_lock);
-                       set_time();
-                       write_sequnlock_irq(&xtime_lock);
                        break;
 
                case APM_CRITICAL_SUSPEND:
@@ -1426,7 +1395,7 @@ static void apm_mainloop(void)
        set_current_state(TASK_INTERRUPTIBLE);
        for (;;) {
                schedule_timeout(APM_CHECK_TIMEOUT);
-               if (exit_kapmd)
+               if (kthread_should_stop())
                        break;
                /*
                 * Ok, check all events, check for idle (and mark us sleeping
@@ -1588,7 +1557,7 @@ static int do_open(struct inode * inode, struct file * filp)
 {
        struct apm_user *       as;
 
-       as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
+       as = kmalloc(sizeof(*as), GFP_KERNEL);
        if (as == NULL) {
                printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
                       sizeof(*as));
@@ -1616,9 +1585,8 @@ static int do_open(struct inode * inode, struct file * filp)
        return 0;
 }
 
-static int apm_get_info(char *buf, char **start, off_t fpos, int length)
+static int proc_apm_show(struct seq_file *m, void *v)
 {
-       char *          p;
        unsigned short  bx;
        unsigned short  cx;
        unsigned short  dx;
@@ -1630,8 +1598,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
        int             time_units     = -1;
        char            *units         = "?";
 
-       p = buf;
-
        if ((num_online_cpus() == 1) &&
            !(error = apm_get_power_status(&bx, &cx, &dx))) {
                ac_line_status = (bx >> 8) & 0xff;
@@ -1685,7 +1651,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
              -1: Unknown
           8) min = minutes; sec = seconds */
 
-       p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
+       seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
                     driver_version,
                     (apm_info.bios.version >> 8) & 0xff,
                     apm_info.bios.version & 0xff,
@@ -1696,10 +1662,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
                     percentage,
                     time_units,
                     units);
+       return 0;
+}
 
-       return p - buf;
+static int proc_apm_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, proc_apm_show, NULL);
 }
 
+static const struct file_operations apm_file_ops = {
+       .owner          = THIS_MODULE,
+       .open           = proc_apm_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 static int apm(void *unused)
 {
        unsigned short  bx;
@@ -1709,12 +1687,6 @@ static int apm(void *unused)
        char *          power_stat;
        char *          bat_stat;
 
-       kapmd_running = 1;
-
-       daemonize("kapmd");
-
-       current->flags |= PF_NOFREEZE;
-
 #ifdef CONFIG_SMP
        /* 2002/08/01 - WT
         * This is to avoid random crashes at boot time during initialization
@@ -1824,7 +1796,6 @@ static int apm(void *unused)
                console_blank_hook = NULL;
 #endif
        }
-       kapmd_running = 0;
 
        return 0;
 }
@@ -1881,7 +1852,7 @@ static int __init apm_setup(char *str)
 __setup("apm=", apm_setup);
 #endif
 
-static struct file_operations apm_bios_fops = {
+static const struct file_operations apm_bios_fops = {
        .owner          = THIS_MODULE,
        .read           = do_read,
        .poll           = do_poll,
@@ -2222,12 +2193,12 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
 static int __init apm_init(void)
 {
        struct proc_dir_entry *apm_proc;
-       int ret;
-       int i;
+       struct desc_struct *gdt;
+       int err;
 
        dmi_check_system(apm_dmi_table);
 
-       if (apm_info.bios.version == 0) {
+       if (apm_info.bios.version == 0 || paravirt_enabled()) {
                printk(KERN_INFO "apm: BIOS not found.\n");
                return -ENODEV;
        }
@@ -2290,7 +2261,9 @@ static int __init apm_init(void)
                apm_info.disabled = 1;
                return -ENODEV;
        }
+#ifdef CONFIG_PM_LEGACY
        pm_active = 1;
+#endif
 
        /*
         * Set up a segment that references the real mode segment 0x40
@@ -2314,28 +2287,32 @@ static int __init apm_init(void)
         * not restrict themselves to their claimed limit.  When this happens,
         * they will cause a segmentation violation in the kernel at boot time.
         * Most BIOS's, however, will respect a 64k limit, so we use that.
+        *
+        * Note we only set APM segments on CPU zero, since we pin the APM
+        * code to that CPU.
         */
-       for (i = 0; i < NR_CPUS; i++) {
-               struct desc_struct *gdt = get_cpu_gdt_table(i);
-               if (!gdt)
-                       continue;
-               set_base(gdt[APM_CS >> 3],
-                        __va((unsigned long)apm_info.bios.cseg << 4));
-               set_base(gdt[APM_CS_16 >> 3],
-                        __va((unsigned long)apm_info.bios.cseg_16 << 4));
-               set_base(gdt[APM_DS >> 3],
-                        __va((unsigned long)apm_info.bios.dseg << 4));
-       }
-
-       apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
+       gdt = get_cpu_gdt_table(0);
+       set_base(gdt[APM_CS >> 3],
+                __va((unsigned long)apm_info.bios.cseg << 4));
+       set_base(gdt[APM_CS_16 >> 3],
+                __va((unsigned long)apm_info.bios.cseg_16 << 4));
+       set_base(gdt[APM_DS >> 3],
+                __va((unsigned long)apm_info.bios.dseg << 4));
+
+       apm_proc = create_proc_entry("apm", 0, NULL);
        if (apm_proc)
-               apm_proc->owner = THIS_MODULE;
-
-       ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
-       if (ret < 0) {
-               printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
-               return -ENOMEM;
+               apm_proc->proc_fops = &apm_file_ops;
+
+       kapmd_task = kthread_create(apm, NULL, "kapmd");
+       if (IS_ERR(kapmd_task)) {
+               printk(KERN_ERR "apm: disabled - Unable to start kernel "
+                               "thread.\n");
+               err = PTR_ERR(kapmd_task);
+               kapmd_task = NULL;
+               remove_proc_entry("apm", NULL);
+               return err;
        }
+       wake_up_process(kapmd_task);
 
        if (num_online_cpus() > 1 && !smp ) {
                printk(KERN_NOTICE
@@ -2343,7 +2320,13 @@ static int __init apm_init(void)
                return 0;
        }
 
-       misc_register(&apm_device);
+       /*
+        * Note we don't actually care if the misc_device cannot be registered.
+        * this driver can do its job without it, even if userspace can't
+        * control it.  just log the error
+        */
+       if (misc_register(&apm_device))
+               printk(KERN_WARNING "apm: Could not register misc device.\n");
 
        if (HZ != 100)
                idle_period = (idle_period * HZ) / 100;
@@ -2379,10 +2362,13 @@ static void __exit apm_exit(void)
        remove_proc_entry("apm", NULL);
        if (power_off)
                pm_power_off = NULL;
-       exit_kapmd = 1;
-       while (kapmd_running)
-               schedule();
+       if (kapmd_task) {
+               kthread_stop(kapmd_task);
+               kapmd_task = NULL;
+       }
+#ifdef CONFIG_PM_LEGACY
        pm_active = 0;
+#endif
 }
 
 module_init(apm_init);