sh: convert /proc/cpu/aligmnent, /proc/cpu/kernel_alignment to seq_file
[safe/jmp/linux-2.6] / kernel / stop_machine.c
index a473bd0..912823e 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
  * GPL v2 and any later version.
  */
 #include <linux/cpu.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
 enum stopmachine_state {
-       STOPMACHINE_WAIT,
+       /* Dummy starting state for thread. */
+       STOPMACHINE_NONE,
+       /* Awaiting everyone to be scheduled. */
        STOPMACHINE_PREPARE,
+       /* Disable interrupts. */
        STOPMACHINE_DISABLE_IRQ,
+       /* Run the function */
        STOPMACHINE_RUN,
+       /* Exit */
        STOPMACHINE_EXIT,
 };
+static enum stopmachine_state state;
 
 struct stop_machine_data {
        int (*fn)(void *);
        void *data;
-       struct completion done;
-       int run_all;
-} smdata;
-
-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+       int fnret;
+};
 
-static int stopmachine(void *cpu)
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static DEFINE_MUTEX(lock);
+/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
+static DEFINE_MUTEX(setup_lock);
+/* Users of stop_machine. */
+static int refcount;
+static struct workqueue_struct *stop_machine_wq;
+static struct stop_machine_data active, idle;
+static const struct cpumask *active_cpus;
+static void *stop_machine_work;
+
+static void set_state(enum stopmachine_state newstate)
 {
-       int irqs_disabled = 0;
-       int prepared = 0;
-       int ran = 0;
-       cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
-
-       set_cpus_allowed_ptr(current, cpumask);
-
-       /* Ack: we are alive */
-       smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
-       atomic_inc(&stopmachine_thread_ack);
-
-       /* Simple state machine */
-       while (stopmachine_state != STOPMACHINE_EXIT) {
-               if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
-                   && !irqs_disabled) {
-                       local_irq_disable();
-                       hard_irq_disable();
-                       irqs_disabled = 1;
-                       /* Ack: irqs disabled. */
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               } else if (stopmachine_state == STOPMACHINE_PREPARE
-                          && !prepared) {
-                       /* Everyone is in place, hold CPU. */
-                       preempt_disable();
-                       prepared = 1;
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
-                       smdata.fn(smdata.data);
-                       ran = 1;
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               }
-               /* Yield in first stage: migration threads need to
-                * help our sisters onto their CPUs. */
-               if (!prepared && !irqs_disabled)
-                       yield();
-               cpu_relax();
-       }
-
-       /* Ack: we are exiting. */
-       smp_mb(); /* Must read state first. */
-       atomic_inc(&stopmachine_thread_ack);
-
-       if (irqs_disabled)
-               local_irq_enable();
-       if (prepared)
-               preempt_enable();
-
-       return 0;
+       /* Reset ack counter. */
+       atomic_set(&thread_ack, num_threads);
+       smp_wmb();
+       state = newstate;
 }
 
-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
 {
-       atomic_set(&stopmachine_thread_ack, 0);
-       smp_wmb();
-       stopmachine_state = state;
-       while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-               cpu_relax();
+       if (atomic_dec_and_test(&thread_ack))
+               set_state(state + 1);
 }
 
-static int stop_machine(void)
+/* This is the actual function which stops the CPU. It runs
+ * in the context of a dedicated stopmachine workqueue. */
+static void stop_cpu(struct work_struct *unused)
 {
-       int i, ret = 0;
-
-       atomic_set(&stopmachine_thread_ack, 0);
-       stopmachine_num_threads = 0;
-       stopmachine_state = STOPMACHINE_WAIT;
-
-       for_each_online_cpu(i) {
-               if (i == raw_smp_processor_id())
-                       continue;
-               ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-               if (ret < 0)
-                       break;
-               stopmachine_num_threads++;
+       enum stopmachine_state curstate = STOPMACHINE_NONE;
+       struct stop_machine_data *smdata = &idle;
+       int cpu = smp_processor_id();
+       int err;
+
+       if (!active_cpus) {
+               if (cpu == cpumask_first(cpu_online_mask))
+                       smdata = &active;
+       } else {
+               if (cpumask_test_cpu(cpu, active_cpus))
+                       smdata = &active;
        }
-
-       /* Wait for them all to come to life. */
-       while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
-               yield();
+       /* Simple state machine */
+       do {
+               /* Chill out and ensure we re-read stopmachine_state. */
                cpu_relax();
-       }
-
-       /* If some failed, kill them all. */
-       if (ret < 0) {
-               stopmachine_set_state(STOPMACHINE_EXIT);
-               return ret;
-       }
-
-       /* Now they are all started, make them hold the CPUs, ready. */
-       preempt_disable();
-       stopmachine_set_state(STOPMACHINE_PREPARE);
-
-       /* Make them disable irqs. */
-       local_irq_disable();
-       hard_irq_disable();
-       stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
-
-       return 0;
-}
+               if (state != curstate) {
+                       curstate = state;
+                       switch (curstate) {
+                       case STOPMACHINE_DISABLE_IRQ:
+                               local_irq_disable();
+                               hard_irq_disable();
+                               break;
+                       case STOPMACHINE_RUN:
+                               /* On multiple CPUs only a single error code
+                                * is needed to tell that something failed. */
+                               err = smdata->fn(smdata->data);
+                               if (err)
+                                       smdata->fnret = err;
+                               break;
+                       default:
+                               break;
+                       }
+                       ack_state();
+               }
+       } while (curstate != STOPMACHINE_EXIT);
 
-static void restart_machine(void)
-{
-       stopmachine_set_state(STOPMACHINE_EXIT);
        local_irq_enable();
-       preempt_enable_no_resched();
 }
 
-static void run_other_cpus(void)
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
 {
-       stopmachine_set_state(STOPMACHINE_RUN);
+       return 0;
 }
 
-static int do_stop(void *_smdata)
+int stop_machine_create(void)
 {
-       struct stop_machine_data *smdata = _smdata;
-       int ret;
-
-       ret = stop_machine();
-       if (ret == 0) {
-               ret = smdata->fn(smdata->data);
-               if (smdata->run_all)
-                       run_other_cpus();
-               restart_machine();
-       }
-
-       /* We're done: you can kthread_stop us now */
-       complete(&smdata->done);
+       mutex_lock(&setup_lock);
+       if (refcount)
+               goto done;
+       stop_machine_wq = create_rt_workqueue("kstop");
+       if (!stop_machine_wq)
+               goto err_out;
+       stop_machine_work = alloc_percpu(struct work_struct);
+       if (!stop_machine_work)
+               goto err_out;
+done:
+       refcount++;
+       mutex_unlock(&setup_lock);
+       return 0;
 
-       /* Wait for kthread_stop */
-       set_current_state(TASK_INTERRUPTIBLE);
-       while (!kthread_should_stop()) {
-               schedule();
-               set_current_state(TASK_INTERRUPTIBLE);
-       }
-       __set_current_state(TASK_RUNNING);
-       return ret;
+err_out:
+       if (stop_machine_wq)
+               destroy_workqueue(stop_machine_wq);
+       mutex_unlock(&setup_lock);
+       return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(stop_machine_create);
 
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-                                      unsigned int cpu)
+void stop_machine_destroy(void)
 {
-       static DEFINE_MUTEX(stopmachine_mutex);
-       struct stop_machine_data smdata;
-       struct task_struct *p;
-
-       mutex_lock(&stopmachine_mutex);
-
-       smdata.fn = fn;
-       smdata.data = data;
-       smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
-       init_completion(&smdata.done);
-
-       smp_wmb(); /* make sure other cpus see smdata updates */
-
-       /* If they don't care which CPU fn runs on, bind to any online one. */
-       if (cpu == NR_CPUS || cpu == ALL_CPUS)
-               cpu = raw_smp_processor_id();
-
-       p = kthread_create(do_stop, &smdata, "kstopmachine");
-       if (!IS_ERR(p)) {
-               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+       mutex_lock(&setup_lock);
+       refcount--;
+       if (refcount)
+               goto done;
+       destroy_workqueue(stop_machine_wq);
+       free_percpu(stop_machine_work);
+done:
+       mutex_unlock(&setup_lock);
+}
+EXPORT_SYMBOL_GPL(stop_machine_destroy);
 
-               /* One high-prio thread per cpu.  We'll do this one. */
-               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-               kthread_bind(p, cpu);
-               wake_up_process(p);
-               wait_for_completion(&smdata.done);
+int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+{
+       struct work_struct *sm_work;
+       int i, ret;
+
+       /* Set up initial state. */
+       mutex_lock(&lock);
+       num_threads = num_online_cpus();
+       active_cpus = cpus;
+       active.fn = fn;
+       active.data = data;
+       active.fnret = 0;
+       idle.fn = chill;
+       idle.data = NULL;
+
+       set_state(STOPMACHINE_PREPARE);
+
+       /* Schedule the stop_cpu work on all cpus: hold this CPU so one
+        * doesn't hit this CPU until we're ready. */
+       get_cpu();
+       for_each_online_cpu(i) {
+               sm_work = per_cpu_ptr(stop_machine_work, i);
+               INIT_WORK(sm_work, stop_cpu);
+               queue_work_on(i, stop_machine_wq, sm_work);
        }
-       mutex_unlock(&stopmachine_mutex);
-       return p;
+       /* This will release the thread on our CPU. */
+       put_cpu();
+       flush_workqueue(stop_machine_wq);
+       ret = active.fnret;
+       mutex_unlock(&lock);
+       return ret;
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
-       struct task_struct *p;
        int ret;
 
+       ret = stop_machine_create();
+       if (ret)
+               return ret;
        /* No CPUs can come up or down during this. */
        get_online_cpus();
-       p = __stop_machine_run(fn, data, cpu);
-       if (!IS_ERR(p))
-               ret = kthread_stop(p);
-       else
-               ret = PTR_ERR(p);
+       ret = __stop_machine(fn, data, cpus);
        put_online_cpus();
-
+       stop_machine_destroy();
        return ret;
 }
-EXPORT_SYMBOL_GPL(stop_machine_run);
+EXPORT_SYMBOL_GPL(stop_machine);