Merge branch 'x86-setup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[safe/jmp/linux-2.6] / arch / powerpc / kernel / smp.c
index 30374d2..0b47de0 100644 (file)
@@ -17,7 +17,6 @@
 
 #undef DEBUG
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -31,6 +30,7 @@
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/topology.h>
 
 #include <asm/ptrace.h>
 #include <asm/atomic.h>
@@ -41,6 +41,7 @@
 #include <asm/smp.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
+#include <asm/cputhreads.h>
 #include <asm/cputable.h>
 #include <asm/system.h>
 #include <asm/mpic.h>
 #define DBG(fmt...)
 #endif
 
-int smp_hw_index[NR_CPUS];
 struct thread_info *secondary_ti;
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
+DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
 
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
 
-static volatile unsigned int cpu_callin_map[NR_CPUS];
-
-void smp_call_function_interrupt(void);
+/* Can't be static due to PowerMac hackery */
+volatile unsigned int cpu_callin_map[NR_CPUS];
 
 int smt_enabled_at_boot = 1;
 
-#ifdef CONFIG_MPIC
-int __init smp_mpic_probe(void)
-{
-       int nr_cpus;
-
-       DBG("smp_mpic_probe()...\n");
-
-       nr_cpus = cpus_weight(cpu_possible_map);
-
-       DBG("nr_cpus: %d\n", nr_cpus);
-
-       if (nr_cpus > 1)
-               mpic_request_ipis();
-
-       return nr_cpus;
-}
-
-void __devinit smp_mpic_setup_cpu(int cpu)
-{
-       mpic_setup_this_cpu();
-}
-#endif /* CONFIG_MPIC */
+static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
 void __devinit smp_generic_kick_cpu(int nr)
@@ -113,21 +90,28 @@ void __devinit smp_generic_kick_cpu(int nr)
 }
 #endif
 
-void smp_message_recv(int msg, struct pt_regs *regs)
+void smp_message_recv(int msg)
 {
        switch(msg) {
        case PPC_MSG_CALL_FUNCTION:
-               smp_call_function_interrupt();
+               generic_smp_call_function_interrupt();
                break;
        case PPC_MSG_RESCHEDULE:
-               /* XXX Do we have to do this? */
-               set_need_resched();
+               /* we notice need_resched on exit */
+               break;
+       case PPC_MSG_CALL_FUNC_SINGLE:
+               generic_smp_call_function_single_interrupt();
                break;
-#ifdef CONFIG_DEBUGGER
        case PPC_MSG_DEBUGGER_BREAK:
-               debugger_ipi(regs);
+               if (crash_ipi_function_ptr) {
+                       crash_ipi_function_ptr(get_irq_regs());
+                       break;
+               }
+#ifdef CONFIG_DEBUGGER
+               debugger_ipi(get_irq_regs());
                break;
-#endif
+#endif /* CONFIG_DEBUGGER */
+               /* FALLTHROUGH */
        default:
                printk("SMP %d: smp_message_recv(): unknown msg %d\n",
                       smp_processor_id(), msg);
@@ -135,173 +119,116 @@ void smp_message_recv(int msg, struct pt_regs *regs)
        }
 }
 
-void smp_send_reschedule(int cpu)
+static irqreturn_t call_function_action(int irq, void *data)
 {
-       smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
+       generic_smp_call_function_interrupt();
+       return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+static irqreturn_t reschedule_action(int irq, void *data)
 {
-       smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+       /* we just need the return path side effect of checking need_resched */
+       return IRQ_HANDLED;
 }
-#endif
 
-static void stop_this_cpu(void *dummy)
+static irqreturn_t call_function_single_action(int irq, void *data)
 {
-       local_irq_disable();
-       while (1)
-               ;
+       generic_smp_call_function_single_interrupt();
+       return IRQ_HANDLED;
 }
 
-void smp_send_stop(void)
+static irqreturn_t debug_ipi_action(int irq, void *data)
 {
-       smp_call_function(stop_this_cpu, NULL, 1, 0);
+       smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+       return IRQ_HANDLED;
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- * Stolen from the i386 version.
- */
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
-
-static struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t started;
-       atomic_t finished;
-       int wait;
-} *call_data;
-
-/* delay of at least 8 seconds */
-#define SMP_CALL_TIMEOUT       8
-
-/*
- * This function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- *
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-                      int wait)
-{ 
-       struct call_data_struct data;
-       int ret = -1, cpus;
-       u64 timeout;
-
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.started, 0);
-       data.wait = wait;
-       if (wait)
-               atomic_set(&data.finished, 0);
-
-       spin_lock(&call_lock);
-       /* Must grab online cpu count with preempt disabled, otherwise
-        * it can change. */
-       cpus = num_online_cpus() - 1;
-       if (!cpus) {
-               ret = 0;
-               goto out;
-       }
+static irq_handler_t smp_ipi_action[] = {
+       [PPC_MSG_CALL_FUNCTION] =  call_function_action,
+       [PPC_MSG_RESCHEDULE] = reschedule_action,
+       [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+       [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+};
+
+const char *smp_ipi_name[] = {
+       [PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+       [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+       [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+       [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+};
+
+/* optional function to request ipi, for controllers with >= 4 ipis */
+int smp_request_message_ipi(int virq, int msg)
+{
+       int err;
 
-       call_data = &data;
-       smp_wmb();
-       /* Send a message to all other CPUs and wait for them to respond */
-       smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION);
-
-       timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec;
-
-       /* Wait for response */
-       while (atomic_read(&data.started) != cpus) {
-               HMT_low();
-               if (get_tb() >= timeout) {
-                       printk("smp_call_function on cpu %d: other cpus not "
-                              "responding (%d)\n", smp_processor_id(),
-                              atomic_read(&data.started));
-                       debugger(NULL);
-                       goto out;
-               }
+       if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+               return -EINVAL;
        }
-
-       if (wait) {
-               while (atomic_read(&data.finished) != cpus) {
-                       HMT_low();
-                       if (get_tb() >= timeout) {
-                               printk("smp_call_function on cpu %d: other "
-                                      "cpus not finishing (%d/%d)\n",
-                                      smp_processor_id(),
-                                      atomic_read(&data.finished),
-                                      atomic_read(&data.started));
-                               debugger(NULL);
-                               goto out;
-                       }
-               }
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+       if (msg == PPC_MSG_DEBUGGER_BREAK) {
+               return 1;
        }
+#endif
+       err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
+                         smp_ipi_name[msg], 0);
+       WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+               virq, smp_ipi_name[msg], err);
 
-       ret = 0;
-
- out:
-       call_data = NULL;
-       HMT_medium();
-       spin_unlock(&call_lock);
-       return ret;
+       return err;
 }
 
-EXPORT_SYMBOL(smp_call_function);
+void smp_send_reschedule(int cpu)
+{
+       if (likely(smp_ops))
+               smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
+}
 
-void smp_call_function_interrupt(void)
+void arch_send_call_function_single_ipi(int cpu)
 {
-       void (*func) (void *info);
-       void *info;
-       int wait;
+       smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+}
 
-       /* call_data will be NULL if the sender timed out while
-        * waiting on us to receive the call.
-        */
-       if (!call_data)
-               return;
+void arch_send_call_function_ipi(cpumask_t mask)
+{
+       unsigned int cpu;
 
-       func = call_data->func;
-       info = call_data->info;
-       wait = call_data->wait;
+       for_each_cpu_mask(cpu, mask)
+               smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
 
-       if (!wait)
-               smp_mb__before_atomic_inc();
+#ifdef CONFIG_DEBUGGER
+void smp_send_debugger_break(int cpu)
+{
+       if (likely(smp_ops))
+               smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+}
+#endif
 
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       atomic_inc(&call_data->started);
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
-       (*func)(info);
-       if (wait) {
-               smp_mb__before_atomic_inc();
-               atomic_inc(&call_data->finished);
+#ifdef CONFIG_KEXEC
+void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+       crash_ipi_function_ptr = crash_ipi_callback;
+       if (crash_ipi_callback && smp_ops) {
+               mb();
+               smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
        }
 }
+#endif
 
-extern struct gettimeofday_struct do_gtod;
+static void stop_this_cpu(void *dummy)
+{
+       local_irq_disable();
+       while (1)
+               ;
+}
 
-struct thread_info *current_set[NR_CPUS];
+void smp_send_stop(void)
+{
+       smp_call_function(stop_this_cpu, NULL, 0);
+}
 
-DECLARE_PER_CPU(unsigned int, pvr);
+struct thread_info *current_set[NR_CPUS];
 
 static void __devinit smp_store_cpu_info(int id)
 {
@@ -318,9 +245,11 @@ static void __init smp_create_idle(unsigned int cpu)
                panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
 #ifdef CONFIG_PPC64
        paca[cpu].__current = p;
+       paca[cpu].kstack = (unsigned long) task_thread_info(p)
+               + THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
-       current_set[cpu] = p->thread_info;
-       p->thread_info->cpu = cpu;
+       current_set[cpu] = task_thread_info(p);
+       task_thread_info(p)->cpu = cpu;
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -339,11 +268,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
        smp_store_cpu_info(boot_cpuid);
        cpu_callin_map[boot_cpuid] = 1;
 
-       max_cpus = smp_ops->probe();
+       if (smp_ops)
+               max_cpus = smp_ops->probe();
+       else
+               max_cpus = 1;
  
        smp_space_timers(max_cpus);
 
-       for_each_cpu(cpu)
+       for_each_possible_cpu(cpu)
                if (cpu != boot_cpuid)
                        smp_create_idle(cpu);
 }
@@ -353,10 +285,12 @@ void __devinit smp_prepare_boot_cpu(void)
        BUG_ON(smp_processor_id() != boot_cpuid);
 
        cpu_set(boot_cpuid, cpu_online_map);
+       cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid));
+       cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid));
 #ifdef CONFIG_PPC64
        paca[boot_cpuid].__current = current;
 #endif
-       current_set[boot_cpuid] = current->thread_info;
+       current_set[boot_cpuid] = task_thread_info(current);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -424,10 +358,6 @@ void generic_mach_cpu_die(void)
        smp_wmb();
        while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                cpu_relax();
-
-#ifdef CONFIG_PPC64
-       flush_tlb_pending();
-#endif
        cpu_set(cpu, cpu_online_map);
        local_irq_enable();
 }
@@ -435,13 +365,13 @@ void generic_mach_cpu_die(void)
 
 static int __devinit cpu_enable(unsigned int cpu)
 {
-       if (smp_ops->cpu_enable)
+       if (smp_ops && smp_ops->cpu_enable)
                return smp_ops->cpu_enable(cpu);
 
        return -ENOSYS;
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
        int c;
 
@@ -449,13 +379,10 @@ int __devinit __cpu_up(unsigned int cpu)
        if (!cpu_enable(cpu))
                return 0;
 
-       if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))
+       if (smp_ops == NULL ||
+           (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
                return -EINVAL;
 
-#ifdef CONFIG_PPC64
-       paca[cpu].default_decr = tb_ticks_per_jiffy;
-#endif
-
        /* Make sure callin-map entry is 0 (can be leftover a CPU
         * hotplug
         */
@@ -477,7 +404,7 @@ int __devinit __cpu_up(unsigned int cpu)
         * -- Cort
         */
        if (system_state < SYSTEM_RUNNING)
-               for (c = 5000; c && !cpu_callin_map[cpu]; c--)
+               for (c = 50000; c && !cpu_callin_map[cpu]; c--)
                        udelay(100);
 #ifdef CONFIG_HOTPLUG_CPU
        else
@@ -507,11 +434,57 @@ int __devinit __cpu_up(unsigned int cpu)
        return 0;
 }
 
+/* Return the value of the reg property corresponding to the given
+ * logical cpu.
+ */
+int cpu_to_core_id(int cpu)
+{
+       struct device_node *np;
+       const int *reg;
+       int id = -1;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (!np)
+               goto out;
+
+       reg = of_get_property(np, "reg", NULL);
+       if (!reg)
+               goto out;
+
+       id = *reg;
+out:
+       of_node_put(np);
+       return id;
+}
+
+/* Must be called when no change can occur to cpu_present_map,
+ * i.e. during cpu online or offline.
+ */
+static struct device_node *cpu_to_l2cache(int cpu)
+{
+       struct device_node *np;
+       struct device_node *cache;
+
+       if (!cpu_present(cpu))
+               return NULL;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (np == NULL)
+               return NULL;
+
+       cache = of_find_next_cache_node(np);
+
+       of_node_put(np);
+
+       return cache;
+}
 
 /* Activate a secondary processor. */
 int __devinit start_secondary(void *unused)
 {
        unsigned int cpu = smp_processor_id();
+       struct device_node *l2_cache;
+       int i, base;
 
        atomic_inc(&init_mm.mm_count);
        current->active_mm = &init_mm;
@@ -525,9 +498,42 @@ int __devinit start_secondary(void *unused)
        if (smp_ops->take_timebase)
                smp_ops->take_timebase();
 
-       spin_lock(&call_lock);
+       if (system_state > SYSTEM_BOOTING)
+               snapshot_timebase();
+
+       secondary_cpu_time_init();
+
+       ipi_call_lock();
+       notify_cpu_starting(cpu);
        cpu_set(cpu, cpu_online_map);
-       spin_unlock(&call_lock);
+       /* Update sibling maps */
+       base = cpu_first_thread_in_core(cpu);
+       for (i = 0; i < threads_per_core; i++) {
+               if (cpu_is_offline(base + i))
+                       continue;
+               cpu_set(cpu, per_cpu(cpu_sibling_map, base + i));
+               cpu_set(base + i, per_cpu(cpu_sibling_map, cpu));
+
+               /* cpu_core_map should be a superset of
+                * cpu_sibling_map even if we don't have cache
+                * information, so update the former here, too.
+                */
+               cpu_set(cpu, per_cpu(cpu_core_map, base +i));
+               cpu_set(base + i, per_cpu(cpu_core_map, cpu));
+       }
+       l2_cache = cpu_to_l2cache(cpu);
+       for_each_online_cpu(i) {
+               struct device_node *np = cpu_to_l2cache(i);
+               if (!np)
+                       continue;
+               if (np == l2_cache) {
+                       cpu_set(cpu, per_cpu(cpu_core_map, i));
+                       cpu_set(i, per_cpu(cpu_core_map, cpu));
+               }
+               of_node_put(np);
+       }
+       of_node_put(l2_cache);
+       ipi_call_unlock();
 
        local_irq_enable();
 
@@ -551,18 +557,55 @@ void __init smp_cpus_done(unsigned int max_cpus)
        old_mask = current->cpus_allowed;
        set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
        
-       smp_ops->setup_cpu(boot_cpuid);
+       if (smp_ops)
+               smp_ops->setup_cpu(boot_cpuid);
 
        set_cpus_allowed(current, old_mask);
+
+       snapshot_timebases();
+
+       dump_numa_cpu_topology();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 int __cpu_disable(void)
 {
-       if (smp_ops->cpu_disable)
-               return smp_ops->cpu_disable();
+       struct device_node *l2_cache;
+       int cpu = smp_processor_id();
+       int base, i;
+       int err;
 
-       return -ENOSYS;
+       if (!smp_ops->cpu_disable)
+               return -ENOSYS;
+
+       err = smp_ops->cpu_disable();
+       if (err)
+               return err;
+
+       /* Update sibling maps */
+       base = cpu_first_thread_in_core(cpu);
+       for (i = 0; i < threads_per_core; i++) {
+               cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i));
+               cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu));
+               cpu_clear(cpu, per_cpu(cpu_core_map, base +i));
+               cpu_clear(base + i, per_cpu(cpu_core_map, cpu));
+       }
+
+       l2_cache = cpu_to_l2cache(cpu);
+       for_each_present_cpu(i) {
+               struct device_node *np = cpu_to_l2cache(i);
+               if (!np)
+                       continue;
+               if (np == l2_cache) {
+                       cpu_clear(cpu, per_cpu(cpu_core_map, i));
+                       cpu_clear(i, per_cpu(cpu_core_map, cpu));
+               }
+               of_node_put(np);
+       }
+       of_node_put(l2_cache);
+
+
+       return 0;
 }
 
 void __cpu_die(unsigned int cpu)