/* smp.c: Sparc64 SMP support.
*
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
*/
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/lmb.h>
#include <asm/head.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cpudata.h>
+#include <asm/hvtramp.h>
+#include <asm/io.h>
+#include <asm/timer.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <asm/uaccess.h>
-#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/mdesc.h>
+#include <asm/ldc.h>
+#include <asm/hypervisor.h>
-extern void calibrate_delay(void);
-
-/* Please don't make this stuff initdata!!! --DaveM */
-unsigned char boot_cpu_id;
+int sparc64_multi_core __read_mostly;
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(cpu_core_map);
+
static cpumask_t smp_commenced_mask;
-static cpumask_t cpu_callout_map;
void smp_info(struct seq_file *m)
{
for_each_online_cpu(i)
seq_printf(m,
- "Cpu%dBogo\t: %lu.%02lu\n"
"Cpu%dClkTck\t: %016lx\n",
- i, cpu_data(i).udelay_val / (500000/HZ),
- (cpu_data(i).udelay_val / (5000/HZ)) % 100,
i, cpu_data(i).clock_tick);
}
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
+
extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
-void __init smp_callin(void)
+void __cpuinit smp_callin(void)
{
int cpuid = hard_smp_processor_id();
local_irq_enable();
- calibrate_delay();
- cpu_data(cpuid).udelay_val = loops_per_jiffy;
callin_flag = 1;
__asm__ __volatile__("membar #Sync\n\t"
"flush %%g6" : : : "memory");
while (!cpu_isset(cpuid, smp_commenced_mask))
rmb();
+ spin_lock(&call_lock);
cpu_set(cpuid, cpu_online_map);
+ spin_unlock(&call_lock);
/* idle thread is expected to have preempt disabled */
preempt_disable();
t[i].rt, t[i].master, t[i].diff, t[i].lat);
#endif
- printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"
- "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);
+ printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
+ "(last diff %ld cycles, maxerr %lu cycles)\n",
+ smp_processor_id(), delta, rt);
}
static void smp_start_sync_tick_client(int cpu);
spin_unlock_irqrestore(&itc_sync_lock, flags);
}
-extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+/* XXX Put this in some common place. XXX */
+static unsigned long kimage_addr_to_ra(void *p)
+{
+ unsigned long val = (unsigned long) p;
+
+ return kern_base + (val - KERNBASE);
+}
+
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+{
+ extern unsigned long sparc64_ttable_tl0;
+ extern unsigned long kern_locked_tte_data;
+ struct hvtramp_descr *hdesc;
+ unsigned long trampoline_ra;
+ struct trap_per_cpu *tb;
+ u64 tte_vaddr, tte_data;
+ unsigned long hv_err;
+ int i;
+
+ hdesc = kzalloc(sizeof(*hdesc) +
+ (sizeof(struct hvtramp_mapping) *
+ num_kernel_image_mappings - 1),
+ GFP_KERNEL);
+ if (!hdesc) {
+ printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
+ "hvtramp_descr.\n");
+ return;
+ }
+
+ hdesc->cpu = cpu;
+ hdesc->num_mappings = num_kernel_image_mappings;
+
+ tb = &trap_block[cpu];
+ tb->hdesc = hdesc;
+
+ hdesc->fault_info_va = (unsigned long) &tb->fault_info;
+ hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
+
+ hdesc->thread_reg = thread_reg;
+
+ tte_vaddr = (unsigned long) KERNBASE;
+ tte_data = kern_locked_tte_data;
+
+ for (i = 0; i < hdesc->num_mappings; i++) {
+ hdesc->maps[i].vaddr = tte_vaddr;
+ hdesc->maps[i].tte = tte_data;
+ tte_vaddr += 0x400000;
+ tte_data += 0x400000;
+ }
+
+ trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
+
+ hv_err = sun4v_cpu_start(cpu, trampoline_ra,
+ kimage_addr_to_ra(&sparc64_ttable_tl0),
+ __pa(hdesc));
+ if (hv_err)
+ printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
+ "gives error %lu\n", hv_err);
+}
+#endif
extern unsigned long sparc64_cpu_startup;
static int __devinit smp_boot_one_cpu(unsigned int cpu)
{
+ struct trap_per_cpu *tb = &trap_block[cpu];
unsigned long entry =
(unsigned long)(&sparc64_cpu_startup);
unsigned long cookie =
int timeout, ret;
p = fork_idle(cpu);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
callin_flag = 0;
cpu_new_thread = task_thread_info(p);
- cpu_set(cpu, cpu_callout_map);
if (tlb_type == hypervisor) {
- /* Alloc the mondo queues, cpu will load them. */
- sun4v_init_mondo_queues(0, cpu, 1, 0);
-
- prom_startcpu_cpuid(cpu, entry, cookie);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+ if (ldom_domaining_enabled)
+ ldom_startcpu_cpuid(cpu,
+ (unsigned long) cpu_new_thread);
+ else
+#endif
+ prom_startcpu_cpuid(cpu, entry, cookie);
} else {
struct device_node *dp = of_find_node_by_cpuid(cpu);
prom_startcpu(dp->node, entry, cookie);
}
- for (timeout = 0; timeout < 5000000; timeout++) {
+ for (timeout = 0; timeout < 50000; timeout++) {
if (callin_flag)
break;
udelay(100);
ret = 0;
} else {
printk("Processor %d is stuck.\n", cpu);
- cpu_clear(cpu, cpu_callout_map);
ret = -ENODEV;
}
cpu_new_thread = NULL;
+ if (tb->hdesc) {
+ kfree(tb->hdesc);
+ tb->hdesc = NULL;
+ }
+
return ret;
}
}
}
-static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
u64 pstate;
int i;
*/
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
- u64 pstate, ver;
+ u64 pstate, ver, busy_mask;
int nack_busy_id, is_jbus, need_more;
if (cpus_empty(mask))
"i" (ASI_INTR_W));
nack_busy_id = 0;
+ busy_mask = 0;
{
int i;
for_each_cpu_mask(i, mask) {
u64 target = (i << 14) | 0x70;
- if (!is_jbus)
+ if (is_jbus) {
+ busy_mask |= (0x1UL << (i * 2));
+ } else {
target |= (nack_busy_id << 24);
+ busy_mask |= (0x1UL <<
+ (nack_busy_id * 2));
+ }
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
"membar #Sync\n\t"
/* Now, poll for completion. */
{
- u64 dispatch_stat;
+ u64 dispatch_stat, nack_mask;
long stuck;
stuck = 100000 * nack_busy_id;
+ nack_mask = busy_mask << 1;
do {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (dispatch_stat)
: "i" (ASI_INTR_DISPATCH_STAT));
- if (dispatch_stat == 0UL) {
+ if (!(dispatch_stat & (busy_mask | nack_mask))) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
if (unlikely(need_more)) {
}
if (!--stuck)
break;
- } while (dispatch_stat & 0x5555555555555555UL);
+ } while (dispatch_stat & busy_mask);
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
- if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
+ if (dispatch_stat & busy_mask) {
/* Busy bits will not clear, continue instead
* of freezing up on this cpu.
*/
int wait;
};
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
static struct call_data_struct *call_data;
extern unsigned long xcall_call_function;
* smp_call_function(): Run a function on all other CPUs.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
* @wait: If true, wait (atomically) until function has completed on other CPUs.
*
* Returns 0 on success, else a negative status code. Does not return until
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-static int smp_call_function_mask(void (*func)(void *info), void *info,
- int nonatomic, int wait, cpumask_t mask)
+static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info,
+ int wait, cpumask_t mask)
{
struct call_data_struct data;
int cpus;
return 0;
}
-int smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
+int smp_call_function(void (*func)(void *info), void *info, int wait)
{
- return smp_call_function_mask(func, info, nonatomic, wait,
- cpu_online_map);
+ return sparc64_smp_call_function_mask(func, info, wait, cpu_online_map);
}
void smp_call_function_client(int irq, struct pt_regs *regs)
void smp_tsb_sync(struct mm_struct *mm)
{
- smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+ sparc64_smp_call_function_mask(tsb_sync, mm, 1, mm->cpu_vm_mask);
}
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
extern unsigned long xcall_report_regs;
+#ifdef CONFIG_MAGIC_SYSRQ
+extern unsigned long xcall_fetch_glob_regs;
+#endif
extern unsigned long xcall_receive_signal;
extern unsigned long xcall_new_mmu_context_version;
+#ifdef CONFIG_KGDB
+extern unsigned long xcall_kgdb_capture;
+#endif
#ifdef DCACHE_ALIASING_POSSIBLE
extern unsigned long xcall_flush_dcache_page_cheetah;
extern atomic_t dcpage_flushes_xcall;
#endif
-static __inline__ void __local_flush_dcache_page(struct page *page)
+static inline void __local_flush_dcache_page(struct page *page)
{
#ifdef DCACHE_ALIASING_POSSIBLE
__flush_dcache_page(page_address(page),
smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
}
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
+}
+#endif
+
void smp_report_regs(void)
{
smp_cross_call(&xcall_report_regs, 0, 0, 0);
}
+#ifdef CONFIG_MAGIC_SYSRQ
+void smp_fetch_global_regs(void)
+{
+ smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
+}
+#endif
+
/* We know that the window frames of the user have been flushed
* to the stack before we get here because all callers of us
* are flush_tlb_*() routines, and these run after flush_cache_*()
preempt_enable();
}
-void __init smp_tick_init(void)
-{
- boot_cpu_id = hard_smp_processor_id();
-}
-
/* /proc/profile writes can call this, don't __init it please. */
int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
-static void __init smp_tune_scheduling(void)
-{
- unsigned int smallest = ~0U;
- int i;
-
- for (i = 0; i < NR_CPUS; i++) {
- unsigned int val = cpu_data(i).ecache_size;
-
- if (val && val < smallest)
- smallest = val;
- }
-
- /* Any value less than 256K is nonsense. */
- if (smallest < (256U * 1024U))
- smallest = 256 * 1024;
-
- max_cache_size = smallest;
-
- if (smallest < 1U * 1024U * 1024U)
- printk(KERN_INFO "Using max_cache_size of %uKB\n",
- smallest / 1024U);
- else
- printk(KERN_INFO "Using max_cache_size of %uMB\n",
- smallest / 1024U / 1024U);
-}
-
-/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int i;
-
- if (num_possible_cpus() > max_cpus) {
- for_each_possible_cpu(i) {
- if (i != boot_cpu_id) {
- cpu_clear(i, phys_cpu_present_map);
- cpu_clear(i, cpu_present_map);
- if (num_possible_cpus() <= max_cpus)
- break;
- }
- }
- }
-
- cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
- smp_tune_scheduling();
}
void __devinit smp_prepare_boot_cpu(void)
{
unsigned int i;
- for_each_possible_cpu(i) {
+ for_each_present_cpu(i) {
unsigned int j;
+ cpus_clear(cpu_core_map[i]);
if (cpu_data(i).core_id == 0) {
cpu_set(i, cpu_core_map[i]);
continue;
}
- for_each_possible_cpu(j) {
+ for_each_present_cpu(j) {
if (cpu_data(i).core_id ==
cpu_data(j).core_id)
cpu_set(j, cpu_core_map[i]);
}
}
- for_each_possible_cpu(i) {
+ for_each_present_cpu(i) {
unsigned int j;
+ cpus_clear(per_cpu(cpu_sibling_map, i));
if (cpu_data(i).proc_id == -1) {
- cpu_set(i, cpu_sibling_map[i]);
+ cpu_set(i, per_cpu(cpu_sibling_map, i));
continue;
}
- for_each_possible_cpu(j) {
+ for_each_present_cpu(j) {
if (cpu_data(i).proc_id ==
cpu_data(j).proc_id)
- cpu_set(j, cpu_sibling_map[i]);
+ cpu_set(j, per_cpu(cpu_sibling_map, i));
}
}
}
return ret;
}
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_play_dead(void)
{
- unsigned long bogosum = 0;
+ int cpu = smp_processor_id();
+ unsigned long pstate;
+
+ idle_task_exit();
+
+ if (tlb_type == hypervisor) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
+ tb->cpu_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
+ tb->dev_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
+ tb->resum_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
+ tb->nonresum_mondo_pa, 0);
+ }
+
+ cpu_clear(cpu, smp_commenced_mask);
+ membar_safe("#Sync");
+
+ local_irq_disable();
+
+ __asm__ __volatile__(
+ "rdpr %%pstate, %0\n\t"
+ "wrpr %0, %1, %%pstate"
+ : "=r" (pstate)
+ : "i" (PSTATE_IE));
+
+ while (1)
+ barrier();
+}
+
+int __cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+ cpuinfo_sparc *c;
int i;
- for_each_online_cpu(i)
- bogosum += cpu_data(i).udelay_val;
- printk("Total of %ld processors activated "
- "(%lu.%02lu BogoMIPS).\n",
- (long) num_online_cpus(),
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
+ for_each_cpu_mask(i, cpu_core_map[cpu])
+ cpu_clear(cpu, cpu_core_map[i]);
+ cpus_clear(cpu_core_map[cpu]);
+
+ for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+
+ c = &cpu_data(cpu);
+
+ c->core_id = 0;
+ c->proc_id = -1;
+
+ spin_lock(&call_lock);
+ cpu_clear(cpu, cpu_online_map);
+ spin_unlock(&call_lock);
+
+ smp_wmb();
+
+ /* Make sure no interrupts point to this cpu. */
+ fixup_irqs();
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ smp_rmb();
+ if (!cpu_isset(cpu, smp_commenced_mask))
+ break;
+ msleep(100);
+ }
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ } else {
+#if defined(CONFIG_SUN_LDOMS)
+ unsigned long hv_err;
+ int limit = 100;
+
+ do {
+ hv_err = sun4v_cpu_stop(cpu);
+ if (hv_err == HV_EOK) {
+ cpu_clear(cpu, cpu_present_map);
+ break;
+ }
+ } while (--limit > 0);
+ if (limit <= 0) {
+ printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
+ hv_err);
+ }
+#endif
+ }
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
}
void smp_send_reschedule(int cpu)
void __init real_setup_per_cpu_areas(void)
{
- unsigned long goal, size, i;
+ unsigned long paddr, goal, size, i;
char *ptr;
/* Copy section for each CPU (we discard the original) */
for (size = PAGE_SIZE; size < goal; size <<= 1UL)
__per_cpu_shift++;
- ptr = alloc_bootmem_pages(size * NR_CPUS);
+ paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
+ if (!paddr) {
+ prom_printf("Cannot allocate per-cpu memory.\n");
+ prom_halt();
+ }
+ ptr = __va(paddr);
__per_cpu_base = ptr - __per_cpu_start;
for (i = 0; i < NR_CPUS; i++, ptr += size)