/* smp.c: Sparc64 SMP support.
*
- * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
*/
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/lmb.h>
+#include <linux/cpu.h>
#include <asm/head.h>
#include <asm/ptrace.h>
#include <asm/cpudata.h>
#include <asm/hvtramp.h>
#include <asm/io.h>
+#include <asm/timer.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <asm/uaccess.h>
-#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/ldc.h>
#include <asm/hypervisor.h>
-extern void calibrate_delay(void);
-
int sparc64_multi_core __read_mostly;
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
EXPORT_SYMBOL(cpu_possible_map);
EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
static cpumask_t smp_commenced_mask;
i, cpu_data(i).clock_tick);
}
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
-
extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
-void __devinit smp_callin(void)
+void __cpuinit smp_callin(void)
{
int cpuid = hard_smp_processor_id();
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
+ /* inform the notifiers about the new cpu */
+ notify_cpu_starting(cpuid);
+
while (!cpu_isset(cpuid, smp_commenced_mask))
rmb();
- spin_lock(&call_lock);
+ ipi_call_lock();
cpu_set(cpuid, cpu_online_map);
- spin_unlock(&call_lock);
+ ipi_call_unlock();
/* idle thread is expected to have preempt disabled */
preempt_disable();
for (i = 0; i < NUM_ITERS; i++) {
t0 = tick_ops->get_tick();
go[MASTER] = 1;
- membar_storeload();
+ membar_safe("#StoreLoad");
while (!(tm = go[SLAVE]))
rmb();
go[SLAVE] = 0;
t[i].rt, t[i].master, t[i].diff, t[i].lat);
#endif
- printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"
- "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);
+ printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
+ "(last diff %ld cycles, maxerr %lu cycles)\n",
+ smp_processor_id(), delta, rt);
}
static void smp_start_sync_tick_client(int cpu);
/* now let the client proceed into his loop */
go[MASTER] = 0;
- membar_storeload();
+ membar_safe("#StoreLoad");
spin_lock_irqsave(&itc_sync_lock, flags);
{
go[MASTER] = 0;
wmb();
go[SLAVE] = tick_ops->get_tick();
- membar_storeload();
+ membar_safe("#StoreLoad");
}
}
spin_unlock_irqrestore(&itc_sync_lock, flags);
return kern_base + (val - KERNBASE);
}
-static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
{
extern unsigned long sparc64_ttable_tl0;
extern unsigned long kern_locked_tte_data;
- extern int bigkernel;
struct hvtramp_descr *hdesc;
unsigned long trampoline_ra;
struct trap_per_cpu *tb;
u64 tte_vaddr, tte_data;
unsigned long hv_err;
+ int i;
- hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
+ hdesc = kzalloc(sizeof(*hdesc) +
+ (sizeof(struct hvtramp_mapping) *
+ num_kernel_image_mappings - 1),
+ GFP_KERNEL);
if (!hdesc) {
printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
"hvtramp_descr.\n");
}
hdesc->cpu = cpu;
- hdesc->num_mappings = (bigkernel ? 2 : 1);
+ hdesc->num_mappings = num_kernel_image_mappings;
tb = &trap_block[cpu];
tb->hdesc = hdesc;
tte_vaddr = (unsigned long) KERNBASE;
tte_data = kern_locked_tte_data;
- hdesc->maps[0].vaddr = tte_vaddr;
- hdesc->maps[0].tte = tte_data;
- if (bigkernel) {
+ for (i = 0; i < hdesc->num_mappings; i++) {
+ hdesc->maps[i].vaddr = tte_vaddr;
+ hdesc->maps[i].tte = tte_data;
tte_vaddr += 0x400000;
tte_data += 0x400000;
- hdesc->maps[1].vaddr = tte_vaddr;
- hdesc->maps[1].tte = tte_data;
}
trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
*/
static struct thread_info *cpu_new_thread = NULL;
-static int __devinit smp_boot_one_cpu(unsigned int cpu)
+static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
{
struct trap_per_cpu *tb = &trap_block[cpu];
unsigned long entry =
}
}
-static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
+ u64 *mondo, data0, data1, data2;
+ u16 *cpu_list;
u64 pstate;
int i;
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
- for_each_cpu_mask(i, mask)
- spitfire_xcall_helper(data0, data1, data2, pstate, i);
+ cpu_list = __va(tb->cpu_list_pa);
+ mondo = __va(tb->cpu_mondo_block_pa);
+ data0 = mondo[0];
+ data1 = mondo[1];
+ data2 = mondo[2];
+ for (i = 0; i < cnt; i++)
+ spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
}
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
* packet, but we have no use for that. However we do take advantage of
* the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
*/
-static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
- u64 pstate, ver;
int nack_busy_id, is_jbus, need_more;
+ u64 *mondo, pstate, ver, busy_mask;
+ u16 *cpu_list;
- if (cpus_empty(mask))
- return;
+ cpu_list = __va(tb->cpu_list_pa);
+ mondo = __va(tb->cpu_mondo_block_pa);
/* Unfortunately, someone at Sun had the brilliant idea to make the
* busy/nack fields hard-coded by ITID number for this Ultra-III
"stxa %2, [%5] %6\n\t"
"membar #Sync\n\t"
: /* no outputs */
- : "r" (data0), "r" (data1), "r" (data2),
+ : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
"r" (0x40), "r" (0x50), "r" (0x60),
"i" (ASI_INTR_W));
nack_busy_id = 0;
+ busy_mask = 0;
{
int i;
- for_each_cpu_mask(i, mask) {
- u64 target = (i << 14) | 0x70;
+ for (i = 0; i < cnt; i++) {
+ u64 target, nr;
+
+ nr = cpu_list[i];
+ if (nr == 0xffff)
+ continue;
- if (!is_jbus)
+ target = (nr << 14) | 0x70;
+ if (is_jbus) {
+ busy_mask |= (0x1UL << (nr * 2));
+ } else {
target |= (nack_busy_id << 24);
+ busy_mask |= (0x1UL <<
+ (nack_busy_id * 2));
+ }
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
"membar #Sync\n\t"
/* Now, poll for completion. */
{
- u64 dispatch_stat;
+ u64 dispatch_stat, nack_mask;
long stuck;
stuck = 100000 * nack_busy_id;
+ nack_mask = busy_mask << 1;
do {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (dispatch_stat)
: "i" (ASI_INTR_DISPATCH_STAT));
- if (dispatch_stat == 0UL) {
+ if (!(dispatch_stat & (busy_mask | nack_mask))) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
if (unlikely(need_more)) {
- int i, cnt = 0;
- for_each_cpu_mask(i, mask) {
- cpu_clear(i, mask);
- cnt++;
- if (cnt == 32)
+ int i, this_cnt = 0;
+ for (i = 0; i < cnt; i++) {
+ if (cpu_list[i] == 0xffff)
+ continue;
+ cpu_list[i] = 0xffff;
+ this_cnt++;
+ if (this_cnt == 32)
break;
}
goto retry;
}
if (!--stuck)
break;
- } while (dispatch_stat & 0x5555555555555555UL);
+ } while (dispatch_stat & busy_mask);
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
- if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
+ if (dispatch_stat & busy_mask) {
/* Busy bits will not clear, continue instead
* of freezing up on this cpu.
*/
/* Clear out the mask bits for cpus which did not
* NACK us.
*/
- for_each_cpu_mask(i, mask) {
- u64 check_mask;
+ for (i = 0; i < cnt; i++) {
+ u64 check_mask, nr;
+
+ nr = cpu_list[i];
+ if (nr == 0xffff)
+ continue;
if (is_jbus)
- check_mask = (0x2UL << (2*i));
+ check_mask = (0x2UL << (2*nr));
else
check_mask = (0x2UL <<
this_busy_nack);
if ((dispatch_stat & check_mask) == 0)
- cpu_clear(i, mask);
+ cpu_list[i] = 0xffff;
this_busy_nack += 2;
if (this_busy_nack == 64)
break;
}
/* Multi-cpu list version. */
-static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
- struct trap_per_cpu *tb;
+ int retries, this_cpu, prev_sent, i, saw_cpu_error;
+ unsigned long status;
u16 *cpu_list;
- u64 *mondo;
- cpumask_t error_mask;
- unsigned long flags, status;
- int cnt, retries, this_cpu, prev_sent, i;
-
- if (cpus_empty(mask))
- return;
-
- /* We have to do this whole thing with interrupts fully disabled.
- * Otherwise if we send an xcall from interrupt context it will
- * corrupt both our mondo block and cpu list state.
- *
- * One consequence of this is that we cannot use timeout mechanisms
- * that depend upon interrupts being delivered locally. So, for
- * example, we cannot sample jiffies and expect it to advance.
- *
- * Fortunately, udelay() uses %stick/%tick so we can use that.
- */
- local_irq_save(flags);
this_cpu = smp_processor_id();
- tb = &trap_block[this_cpu];
-
- mondo = __va(tb->cpu_mondo_block_pa);
- mondo[0] = data0;
- mondo[1] = data1;
- mondo[2] = data2;
- wmb();
cpu_list = __va(tb->cpu_list_pa);
- /* Setup the initial cpu list. */
- cnt = 0;
- for_each_cpu_mask(i, mask)
- cpu_list[cnt++] = i;
-
- cpus_clear(error_mask);
+ saw_cpu_error = 0;
retries = 0;
prev_sent = 0;
do {
continue;
err = sun4v_cpu_state(cpu);
- if (err >= 0 &&
- err == HV_CPU_STATE_ERROR) {
+ if (err == HV_CPU_STATE_ERROR) {
+ saw_cpu_error = (cpu + 1);
cpu_list[i] = 0xffff;
- cpu_set(cpu, error_mask);
}
}
} else if (unlikely(status != HV_EWOULDBLOCK))
}
} while (1);
- local_irq_restore(flags);
-
- if (unlikely(!cpus_empty(error_mask)))
+ if (unlikely(saw_cpu_error))
goto fatal_mondo_cpu_error;
return;
fatal_mondo_cpu_error:
printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
- "were in error state\n",
- this_cpu);
- printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
- for_each_cpu_mask(i, error_mask)
- printk("%d ", i);
- printk("]\n");
+ "(including %d) were in error state\n",
+ this_cpu, saw_cpu_error - 1);
return;
fatal_mondo_timeout:
- local_irq_restore(flags);
printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
" progress after %d retries.\n",
this_cpu, retries);
goto dump_cpu_list_and_out;
fatal_mondo_error:
- local_irq_restore(flags);
printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
this_cpu, status);
printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
printk("]\n");
}
-/* Send cross call to all processors mentioned in MASK
- * except self.
- */
-static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask)
-{
- u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
- int this_cpu = get_cpu();
-
- cpus_and(mask, mask, cpu_online_map);
- cpu_clear(this_cpu, mask);
-
- if (tlb_type == spitfire)
- spitfire_xcall_deliver(data0, data1, data2, mask);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_xcall_deliver(data0, data1, data2, mask);
- else
- hypervisor_xcall_deliver(data0, data1, data2, mask);
- /* NOTE: Caller runs local copy on master. */
+static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
- put_cpu();
-}
+static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+{
+ struct trap_per_cpu *tb;
+ int this_cpu, i, cnt;
+ unsigned long flags;
+ u16 *cpu_list;
+ u64 *mondo;
-extern unsigned long xcall_sync_tick;
+ /* We have to do this whole thing with interrupts fully disabled.
+ * Otherwise if we send an xcall from interrupt context it will
+ * corrupt both our mondo block and cpu list state.
+ *
+ * One consequence of this is that we cannot use timeout mechanisms
+ * that depend upon interrupts being delivered locally. So, for
+ * example, we cannot sample jiffies and expect it to advance.
+ *
+ * Fortunately, udelay() uses %stick/%tick so we can use that.
+ */
+ local_irq_save(flags);
-static void smp_start_sync_tick_client(int cpu)
-{
- cpumask_t mask = cpumask_of_cpu(cpu);
+ this_cpu = smp_processor_id();
+ tb = &trap_block[this_cpu];
- smp_cross_call_masked(&xcall_sync_tick,
- 0, 0, 0, mask);
-}
+ mondo = __va(tb->cpu_mondo_block_pa);
+ mondo[0] = data0;
+ mondo[1] = data1;
+ mondo[2] = data2;
+ wmb();
-/* Send cross call to all processors except self. */
-#define smp_cross_call(func, ctx, data1, data2) \
- smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
+ cpu_list = __va(tb->cpu_list_pa);
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t finished;
- int wait;
-};
+ /* Setup the initial cpu list. */
+ cnt = 0;
+ for_each_cpu_mask_nr(i, *mask) {
+ if (i == this_cpu || !cpu_online(i))
+ continue;
+ cpu_list[cnt++] = i;
+ }
-static struct call_data_struct *call_data;
+ if (cnt)
+ xcall_deliver_impl(tb, cnt);
-extern unsigned long xcall_call_function;
+ local_irq_restore(flags);
+}
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
+/* Send cross call to all processors mentioned in MASK_P
+ * except self. Really, there are only two cases currently,
+ * "&cpu_online_map" and "&mm->cpu_vm_mask".
*/
-static int smp_call_function_mask(void (*func)(void *info), void *info,
- int nonatomic, int wait, cpumask_t mask)
+static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
{
- struct call_data_struct data;
- int cpus;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.finished, 0);
- data.wait = wait;
-
- spin_lock(&call_lock);
+ u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
- cpu_clear(smp_processor_id(), mask);
- cpus = cpus_weight(mask);
- if (!cpus)
- goto out_unlock;
+ xcall_deliver(data0, data1, data2, mask);
+}
- call_data = &data;
- mb();
+/* Send cross call to all processors except self. */
+static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
+{
+ smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
+}
- smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
+extern unsigned long xcall_sync_tick;
- /* Wait for response */
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
+static void smp_start_sync_tick_client(int cpu)
+{
+ xcall_deliver((u64) &xcall_sync_tick, 0, 0,
+ &cpumask_of_cpu(cpu));
+}
-out_unlock:
- spin_unlock(&call_lock);
+extern unsigned long xcall_call_function;
- return 0;
+void arch_send_call_function_ipi(cpumask_t mask)
+{
+ xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
}
-int smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
+extern unsigned long xcall_call_function_single;
+
+void arch_send_call_function_single_ipi(int cpu)
{
- return smp_call_function_mask(func, info, nonatomic, wait,
- cpu_online_map);
+ xcall_deliver((u64) &xcall_call_function_single, 0, 0,
+ &cpumask_of_cpu(cpu));
}
void smp_call_function_client(int irq, struct pt_regs *regs)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
+ clear_softint(1 << irq);
+ generic_smp_call_function_interrupt();
+}
+void smp_call_function_single_client(int irq, struct pt_regs *regs)
+{
clear_softint(1 << irq);
- if (call_data->wait) {
- /* let initiator proceed only after completion */
- func(info);
- atomic_inc(&call_data->finished);
- } else {
- /* let initiator proceed after getting data */
- atomic_inc(&call_data->finished);
- func(info);
- }
+ generic_smp_call_function_single_interrupt();
}
static void tsb_sync(void *info)
void smp_tsb_sync(struct mm_struct *mm)
{
- smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+ smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
}
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_report_regs;
+extern unsigned long xcall_fetch_glob_regs;
extern unsigned long xcall_receive_signal;
extern unsigned long xcall_new_mmu_context_version;
+#ifdef CONFIG_KGDB
+extern unsigned long xcall_kgdb_capture;
+#endif
#ifdef DCACHE_ALIASING_POSSIBLE
extern unsigned long xcall_flush_dcache_page_cheetah;
extern atomic_t dcpage_flushes_xcall;
#endif
-static __inline__ void __local_flush_dcache_page(struct page *page)
+static inline void __local_flush_dcache_page(struct page *page)
{
#ifdef DCACHE_ALIASING_POSSIBLE
__flush_dcache_page(page_address(page),
void smp_flush_dcache_page_impl(struct page *page, int cpu)
{
- cpumask_t mask = cpumask_of_cpu(cpu);
int this_cpu;
if (tlb_type == hypervisor)
__local_flush_dcache_page(page);
} else if (cpu_online(cpu)) {
void *pg_addr = page_address(page);
- u64 data0;
+ u64 data0 = 0;
if (tlb_type == spitfire) {
- data0 =
- ((u64)&xcall_flush_dcache_page_spitfire);
+ data0 = ((u64)&xcall_flush_dcache_page_spitfire);
if (page_mapping(page) != NULL)
data0 |= ((u64)1 << 32);
- spitfire_xcall_deliver(data0,
- __pa(pg_addr),
- (u64) pg_addr,
- mask);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
- data0 =
- ((u64)&xcall_flush_dcache_page_cheetah);
- cheetah_xcall_deliver(data0,
- __pa(pg_addr),
- 0, mask);
+ data0 = ((u64)&xcall_flush_dcache_page_cheetah);
#endif
}
+ if (data0) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, &cpumask_of_cpu(cpu));
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ }
}
put_cpu();
void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
{
- void *pg_addr = page_address(page);
- cpumask_t mask = cpu_online_map;
- u64 data0;
+ void *pg_addr;
int this_cpu;
+ u64 data0;
if (tlb_type == hypervisor)
return;
this_cpu = get_cpu();
- cpu_clear(this_cpu, mask);
-
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes);
#endif
- if (cpus_empty(mask))
- goto flush_self;
+ data0 = 0;
+ pg_addr = page_address(page);
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
if (page_mapping(page) != NULL)
data0 |= ((u64)1 << 32);
- spitfire_xcall_deliver(data0,
- __pa(pg_addr),
- (u64) pg_addr,
- mask);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 = ((u64)&xcall_flush_dcache_page_cheetah);
- cheetah_xcall_deliver(data0,
- __pa(pg_addr),
- 0, mask);
#endif
}
+ if (data0) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, &cpu_online_map);
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
- flush_self:
+ }
__local_flush_dcache_page(page);
put_cpu();
}
-static void __smp_receive_signal_mask(cpumask_t mask)
-{
- smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
-}
-
-void smp_receive_signal(int cpu)
-{
- cpumask_t mask = cpumask_of_cpu(cpu);
-
- if (cpu_online(cpu))
- __smp_receive_signal_mask(mask);
-}
-
-void smp_receive_signal_client(int irq, struct pt_regs *regs)
-{
- clear_softint(1 << irq);
-}
-
void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
{
struct mm_struct *mm;
smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
}
-void smp_report_regs(void)
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(unsigned long flags)
{
- smp_cross_call(&xcall_report_regs, 0, 0, 0);
+ smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
+}
+#endif
+
+void smp_fetch_global_regs(void)
+{
+ smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
}
/* We know that the window frames of the user have been flushed
smp_cross_call_masked(&xcall_flush_tlb_mm,
ctx, 0, 0,
- mm->cpu_vm_mask);
+ &mm->cpu_vm_mask);
local_flush_and_out:
__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
else
smp_cross_call_masked(&xcall_flush_tlb_pending,
ctx, nr, (unsigned long) vaddrs,
- mm->cpu_vm_mask);
+ &mm->cpu_vm_mask);
__flush_tlb_pending(ctx, nr, vaddrs);
smp_processor_id());
#endif
penguins_are_doing_time = 1;
- membar_storestore_loadstore();
atomic_inc(&smp_capture_registry);
smp_cross_call(&xcall_capture, 0, 0, 0);
while (atomic_read(&smp_capture_registry) != ncpus)
smp_processor_id());
#endif
penguins_are_doing_time = 0;
- membar_storeload_storestore();
+ membar_safe("#StoreLoad");
atomic_dec(&smp_capture_registry);
}
}
-/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
- * can service tlb flush xcalls...
+/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+ * set, so they can service tlb flush xcalls...
*/
extern void prom_world(int);
__asm__ __volatile__("flushw");
prom_world(1);
atomic_inc(&smp_capture_registry);
- membar_storeload_storestore();
+ membar_safe("#StoreLoad");
while (penguins_are_doing_time)
rmb();
atomic_dec(&smp_capture_registry);
{
}
+void __init smp_setup_processor_id(void)
+{
+ if (tlb_type == spitfire)
+ xcall_deliver_impl = spitfire_xcall_deliver;
+ else if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ xcall_deliver_impl = cheetah_xcall_deliver;
+ else
+ xcall_deliver_impl = hypervisor_xcall_deliver;
+}
+
void __devinit smp_fill_in_sib_core_maps(void)
{
unsigned int i;
for_each_present_cpu(i) {
unsigned int j;
- cpus_clear(cpu_sibling_map[i]);
+ cpus_clear(per_cpu(cpu_sibling_map, i));
if (cpu_data(i).proc_id == -1) {
- cpu_set(i, cpu_sibling_map[i]);
+ cpu_set(i, per_cpu(cpu_sibling_map, i));
continue;
}
for_each_present_cpu(j) {
if (cpu_data(i).proc_id ==
cpu_data(j).proc_id)
- cpu_set(j, cpu_sibling_map[i]);
+ cpu_set(j, per_cpu(cpu_sibling_map, i));
}
}
}
cpu_clear(cpu, cpu_core_map[i]);
cpus_clear(cpu_core_map[cpu]);
- for_each_cpu_mask(i, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[i]);
- cpus_clear(cpu_sibling_map[cpu]);
+ for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
c = &cpu_data(cpu);
c->core_id = 0;
c->proc_id = -1;
- spin_lock(&call_lock);
- cpu_clear(cpu, cpu_online_map);
- spin_unlock(&call_lock);
-
smp_wmb();
/* Make sure no interrupts point to this cpu. */
mdelay(1);
local_irq_disable();
+ ipi_call_lock();
+ cpu_clear(cpu, cpu_online_map);
+ ipi_call_unlock();
+
return 0;
}
void smp_send_reschedule(int cpu)
{
- smp_receive_signal(cpu);
+ xcall_deliver((u64) &xcall_receive_signal, 0, 0,
+ &cpumask_of_cpu(cpu));
+}
+
+void smp_receive_signal_client(int irq, struct pt_regs *regs)
+{
+ clear_softint(1 << irq);
}
/* This is a nop because we capture all other cpus
void __init real_setup_per_cpu_areas(void)
{
- unsigned long goal, size, i;
+ unsigned long paddr, goal, size, i;
char *ptr;
/* Copy section for each CPU (we discard the original) */
for (size = PAGE_SIZE; size < goal; size <<= 1UL)
__per_cpu_shift++;
- ptr = alloc_bootmem_pages(size * NR_CPUS);
+ paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
+ if (!paddr) {
+ prom_printf("Cannot allocate per-cpu memory.\n");
+ prom_halt();
+ }
+ ptr = __va(paddr);
__per_cpu_base = ptr - __per_cpu_start;
for (i = 0; i < NR_CPUS; i++, ptr += size)