Blackfin arch: SMP supporting patchset: Blackfin kernel and memory management code
authorGraf Yang <graf.yang@analog.com>
Tue, 18 Nov 2008 09:48:22 +0000 (17:48 +0800)
committerBryan Wu <cooloney@kernel.org>
Tue, 18 Nov 2008 09:48:22 +0000 (17:48 +0800)
Blackfin dual core BF561 processor can support SMP like features.
https://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:smp-like

In this patch, we provide SMP extend to Blackfin kernel and memory management code

Singed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
14 files changed:
arch/blackfin/kernel/asm-offsets.c
arch/blackfin/kernel/bfin_ksyms.c
arch/blackfin/kernel/entry.S
arch/blackfin/kernel/irqchip.c
arch/blackfin/kernel/kgdb.c
arch/blackfin/kernel/module.c
arch/blackfin/kernel/process.c
arch/blackfin/kernel/ptrace.c
arch/blackfin/kernel/reboot.c
arch/blackfin/kernel/setup.c
arch/blackfin/kernel/time.c
arch/blackfin/kernel/traps.c
arch/blackfin/mm/init.c
arch/blackfin/mm/sram-alloc.c

index 9bb85dd..b5df945 100644 (file)
@@ -56,6 +56,9 @@ int main(void)
        /* offsets into the thread struct */
        DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
        DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
+       DEFINE(THREAD_SR, offsetof(struct thread_struct, seqstat));
+       DEFINE(PT_SR, offsetof(struct thread_struct, seqstat));
+       DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0));
        DEFINE(THREAD_PC, offsetof(struct thread_struct, pc));
        DEFINE(KERNEL_STACK_SIZE, THREAD_SIZE);
 
@@ -128,5 +131,31 @@ int main(void)
        DEFINE(SIGSEGV, SIGSEGV);
        DEFINE(SIGTRAP, SIGTRAP);
 
+       /* PDA management (in L1 scratchpad) */
+       DEFINE(PDA_SYSCFG, offsetof(struct blackfin_pda, syscfg));
+#ifdef CONFIG_SMP
+       DEFINE(PDA_IRQFLAGS, offsetof(struct blackfin_pda, imask));
+#endif
+       DEFINE(PDA_IPDT, offsetof(struct blackfin_pda, ipdt));
+       DEFINE(PDA_IPDT_SWAPCOUNT, offsetof(struct blackfin_pda, ipdt_swapcount));
+       DEFINE(PDA_DPDT, offsetof(struct blackfin_pda, dpdt));
+       DEFINE(PDA_DPDT_SWAPCOUNT, offsetof(struct blackfin_pda, dpdt_swapcount));
+       DEFINE(PDA_EXIPTR, offsetof(struct blackfin_pda, ex_iptr));
+       DEFINE(PDA_EXOPTR, offsetof(struct blackfin_pda, ex_optr));
+       DEFINE(PDA_EXBUF, offsetof(struct blackfin_pda, ex_buf));
+       DEFINE(PDA_EXIMASK, offsetof(struct blackfin_pda, ex_imask));
+       DEFINE(PDA_EXSTACK, offsetof(struct blackfin_pda, ex_stack));
+#ifdef ANOMALY_05000261
+       DEFINE(PDA_LFRETX, offsetof(struct blackfin_pda, last_cplb_fault_retx));
+#endif
+       DEFINE(PDA_DCPLB, offsetof(struct blackfin_pda, dcplb_fault_addr));
+       DEFINE(PDA_ICPLB, offsetof(struct blackfin_pda, icplb_fault_addr));
+       DEFINE(PDA_RETX, offsetof(struct blackfin_pda, retx));
+       DEFINE(PDA_SEQSTAT, offsetof(struct blackfin_pda, seqstat));
+#ifdef CONFIG_SMP
+       /* Inter-core lock (in L2 SRAM) */
+       DEFINE(SIZEOF_CORELOCK, sizeof(struct corelock_slot));
+#endif
+
        return 0;
 }
index b66f1d4..763c315 100644 (file)
@@ -68,3 +68,37 @@ EXPORT_SYMBOL(insw_8);
 EXPORT_SYMBOL(outsl);
 EXPORT_SYMBOL(insl);
 EXPORT_SYMBOL(insl_16);
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(__raw_atomic_update_asm);
+EXPORT_SYMBOL(__raw_atomic_clear_asm);
+EXPORT_SYMBOL(__raw_atomic_set_asm);
+EXPORT_SYMBOL(__raw_atomic_xor_asm);
+EXPORT_SYMBOL(__raw_atomic_test_asm);
+EXPORT_SYMBOL(__raw_xchg_1_asm);
+EXPORT_SYMBOL(__raw_xchg_2_asm);
+EXPORT_SYMBOL(__raw_xchg_4_asm);
+EXPORT_SYMBOL(__raw_cmpxchg_1_asm);
+EXPORT_SYMBOL(__raw_cmpxchg_2_asm);
+EXPORT_SYMBOL(__raw_cmpxchg_4_asm);
+EXPORT_SYMBOL(__raw_spin_is_locked_asm);
+EXPORT_SYMBOL(__raw_spin_lock_asm);
+EXPORT_SYMBOL(__raw_spin_trylock_asm);
+EXPORT_SYMBOL(__raw_spin_unlock_asm);
+EXPORT_SYMBOL(__raw_read_lock_asm);
+EXPORT_SYMBOL(__raw_read_trylock_asm);
+EXPORT_SYMBOL(__raw_read_unlock_asm);
+EXPORT_SYMBOL(__raw_write_lock_asm);
+EXPORT_SYMBOL(__raw_write_trylock_asm);
+EXPORT_SYMBOL(__raw_write_unlock_asm);
+EXPORT_SYMBOL(__raw_bit_set_asm);
+EXPORT_SYMBOL(__raw_bit_clear_asm);
+EXPORT_SYMBOL(__raw_bit_toggle_asm);
+EXPORT_SYMBOL(__raw_bit_test_asm);
+EXPORT_SYMBOL(__raw_bit_test_set_asm);
+EXPORT_SYMBOL(__raw_bit_test_clear_asm);
+EXPORT_SYMBOL(__raw_bit_test_toggle_asm);
+EXPORT_SYMBOL(__raw_uncached_fetch_asm);
+EXPORT_SYMBOL(__raw_smp_mark_barrier_asm);
+EXPORT_SYMBOL(__raw_smp_check_barrier_asm);
+#endif
index faea88e..c0c3fe8 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/errno.h>
+#include <asm/blackfin.h>
 #include <asm/asm-offsets.h>
 
 #include <asm/context.S>
index 07402f5..9eebb78 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/irq.h>
 #include <asm/trace.h>
 
-static unsigned long irq_err_count;
+static atomic_t irq_err_count;
 static spinlock_t irq_controller_lock;
 
 /*
@@ -48,7 +48,7 @@ void dummy_mask_unmask_irq(unsigned int irq)
 
 void ack_bad_irq(unsigned int irq)
 {
-       irq_err_count += 1;
+       atomic_inc(&irq_err_count);
        printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
 }
 EXPORT_SYMBOL(ack_bad_irq);
@@ -72,7 +72,7 @@ static struct irq_desc bad_irq_desc = {
 
 int show_interrupts(struct seq_file *p, void *v)
 {
-       int i = *(loff_t *) v;
+       int i = *(loff_t *) v, j;
        struct irqaction *action;
        unsigned long flags;
 
@@ -80,19 +80,20 @@ int show_interrupts(struct seq_file *p, void *v)
                spin_lock_irqsave(&irq_desc[i].lock, flags);
                action = irq_desc[i].action;
                if (!action)
-                       goto unlock;
-
-               seq_printf(p, "%3d: %10u ", i, kstat_irqs(i));
+                       goto skip;
+               seq_printf(p, "%3d: ", i);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               seq_printf(p, " %8s", irq_desc[i].chip->name);
                seq_printf(p, "  %s", action->name);
                for (action = action->next; action; action = action->next)
-                       seq_printf(p, ", %s", action->name);
+                       seq_printf(p, "  %s", action->name);
 
                seq_putc(p, '\n');
unlock:
skip:
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-       } else if (i == NR_IRQS) {
-               seq_printf(p, "Err: %10lu\n", irq_err_count);
-       }
+       } else if (i == NR_IRQS)
+               seq_printf(p, "Err: %10u\n",  atomic_read(&irq_err_count));
        return 0;
 }
 
@@ -101,7 +102,6 @@ int show_interrupts(struct seq_file *p, void *v)
  * come via this function.  Instead, they should provide their
  * own 'handler'
  */
-
 #ifdef CONFIG_DO_IRQ_L1
 __attribute__((l1_text))
 #endif
index b795a20..ab40221 100644 (file)
@@ -363,12 +363,12 @@ void kgdb_passive_cpu_callback(void *info)
 
 void kgdb_roundup_cpus(unsigned long flags)
 {
-       smp_call_function(kgdb_passive_cpu_callback, NULL, 0, 0);
+       smp_call_function(kgdb_passive_cpu_callback, NULL, 0);
 }
 
 void kgdb_roundup_cpu(int cpu, unsigned long flags)
 {
-       smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0, 0);
+       smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0);
 }
 #endif
 
index e1bebc8..2e14cad 100644 (file)
@@ -343,7 +343,13 @@ apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
                pr_debug("location is %x, value is %x type is %d \n",
                         (unsigned int) location32, value,
                         ELF32_R_TYPE(rel[i].r_info));
-
+#ifdef CONFIG_SMP
+               if ((unsigned long)location16 >= COREB_L1_DATA_A_START) {
+                       printk(KERN_ERR "module %s: cannot relocate in L1: %u (SMP kernel)",
+                                      mod->name, ELF32_R_TYPE(rel[i].r_info));
+                       return -ENOEXEC;
+               }
+#endif
                switch (ELF32_R_TYPE(rel[i].r_info)) {
 
                case R_pcrel24:
@@ -436,6 +442,7 @@ module_finalize(const Elf_Ehdr * hdr,
 {
        unsigned int i, strindex = 0, symindex = 0;
        char *secstrings;
+       long err = 0;
 
        secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
@@ -460,8 +467,10 @@ module_finalize(const Elf_Ehdr * hdr,
                    (strcmp(".rela.l1.text", secstrings + sechdrs[i].sh_name) == 0) ||
                    ((strcmp(".rela.text", secstrings + sechdrs[i].sh_name) == 0) &&
                        (hdr->e_flags & (EF_BFIN_CODE_IN_L1|EF_BFIN_CODE_IN_L2))))) {
-                       apply_relocate_add((Elf_Shdr *) sechdrs, strtab,
+                       err = apply_relocate_add((Elf_Shdr *) sechdrs, strtab,
                                           symindex, i, mod);
+                       if (err < 0)
+                               return -ENOEXEC;
                }
        }
        return 0;
index 326e301..4359ea2 100644 (file)
@@ -171,6 +171,13 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
        unsigned long clone_flags;
        unsigned long newsp;
 
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       if (current->rt.nr_cpus_allowed == num_possible_cpus()) {
+               current->cpus_allowed = cpumask_of_cpu(smp_processor_id());
+               current->rt.nr_cpus_allowed = 1;
+       }
+#endif
+
        /* syscall2 puts clone_flags in r0 and usp in r1 */
        clone_flags = regs->r0;
        newsp = regs->r1;
@@ -338,22 +345,22 @@ int _access_ok(unsigned long addr, unsigned long size)
        if (addr >= (unsigned long)__init_begin &&
            addr + size <= (unsigned long)__init_end)
                return 1;
-       if (addr >= L1_SCRATCH_START
-           && addr + size <= L1_SCRATCH_START + L1_SCRATCH_LENGTH)
+       if (addr >= get_l1_scratch_start()
+           && addr + size <= get_l1_scratch_start() + L1_SCRATCH_LENGTH)
                return 1;
 #if L1_CODE_LENGTH != 0
-       if (addr >= L1_CODE_START + (_etext_l1 - _stext_l1)
-           && addr + size <= L1_CODE_START + L1_CODE_LENGTH)
+       if (addr >= get_l1_code_start() + (_etext_l1 - _stext_l1)
+           && addr + size <= get_l1_code_start() + L1_CODE_LENGTH)
                return 1;
 #endif
 #if L1_DATA_A_LENGTH != 0
-       if (addr >= L1_DATA_A_START + (_ebss_l1 - _sdata_l1)
-           && addr + size <= L1_DATA_A_START + L1_DATA_A_LENGTH)
+       if (addr >= get_l1_data_a_start() + (_ebss_l1 - _sdata_l1)
+           && addr + size <= get_l1_data_a_start() + L1_DATA_A_LENGTH)
                return 1;
 #endif
 #if L1_DATA_B_LENGTH != 0
-       if (addr >= L1_DATA_B_START + (_ebss_b_l1 - _sdata_b_l1)
-           && addr + size <= L1_DATA_B_START + L1_DATA_B_LENGTH)
+       if (addr >= get_l1_data_b_start() + (_ebss_b_l1 - _sdata_b_l1)
+           && addr + size <= get_l1_data_b_start() + L1_DATA_B_LENGTH)
                return 1;
 #endif
 #if L2_LENGTH != 0
index 140bf00..4de44f3 100644 (file)
@@ -220,8 +220,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                break;
                        pr_debug("ptrace: user address is valid\n");
 
-                       if (L1_CODE_LENGTH != 0 && addr >= L1_CODE_START
-                           && addr + sizeof(tmp) <= L1_CODE_START + L1_CODE_LENGTH) {
+                       if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start()
+                           && addr + sizeof(tmp) <= get_l1_code_start() + L1_CODE_LENGTH) {
                                safe_dma_memcpy (&tmp, (const void *)(addr), sizeof(tmp));
                                copied = sizeof(tmp);
 
@@ -300,8 +300,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                break;
                        pr_debug("ptrace: user address is valid\n");
 
-                       if (L1_CODE_LENGTH != 0 && addr >= L1_CODE_START
-                           && addr + sizeof(data) <= L1_CODE_START + L1_CODE_LENGTH) {
+                       if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start()
+                           && addr + sizeof(data) <= get_l1_code_start() + L1_CODE_LENGTH) {
                                safe_dma_memcpy ((void *)(addr), &data, sizeof(data));
                                copied = sizeof(data);
 
index ae97ca4..eeee8cb 100644 (file)
@@ -21,7 +21,7 @@
  * the core reset.
  */
 __attribute__((l1_text))
-static void bfin_reset(void)
+static void _bfin_reset(void)
 {
        /* Wait for completion of "system" events such as cache line
         * line fills so that we avoid infinite stalls later on as
@@ -66,6 +66,18 @@ static void bfin_reset(void)
        }
 }
 
+static void bfin_reset(void)
+{
+       if (ANOMALY_05000353 || ANOMALY_05000386)
+               _bfin_reset();
+       else
+               /* the bootrom checks to see how it was reset and will
+                * automatically perform a software reset for us when
+                * it starts executing boot
+                */
+               asm("raise 1;");
+}
+
 __attribute__((weak))
 void native_machine_restart(char *cmd)
 {
@@ -75,14 +87,10 @@ void machine_restart(char *cmd)
 {
        native_machine_restart(cmd);
        local_irq_disable();
-       if (ANOMALY_05000353 || ANOMALY_05000386)
-               bfin_reset();
+       if (smp_processor_id())
+               smp_call_function((void *)bfin_reset, 0, 1);
        else
-               /* the bootrom checks to see how it was reset and will
-                * automatically perform a software reset for us when
-                * it starts executing boot
-                */
-               asm("raise 1;");
+               bfin_reset();
 }
 
 __attribute__((weak))
index 71a9a8c..c644d23 100644 (file)
 #include <asm/blackfin.h>
 #include <asm/cplbinit.h>
 #include <asm/div64.h>
+#include <asm/cpu.h>
 #include <asm/fixed_code.h>
 #include <asm/early_printk.h>
 
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
-
 u16 _bfin_swrst;
 EXPORT_SYMBOL(_bfin_swrst);
 
@@ -79,29 +78,76 @@ static struct change_member *change_point[2*BFIN_MEMMAP_MAX] __initdata;
 static struct bfin_memmap_entry *overlap_list[BFIN_MEMMAP_MAX] __initdata;
 static struct bfin_memmap_entry new_map[BFIN_MEMMAP_MAX] __initdata;
 
-void __init bfin_cache_init(void)
-{
+DEFINE_PER_CPU(struct blackfin_cpudata, cpu_data);
+
 #if defined(CONFIG_BFIN_DCACHE) || defined(CONFIG_BFIN_ICACHE)
-       generate_cplb_tables();
+void __init generate_cplb_tables(void)
+{
+       unsigned int cpu;
+
+       /* Generate per-CPU I&D CPLB tables */
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu)
+               generate_cplb_tables_cpu(cpu);
+}
 #endif
 
+void __cpuinit bfin_setup_caches(unsigned int cpu)
+{
 #ifdef CONFIG_BFIN_ICACHE
-       bfin_icache_init();
-       printk(KERN_INFO "Instruction Cache Enabled\n");
+#ifdef CONFIG_MPU
+       bfin_icache_init(icplb_tbl[cpu]);
+#else
+       bfin_icache_init(icplb_tables[cpu]);
+#endif
 #endif
 
 #ifdef CONFIG_BFIN_DCACHE
-       bfin_dcache_init();
-       printk(KERN_INFO "Data Cache Enabled"
+#ifdef CONFIG_MPU
+       bfin_dcache_init(dcplb_tbl[cpu]);
+#else
+       bfin_dcache_init(dcplb_tables[cpu]);
+#endif
+#endif
+
+       /*
+        * In cache coherence emulation mode, we need to have the
+        * D-cache enabled before running any atomic operation which
+        * might invove cache invalidation (i.e. spinlock, rwlock).
+        * So printk's are deferred until then.
+        */
+#ifdef CONFIG_BFIN_ICACHE
+       printk(KERN_INFO "Instruction Cache Enabled for CPU%u\n", cpu);
+#endif
+#ifdef CONFIG_BFIN_DCACHE
+       printk(KERN_INFO "Data Cache Enabled for CPU%u"
 # if defined CONFIG_BFIN_WB
                " (write-back)"
 # elif defined CONFIG_BFIN_WT
                " (write-through)"
 # endif
-               "\n");
+               "\n", cpu);
 #endif
 }
 
+void __cpuinit bfin_setup_cpudata(unsigned int cpu)
+{
+       struct blackfin_cpudata *cpudata = &per_cpu(cpu_data, cpu);
+
+       cpudata->idle = current;
+       cpudata->loops_per_jiffy = loops_per_jiffy;
+       cpudata->cclk = get_cclk();
+       cpudata->imemctl = bfin_read_IMEM_CONTROL();
+       cpudata->dmemctl = bfin_read_DMEM_CONTROL();
+}
+
+void __init bfin_cache_init(void)
+{
+#if defined(CONFIG_BFIN_DCACHE) || defined(CONFIG_BFIN_ICACHE)
+       generate_cplb_tables();
+#endif
+       bfin_setup_caches(0);
+}
+
 void __init bfin_relocate_l1_mem(void)
 {
        unsigned long l1_code_length;
@@ -230,7 +276,7 @@ static int __init sanitize_memmap(struct bfin_memmap_entry *map, int *pnr_map)
        /* record all known change-points (starting and ending addresses),
           omitting those that are for empty memory regions */
        chgidx = 0;
-       for (i = 0; i < old_nr; i++)    {
+       for (i = 0; i < old_nr; i++) {
                if (map[i].size != 0) {
                        change_point[chgidx]->addr = map[i].addr;
                        change_point[chgidx++]->pentry = &map[i];
@@ -238,13 +284,13 @@ static int __init sanitize_memmap(struct bfin_memmap_entry *map, int *pnr_map)
                        change_point[chgidx++]->pentry = &map[i];
                }
        }
-       chg_nr = chgidx;        /* true number of change-points */
+       chg_nr = chgidx;        /* true number of change-points */
 
        /* sort change-point list by memory addresses (low -> high) */
        still_changing = 1;
-       while (still_changing)  {
+       while (still_changing) {
                still_changing = 0;
-               for (i = 1; i < chg_nr; i++)  {
+               for (i = 1; i < chg_nr; i++) {
                        /* if <current_addr> > <last_addr>, swap */
                        /* or, if current=<start_addr> & last=<end_addr>, swap */
                        if ((change_point[i]->addr < change_point[i-1]->addr) ||
@@ -261,10 +307,10 @@ static int __init sanitize_memmap(struct bfin_memmap_entry *map, int *pnr_map)
        }
 
        /* create a new memmap, removing overlaps */
-       overlap_entries = 0;     /* number of entries in the overlap table */
-       new_entry = 0;   /* index for creating new memmap entries */
-       last_type = 0;           /* start with undefined memory type */
-       last_addr = 0;           /* start with 0 as last starting address */
+       overlap_entries = 0;    /* number of entries in the overlap table */
+       new_entry = 0;          /* index for creating new memmap entries */
+       last_type = 0;          /* start with undefined memory type */
+       last_addr = 0;          /* start with 0 as last starting address */
        /* loop through change-points, determining affect on the new memmap */
        for (chgidx = 0; chgidx < chg_nr; chgidx++) {
                /* keep track of all overlapping memmap entries */
@@ -286,14 +332,14 @@ static int __init sanitize_memmap(struct bfin_memmap_entry *map, int *pnr_map)
                        if (overlap_list[i]->type > current_type)
                                current_type = overlap_list[i]->type;
                /* continue building up new memmap based on this information */
-               if (current_type != last_type)  {
+               if (current_type != last_type) {
                        if (last_type != 0) {
                                new_map[new_entry].size =
                                        change_point[chgidx]->addr - last_addr;
                                /* move forward only if the new size was non-zero */
                                if (new_map[new_entry].size != 0)
                                        if (++new_entry >= BFIN_MEMMAP_MAX)
-                                               break;  /* no more space left for new entries */
+                                               break;  /* no more space left for new entries */
                        }
                        if (current_type != 0) {
                                new_map[new_entry].addr = change_point[chgidx]->addr;
@@ -303,9 +349,9 @@ static int __init sanitize_memmap(struct bfin_memmap_entry *map, int *pnr_map)
                        last_type = current_type;
                }
        }
-       new_nr = new_entry;   /* retain count for new entries */
+       new_nr = new_entry;     /* retain count for new entries */
 
-       /* copy new  mapping into original location */
+       /* copy new mapping into original location */
        memcpy(map, new_map, new_nr*sizeof(struct bfin_memmap_entry));
        *pnr_map = new_nr;
 
@@ -361,7 +407,6 @@ static __init int parse_memmap(char *arg)
  *  - "memmap=XXX[KkmM][@][$]XXX[KkmM]" defines a memory region
  *       @ from <start> to <start>+<mem>, type RAM
  *       $ from <start> to <start>+<mem>, type RESERVED
- *
  */
 static __init void parse_cmdline_early(char *cmdline_p)
 {
@@ -383,12 +428,10 @@ static __init void parse_cmdline_early(char *cmdline_p)
                                        if (*to != ' ') {
                                                if (*to == '$'
                                                    || *(to + 1) == '$')
-                                                       reserved_mem_dcache_on =
-                                                           1;
+                                                       reserved_mem_dcache_on = 1;
                                                if (*to == '#'
                                                    || *(to + 1) == '#')
-                                                       reserved_mem_icache_on =
-                                                           1;
+                                                       reserved_mem_icache_on = 1;
                                        }
                                }
                        } else if (!memcmp(to, "earlyprintk=", 12)) {
@@ -417,9 +460,8 @@ static __init void parse_cmdline_early(char *cmdline_p)
  *     [_ramend - DMA_UNCACHED_REGION,
  *             _ramend]:                       uncached DMA region
  *  [_ramend, physical_mem_end]:       memory not managed by kernel
- *
  */
-static __init void  memory_setup(void)
+static __init void memory_setup(void)
 {
 #ifdef CONFIG_MTD_UCLINUX
        unsigned long mtd_phys = 0;
@@ -436,7 +478,7 @@ static __init void  memory_setup(void)
        memory_end = _ramend - DMA_UNCACHED_REGION;
 
 #ifdef CONFIG_MPU
-       /* Round up to multiple of 4MB */
+       /* Round up to multiple of 4MB */
        memory_start = (_ramstart + 0x3fffff) & ~0x3fffff;
 #else
        memory_start = PAGE_ALIGN(_ramstart);
@@ -616,7 +658,7 @@ static __init void setup_bootmem_allocator(void)
        end_pfn = memory_end >> PAGE_SHIFT;
 
        /*
-        * give all the memory to the bootmap allocator,  tell it to put the
+        * give all the memory to the bootmap allocator, tell it to put the
         * boot mem_map at the start of memory.
         */
        bootmap_size = init_bootmem_node(NODE_DATA(0),
@@ -791,7 +833,11 @@ void __init setup_arch(char **cmdline_p)
        bfin_write_SWRST(_bfin_swrst | DOUBLE_FAULT);
 #endif
 
+#ifdef CONFIG_SMP
+       if (_bfin_swrst & SWRST_DBL_FAULT_A) {
+#else
        if (_bfin_swrst & RESET_DOUBLE) {
+#endif
                printk(KERN_EMERG "Recovering from DOUBLE FAULT event\n");
 #ifdef CONFIG_DEBUG_DOUBLEFAULT
                /* We assume the crashing kernel, and the current symbol table match */
@@ -835,7 +881,7 @@ void __init setup_arch(char **cmdline_p)
        printk(KERN_INFO "Blackfin Linux support by http://blackfin.uclinux.org/\n");
 
        printk(KERN_INFO "Processor Speed: %lu MHz core clock and %lu MHz System Clock\n",
-              cclk / 1000000,  sclk / 1000000);
+              cclk / 1000000, sclk / 1000000);
 
        if (ANOMALY_05000273 && (cclk >> 1) <= sclk)
                printk("\n\n\nANOMALY_05000273: CCLK must be >= 2*SCLK !!!\n\n\n");
@@ -867,18 +913,21 @@ void __init setup_arch(char **cmdline_p)
        BUG_ON((char *)&safe_user_instruction - (char *)&fixed_code_start
                != SAFE_USER_INSTRUCTION - FIXED_CODE_START);
 
+#ifdef CONFIG_SMP
+       platform_init_cpus();
+#endif
        init_exception_vectors();
-       bfin_cache_init();
+       bfin_cache_init();      /* Initialize caches for the boot CPU */
 }
 
 static int __init topology_init(void)
 {
-       int cpu;
+       unsigned int cpu;
+       /* Record CPU-private information for the boot processor. */
+       bfin_setup_cpudata(0);
 
        for_each_possible_cpu(cpu) {
-               struct cpu *c = &per_cpu(cpu_devices, cpu);
-
-               register_cpu(c, cpu);
+               register_cpu(&per_cpu(cpu_data, cpu).cpu, cpu);
        }
 
        return 0;
@@ -983,15 +1032,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        char *cpu, *mmu, *fpu, *vendor, *cache;
        uint32_t revid;
 
-       u_long cclk = 0, sclk = 0;
+       u_long sclk = 0;
        u_int icache_size = BFIN_ICACHESIZE / 1024, dcache_size = 0, dsup_banks = 0;
+       struct blackfin_cpudata *cpudata = &per_cpu(cpu_data, *(unsigned int *)v);
 
        cpu = CPU;
        mmu = "none";
        fpu = "none";
        revid = bfin_revid();
 
-       cclk = get_cclk();
        sclk = get_sclk();
 
        switch (bfin_read_CHIPID() & CHIPID_MANUFACTURE) {
@@ -1003,10 +1052,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                break;
        }
 
-       seq_printf(m, "processor\t: %d\n"
-               "vendor_id\t: %s\n",
-               *(unsigned int *)v,
-               vendor);
+       seq_printf(m, "processor\t: %d\n" "vendor_id\t: %s\n",
+               *(unsigned int *)v, vendor);
 
        if (CPUID == bfin_cpuid())
                seq_printf(m, "cpu family\t: 0x%04x\n", CPUID);
@@ -1016,7 +1063,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
        seq_printf(m, "model name\t: ADSP-%s %lu(MHz CCLK) %lu(MHz SCLK) (%s)\n"
                "stepping\t: %d\n",
-               cpu, cclk/1000000, sclk/1000000,
+               cpu, cpudata->cclk/1000000, sclk/1000000,
 #ifdef CONFIG_MPU
                "mpu on",
 #else
@@ -1025,16 +1072,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                revid);
 
        seq_printf(m, "cpu MHz\t\t: %lu.%03lu/%lu.%03lu\n",
-               cclk/1000000, cclk%1000000,
+               cpudata->cclk/1000000, cpudata->cclk%1000000,
                sclk/1000000, sclk%1000000);
        seq_printf(m, "bogomips\t: %lu.%02lu\n"
                "Calibration\t: %lu loops\n",
-               (loops_per_jiffy * HZ) / 500000,
-               ((loops_per_jiffy * HZ) / 5000) % 100,
-               (loops_per_jiffy * HZ));
+               (cpudata->loops_per_jiffy * HZ) / 500000,
+               ((cpudata->loops_per_jiffy * HZ) / 5000) % 100,
+               (cpudata->loops_per_jiffy * HZ));
 
        /* Check Cache configutation */
-       switch (bfin_read_DMEM_CONTROL() & (1 << DMC0_P | 1 << DMC1_P)) {
+       switch (cpudata->dmemctl & (1 << DMC0_P | 1 << DMC1_P)) {
        case ACACHE_BSRAM:
                cache = "dbank-A/B\t: cache/sram";
                dcache_size = 16;
@@ -1058,10 +1105,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        }
 
        /* Is it turned on? */
-       if ((bfin_read_DMEM_CONTROL() & (ENDCPLB | DMC_ENABLE)) != (ENDCPLB | DMC_ENABLE))
+       if ((cpudata->dmemctl & (ENDCPLB | DMC_ENABLE)) != (ENDCPLB | DMC_ENABLE))
                dcache_size = 0;
 
-       if ((bfin_read_IMEM_CONTROL() & (IMC | ENICPLB)) != (IMC | ENICPLB))
+       if ((cpudata->imemctl & (IMC | ENICPLB)) != (IMC | ENICPLB))
                icache_size = 0;
 
        seq_printf(m, "cache size\t: %d KB(L1 icache) "
@@ -1086,8 +1133,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   "dcache setup\t: %d Super-banks/%d Sub-banks/%d Ways, %d Lines/Way\n",
                   dsup_banks, BFIN_DSUBBANKS, BFIN_DWAYS,
                   BFIN_DLINES);
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       seq_printf(m,
+               "SMP Dcache Flushes\t: %lu\n\n",
+               per_cpu(cpu_data, *(unsigned int *)v).dcache_invld_count);
+#endif
 #ifdef CONFIG_BFIN_ICACHE_LOCK
-       switch ((bfin_read_IMEM_CONTROL() >> 3) & WAYALL_L) {
+       switch ((cpudata->imemctl >> 3) & WAYALL_L) {
        case WAY0_L:
                seq_printf(m, "Way0 Locked-Down\n");
                break;
@@ -1137,6 +1189,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_printf(m, "No Ways are locked\n");
        }
 #endif
+       if (*(unsigned int *)v != NR_CPUS-1)
+               return 0;
+
+#if L2_LENGTH
+       seq_printf(m, "L2 SRAM\t\t: %dKB\n", L2_LENGTH/0x400);
+#endif
        seq_printf(m, "board name\t: %s\n", bfin_board_name);
        seq_printf(m, "board memory\t: %ld kB (0x%p -> 0x%p)\n",
                 physical_mem_end >> 10, (void *)0, (void *)physical_mem_end);
@@ -1144,6 +1202,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                ((int)memory_end - (int)_stext) >> 10,
                _stext,
                (void *)memory_end);
+       seq_printf(m, "\n");
 
        return 0;
 }
index eb23523..06de2ce 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/irq.h>
+#include <linux/delay.h>
 
 #include <asm/blackfin.h>
 #include <asm/time.h>
+#include <asm/gptimers.h>
 
 /* This is an NTP setting */
 #define        TICK_SIZE (tick_nsec / 1000)
@@ -46,11 +48,14 @@ static unsigned long gettimeoffset(void);
 
 static struct irqaction bfin_timer_irq = {
        .name = "BFIN Timer Tick",
+#ifdef CONFIG_IRQ_PER_CPU
+       .flags = IRQF_DISABLED  | IRQF_PERCPU,
+#else
        .flags = IRQF_DISABLED
+#endif
 };
 
-static void
-time_sched_init(irq_handler_t timer_routine)
+void setup_core_timer(void)
 {
        u32 tcount;
 
@@ -71,12 +76,41 @@ time_sched_init(irq_handler_t timer_routine)
        CSYNC();
 
        bfin_write_TCNTL(7);
+}
+
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+void setup_system_timer0(void)
+{
+       /* Power down the core timer, just to play safe. */
+       bfin_write_TCNTL(0);
+
+       disable_gptimers(TIMER0bit);
+       set_gptimer_status(0, TIMER_STATUS_TRUN0);
+       while (get_gptimer_status(0) & TIMER_STATUS_TRUN0)
+               udelay(10);
+
+       set_gptimer_config(0, 0x59); /* IRQ enable, periodic, PWM_OUT, SCLKed, OUT PAD disabled */
+       set_gptimer_period(TIMER0_id, get_sclk() / HZ);
+       set_gptimer_pwidth(TIMER0_id, 1);
+       SSYNC();
+       enable_gptimers(TIMER0bit);
+}
+#endif
 
+static void
+time_sched_init(irqreturn_t(*timer_routine) (int, void *))
+{
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+       setup_system_timer0();
+#else
+       setup_core_timer();
+#endif
        bfin_timer_irq.handler = (irq_handler_t)timer_routine;
-       /* call setup_irq instead of request_irq because request_irq calls
-        * kmalloc which has not been initialized yet
-        */
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+       setup_irq(IRQ_TIMER0, &bfin_timer_irq);
+#else
        setup_irq(IRQ_CORETMR, &bfin_timer_irq);
+#endif
 }
 
 /*
@@ -87,17 +121,23 @@ static unsigned long gettimeoffset(void)
        unsigned long offset;
        unsigned long clocks_per_jiffy;
 
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+       clocks_per_jiffy =  bfin_read_TIMER0_PERIOD();
+       offset =  bfin_read_TIMER0_COUNTER() / \
+               (((clocks_per_jiffy + 1) * HZ) / USEC_PER_SEC);
+
+       if ((get_gptimer_status(0) & TIMER_STATUS_TIMIL0) && offset < (100000 / HZ / 2))
+               offset += (USEC_PER_SEC / HZ);
+#else
        clocks_per_jiffy = bfin_read_TPERIOD();
-       offset =
-           (clocks_per_jiffy -
-            bfin_read_TCOUNT()) / (((clocks_per_jiffy + 1) * HZ) /
-                                   USEC_PER_SEC);
+       offset = (clocks_per_jiffy - bfin_read_TCOUNT()) / \
+               (((clocks_per_jiffy + 1) * HZ)  / USEC_PER_SEC);
 
        /* Check if we just wrapped the counters and maybe missed a tick */
        if ((bfin_read_ILAT() & (1 << IRQ_CORETMR))
-           && (offset < (100000 / HZ / 2)))
+               && (offset < (100000 / HZ / 2)))
                offset += (USEC_PER_SEC / HZ);
-
+#endif
        return offset;
 }
 
@@ -120,34 +160,38 @@ irqreturn_t timer_interrupt(int irq, void *dummy)
        static long last_rtc_update;
 
        write_seqlock(&xtime_lock);
-
-       do_timer(1);
-
-       profile_tick(CPU_PROFILING);
-
-       /*
-        * If we have an externally synchronized Linux clock, then update
-        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-        * called as close as possible to 500 ms before the new second starts.
-        */
-
-       if (ntp_synced() &&
-           xtime.tv_sec > last_rtc_update + 660 &&
-           (xtime.tv_nsec / NSEC_PER_USEC) >=
-           500000 - ((unsigned)TICK_SIZE) / 2
-           && (xtime.tv_nsec / NSEC_PER_USEC) <=
-           500000 + ((unsigned)TICK_SIZE) / 2) {
-               if (set_rtc_mmss(xtime.tv_sec) == 0)
-                       last_rtc_update = xtime.tv_sec;
-               else
-                       /* Do it again in 60s. */
-                       last_rtc_update = xtime.tv_sec - 600;
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+       if (get_gptimer_status(0) & TIMER_STATUS_TIMIL0) {
+#endif
+               do_timer(1);
+
+
+               /*
+                * If we have an externally synchronized Linux clock, then update
+                * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+                * called as close as possible to 500 ms before the new second starts.
+                */
+
+               if (ntp_synced() &&
+                   xtime.tv_sec > last_rtc_update + 660 &&
+                   (xtime.tv_nsec / NSEC_PER_USEC) >=
+                   500000 - ((unsigned)TICK_SIZE) / 2
+                   && (xtime.tv_nsec / NSEC_PER_USEC) <=
+                   500000 + ((unsigned)TICK_SIZE) / 2) {
+                       if (set_rtc_mmss(xtime.tv_sec) == 0)
+                               last_rtc_update = xtime.tv_sec;
+                       else
+                               /* Do it again in 60s. */
+                               last_rtc_update = xtime.tv_sec - 600;
+               }
+#ifdef CONFIG_TICK_SOURCE_SYSTMR0
+               set_gptimer_status(0, TIMER_STATUS_TIMIL0);
        }
+#endif
        write_sequnlock(&xtime_lock);
 
-#ifndef CONFIG_SMP
        update_process_times(user_mode(get_irq_regs()));
-#endif
+       profile_tick(CPU_PROFILING);
 
        return IRQ_HANDLED;
 }
index bef025b..af7cc43 100644 (file)
@@ -75,16 +75,6 @@ void __init trap_init(void)
        CSYNC();
 }
 
-/*
- * Used to save the RETX, SEQSTAT, I/D CPLB FAULT ADDR
- * values across the transition from exception to IRQ5.
- * We put these in L1, so they are going to be in a valid
- * location during exception context
- */
-__attribute__((l1_data))
-unsigned long saved_retx, saved_seqstat,
-       saved_icplb_fault_addr, saved_dcplb_fault_addr;
-
 static void decode_address(char *buf, unsigned long address)
 {
 #ifdef CONFIG_DEBUG_VERBOSE
@@ -211,18 +201,18 @@ asmlinkage void double_fault_c(struct pt_regs *fp)
        printk(KERN_EMERG "\n" KERN_EMERG "Double Fault\n");
 #ifdef CONFIG_DEBUG_DOUBLEFAULT_PRINT
        if (((long)fp->seqstat &  SEQSTAT_EXCAUSE) == VEC_UNCOV) {
+               unsigned int cpu = smp_processor_id();
                char buf[150];
-               decode_address(buf, saved_retx);
+               decode_address(buf, cpu_pda[cpu].retx);
                printk(KERN_EMERG "While handling exception (EXCAUSE = 0x%x) at %s:\n",
-                       (int)saved_seqstat & SEQSTAT_EXCAUSE, buf);
-               decode_address(buf, saved_dcplb_fault_addr);
+                       (unsigned int)cpu_pda[cpu].seqstat & SEQSTAT_EXCAUSE, buf);
+               decode_address(buf, cpu_pda[cpu].dcplb_fault_addr);
                printk(KERN_NOTICE "   DCPLB_FAULT_ADDR: %s\n", buf);
-               decode_address(buf, saved_icplb_fault_addr);
+               decode_address(buf, cpu_pda[cpu].icplb_fault_addr);
                printk(KERN_NOTICE "   ICPLB_FAULT_ADDR: %s\n", buf);
 
                decode_address(buf, fp->retx);
-               printk(KERN_NOTICE "The instruction at %s caused a double exception\n",
-                       buf);
+               printk(KERN_NOTICE "The instruction at %s caused a double exception\n", buf);
        } else
 #endif
        {
@@ -240,6 +230,9 @@ asmlinkage void trap_c(struct pt_regs *fp)
 #ifdef CONFIG_DEBUG_BFIN_HWTRACE_ON
        int j;
 #endif
+#ifdef CONFIG_DEBUG_HUNT_FOR_ZERO
+       unsigned int cpu = smp_processor_id();
+#endif
        int sig = 0;
        siginfo_t info;
        unsigned long trapnr = fp->seqstat & SEQSTAT_EXCAUSE;
@@ -417,7 +410,7 @@ asmlinkage void trap_c(struct pt_regs *fp)
                info.si_code = ILL_CPLB_MULHIT;
                sig = SIGSEGV;
 #ifdef CONFIG_DEBUG_HUNT_FOR_ZERO
-               if (saved_dcplb_fault_addr < FIXED_CODE_START)
+               if (cpu_pda[cpu].dcplb_fault_addr < FIXED_CODE_START)
                        verbose_printk(KERN_NOTICE "NULL pointer access\n");
                else
 #endif
@@ -471,7 +464,7 @@ asmlinkage void trap_c(struct pt_regs *fp)
                info.si_code = ILL_CPLB_MULHIT;
                sig = SIGSEGV;
 #ifdef CONFIG_DEBUG_HUNT_FOR_ZERO
-               if (saved_icplb_fault_addr < FIXED_CODE_START)
+               if (cpu_pda[cpu].icplb_fault_addr < FIXED_CODE_START)
                        verbose_printk(KERN_NOTICE "Jump to NULL address\n");
                else
 #endif
@@ -960,6 +953,7 @@ void dump_bfin_process(struct pt_regs *fp)
                else
                        verbose_printk(KERN_NOTICE "COMM= invalid\n");
 
+               printk(KERN_NOTICE "CPU = %d\n", current_thread_info()->cpu);
                if (!((unsigned long)current->mm & 0x3) && (unsigned long)current->mm >= FIXED_CODE_START)
                        verbose_printk(KERN_NOTICE  "TEXT = 0x%p-0x%p        DATA = 0x%p-0x%p\n"
                                KERN_NOTICE " BSS = 0x%p-0x%p  USER-STACK = 0x%p\n"
@@ -1053,6 +1047,7 @@ void show_regs(struct pt_regs *fp)
        struct irqaction *action;
        unsigned int i;
        unsigned long flags;
+       unsigned int cpu = smp_processor_id();
 
        verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\t\t%s\n", print_tainted());
        verbose_printk(KERN_NOTICE " SEQSTAT: %08lx  IPEND: %04lx  SYSCFG: %04lx\n",
@@ -1112,9 +1107,9 @@ unlock:
 
        if (((long)fp->seqstat &  SEQSTAT_EXCAUSE) &&
            (((long)fp->seqstat & SEQSTAT_EXCAUSE) != VEC_HWERR)) {
-               decode_address(buf, saved_dcplb_fault_addr);
+               decode_address(buf, cpu_pda[cpu].dcplb_fault_addr);
                verbose_printk(KERN_NOTICE "DCPLB_FAULT_ADDR: %s\n", buf);
-               decode_address(buf, saved_icplb_fault_addr);
+               decode_address(buf, cpu_pda[cpu].icplb_fault_addr);
                verbose_printk(KERN_NOTICE "ICPLB_FAULT_ADDR: %s\n", buf);
        }
 
@@ -1153,20 +1148,21 @@ unlock:
 asmlinkage int sys_bfin_spinlock(int *spinlock)__attribute__((l1_text));
 #endif
 
-asmlinkage int sys_bfin_spinlock(int *spinlock)
+static DEFINE_SPINLOCK(bfin_spinlock_lock);
+
+asmlinkage int sys_bfin_spinlock(int *p)
 {
-       int ret = 0;
-       int tmp = 0;
+       int ret, tmp = 0;
 
-       local_irq_disable();
-       ret = get_user(tmp, spinlock);
-       if (ret == 0) {
-               if (tmp)
+       spin_lock(&bfin_spinlock_lock); /* This would also hold kernel preemption. */
+       ret = get_user(tmp, p);
+       if (likely(ret == 0)) {
+               if (unlikely(tmp))
                        ret = 1;
-               tmp = 1;
-               put_user(tmp, spinlock);
+               else
+                       put_user(1, p);
        }
-       local_irq_enable();
+       spin_unlock(&bfin_spinlock_lock);
        return ret;
 }
 
index bc240ab..57d306b 100644 (file)
@@ -31,7 +31,8 @@
 #include <linux/bootmem.h>
 #include <linux/uaccess.h>
 #include <asm/bfin-global.h>
-#include <asm/l1layout.h>
+#include <asm/pda.h>
+#include <asm/cplbinit.h>
 #include "blackfin_sram.h"
 
 /*
@@ -53,6 +54,11 @@ static unsigned long empty_bad_page;
 
 unsigned long empty_zero_page;
 
+extern unsigned long exception_stack[NR_CPUS][1024];
+
+struct blackfin_pda cpu_pda[NR_CPUS];
+EXPORT_SYMBOL(cpu_pda);
+
 /*
  * paging_init() continues the virtual memory environment setup which
  * was begun by the code in arch/head.S.
@@ -98,6 +104,42 @@ void __init paging_init(void)
        }
 }
 
+asmlinkage void init_pda(void)
+{
+       unsigned int cpu = raw_smp_processor_id();
+
+       /* Initialize the PDA fields holding references to other parts
+          of the memory. The content of such memory is still
+          undefined at the time of the call, we are only setting up
+          valid pointers to it. */
+       memset(&cpu_pda[cpu], 0, sizeof(cpu_pda[cpu]));
+
+       cpu_pda[0].next = &cpu_pda[1];
+       cpu_pda[1].next = &cpu_pda[0];
+
+       cpu_pda[cpu].ex_stack = exception_stack[cpu + 1];
+
+#ifdef CONFIG_MPU
+#else
+       cpu_pda[cpu].ipdt = ipdt_tables[cpu];
+       cpu_pda[cpu].dpdt = dpdt_tables[cpu];
+#ifdef CONFIG_CPLB_INFO
+       cpu_pda[cpu].ipdt_swapcount = ipdt_swapcount_tables[cpu];
+       cpu_pda[cpu].dpdt_swapcount = dpdt_swapcount_tables[cpu];
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+       cpu_pda[cpu].imask = 0x1f;
+#endif
+}
+
+void __cpuinit reserve_pda(void)
+{
+       printk(KERN_INFO "PDA for CPU%u reserved at %p\n", smp_processor_id(),
+                                       &cpu_pda[smp_processor_id()]);
+}
+
 void __init mem_init(void)
 {
        unsigned int codek = 0, datak = 0, initk = 0;
@@ -141,21 +183,13 @@ void __init mem_init(void)
 
 static int __init sram_init(void)
 {
-       unsigned long tmp;
-
        /* Initialize the blackfin L1 Memory. */
        bfin_sram_init();
 
-       /* Allocate this once; never free it.  We assume this gives us a
-          pointer to the start of L1 scratchpad memory; panic if it
-          doesn't.  */
-       tmp = (unsigned long)l1sram_alloc(sizeof(struct l1_scratch_task_info));
-       if (tmp != (unsigned long)L1_SCRATCH_TASK_INFO) {
-               printk(KERN_EMERG "mem_init(): Did not get the right address from l1sram_alloc: %08lx != %08lx\n",
-                       tmp, (unsigned long)L1_SCRATCH_TASK_INFO);
-               panic("No L1, time to give up\n");
-       }
-
+       /* Reserve the PDA space for the boot CPU right after we
+        * initialized the scratch memory allocator.
+        */
+       reserve_pda();
        return 0;
 }
 pure_initcall(sram_init);
index cc6f336..8f82b4c 100644 (file)
 #include <asm/blackfin.h>
 #include "blackfin_sram.h"
 
-static spinlock_t l1sram_lock, l1_data_sram_lock, l1_inst_sram_lock;
-static spinlock_t l2_sram_lock;
+static DEFINE_PER_CPU(spinlock_t, l1sram_lock) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(spinlock_t, l1_data_sram_lock) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(spinlock_t, l1_inst_sram_lock) ____cacheline_aligned_in_smp;
+static spinlock_t l2_sram_lock ____cacheline_aligned_in_smp;
 
 /* the data structure for L1 scratchpad and DATA SRAM */
 struct sram_piece {
@@ -52,18 +54,22 @@ struct sram_piece {
        struct sram_piece *next;
 };
 
-static struct sram_piece free_l1_ssram_head, used_l1_ssram_head;
+static DEFINE_PER_CPU(struct sram_piece, free_l1_ssram_head);
+static DEFINE_PER_CPU(struct sram_piece, used_l1_ssram_head);
 
 #if L1_DATA_A_LENGTH != 0
-static struct sram_piece free_l1_data_A_sram_head, used_l1_data_A_sram_head;
+static DEFINE_PER_CPU(struct sram_piece, free_l1_data_A_sram_head);
+static DEFINE_PER_CPU(struct sram_piece, used_l1_data_A_sram_head);
 #endif
 
 #if L1_DATA_B_LENGTH != 0
-static struct sram_piece free_l1_data_B_sram_head, used_l1_data_B_sram_head;
+static DEFINE_PER_CPU(struct sram_piece, free_l1_data_B_sram_head);
+static DEFINE_PER_CPU(struct sram_piece, used_l1_data_B_sram_head);
 #endif
 
 #if L1_CODE_LENGTH != 0
-static struct sram_piece free_l1_inst_sram_head, used_l1_inst_sram_head;
+static DEFINE_PER_CPU(struct sram_piece, free_l1_inst_sram_head);
+static DEFINE_PER_CPU(struct sram_piece, used_l1_inst_sram_head);
 #endif
 
 #if L2_LENGTH != 0
@@ -75,102 +81,115 @@ static struct kmem_cache *sram_piece_cache;
 /* L1 Scratchpad SRAM initialization function */
 static void __init l1sram_init(void)
 {
-       free_l1_ssram_head.next =
-               kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
-       if (!free_l1_ssram_head.next) {
-               printk(KERN_INFO "Failed to initialize Scratchpad data SRAM\n");
-               return;
+       unsigned int cpu;
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
+               per_cpu(free_l1_ssram_head, cpu).next =
+                       kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
+               if (!per_cpu(free_l1_ssram_head, cpu).next) {
+                       printk(KERN_INFO "Fail to initialize Scratchpad data SRAM.\n");
+                       return;
+               }
+
+               per_cpu(free_l1_ssram_head, cpu).next->paddr = (void *)get_l1_scratch_start_cpu(cpu);
+               per_cpu(free_l1_ssram_head, cpu).next->size = L1_SCRATCH_LENGTH;
+               per_cpu(free_l1_ssram_head, cpu).next->pid = 0;
+               per_cpu(free_l1_ssram_head, cpu).next->next = NULL;
+
+               per_cpu(used_l1_ssram_head, cpu).next = NULL;
+
+               /* mutex initialize */
+               spin_lock_init(&per_cpu(l1sram_lock, cpu));
+               printk(KERN_INFO "Blackfin Scratchpad data SRAM: %d KB\n",
+                       L1_SCRATCH_LENGTH >> 10);
        }
-
-       free_l1_ssram_head.next->paddr = (void *)L1_SCRATCH_START;
-       free_l1_ssram_head.next->size = L1_SCRATCH_LENGTH;
-       free_l1_ssram_head.next->pid = 0;
-       free_l1_ssram_head.next->next = NULL;
-
-       used_l1_ssram_head.next = NULL;
-
-       /* mutex initialize */
-       spin_lock_init(&l1sram_lock);
-
-       printk(KERN_INFO "Blackfin Scratchpad data SRAM: %d KB\n",
-              L1_SCRATCH_LENGTH >> 10);
 }
 
 static void __init l1_data_sram_init(void)
 {
+       unsigned int cpu;
 #if L1_DATA_A_LENGTH != 0
-       free_l1_data_A_sram_head.next =
-               kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
-       if (!free_l1_data_A_sram_head.next) {
-               printk(KERN_INFO "Failed to initialize L1 Data A SRAM\n");
-               return;
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
+               per_cpu(free_l1_data_A_sram_head, cpu).next =
+                       kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
+               if (!per_cpu(free_l1_data_A_sram_head, cpu).next) {
+                       printk(KERN_INFO "Fail to initialize L1 Data A SRAM.\n");
+                       return;
+               }
+
+               per_cpu(free_l1_data_A_sram_head, cpu).next->paddr =
+                       (void *)get_l1_data_a_start_cpu(cpu) + (_ebss_l1 - _sdata_l1);
+               per_cpu(free_l1_data_A_sram_head, cpu).next->size =
+                       L1_DATA_A_LENGTH - (_ebss_l1 - _sdata_l1);
+               per_cpu(free_l1_data_A_sram_head, cpu).next->pid = 0;
+               per_cpu(free_l1_data_A_sram_head, cpu).next->next = NULL;
+
+               per_cpu(used_l1_data_A_sram_head, cpu).next = NULL;
+
+               printk(KERN_INFO "Blackfin L1 Data A SRAM: %d KB (%d KB free)\n",
+                       L1_DATA_A_LENGTH >> 10,
+                       per_cpu(free_l1_data_A_sram_head, cpu).next->size >> 10);
        }
-
-       free_l1_data_A_sram_head.next->paddr =
-               (void *)L1_DATA_A_START + (_ebss_l1 - _sdata_l1);
-       free_l1_data_A_sram_head.next->size =
-               L1_DATA_A_LENGTH - (_ebss_l1 - _sdata_l1);
-       free_l1_data_A_sram_head.next->pid = 0;
-       free_l1_data_A_sram_head.next->next = NULL;
-
-       used_l1_data_A_sram_head.next = NULL;
-
-       printk(KERN_INFO "Blackfin L1 Data A SRAM: %d KB (%d KB free)\n",
-               L1_DATA_A_LENGTH >> 10,
-               free_l1_data_A_sram_head.next->size >> 10);
 #endif
 #if L1_DATA_B_LENGTH != 0
-       free_l1_data_B_sram_head.next =
-               kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
-       if (!free_l1_data_B_sram_head.next) {
-               printk(KERN_INFO "Failed to initialize L1 Data B SRAM\n");
-               return;
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
+               per_cpu(free_l1_data_B_sram_head, cpu).next =
+                       kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
+               if (!per_cpu(free_l1_data_B_sram_head, cpu).next) {
+                       printk(KERN_INFO "Fail to initialize L1 Data B SRAM.\n");
+                       return;
+               }
+
+               per_cpu(free_l1_data_B_sram_head, cpu).next->paddr =
+                       (void *)get_l1_data_b_start_cpu(cpu) + (_ebss_b_l1 - _sdata_b_l1);
+               per_cpu(free_l1_data_B_sram_head, cpu).next->size =
+                       L1_DATA_B_LENGTH - (_ebss_b_l1 - _sdata_b_l1);
+               per_cpu(free_l1_data_B_sram_head, cpu).next->pid = 0;
+               per_cpu(free_l1_data_B_sram_head, cpu).next->next = NULL;
+
+               per_cpu(used_l1_data_B_sram_head, cpu).next = NULL;
+
+               printk(KERN_INFO "Blackfin L1 Data B SRAM: %d KB (%d KB free)\n",
+                       L1_DATA_B_LENGTH >> 10,
+                       per_cpu(free_l1_data_B_sram_head, cpu).next->size >> 10);
+               /* mutex initialize */
        }
-
-       free_l1_data_B_sram_head.next->paddr =
-               (void *)L1_DATA_B_START + (_ebss_b_l1 - _sdata_b_l1);
-       free_l1_data_B_sram_head.next->size =
-               L1_DATA_B_LENGTH - (_ebss_b_l1 - _sdata_b_l1);
-       free_l1_data_B_sram_head.next->pid = 0;
-       free_l1_data_B_sram_head.next->next = NULL;
-
-       used_l1_data_B_sram_head.next = NULL;
-
-       printk(KERN_INFO "Blackfin L1 Data B SRAM: %d KB (%d KB free)\n",
-               L1_DATA_B_LENGTH >> 10,
-               free_l1_data_B_sram_head.next->size >> 10);
 #endif
 
-       /* mutex initialize */
-       spin_lock_init(&l1_data_sram_lock);
+#if L1_DATA_A_LENGTH != 0 || L1_DATA_B_LENGTH != 0
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu)
+               spin_lock_init(&per_cpu(l1_data_sram_lock, cpu));
+#endif
 }
 
 static void __init l1_inst_sram_init(void)
 {
 #if L1_CODE_LENGTH != 0
-       free_l1_inst_sram_head.next =
-               kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
-       if (!free_l1_inst_sram_head.next) {
-               printk(KERN_INFO "Failed to initialize L1 Instruction SRAM\n");
-               return;
+       unsigned int cpu;
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
+               per_cpu(free_l1_inst_sram_head, cpu).next =
+                       kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
+               if (!per_cpu(free_l1_inst_sram_head, cpu).next) {
+                       printk(KERN_INFO "Failed to initialize L1 Instruction SRAM\n");
+                       return;
+               }
+
+               per_cpu(free_l1_inst_sram_head, cpu).next->paddr =
+                       (void *)get_l1_code_start_cpu(cpu) + (_etext_l1 - _stext_l1);
+               per_cpu(free_l1_inst_sram_head, cpu).next->size =
+                       L1_CODE_LENGTH - (_etext_l1 - _stext_l1);
+               per_cpu(free_l1_inst_sram_head, cpu).next->pid = 0;
+               per_cpu(free_l1_inst_sram_head, cpu).next->next = NULL;
+
+               per_cpu(used_l1_inst_sram_head, cpu).next = NULL;
+
+               printk(KERN_INFO "Blackfin L1 Instruction SRAM: %d KB (%d KB free)\n",
+                       L1_CODE_LENGTH >> 10,
+                       per_cpu(free_l1_inst_sram_head, cpu).next->size >> 10);
+
+               /* mutex initialize */
+               spin_lock_init(&per_cpu(l1_inst_sram_lock, cpu));
        }
-
-       free_l1_inst_sram_head.next->paddr =
-               (void *)L1_CODE_START + (_etext_l1 - _stext_l1);
-       free_l1_inst_sram_head.next->size =
-               L1_CODE_LENGTH - (_etext_l1 - _stext_l1);
-       free_l1_inst_sram_head.next->pid = 0;
-       free_l1_inst_sram_head.next->next = NULL;
-
-       used_l1_inst_sram_head.next = NULL;
-
-       printk(KERN_INFO "Blackfin L1 Instruction SRAM: %d KB (%d KB free)\n",
-               L1_CODE_LENGTH >> 10,
-               free_l1_inst_sram_head.next->size >> 10);
 #endif
-
-       /* mutex initialize */
-       spin_lock_init(&l1_inst_sram_lock);
 }
 
 static void __init l2_sram_init(void)
@@ -179,7 +198,7 @@ static void __init l2_sram_init(void)
        free_l2_sram_head.next =
                kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
        if (!free_l2_sram_head.next) {
-               printk(KERN_INFO "Failed to initialize L2 SRAM\n");
+               printk(KERN_INFO "Fail to initialize L2 SRAM.\n");
                return;
        }
 
@@ -200,6 +219,7 @@ static void __init l2_sram_init(void)
        /* mutex initialize */
        spin_lock_init(&l2_sram_lock);
 }
+
 void __init bfin_sram_init(void)
 {
        sram_piece_cache = kmem_cache_create("sram_piece_cache",
@@ -353,20 +373,20 @@ int sram_free(const void *addr)
 {
 
 #if L1_CODE_LENGTH != 0
-       if (addr >= (void *)L1_CODE_START
-                && addr < (void *)(L1_CODE_START + L1_CODE_LENGTH))
+       if (addr >= (void *)get_l1_code_start()
+                && addr < (void *)(get_l1_code_start() + L1_CODE_LENGTH))
                return l1_inst_sram_free(addr);
        else
 #endif
 #if L1_DATA_A_LENGTH != 0
-       if (addr >= (void *)L1_DATA_A_START
-                && addr < (void *)(L1_DATA_A_START + L1_DATA_A_LENGTH))
+       if (addr >= (void *)get_l1_data_a_start()
+                && addr < (void *)(get_l1_data_a_start() + L1_DATA_A_LENGTH))
                return l1_data_A_sram_free(addr);
        else
 #endif
 #if L1_DATA_B_LENGTH != 0
-       if (addr >= (void *)L1_DATA_B_START
-                && addr < (void *)(L1_DATA_B_START + L1_DATA_B_LENGTH))
+       if (addr >= (void *)get_l1_data_b_start()
+                && addr < (void *)(get_l1_data_b_start() + L1_DATA_B_LENGTH))
                return l1_data_B_sram_free(addr);
        else
 #endif
@@ -384,17 +404,20 @@ void *l1_data_A_sram_alloc(size_t size)
 {
        unsigned long flags;
        void *addr = NULL;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_data_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_data_sram_lock, cpu), flags);
 
 #if L1_DATA_A_LENGTH != 0
-       addr = _sram_alloc(size, &free_l1_data_A_sram_head,
-                       &used_l1_data_A_sram_head);
+       addr = _sram_alloc(size, &per_cpu(free_l1_data_A_sram_head, cpu),
+                       &per_cpu(used_l1_data_A_sram_head, cpu));
 #endif
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_data_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_data_sram_lock, cpu), flags);
+       put_cpu();
 
        pr_debug("Allocated address in l1_data_A_sram_alloc is 0x%lx+0x%lx\n",
                 (long unsigned int)addr, size);
@@ -407,19 +430,22 @@ int l1_data_A_sram_free(const void *addr)
 {
        unsigned long flags;
        int ret;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_data_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_data_sram_lock, cpu), flags);
 
 #if L1_DATA_A_LENGTH != 0
-       ret = _sram_free(addr, &free_l1_data_A_sram_head,
-                       &used_l1_data_A_sram_head);
+       ret = _sram_free(addr, &per_cpu(free_l1_data_A_sram_head, cpu),
+                       &per_cpu(used_l1_data_A_sram_head, cpu));
 #else
        ret = -1;
 #endif
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_data_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_data_sram_lock, cpu), flags);
+       put_cpu();
 
        return ret;
 }
@@ -430,15 +456,18 @@ void *l1_data_B_sram_alloc(size_t size)
 #if L1_DATA_B_LENGTH != 0
        unsigned long flags;
        void *addr;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_data_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_data_sram_lock, cpu), flags);
 
-       addr = _sram_alloc(size, &free_l1_data_B_sram_head,
-                       &used_l1_data_B_sram_head);
+       addr = _sram_alloc(size, &per_cpu(free_l1_data_B_sram_head, cpu),
+                       &per_cpu(used_l1_data_B_sram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_data_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_data_sram_lock, cpu), flags);
+       put_cpu();
 
        pr_debug("Allocated address in l1_data_B_sram_alloc is 0x%lx+0x%lx\n",
                 (long unsigned int)addr, size);
@@ -455,15 +484,18 @@ int l1_data_B_sram_free(const void *addr)
 #if L1_DATA_B_LENGTH != 0
        unsigned long flags;
        int ret;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_data_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_data_sram_lock, cpu), flags);
 
-       ret = _sram_free(addr, &free_l1_data_B_sram_head,
-                       &used_l1_data_B_sram_head);
+       ret = _sram_free(addr, &per_cpu(free_l1_data_B_sram_head, cpu),
+                       &per_cpu(used_l1_data_B_sram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_data_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_data_sram_lock, cpu), flags);
+       put_cpu();
 
        return ret;
 #else
@@ -509,15 +541,18 @@ void *l1_inst_sram_alloc(size_t size)
 #if L1_CODE_LENGTH != 0
        unsigned long flags;
        void *addr;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_inst_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_inst_sram_lock, cpu), flags);
 
-       addr = _sram_alloc(size, &free_l1_inst_sram_head,
-                       &used_l1_inst_sram_head);
+       addr = _sram_alloc(size, &per_cpu(free_l1_inst_sram_head, cpu),
+                       &per_cpu(used_l1_inst_sram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_inst_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_inst_sram_lock, cpu), flags);
+       put_cpu();
 
        pr_debug("Allocated address in l1_inst_sram_alloc is 0x%lx+0x%lx\n",
                 (long unsigned int)addr, size);
@@ -534,15 +569,18 @@ int l1_inst_sram_free(const void *addr)
 #if L1_CODE_LENGTH != 0
        unsigned long flags;
        int ret;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1_inst_sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1_inst_sram_lock, cpu), flags);
 
-       ret = _sram_free(addr, &free_l1_inst_sram_head,
-                       &used_l1_inst_sram_head);
+       ret = _sram_free(addr, &per_cpu(free_l1_inst_sram_head, cpu),
+                       &per_cpu(used_l1_inst_sram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1_inst_sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1_inst_sram_lock, cpu), flags);
+       put_cpu();
 
        return ret;
 #else
@@ -556,15 +594,18 @@ void *l1sram_alloc(size_t size)
 {
        unsigned long flags;
        void *addr;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1sram_lock, cpu), flags);
 
-       addr = _sram_alloc(size, &free_l1_ssram_head,
-                       &used_l1_ssram_head);
+       addr = _sram_alloc(size, &per_cpu(free_l1_ssram_head, cpu),
+                       &per_cpu(used_l1_ssram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1sram_lock, cpu), flags);
+       put_cpu();
 
        return addr;
 }
@@ -574,15 +615,18 @@ void *l1sram_alloc_max(size_t *psize)
 {
        unsigned long flags;
        void *addr;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1sram_lock, cpu), flags);
 
-       addr = _sram_alloc_max(&free_l1_ssram_head,
-                       &used_l1_ssram_head, psize);
+       addr = _sram_alloc_max(&per_cpu(free_l1_ssram_head, cpu),
+                       &per_cpu(used_l1_ssram_head, cpu), psize);
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1sram_lock, cpu), flags);
+       put_cpu();
 
        return addr;
 }
@@ -592,15 +636,18 @@ int l1sram_free(const void *addr)
 {
        unsigned long flags;
        int ret;
+       unsigned int cpu;
 
+       cpu = get_cpu();
        /* add mutex operation */
-       spin_lock_irqsave(&l1sram_lock, flags);
+       spin_lock_irqsave(&per_cpu(l1sram_lock, cpu), flags);
 
-       ret = _sram_free(addr, &free_l1_ssram_head,
-                       &used_l1_ssram_head);
+       ret = _sram_free(addr, &per_cpu(free_l1_ssram_head, cpu),
+                       &per_cpu(used_l1_ssram_head, cpu));
 
        /* add mutex operation */
-       spin_unlock_irqrestore(&l1sram_lock, flags);
+       spin_unlock_irqrestore(&per_cpu(l1sram_lock, cpu), flags);
+       put_cpu();
 
        return ret;
 }
@@ -761,33 +808,36 @@ static int sram_proc_read(char *buf, char **start, off_t offset, int count,
                int *eof, void *data)
 {
        int len = 0;
+       unsigned int cpu;
 
-       if (_sram_proc_read(buf, &len, count, "Scratchpad",
-                       &free_l1_ssram_head, &used_l1_ssram_head))
-               goto not_done;
+       for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
+               if (_sram_proc_read(buf, &len, count, "Scratchpad",
+                       &per_cpu(free_l1_ssram_head, cpu), &per_cpu(used_l1_ssram_head, cpu)))
+                       goto not_done;
 #if L1_DATA_A_LENGTH != 0
-       if (_sram_proc_read(buf, &len, count, "L1 Data A",
-                       &free_l1_data_A_sram_head,
-                       &used_l1_data_A_sram_head))
-               goto not_done;
+               if (_sram_proc_read(buf, &len, count, "L1 Data A",
+                       &per_cpu(free_l1_data_A_sram_head, cpu),
+                       &per_cpu(used_l1_data_A_sram_head, cpu)))
+                       goto not_done;
 #endif
 #if L1_DATA_B_LENGTH != 0
-       if (_sram_proc_read(buf, &len, count, "L1 Data B",
-                       &free_l1_data_B_sram_head,
-                       &used_l1_data_B_sram_head))
-               goto not_done;
+               if (_sram_proc_read(buf, &len, count, "L1 Data B",
+                       &per_cpu(free_l1_data_B_sram_head, cpu),
+                       &per_cpu(used_l1_data_B_sram_head, cpu)))
+                       goto not_done;
 #endif
 #if L1_CODE_LENGTH != 0
-       if (_sram_proc_read(buf, &len, count, "L1 Instruction",
-                       &free_l1_inst_sram_head, &used_l1_inst_sram_head))
-               goto not_done;
+               if (_sram_proc_read(buf, &len, count, "L1 Instruction",
+                       &per_cpu(free_l1_inst_sram_head, cpu),
+                       &per_cpu(used_l1_inst_sram_head, cpu)))
+                       goto not_done;
 #endif
+       }
 #if L2_LENGTH != 0
-       if (_sram_proc_read(buf, &len, count, "L2",
-                       &free_l2_sram_head, &used_l2_sram_head))
+       if (_sram_proc_read(buf, &len, count, "L2", &free_l2_sram_head,
+               &used_l2_sram_head))
                goto not_done;
 #endif
-
        *eof = 1;
  not_done:
        return len;