memstick: add support for 8-bit parallel mode
[safe/jmp/linux-2.6] / include / asm-sparc64 / cpudata.h
index 16d6289..5424214 100644 (file)
@@ -6,6 +6,9 @@
 #ifndef _SPARC64_CPUDATA_H
 #define _SPARC64_CPUDATA_H
 
+#include <asm/hypervisor.h>
+#include <asm/asi.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/percpu.h>
 typedef struct {
        /* Dcache line 1 */
        unsigned int    __softirq_pending; /* must be 1st, see rtrap.S */
-       unsigned int    multiplier;
-       unsigned int    counter;
-       unsigned int    idle_volume;
+       unsigned int    __pad0;
        unsigned long   clock_tick;     /* %tick's per second */
-       unsigned long   udelay_val;
+       unsigned long   __pad;
+       unsigned int    __pad1;
+       unsigned int    __pad2;
 
        /* Dcache line 2, rarely used */
        unsigned int    dcache_size;
@@ -27,8 +30,8 @@ typedef struct {
        unsigned int    icache_line_size;
        unsigned int    ecache_size;
        unsigned int    ecache_line_size;
-       unsigned int    __pad3;
-       unsigned int    __pad4;
+       int             core_id;
+       int             proc_id;
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
@@ -50,71 +53,186 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
  */
 struct thread_info;
 struct trap_per_cpu {
-/* D-cache line 1 */
+/* D-cache line 1: Basic thread information, cpu and device mondo queues */
        struct thread_info      *thread;
        unsigned long           pgd_paddr;
-       unsigned long           __pad1[2];
-
-/* D-cache line 2 */
-       unsigned long           __pad2[4];
+       unsigned long           cpu_mondo_pa;
+       unsigned long           dev_mondo_pa;
+
+/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
+       unsigned long           resum_mondo_pa;
+       unsigned long           resum_kernel_buf_pa;
+       unsigned long           nonresum_mondo_pa;
+       unsigned long           nonresum_kernel_buf_pa;
+
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
+       struct hv_fault_status  fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list.  */
+       unsigned long           cpu_mondo_block_pa;
+       unsigned long           cpu_list_pa;
+       unsigned long           tsb_huge;
+       unsigned long           tsb_huge_temp;
+
+/* Dcache line 8: IRQ work list, and keep trap_block a power-of-2 in size.  */
+       unsigned long           irq_worklist_pa;
+       unsigned int            cpu_mondo_qmask;
+       unsigned int            dev_mondo_qmask;
+       unsigned int            resum_qmask;
+       unsigned int            nonresum_qmask;
+       void                    *hdesc;
 } __attribute__((aligned(64)));
 extern struct trap_per_cpu trap_block[NR_CPUS];
-extern void init_cur_cpu_trap(void);
-extern void per_cpu_patch(void);
+extern void init_cur_cpu_trap(struct thread_info *);
 extern void setup_tba(void);
+extern int ncpus_probed;
+
+extern unsigned long real_hard_smp_processor_id(void);
+
+struct cpuid_patch_entry {
+       unsigned int    addr;
+       unsigned int    cheetah_safari[4];
+       unsigned int    cheetah_jbus[4];
+       unsigned int    starfire[4];
+       unsigned int    sun4v[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+struct sun4v_1insn_patch_entry {
+       unsigned int    addr;
+       unsigned int    insn;
+};
+extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
+       __sun4v_1insn_patch_end;
+
+struct sun4v_2insn_patch_entry {
+       unsigned int    addr;
+       unsigned int    insns[2];
+};
+extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
+       __sun4v_2insn_patch_end;
 
 #endif /* !(__ASSEMBLY__) */
 
-#define TRAP_PER_CPU_THREAD    0x00
-#define TRAP_PER_CPU_PGD_PADDR 0x08
-
-#define TRAP_BLOCK_SZ_SHIFT    6
-
-/* Clobbers %g1, loads %g6 with local processor's cpuid */
-#define __GET_CPUID                    \
-       ba,pt   %xcc, __get_cpu_id;     \
-        rd     %pc, %g1;
-
-/* Clobbers %g1, current address space PGD phys address into %g7.  */
-#define TRAP_LOAD_PGD_PHYS                     \
-       __GET_CPUID                             \
-       sllx    %g6, TRAP_BLOCK_SZ_SHIFT, %g6;  \
-       sethi   %hi(trap_block), %g7;           \
-       or      %g7, %lo(trap_block), %g7;      \
-       add     %g7, %g6, %g7;                  \
-       ldx     [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
-
-/* Clobbers %g1, loads local processor's IRQ work area into %g6.  */
-#define TRAP_LOAD_IRQ_WORK                     \
-       __GET_CPUID                             \
-       sethi   %hi(__irq_work), %g1;           \
-       sllx    %g6, 6, %g6;                    \
-       or      %g1, %lo(__irq_work), %g1;      \
-       add     %g1, %g6, %g6;
-
-/* Clobbers %g1, loads %g6 with current thread info pointer.  */
-#define TRAP_LOAD_THREAD_REG                   \
-       __GET_CPUID                             \
-       sllx    %g6, TRAP_BLOCK_SZ_SHIFT, %g6;  \
-       sethi   %hi(trap_block), %g1;           \
-       or      %g1, %lo(trap_block), %g1;      \
-       ldx     [%g1 + %g6], %g6;
-
-/* Given the current thread info pointer in %g6, load the per-cpu
- * area base of the current processor into %g5.  REG1 and REG2 are
+#define TRAP_PER_CPU_THREAD            0x00
+#define TRAP_PER_CPU_PGD_PADDR         0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA      0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA      0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA    0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA     0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA  0x38
+#define TRAP_PER_CPU_FAULT_INFO                0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA        0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA       0xc8
+#define TRAP_PER_CPU_TSB_HUGE          0xd0
+#define TRAP_PER_CPU_TSB_HUGE_TEMP     0xd8
+#define TRAP_PER_CPU_IRQ_WORKLIST_PA   0xe0
+#define TRAP_PER_CPU_CPU_MONDO_QMASK   0xe8
+#define TRAP_PER_CPU_DEV_MONDO_QMASK   0xec
+#define TRAP_PER_CPU_RESUM_QMASK       0xf0
+#define TRAP_PER_CPU_NONRESUM_QMASK    0xf4
+
+#define TRAP_BLOCK_SZ_SHIFT            8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG)                               \
+       /* Spitfire implementation (default). */        \
+661:   ldxa            [%g0] ASI_UPA_CONFIG, REG;      \
+       srlx            REG, 17, REG;                   \
+        and            REG, 0x1f, REG;                 \
+       nop;                                            \
+       .section        .cpuid_patch, "ax";             \
+       /* Instruction location. */                     \
+       .word           661b;                           \
+       /* Cheetah Safari implementation. */            \
+       ldxa            [%g0] ASI_SAFARI_CONFIG, REG;   \
+       srlx            REG, 17, REG;                   \
+       and             REG, 0x3ff, REG;                \
+       nop;                                            \
+       /* Cheetah JBUS implementation. */              \
+       ldxa            [%g0] ASI_JBUS_CONFIG, REG;     \
+       srlx            REG, 17, REG;                   \
+       and             REG, 0x1f, REG;                 \
+       nop;                                            \
+       /* Starfire implementation. */                  \
+       sethi           %hi(0x1fff40000d0 >> 9), REG;   \
+       sllx            REG, 9, REG;                    \
+       or              REG, 0xd0, REG;                 \
+       lduwa           [REG] ASI_PHYS_BYPASS_EC_E, REG;\
+       /* sun4v implementation. */                     \
+       mov             SCRATCHPAD_CPUID, REG;          \
+       ldxa            [REG] ASI_SCRATCHPAD, REG;      \
+       nop;                                            \
+       nop;                                            \
+       .previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)                \
+       __GET_CPUID(TMP)                        \
+       sethi   %hi(trap_block), DEST;          \
+       sllx    TMP, TRAP_BLOCK_SZ_SHIFT, TMP;  \
+       or      DEST, %lo(trap_block), DEST;    \
+       add     DEST, TMP, DEST;                \
+
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)          \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       ldx     [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)       \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       add     DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer.  */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)                \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       ldx     [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST.  REG1, REG2, and REG3 are
  * clobbered.
+ *
+ * You absolutely cannot use DEST as a temporary in this code.  The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved DEST per-cpu base.  This can corrupt
+ * the calculations done by the macro mid-stream.
  */
-#ifdef CONFIG_SMP
-#define LOAD_PER_CPU_BASE(REG1, REG2)                  \
-       ldub    [%g6 + TI_CPU], REG1;                   \
-       sethi   %hi(__per_cpu_shift), %g5;              \
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
+       lduh    [THR + TI_CPU], REG1;                   \
+       sethi   %hi(__per_cpu_shift), REG3;             \
        sethi   %hi(__per_cpu_base), REG2;              \
-       ldx     [%g5 + %lo(__per_cpu_shift)], %g5;      \
+       ldx     [REG3 + %lo(__per_cpu_shift)], REG3;    \
        ldx     [REG2 + %lo(__per_cpu_base)], REG2;     \
-       sllx    REG1, %g5, %g5;                         \
-       add     %g5, REG2, %g5;
+       sllx    REG1, REG3, REG3;                       \
+       add     REG3, REG2, DEST;
+
 #else
-#define LOAD_PER_CPU_BASE(REG1, REG2)
-#endif
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)                \
+       sethi   %hi(trap_block), DEST;          \
+       or      DEST, %lo(trap_block), DEST;    \
+
+/* Uniprocessor versions, we know the cpuid is zero.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)          \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       ldx     [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)       \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       add     DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)                \
+       TRAP_LOAD_TRAP_BLOCK(DEST, TMP)         \
+       ldx     [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* No per-cpu areas on uniprocessor, so no need to load DEST.  */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
+
+#endif /* !(CONFIG_SMP) */
 
 #endif /* _SPARC64_CPUDATA_H */