[POWERPC] powerpc: Instrument Hypervisor Calls
[safe/jmp/linux-2.6] / arch / powerpc / kernel / head_64.S
index 16ab40d..3065b47 100644 (file)
@@ -1,6 +1,4 @@
 /*
- *  arch/ppc64/kernel/head.S
- *
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
  *  2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/threads.h>
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/systemcfg.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/bug.h>
@@ -88,34 +84,6 @@ END_FTR_SECTION(0, 1)
        /* Catch branch to 0 in real mode */
        trap
 
-#ifdef CONFIG_PPC_ISERIES
-       /*
-        * At offset 0x20, there is a pointer to iSeries LPAR data.
-        * This is required by the hypervisor
-        */
-       . = 0x20
-       .llong hvReleaseData-KERNELBASE
-
-       /*
-        * At offset 0x28 and 0x30 are offsets to the mschunks_map
-        * array (used by the iSeries LPAR debugger to do translation
-        * between physical addresses and absolute addresses) and
-        * to the pidhash table (also used by the debugger)
-        */
-       .llong mschunks_map-KERNELBASE
-       .llong 0        /* pidhash-KERNELBASE SFRXXX */
-
-       /* Offset 0x38 - Pointer to start of embedded System.map */
-       .globl  embedded_sysmap_start
-embedded_sysmap_start:
-       .llong  0
-       /* Offset 0x40 - Pointer to end of embedded System.map */
-       .globl  embedded_sysmap_end
-embedded_sysmap_end:
-       .llong  0
-
-#endif /* CONFIG_PPC_ISERIES */
-
        /* Secondary processors spin on this value until it goes to 1. */
        .globl  __secondary_hold_spinloop
 __secondary_hold_spinloop:
@@ -127,6 +95,15 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
        .llong  0x0
 
+#ifdef CONFIG_PPC_ISERIES
+       /*
+        * At offset 0x20, there is a pointer to iSeries LPAR data.
+        * This is required by the hypervisor
+        */
+       . = 0x20
+       .llong hvReleaseData-KERNELBASE
+#endif /* CONFIG_PPC_ISERIES */
+
        . = 0x60
 /*
  * The following code is used on pSeries to hold secondary processors
@@ -140,7 +117,7 @@ _GLOBAL(__secondary_hold)
        ori     r24,r24,MSR_RI
        mtmsrd  r24                     /* RI on */
 
-       /* Grab our linux cpu number */
+       /* Grab our physical cpu number */
        mr      r24,r3
 
        /* Tell the master cpu we're here */
@@ -154,16 +131,14 @@ _GLOBAL(__secondary_hold)
        cmpdi   0,r4,1
        bne     100b
 
-#ifdef CONFIG_HMT
-       b       .hmt_init
-#else
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+       LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init)
+       mtctr   r4
        mr      r3,r24
-       b       .pSeries_secondary_smp_init
+       bctr
 #else
        BUG_OPCODE
 #endif
-#endif
 
 /* This value is used to mark exception frames on the stack. */
        .section ".toc","aw"
@@ -201,6 +176,52 @@ exception_marker:
 #define EX_R3          64
 #define EX_LR          72
 
+/*
+ * We're short on space and time in the exception prolog, so we can't
+ * use the normal SET_REG_IMMEDIATE macro. Normally we just need the
+ * low halfword of the address, but for Kdump we need the whole low
+ * word.
+ */
+#ifdef CONFIG_CRASH_DUMP
+#define LOAD_HANDLER(reg, label)                                       \
+       oris    reg,reg,(label)@h;      /* virt addr of handler ... */  \
+       ori     reg,reg,(label)@l;      /* .. and the rest */
+#else
+#define LOAD_HANDLER(reg, label)                                       \
+       ori     reg,reg,(label)@l;      /* virt addr of handler ... */
+#endif
+
+/*
+ * Equal to EXCEPTION_PROLOG_PSERIES, except that it forces 64bit mode.
+ * The firmware calls the registered system_reset_fwnmi and
+ * machine_check_fwnmi handlers in 32bit mode if the cpu happens to run
+ * a 32bit application at the time of the event.
+ * This firmware bug is present on POWER4 and JS20.
+ */
+#define EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(area, label)              \
+       mfspr   r13,SPRN_SPRG3;         /* get paca address into r13 */ \
+       std     r9,area+EX_R9(r13);     /* save r9 - r12 */             \
+       std     r10,area+EX_R10(r13);                                   \
+       std     r11,area+EX_R11(r13);                                   \
+       std     r12,area+EX_R12(r13);                                   \
+       mfspr   r9,SPRN_SPRG1;                                          \
+       std     r9,area+EX_R13(r13);                                    \
+       mfcr    r9;                                                     \
+       clrrdi  r12,r13,32;             /* get high part of &label */   \
+       mfmsr   r10;                                                    \
+       /* force 64bit mode */                                          \
+       li      r11,5;                  /* MSR_SF_LG|MSR_ISF_LG */      \
+       rldimi  r10,r11,61,0;           /* insert into top 3 bits */    \
+       /* done 64bit mode */                                           \
+       mfspr   r11,SPRN_SRR0;          /* save SRR0 */                 \
+       LOAD_HANDLER(r12,label)                                         \
+       ori     r10,r10,MSR_IR|MSR_DR|MSR_RI;                           \
+       mtspr   SPRN_SRR0,r12;                                          \
+       mfspr   r12,SPRN_SRR1;          /* and SRR1 */                  \
+       mtspr   SPRN_SRR1,r10;                                          \
+       rfid;                                                           \
+       b       .       /* prevent speculative execution */
+
 #define EXCEPTION_PROLOG_PSERIES(area, label)                          \
        mfspr   r13,SPRN_SPRG3;         /* get paca address into r13 */ \
        std     r9,area+EX_R9(r13);     /* save r9 - r12 */             \
@@ -213,7 +234,7 @@ exception_marker:
        clrrdi  r12,r13,32;             /* get high part of &label */   \
        mfmsr   r10;                                                    \
        mfspr   r11,SPRN_SRR0;          /* save SRR0 */                 \
-       ori     r12,r12,(label)@l;      /* virt addr of handler */      \
+       LOAD_HANDLER(r12,label)                                         \
        ori     r10,r10,MSR_IR|MSR_DR|MSR_RI;                           \
        mtspr   SPRN_SRR0,r12;                                          \
        mfspr   r12,SPRN_SRR1;          /* and SRR1 */                  \
@@ -237,8 +258,9 @@ exception_marker:
 
 #define EXCEPTION_PROLOG_ISERIES_2                                     \
        mfmsr   r10;                                                    \
-       ld      r11,PACALPPACA+LPPACASRR0(r13);                         \
-       ld      r12,PACALPPACA+LPPACASRR1(r13);                         \
+       ld      r12,PACALPPACAPTR(r13);                                 \
+       ld      r11,LPPACASRR0(r12);                                    \
+       ld      r12,LPPACASRR1(r12);                                    \
        ori     r10,r10,MSR_RI;                                         \
        mtmsrd  r10,1
 
@@ -266,6 +288,7 @@ exception_marker:
        std     r10,0(r1);              /* make stack chain pointer     */ \
        std     r0,GPR0(r1);            /* save r0 in stackframe        */ \
        std     r10,GPR1(r1);           /* save r1 in stackframe        */ \
+       ACCOUNT_CPU_USER_ENTRY(r9, r10);                                   \
        std     r2,GPR2(r1);            /* save r2 in stackframe        */ \
        SAVE_4GPRS(3, r1);              /* save r3 - r6 in stackframe   */ \
        SAVE_2GPRS(7, r1);              /* save r7, r8 in stackframe    */ \
@@ -302,15 +325,28 @@ exception_marker:
 label##_pSeries:                                       \
        HMT_MEDIUM;                                     \
        mtspr   SPRN_SPRG1,r13;         /* save r13 */  \
-       RUNLATCH_ON(r13);                               \
        EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
 
+#define HSTD_EXCEPTION_PSERIES(n, label)               \
+       . = n;                                          \
+       .globl label##_pSeries;                         \
+label##_pSeries:                                       \
+       HMT_MEDIUM;                                     \
+       mtspr   SPRN_SPRG1,r20;         /* save r20 */  \
+       mfspr   r20,SPRN_HSRR0;         /* copy HSRR0 to SRR0 */ \
+       mtspr   SPRN_SRR0,r20;                          \
+       mfspr   r20,SPRN_HSRR1;         /* copy HSRR0 to SRR0 */ \
+       mtspr   SPRN_SRR1,r20;                          \
+       mfspr   r20,SPRN_SPRG1;         /* restore r20 */ \
+       mtspr   SPRN_SPRG1,r13;         /* save r13 */  \
+       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+
+
 #define STD_EXCEPTION_ISERIES(n, label, area)          \
        .globl label##_iSeries;                         \
 label##_iSeries:                                       \
        HMT_MEDIUM;                                     \
        mtspr   SPRN_SPRG1,r13;         /* save r13 */  \
-       RUNLATCH_ON(r13);                               \
        EXCEPTION_PROLOG_ISERIES_1(area);               \
        EXCEPTION_PROLOG_ISERIES_2;                     \
        b       label##_common
@@ -320,7 +356,6 @@ label##_iSeries:                                    \
 label##_iSeries:                                                       \
        HMT_MEDIUM;                                                     \
        mtspr   SPRN_SPRG1,r13;         /* save r13 */                  \
-       RUNLATCH_ON(r13);                                               \
        EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN);                         \
        lbz     r10,PACAPROCENABLED(r13);                               \
        cmpwi   0,r10,0;                                                \
@@ -367,17 +402,54 @@ label##_common:                                           \
        bl      hdlr;                                   \
        b       .ret_from_except
 
+/*
+ * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
+ * in the idle task and therefore need the special idle handling.
+ */
+#define STD_EXCEPTION_COMMON_IDLE(trap, label, hdlr)   \
+       .align  7;                                      \
+       .globl label##_common;                          \
+label##_common:                                                \
+       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
+       FINISH_NAP;                                     \
+       DISABLE_INTS;                                   \
+       bl      .save_nvgprs;                           \
+       addi    r3,r1,STACK_FRAME_OVERHEAD;             \
+       bl      hdlr;                                   \
+       b       .ret_from_except
+
 #define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr)   \
        .align  7;                                      \
        .globl label##_common;                          \
 label##_common:                                                \
        EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
+       FINISH_NAP;                                     \
        DISABLE_INTS;                                   \
+       bl      .ppc64_runlatch_on;                     \
        addi    r3,r1,STACK_FRAME_OVERHEAD;             \
        bl      hdlr;                                   \
        b       .ret_from_except_lite
 
 /*
+ * When the idle code in power4_idle puts the CPU into NAP mode,
+ * it has to do so in a loop, and relies on the external interrupt
+ * and decrementer interrupt entry code to get it out of the loop.
+ * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
+ * to signal that it is in the loop and needs help to get out.
+ */
+#ifdef CONFIG_PPC_970_NAP
+#define FINISH_NAP                             \
+BEGIN_FTR_SECTION                              \
+       clrrdi  r11,r1,THREAD_SHIFT;            \
+       ld      r9,TI_LOCAL_FLAGS(r11);         \
+       andi.   r10,r9,_TLF_NAPPING;            \
+       bnel    power4_fixup_nap;               \
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+#else
+#define FINISH_NAP
+#endif
+
+/*
  * Start of pSeries system interrupt routines
  */
        . = 0x100
@@ -390,7 +462,6 @@ __start_interrupts:
 _machine_check_pSeries:
        HMT_MEDIUM
        mtspr   SPRN_SPRG1,r13          /* save r13 */
-       RUNLATCH_ON(r13)
        EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
 
        . = 0x300
@@ -417,7 +488,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 data_access_slb_pSeries:
        HMT_MEDIUM
        mtspr   SPRN_SPRG1,r13
-       RUNLATCH_ON(r13)
        mfspr   r13,SPRN_SPRG3          /* get paca address into r13 */
        std     r3,PACA_EXSLB+EX_R3(r13)
        mfspr   r3,SPRN_DAR
@@ -443,7 +513,6 @@ data_access_slb_pSeries:
 instruction_access_slb_pSeries:
        HMT_MEDIUM
        mtspr   SPRN_SPRG1,r13
-       RUNLATCH_ON(r13)
        mfspr   r13,SPRN_SPRG3          /* get paca address into r13 */
        std     r3,PACA_EXSLB+EX_R3(r13)
        mfspr   r3,SPRN_SRR0            /* SRR0 is faulting address */
@@ -474,7 +543,6 @@ instruction_access_slb_pSeries:
        .globl  system_call_pSeries
 system_call_pSeries:
        HMT_MEDIUM
-       RUNLATCH_ON(r9)
        mr      r9,r13
        mfmsr   r10
        mfspr   r13,SPRN_SPRG3
@@ -502,8 +570,17 @@ system_call_pSeries:
 
        STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
 
+#ifdef CONFIG_CBE_RAS
+       HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
+#endif /* CONFIG_CBE_RAS */
        STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+#ifdef CONFIG_CBE_RAS
+       HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
+#endif /* CONFIG_CBE_RAS */
        STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+#ifdef CONFIG_CBE_RAS
+       HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
+#endif /* CONFIG_CBE_RAS */
 
        . = 0x3000
 
@@ -554,18 +631,18 @@ slb_miss_user_pseries:
  * Vectors for the FWNMI option.  Share common code.
  */
        .globl system_reset_fwnmi
+      .align 7
 system_reset_fwnmi:
        HMT_MEDIUM
        mtspr   SPRN_SPRG1,r13          /* save r13 */
-       RUNLATCH_ON(r13)
-       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+       EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXGEN, system_reset_common)
 
        .globl machine_check_fwnmi
+      .align 7
 machine_check_fwnmi:
        HMT_MEDIUM
        mtspr   SPRN_SPRG1,r13          /* save r13 */
-       RUNLATCH_ON(r13)
-       EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+       EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXMC, machine_check_common)
 
 #ifdef CONFIG_PPC_ISERIES
 /***  ISeries-LPAR interrupt handlers ***/
@@ -615,7 +692,8 @@ data_access_slb_iSeries:
        std     r12,PACA_EXSLB+EX_R12(r13)
        mfspr   r10,SPRN_SPRG1
        std     r10,PACA_EXSLB+EX_R13(r13)
-       ld      r12,PACALPPACA+LPPACASRR1(r13);
+       ld      r12,PACALPPACAPTR(r13)
+       ld      r12,LPPACASRR1(r12)
        b       .slb_miss_realmode
 
        STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
@@ -625,7 +703,8 @@ instruction_access_slb_iSeries:
        mtspr   SPRN_SPRG1,r13          /* save r13 */
        mfspr   r13,SPRN_SPRG3          /* get paca address into r13 */
        std     r3,PACA_EXSLB+EX_R3(r13)
-       ld      r3,PACALPPACA+LPPACASRR0(r13)   /* get SRR0 value */
+       ld      r3,PACALPPACAPTR(r13)
+       ld      r3,LPPACASRR0(r3)       /* get SRR0 value */
        std     r9,PACA_EXSLB+EX_R9(r13)
        mfcr    r9
 #ifdef __DISABLED__
@@ -637,7 +716,8 @@ instruction_access_slb_iSeries:
        std     r12,PACA_EXSLB+EX_R12(r13)
        mfspr   r10,SPRN_SPRG1
        std     r10,PACA_EXSLB+EX_R13(r13)
-       ld      r12,PACALPPACA+LPPACASRR1(r13);
+       ld      r12,PACALPPACAPTR(r13)
+       ld      r12,LPPACASRR1(r12)
        b       .slb_miss_realmode
 
 #ifdef __DISABLED__
@@ -694,7 +774,7 @@ system_reset_iSeries:
        lbz     r23,PACAPROCSTART(r13)  /* Test if this processor
                                         * should start */
        sync
-       LOADADDR(r3,current_set)
+       LOAD_REG_IMMEDIATE(r3,current_set)
        sldi    r28,r24,3               /* get current_set[cpu#] */
        ldx     r3,r3,r28
        addi    r1,r3,THREAD_SIZE
@@ -725,17 +805,21 @@ iSeries_secondary_smp_loop:
 
        .globl decrementer_iSeries_masked
 decrementer_iSeries_masked:
+       /* We may not have a valid TOC pointer in here. */
        li      r11,1
-       stb     r11,PACALPPACA+LPPACADECRINT(r13)
-       lwz     r12,PACADEFAULTDECR(r13)
+       ld      r12,PACALPPACAPTR(r13)
+       stb     r11,LPPACADECRINT(r12)
+       LOAD_REG_IMMEDIATE(r12, tb_ticks_per_jiffy)
+       lwz     r12,0(r12)
        mtspr   SPRN_DEC,r12
        /* fall through */
 
        .globl hardware_interrupt_iSeries_masked
 hardware_interrupt_iSeries_masked:
        mtcrf   0x80,r9         /* Restore regs */
-       ld      r11,PACALPPACA+LPPACASRR0(r13)
-       ld      r12,PACALPPACA+LPPACASRR1(r13)
+       ld      r12,PACALPPACAPTR(r13)
+       ld      r11,LPPACASRR0(r12)
+       ld      r12,LPPACASRR1(r12)
        mtspr   SPRN_SRR0,r11
        mtspr   SPRN_SRR1,r12
        ld      r9,PACA_EXGEN+EX_R9(r13)
@@ -759,6 +843,7 @@ hardware_interrupt_iSeries_masked:
        .globl machine_check_common
 machine_check_common:
        EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+       FINISH_NAP
        DISABLE_INTS
        bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
@@ -770,13 +855,18 @@ machine_check_common:
        STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
        STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
        STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
-       STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception)
+       STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
        STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
 #ifdef CONFIG_ALTIVEC
        STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
 #else
        STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
 #endif
+#ifdef CONFIG_CBE_RAS
+       STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
+       STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
+       STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
+#endif /* CONFIG_CBE_RAS */
 
 /*
  * Here we have detected that the kernel stack pointer is bad.
@@ -832,6 +922,14 @@ fast_exception_return:
        ld      r11,_NIP(r1)
        andi.   r3,r12,MSR_RI           /* check if RI is set */
        beq-    unrecov_fer
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+       andi.   r3,r12,MSR_PR
+       beq     2f
+       ACCOUNT_CPU_USER_EXIT(r3, r4)
+2:
+#endif
+
        ld      r3,_CCR(r1)
        ld      r4,_LINK(r1)
        ld      r5,_CTR(r1)
@@ -868,7 +966,6 @@ unrecov_fer:
        .align  7
        .globl data_access_common
 data_access_common:
-       RUNLATCH_ON(r10)                /* It wont fit in the 0x300 handler */
        mfspr   r10,SPRN_DAR
        std     r10,PACA_EXGEN+EX_DAR(r13)
        mfspr   r10,SPRN_DSISR
@@ -974,7 +1071,8 @@ _GLOBAL(slb_miss_realmode)
        ld      r3,PACA_EXSLB+EX_R3(r13)
        lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
 #ifdef CONFIG_PPC_ISERIES
-       ld      r11,PACALPPACA+LPPACASRR0(r13)  /* get SRR0 value */
+       ld      r11,PACALPPACAPTR(r13)
+       ld      r11,LPPACASRR0(r11)             /* get SRR0 value */
 #endif /* CONFIG_PPC_ISERIES */
 
        mtlr    r10
@@ -1013,12 +1111,23 @@ unrecov_slb:
        .globl hardware_interrupt_entry
 hardware_interrupt_common:
        EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
+       FINISH_NAP
 hardware_interrupt_entry:
        DISABLE_INTS
+       bl      .ppc64_runlatch_on
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .do_IRQ
        b       .ret_from_except_lite
 
+#ifdef CONFIG_PPC_970_NAP
+power4_fixup_nap:
+       andc    r9,r9,r10
+       std     r9,TI_LOCAL_FLAGS(r11)
+       ld      r10,_LINK(r1)           /* make idle task do the */
+       std     r10,_NIP(r1)            /* equivalent of a blr */
+       blr
+#endif
+
        .align  7
        .globl alignment_common
 alignment_common:
@@ -1346,7 +1455,7 @@ _GLOBAL(do_stab_bolted)
  * fixed address (the linker can't compute (u64)&initial_stab >>
  * PAGE_SHIFT).
  */
-       . = STAB0_PHYS_ADDR     /* 0x6000 */
+       . = STAB0_OFFSET        /* 0x6000 */
        .globl initial_stab
 initial_stab:
        .space  4096
@@ -1375,24 +1484,22 @@ fwnmi_data_area:
         . = 0x8000
 
 /*
- * On pSeries, secondary processors spin in the following code.
+ * On pSeries and most other platforms, secondary processors spin
+ * in the following code.
  * At entry, r3 = this processor's number (physical cpu id)
  */
-_GLOBAL(pSeries_secondary_smp_init)
+_GLOBAL(generic_secondary_smp_init)
        mr      r24,r3
        
        /* turn on 64-bit mode */
        bl      .enable_64b_mode
        isync
 
-       /* Copy some CPU settings from CPU 0 */
-       bl      .__restore_cpu_setup
-
        /* Set up a paca value for this processor. Since we have the
         * physical cpu id in r24, we need to search the pacas to find
         * which logical id maps to our physical one.
         */
-       LOADADDR(r13, paca)             /* Get base vaddr of paca array  */
+       LOAD_REG_IMMEDIATE(r13, paca)   /* Get base vaddr of paca array  */
        li      r5,0                    /* logical cpu id                */
 1:     lhz     r6,PACAHWCPUID(r13)     /* Load HW procid from paca      */
        cmpw    r6,r24                  /* Compare to our id             */
@@ -1413,21 +1520,34 @@ _GLOBAL(pSeries_secondary_smp_init)
                                        /* start.                        */
        sync
 
-       /* Create a temp kernel stack for use before relocation is on.  */
+#ifndef CONFIG_SMP
+       b       3b                      /* Never go on non-SMP           */
+#else
+       cmpwi   0,r23,0
+       beq     3b                      /* Loop until told to go         */
+
+       /* See if we need to call a cpu state restore handler */
+       LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
+       ld      r23,0(r23)
+       ld      r23,CPU_SPEC_RESTORE(r23)
+       cmpdi   0,r23,0
+       beq     4f
+       ld      r23,0(r23)
+       mtctr   r23
+       bctrl
+
+4:     /* Create a temp kernel stack for use before relocation is on.  */
        ld      r1,PACAEMERGSP(r13)
        subi    r1,r1,STACK_FRAME_OVERHEAD
 
-       cmpwi   0,r23,0
-#ifdef CONFIG_SMP
-       bne     .__secondary_start
+       b       .__secondary_start
 #endif
-       b       3b                      /* Loop until told to go         */
 
 #ifdef CONFIG_PPC_ISERIES
 _STATIC(__start_initialization_iSeries)
        /* Clear out the BSS */
-       LOADADDR(r11,__bss_stop)
-       LOADADDR(r8,__bss_start)
+       LOAD_REG_IMMEDIATE(r11,__bss_stop)
+       LOAD_REG_IMMEDIATE(r8,__bss_start)
        sub     r11,r11,r8              /* bss size                     */
        addi    r11,r11,7               /* round up to an even double word */
        rldicl. r11,r11,61,3            /* shift right by 3             */
@@ -1438,17 +1558,17 @@ _STATIC(__start_initialization_iSeries)
 3:     stdu    r0,8(r8)
        bdnz    3b
 4:
-       LOADADDR(r1,init_thread_union)
+       LOAD_REG_IMMEDIATE(r1,init_thread_union)
        addi    r1,r1,THREAD_SIZE
        li      r0,0
        stdu    r0,-STACK_FRAME_OVERHEAD(r1)
 
-       LOADADDR(r3,cpu_specs)
-       LOADADDR(r4,cur_cpu_spec)
+       LOAD_REG_IMMEDIATE(r3,cpu_specs)
+       LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
        li      r5,0
        bl      .identify_cpu
 
-       LOADADDR(r2,__toc_start)
+       LOAD_REG_IMMEDIATE(r2,__toc_start)
        addi    r2,r2,0x4000
        addi    r2,r2,0x4000
 
@@ -1486,11 +1606,13 @@ _STATIC(__mmu_off)
  *
  */
 _GLOBAL(__start_initialization_multiplatform)
+#ifdef CONFIG_PPC_MULTIPLATFORM
        /*
         * Are we booted from a PROM Of-type client-interface ?
         */
        cmpldi  cr0,r5,0
        bne     .__boot_from_prom               /* yes -> prom */
+#endif
 
        /* Save parameters */
        mr      r31,r3
@@ -1500,17 +1622,24 @@ _GLOBAL(__start_initialization_multiplatform)
        bl      .enable_64b_mode
 
        /* Setup some critical 970 SPRs before switching MMU off */
-       bl      .__970_cpu_preinit
-
-       /* cpu # */
-       li      r24,0
+       mfspr   r0,SPRN_PVR
+       srwi    r0,r0,16
+       cmpwi   r0,0x39         /* 970 */
+       beq     1f
+       cmpwi   r0,0x3c         /* 970FX */
+       beq     1f
+       cmpwi   r0,0x44         /* 970MP */
+       bne     2f
+1:     bl      .__cpu_preinit_ppc970
+2:
 
        /* Switch off MMU if not already */
-       LOADADDR(r4, .__after_prom_start - KERNELBASE)
+       LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
        add     r4,r4,r30
        bl      .__mmu_off
        b       .__after_prom_start
 
+#ifdef CONFIG_PPC_MULTIPLATFORM
 _STATIC(__boot_from_prom)
        /* Save parameters */
        mr      r31,r3
@@ -1519,13 +1648,20 @@ _STATIC(__boot_from_prom)
        mr      r28,r6
        mr      r27,r7
 
+       /*
+        * Align the stack to 16-byte boundary
+        * Depending on the size and layout of the ELF sections in the initial
+        * boot binary, the stack pointer will be unalignet on PowerMac
+        */
+       rldicr  r1,r1,0,59
+
        /* Make sure we are running in 64 bits mode */
        bl      .enable_64b_mode
 
        /* put a relocation offset into r3 */
        bl      .reloc_offset
 
-       LOADADDR(r2,__toc_start)
+       LOAD_REG_IMMEDIATE(r2,__toc_start)
        addi    r2,r2,0x4000
        addi    r2,r2,0x4000
 
@@ -1543,6 +1679,7 @@ _STATIC(__boot_from_prom)
        bl      .prom_init
        /* We never return */
        trap
+#endif
 
 /*
  * At this point, r3 contains the physical address we are running at,
@@ -1551,7 +1688,7 @@ _STATIC(__boot_from_prom)
 _STATIC(__after_prom_start)
 
 /*
- * We need to run with __start at physical address 0.
+ * We need to run with __start at physical address PHYSICAL_START.
  * This will leave some code in the first 256B of
  * real memory, which are reserved for software use.
  * The remainder of the first page is loaded with the fixed
@@ -1564,9 +1701,9 @@ _STATIC(__after_prom_start)
  */
        bl      .reloc_offset
        mr      r26,r3
-       SET_REG_TO_CONST(r27,KERNELBASE)
+       LOAD_REG_IMMEDIATE(r27, KERNELBASE)
 
-       li      r3,0                    /* target addr */
+       LOAD_REG_IMMEDIATE(r3, PHYSICAL_START)  /* target addr */
 
        // XXX FIXME: Use phys returned by OF (r30)
        add     r4,r27,r26              /* source addr                   */
@@ -1574,7 +1711,10 @@ _STATIC(__after_prom_start)
                                        /*   i.e. where we are running   */
                                        /*      the source addr          */
 
-       LOADADDR(r5,copy_to_here)       /* # bytes of memory to copy     */
+       cmpdi   r4,0                    /* In some cases the loader may  */
+       beq     .start_here_multiplatform /* have already put us at zero */
+                                       /* so we can skip the copy.      */
+       LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */
        sub     r5,r5,r27
 
        li      r6,0x100                /* Start offset, the first 0x100 */
@@ -1584,11 +1724,11 @@ _STATIC(__after_prom_start)
                                        /* this includes the code being  */
                                        /* executed here.                */
 
-       LOADADDR(r0, 4f)                /* Jump to the copy of this code */
+       LOAD_REG_IMMEDIATE(r0, 4f)      /* Jump to the copy of this code */
        mtctr   r0                      /* that we just made/relocated   */
        bctr
 
-4:     LOADADDR(r5,klimit)
+4:     LOAD_REG_IMMEDIATE(r5,klimit)
        add     r5,r5,r26
        ld      r5,0(r5)                /* get the value of klimit */
        sub     r5,r5,r27
@@ -1608,7 +1748,7 @@ _STATIC(__after_prom_start)
 _GLOBAL(copy_and_flush)
        addi    r5,r5,-8
        addi    r6,r6,-8
-4:     li      r0,16                   /* Use the least common         */
+4:     li      r0,8                    /* Use the smallest common      */
                                        /* denominator cache line       */
                                        /* size.  This results in       */
                                        /* extra cache line flushes     */
@@ -1662,7 +1802,7 @@ _GLOBAL(pmac_secondary_start)
        isync
 
        /* Copy some CPU settings from CPU 0 */
-       bl      .__restore_cpu_setup
+       bl      .__restore_cpu_ppc970
 
        /* pSeries do that early though I don't think we really need it */
        mfmsr   r3
@@ -1670,7 +1810,7 @@ _GLOBAL(pmac_secondary_start)
        mtmsrd  r3                      /* RI on */
 
        /* Set up a paca value for this processor. */
-       LOADADDR(r4, paca)               /* Get base vaddr of paca array        */
+       LOAD_REG_IMMEDIATE(r4, paca)    /* Get base vaddr of paca array */
        mulli   r13,r24,PACA_SIZE        /* Calculate vaddr of right paca */
        add     r13,r13,r4              /* for this processor.          */
        mtspr   SPRN_SPRG3,r13           /* Save vaddr of paca in SPRG3 */
@@ -1697,70 +1837,29 @@ _GLOBAL(pmac_secondary_start)
  *   SPRG3 = paca virtual address
  */
 _GLOBAL(__secondary_start)
+       /* Set thread priority to MEDIUM */
+       HMT_MEDIUM
 
-       HMT_MEDIUM                      /* Set thread priority to MEDIUM */
-
+       /* Load TOC */
        ld      r2,PACATOC(r13)
-       li      r6,0
-       stb     r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
-       /* Initialize the page table pointer register. */
-       LOADADDR(r6,_SDR1)
-       ld      r6,0(r6)                /* get the value of _SDR1        */
-       mtspr   SPRN_SDR1,r6                    /* set the htab location         */
-#endif
-       /* Initialize the first segment table (or SLB) entry             */
-       ld      r3,PACASTABVIRT(r13)    /* get addr of segment table     */
-BEGIN_FTR_SECTION
-       bl      .stab_initialize
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-       bl      .slb_initialize
+
+       /* Do early setup for that CPU (stab, slb, hash table pointer) */
+       bl      .early_setup_secondary
 
        /* Initialize the kernel stack.  Just a repeat for iSeries.      */
-       LOADADDR(r3,current_set)
+       LOAD_REG_ADDR(r3, current_set)
        sldi    r28,r24,3               /* get current_set[cpu#]         */
        ldx     r1,r3,r28
        addi    r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
        std     r1,PACAKSAVE(r13)
 
-       ld      r3,PACASTABREAL(r13)    /* get raddr of segment table    */
-       ori     r4,r3,1                 /* turn on valid bit             */
-
-#ifdef CONFIG_PPC_ISERIES
-       li      r0,-1                   /* hypervisor call */
-       li      r3,1
-       sldi    r3,r3,63                /* 0x8000000000000000 */
-       ori     r3,r3,4                 /* 0x8000000000000004 */
-       sc                              /* HvCall_setASR */
-#else
-       /* set the ASR */
-       ld      r3,systemcfg@got(r2)    /* r3 = ptr to systemcfg         */
-       ld      r3,0(r3)
-       lwz     r3,PLATFORM(r3)         /* r3 = platform flags           */
-       andi.   r3,r3,PLATFORM_LPAR     /* Test if bit 0 is set (LPAR bit) */
-       beq     98f                     /* branch if result is 0  */
-       mfspr   r3,SPRN_PVR
-       srwi    r3,r3,16
-       cmpwi   r3,0x37                 /* SStar  */
-       beq     97f
-       cmpwi   r3,0x36                 /* IStar  */
-       beq     97f
-       cmpwi   r3,0x34                 /* Pulsar */
-       bne     98f
-97:    li      r3,H_SET_ASR            /* hcall = H_SET_ASR */
-       HVSC                            /* Invoking hcall */
-       b       99f
-98:                                    /* !(rpa hypervisor) || !(star)  */
-       mtasr   r4                      /* set the stab location         */
-99:
-#endif
+       /* Clear backchain so we get nice backtraces */
        li      r7,0
        mtlr    r7
 
        /* enable MMU and jump to start_secondary */
-       LOADADDR(r3,.start_secondary_prolog)
-       SET_REG_TO_CONST(r4, MSR_KERNEL)
+       LOAD_REG_ADDR(r3, .start_secondary_prolog)
+       LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
 #ifdef DO_SOFT_DISABLE
        ori     r4,r4,MSR_EE
 #endif
@@ -1777,6 +1876,7 @@ _GLOBAL(start_secondary_prolog)
        li      r3,0
        std     r3,0(r1)                /* Zero the stack frame pointer */
        bl      .start_secondary
+       b       .
 #endif
 
 /*
@@ -1808,8 +1908,8 @@ _STATIC(start_here_multiplatform)
         * be detached from the kernel completely. Besides, we need
         * to clear it now for kexec-style entry.
         */
-       LOADADDR(r11,__bss_stop)
-       LOADADDR(r8,__bss_start)
+       LOAD_REG_IMMEDIATE(r11,__bss_stop)
+       LOAD_REG_IMMEDIATE(r8,__bss_start)
        sub     r11,r11,r8              /* bss size                     */
        addi    r11,r11,7               /* round up to an even double word */
        rldicl. r11,r11,61,3            /* shift right by 3             */
@@ -1825,29 +1925,13 @@ _STATIC(start_here_multiplatform)
        ori     r6,r6,MSR_RI
        mtmsrd  r6                      /* RI on */
 
-#ifdef CONFIG_HMT
-       /* Start up the second thread on cpu 0 */
-       mfspr   r3,SPRN_PVR
-       srwi    r3,r3,16
-       cmpwi   r3,0x34                 /* Pulsar  */
-       beq     90f
-       cmpwi   r3,0x36                 /* Icestar */
-       beq     90f
-       cmpwi   r3,0x37                 /* SStar   */
-       beq     90f
-       b       91f                     /* HMT not supported */
-90:    li      r3,0
-       bl      .hmt_start_secondary
-91:
-#endif
-
        /* The following gets the stack and TOC set up with the regs */
        /* pointing to the real addr of the kernel stack.  This is   */
        /* all done to support the C function call below which sets  */
        /* up the htab.  This is done because we have relocated the  */
        /* kernel but are still running in real mode. */
 
-       LOADADDR(r3,init_thread_union)
+       LOAD_REG_IMMEDIATE(r3,init_thread_union)
        add     r3,r3,r26
 
        /* set up a stack pointer (physical address) */
@@ -1856,39 +1940,18 @@ _STATIC(start_here_multiplatform)
        stdu    r0,-STACK_FRAME_OVERHEAD(r1)
 
        /* set up the TOC (physical address) */
-       LOADADDR(r2,__toc_start)
+       LOAD_REG_IMMEDIATE(r2,__toc_start)
        addi    r2,r2,0x4000
        addi    r2,r2,0x4000
        add     r2,r2,r26
 
-       LOADADDR(r3,cpu_specs)
+       LOAD_REG_IMMEDIATE(r3, cpu_specs)
        add     r3,r3,r26
-       LOADADDR(r4,cur_cpu_spec)
+       LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
        add     r4,r4,r26
        mr      r5,r26
        bl      .identify_cpu
 
-       /* Save some low level config HIDs of CPU0 to be copied to
-        * other CPUs later on, or used for suspend/resume
-        */
-       bl      .__save_cpu_setup
-       sync
-
-       /* Setup a valid physical PACA pointer in SPRG3 for early_setup
-        * note that boot_cpuid can always be 0 nowadays since there is
-        * nowhere it can be initialized differently before we reach this
-        * code
-        */
-       LOADADDR(r27, boot_cpuid)
-       add     r27,r27,r26
-       lwz     r27,0(r27)
-
-       LOADADDR(r24, paca)             /* Get base vaddr of paca array  */
-       mulli   r13,r27,PACA_SIZE       /* Calculate vaddr of right paca */
-       add     r13,r13,r24             /* for this processor.           */
-       add     r13,r13,r26             /* convert to physical addr      */
-       mtspr   SPRN_SPRG3,r13          /* PPPBBB: Temp... -Peter */
-       
        /* Do very early kernel initializations, including initial hash table,
         * stab and slb setup before we turn on relocation.     */
 
@@ -1896,42 +1959,8 @@ _STATIC(start_here_multiplatform)
        mr      r3,r31
        bl      .early_setup
 
-       /* set the ASR */
-       ld      r3,PACASTABREAL(r13)
-       ori     r4,r3,1                 /* turn on valid bit             */
-       ld      r3,systemcfg@got(r2)    /* r3 = ptr to systemcfg */
-       ld      r3,0(r3)
-       lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
-       andi.   r3,r3,PLATFORM_LPAR     /* Test if bit 0 is set (LPAR bit) */
-       beq     98f                     /* branch if result is 0  */
-       mfspr   r3,SPRN_PVR
-       srwi    r3,r3,16
-       cmpwi   r3,0x37                 /* SStar */
-       beq     97f
-       cmpwi   r3,0x36                 /* IStar  */
-       beq     97f
-       cmpwi   r3,0x34                 /* Pulsar */
-       bne     98f
-97:    li      r3,H_SET_ASR            /* hcall = H_SET_ASR */
-       HVSC                            /* Invoking hcall */
-       b       99f
-98:                                    /* !(rpa hypervisor) || !(star) */
-       mtasr   r4                      /* set the stab location        */
-99:
-       /* Set SDR1 (hash table pointer) */
-       ld      r3,systemcfg@got(r2)    /* r3 = ptr to systemcfg */
-       ld      r3,0(r3)
-       lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
-       /* Test if bit 0 is set (LPAR bit) */
-       andi.   r3,r3,PLATFORM_LPAR
-       bne     98f                     /* branch if result is !0  */
-       LOADADDR(r6,_SDR1)              /* Only if NOT LPAR */
-       add     r6,r6,r26
-       ld      r6,0(r6)                /* get the value of _SDR1 */
-       mtspr   SPRN_SDR1,r6                    /* set the htab location  */
-98: 
-       LOADADDR(r3,.start_here_common)
-       SET_REG_TO_CONST(r4, MSR_KERNEL)
+       LOAD_REG_IMMEDIATE(r3, .start_here_common)
+       LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
        mtspr   SPRN_SRR0,r3
        mtspr   SPRN_SRR1,r4
        rfid
@@ -1945,7 +1974,7 @@ _STATIC(start_here_common)
        /* The following code sets up the SP and TOC now that we are */
        /* running with translation enabled. */
 
-       LOADADDR(r3,init_thread_union)
+       LOAD_REG_IMMEDIATE(r3,init_thread_union)
 
        /* set up the stack */
        addi    r1,r3,THREAD_SIZE
@@ -1958,16 +1987,8 @@ _STATIC(start_here_common)
        li      r3,0
        bl      .do_cpu_ftr_fixups
 
-       LOADADDR(r26, boot_cpuid)
-       lwz     r26,0(r26)
-
-       LOADADDR(r24, paca)             /* Get base vaddr of paca array  */
-       mulli   r13,r26,PACA_SIZE       /* Calculate vaddr of right paca */
-       add     r13,r13,r24             /* for this processor.           */
-       mtspr   SPRN_SPRG3,r13
-
        /* ptr to current */
-       LOADADDR(r4,init_task)
+       LOAD_REG_IMMEDIATE(r4, init_task)
        std     r4,PACACURRENT(r13)
 
        /* Load the TOC */
@@ -1988,77 +2009,8 @@ _STATIC(start_here_common)
 
        bl .start_kernel
 
-_GLOBAL(hmt_init)
-#ifdef CONFIG_HMT
-       LOADADDR(r5, hmt_thread_data)
-       mfspr   r7,SPRN_PVR
-       srwi    r7,r7,16
-       cmpwi   r7,0x34                 /* Pulsar  */
-       beq     90f
-       cmpwi   r7,0x36                 /* Icestar */
-       beq     91f
-       cmpwi   r7,0x37                 /* SStar   */
-       beq     91f
-       b       101f
-90:    mfspr   r6,SPRN_PIR
-       andi.   r6,r6,0x1f
-       b       92f
-91:    mfspr   r6,SPRN_PIR
-       andi.   r6,r6,0x3ff
-92:    sldi    r4,r24,3
-       stwx    r6,r5,r4
-       bl      .hmt_start_secondary
-       b       101f
-
-__hmt_secondary_hold:
-       LOADADDR(r5, hmt_thread_data)
-       clrldi  r5,r5,4
-       li      r7,0
-       mfspr   r6,SPRN_PIR
-       mfspr   r8,SPRN_PVR
-       srwi    r8,r8,16
-       cmpwi   r8,0x34
-       bne     93f
-       andi.   r6,r6,0x1f
-       b       103f
-93:    andi.   r6,r6,0x3f
-
-103:   lwzx    r8,r5,r7
-       cmpw    r8,r6
-       beq     104f
-       addi    r7,r7,8
-       b       103b
-
-104:   addi    r7,r7,4
-       lwzx    r9,r5,r7
-       mr      r24,r9
-101:
-#endif
-       mr      r3,r24
-       b       .pSeries_secondary_smp_init
-
-#ifdef CONFIG_HMT
-_GLOBAL(hmt_start_secondary)
-       LOADADDR(r4,__hmt_secondary_hold)
-       clrldi  r4,r4,4
-       mtspr   SPRN_NIADORM, r4
-       mfspr   r4, SPRN_MSRDORM
-       li      r5, -65
-       and     r4, r4, r5
-       mtspr   SPRN_MSRDORM, r4
-       lis     r4,0xffef
-       ori     r4,r4,0x7403
-       mtspr   SPRN_TSC, r4
-       li      r4,0x1f4
-       mtspr   SPRN_TST, r4
-       mfspr   r4, SPRN_HID0
-       ori     r4, r4, 0x1
-       mtspr   SPRN_HID0, r4
-       mfspr   r4, SPRN_CTRLF
-       oris    r4, r4, 0x40
-       mtspr   SPRN_CTRLT, r4
-       blr
-#endif
+       /* Not reached */
+       BUG_OPCODE
 
 /*
  * We put a few things here that have to be page-aligned.