ARM: Fix decompressor's kernel size estimation for ROM=y
authorRussell King <rmk+kernel@arm.linux.org.uk>
Thu, 25 Feb 2010 23:56:38 +0000 (23:56 +0000)
committerRussell King <rmk+kernel@arm.linux.org.uk>
Fri, 26 Feb 2010 00:10:47 +0000 (00:10 +0000)
Commit 2552fc2 changed the way the decompressor decides if it is safe
to decompress the kernel directly to its final location.  Unfortunately,
it took the top of the compressed data as being the stack pointer,
which it is for ROM=n cases.  However, for ROM=y, the stack pointer
is not relevant, and results in the wrong answer.

Fix this by explicitly storing the end of the biggybacked data in the
decompressor, and use that to calculate the compressed image size.

CC: <stable@kernel.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
arch/arm/boot/compressed/head.S
arch/arm/boot/compressed/vmlinux.lds.in

index 4fddc50..6b84a04 100644 (file)
@@ -170,8 +170,8 @@ not_angel:
 
                .text
                adr     r0, LC0
- ARM(          ldmia   r0, {r1, r2, r3, r4, r5, r6, ip, sp}    )
- THUMB(                ldmia   r0, {r1, r2, r3, r4, r5, r6, ip}        )
+ ARM(          ldmia   r0, {r1, r2, r3, r4, r5, r6, r11, ip, sp})
+ THUMB(                ldmia   r0, {r1, r2, r3, r4, r5, r6, r11, ip}   )
  THUMB(                ldr     sp, [r0, #28]                           )
                subs    r0, r0, r1              @ calculate the delta offset
 
@@ -182,12 +182,13 @@ not_angel:
                /*
                 * We're running at a different address.  We need to fix
                 * up various pointers:
-                *   r5 - zImage base address
-                *   r6 - GOT start
+                *   r5 - zImage base address (_start)
+                *   r6 - size of decompressed image
+                *   r11 - GOT start
                 *   ip - GOT end
                 */
                add     r5, r5, r0
-               add     r6, r6, r0
+               add     r11, r11, r0
                add     ip, ip, r0
 
 #ifndef CONFIG_ZBOOT_ROM
@@ -205,10 +206,10 @@ not_angel:
                /*
                 * Relocate all entries in the GOT table.
                 */
-1:             ldr     r1, [r6, #0]            @ relocate entries in the GOT
+1:             ldr     r1, [r11, #0]           @ relocate entries in the GOT
                add     r1, r1, r0              @ table.  This fixes up the
-               str     r1, [r6], #4            @ C references.
-               cmp     r6, ip
+               str     r1, [r11], #4           @ C references.
+               cmp     r11, ip
                blo     1b
 #else
 
@@ -216,12 +217,12 @@ not_angel:
                 * Relocate entries in the GOT table.  We only relocate
                 * the entries that are outside the (relocated) BSS region.
                 */
-1:             ldr     r1, [r6, #0]            @ relocate entries in the GOT
+1:             ldr     r1, [r11, #0]           @ relocate entries in the GOT
                cmp     r1, r2                  @ entry < bss_start ||
                cmphs   r3, r1                  @ _end < entry
                addlo   r1, r1, r0              @ table.  This fixes up the
-               str     r1, [r6], #4            @ C references.
-               cmp     r6, ip
+               str     r1, [r11], #4           @ C references.
+               cmp     r11, ip
                blo     1b
 #endif
 
@@ -247,6 +248,7 @@ not_relocated:      mov     r0, #0
  * Check to see if we will overwrite ourselves.
  *   r4 = final kernel address
  *   r5 = start of this image
+ *   r6 = size of decompressed image
  *   r2 = end of malloc space (and therefore this image)
  * We basically want:
  *   r4 >= r2 -> OK
@@ -254,8 +256,7 @@ not_relocated:      mov     r0, #0
  */
                cmp     r4, r2
                bhs     wont_overwrite
-               sub     r3, sp, r5              @ > compressed kernel size
-               add     r0, r4, r3, lsl #2      @ allow for 4x expansion
+               add     r0, r4, r6
                cmp     r0, r5
                bls     wont_overwrite
 
@@ -271,7 +272,6 @@ not_relocated:      mov     r0, #0
  * r1-r3  = unused
  * r4     = kernel execution address
  * r5     = decompressed kernel start
- * r6     = processor ID
  * r7     = architecture ID
  * r8     = atags pointer
  * r9-r12,r14 = corrupted
@@ -312,7 +312,8 @@ LC0:                .word   LC0                     @ r1
                .word   _end                    @ r3
                .word   zreladdr                @ r4
                .word   _start                  @ r5
-               .word   _got_start              @ r6
+               .word   _image_size             @ r6
+               .word   _got_start              @ r11
                .word   _got_end                @ ip
                .word   user_stack+4096         @ sp
 LC1:           .word   reloc_end - reloc_start
@@ -336,7 +337,6 @@ params:             ldr     r0, =params_phys
  *
  * On entry,
  *  r4 = kernel execution address
- *  r6 = processor ID
  *  r7 = architecture number
  *  r8 = atags pointer
  *  r9 = run-time address of "start"  (???)
@@ -542,7 +542,6 @@ __common_mmu_cache_on:
  * r1-r3  = unused
  * r4     = kernel execution address
  * r5     = decompressed kernel start
- * r6     = processor ID
  * r7     = architecture ID
  * r8     = atags pointer
  * r9-r12,r14 = corrupted
@@ -581,19 +580,19 @@ call_kernel:      bl      cache_clean_flush
  *  r1  = corrupted
  *  r2  = corrupted
  *  r3  = block offset
- *  r6  = corrupted
+ *  r9  = corrupted
  *  r12 = corrupted
  */
 
 call_cache_fn: adr     r12, proc_types
 #ifdef CONFIG_CPU_CP15
-               mrc     p15, 0, r6, c0, c0      @ get processor ID
+               mrc     p15, 0, r9, c0, c0      @ get processor ID
 #else
-               ldr     r6, =CONFIG_PROCESSOR_ID
+               ldr     r9, =CONFIG_PROCESSOR_ID
 #endif
 1:             ldr     r1, [r12, #0]           @ get value
                ldr     r2, [r12, #4]           @ get mask
-               eor     r1, r1, r6              @ (real ^ match)
+               eor     r1, r1, r9              @ (real ^ match)
                tst     r1, r2                  @       & mask
  ARM(          addeq   pc, r12, r3             ) @ call cache function
  THUMB(                addeq   r12, r3                 )
@@ -778,8 +777,7 @@ proc_types:
  * Turn off the Cache and MMU.  ARMv3 does not support
  * reading the control register, but ARMv4 does.
  *
- * On entry,  r6 = processor ID
- * On exit,   r0, r1, r2, r3, r12 corrupted
+ * On exit, r0, r1, r2, r3, r9, r12 corrupted
  * This routine must preserve: r4, r6, r7
  */
                .align  5
@@ -852,10 +850,8 @@ __armv3_mmu_cache_off:
 /*
  * Clean and flush the cache to maintain consistency.
  *
- * On entry,
- *  r6 = processor ID
  * On exit,
- *  r1, r2, r3, r11, r12 corrupted
+ *  r1, r2, r3, r9, r11, r12 corrupted
  * This routine must preserve:
  *  r0, r4, r5, r6, r7
  */
@@ -967,7 +963,7 @@ __armv4_mmu_cache_flush:
                mov     r2, #64*1024            @ default: 32K dcache size (*2)
                mov     r11, #32                @ default: 32 byte line size
                mrc     p15, 0, r3, c0, c0, 1   @ read cache type
-               teq     r3, r6                  @ cache ID register present?
+               teq     r3, r9                  @ cache ID register present?
                beq     no_cache_id
                mov     r1, r3, lsr #18
                and     r1, r1, #7
index a5924b9..cbed030 100644 (file)
@@ -36,6 +36,9 @@ SECTIONS
 
   _etext = .;
 
+  /* Assume size of decompressed image is 4x the compressed image */
+  _image_size = (_etext - _text) * 4;
+
   _got_start = .;
   .got                 : { *(.got) }
   _got_end = .;