[ARM] 5480/1: U300-v5 integrate into the ARM architecture
[safe/jmp/linux-2.6] / arch / arm / mm / mmu.c
index 445bc3b..39fca4e 100644 (file)
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 
+#include <asm/cputype.h>
 #include <asm/mach-types.h>
+#include <asm/sections.h>
+#include <asm/cachetype.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/highmem.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-extern void _stext, _etext, __data_start, _end;
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
  */
 struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 
 /*
  * The pmd table for the upper-most set of pages.
@@ -49,8 +51,10 @@ pmd_t *top_pmd;
 
 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
+pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
 
+EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
 
 struct cachepolicy {
@@ -65,32 +69,32 @@ static struct cachepolicy cache_policies[] __initdata = {
                .policy         = "uncached",
                .cr_mask        = CR_W|CR_C,
                .pmd            = PMD_SECT_UNCACHED,
-               .pte            = 0,
+               .pte            = L_PTE_MT_UNCACHED,
        }, {
                .policy         = "buffered",
                .cr_mask        = CR_C,
                .pmd            = PMD_SECT_BUFFERED,
-               .pte            = PTE_BUFFERABLE,
+               .pte            = L_PTE_MT_BUFFERABLE,
        }, {
                .policy         = "writethrough",
                .cr_mask        = 0,
                .pmd            = PMD_SECT_WT,
-               .pte            = PTE_CACHEABLE,
+               .pte            = L_PTE_MT_WRITETHROUGH,
        }, {
                .policy         = "writeback",
                .cr_mask        = 0,
                .pmd            = PMD_SECT_WB,
-               .pte            = PTE_BUFFERABLE|PTE_CACHEABLE,
+               .pte            = L_PTE_MT_WRITEBACK,
        }, {
                .policy         = "writealloc",
                .cr_mask        = 0,
                .pmd            = PMD_SECT_WBWA,
-               .pte            = PTE_BUFFERABLE|PTE_CACHEABLE,
+               .pte            = L_PTE_MT_WRITEALLOC,
        }
 };
 
 /*
- * These are useful for identifing cache coherency
+ * These are useful for identifying cache coherency
  * problems by allowing the cache or the cache and
  * writebuffer to be turned off.  (Note: the write
  * buffer should not be on and the cache off).
@@ -112,6 +116,10 @@ static void __init early_cachepolicy(char **p)
        }
        if (i == ARRAY_SIZE(cache_policies))
                printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
+       if (cpu_architecture() >= CPU_ARCH_ARMv6) {
+               printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
+               cachepolicy = CPOLICY_WRITEBACK;
+       }
        flush_cache_all();
        set_cr(cr_alignment);
 }
@@ -154,28 +162,67 @@ static int __init noalign_setup(char *__unused)
 }
 __setup("noalign", noalign_setup);
 
-struct mem_types {
-       unsigned int    prot_pte;
-       unsigned int    prot_l1;
-       unsigned int    prot_sect;
-       unsigned int    domain;
-};
+#ifndef CONFIG_SMP
+void adjust_cr(unsigned long mask, unsigned long set)
+{
+       unsigned long flags;
 
-static struct mem_types mem_types[] __initdata = {
-       [MT_DEVICE] = {
-               .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-                               L_PTE_WRITE,
-               .prot_l1   = PMD_TYPE_TABLE,
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_UNCACHED |
-                               PMD_SECT_AP_WRITE,
-               .domain    = DOMAIN_IO,
+       mask &= ~CR_A;
+
+       set &= mask;
+
+       local_irq_save(flags);
+
+       cr_no_alignment = (cr_no_alignment & ~mask) | set;
+       cr_alignment = (cr_alignment & ~mask) | set;
+
+       set_cr((get_cr() & ~mask) | set);
+
+       local_irq_restore(flags);
+}
+#endif
+
+#define PROT_PTE_DEVICE                L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
+#define PROT_SECT_DEVICE       PMD_TYPE_SECT|PMD_SECT_AP_WRITE
+
+static struct mem_type mem_types[] = {
+       [MT_DEVICE] = {           /* Strongly ordered / ARMv6 shared device */
+               .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
+                                 L_PTE_SHARED,
+               .prot_l1        = PMD_TYPE_TABLE,
+               .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_S,
+               .domain         = DOMAIN_IO,
+       },
+       [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
+               .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
+               .prot_l1        = PMD_TYPE_TABLE,
+               .prot_sect      = PROT_SECT_DEVICE,
+               .domain         = DOMAIN_IO,
+       },
+       [MT_DEVICE_CACHED] = {    /* ioremap_cached */
+               .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
+               .prot_l1        = PMD_TYPE_TABLE,
+               .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_WB,
+               .domain         = DOMAIN_IO,
+       },      
+       [MT_DEVICE_WC] = {      /* ioremap_wc */
+               .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
+               .prot_l1        = PMD_TYPE_TABLE,
+               .prot_sect      = PROT_SECT_DEVICE,
+               .domain         = DOMAIN_IO,
+       },
+       [MT_UNCACHED] = {
+               .prot_pte       = PROT_PTE_DEVICE,
+               .prot_l1        = PMD_TYPE_TABLE,
+               .prot_sect      = PMD_TYPE_SECT | PMD_SECT_XN,
+               .domain         = DOMAIN_IO,
        },
        [MT_CACHECLEAN] = {
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4,
+               .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
                .domain    = DOMAIN_KERNEL,
        },
        [MT_MINICLEAN] = {
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_MINICACHE,
+               .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
                .domain    = DOMAIN_KERNEL,
        },
        [MT_LOW_VECTORS] = {
@@ -191,30 +238,24 @@ static struct mem_types mem_types[] __initdata = {
                .domain    = DOMAIN_USER,
        },
        [MT_MEMORY] = {
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_AP_WRITE,
+               .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
                .domain    = DOMAIN_KERNEL,
        },
        [MT_ROM] = {
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4,
+               .prot_sect = PMD_TYPE_SECT,
                .domain    = DOMAIN_KERNEL,
        },
-       [MT_IXP2000_DEVICE] = { /* IXP2400 requires XCB=101 for on-chip I/O */
-               .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
-                               L_PTE_WRITE,
-               .prot_l1   = PMD_TYPE_TABLE,
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_UNCACHED |
-                               PMD_SECT_AP_WRITE | PMD_SECT_BUFFERABLE |
-                               PMD_SECT_TEX(1),
-               .domain    = DOMAIN_IO,
+       [MT_MEMORY_NONCACHED] = {
+               .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+               .domain    = DOMAIN_KERNEL,
        },
-       [MT_NONSHARED_DEVICE] = {
-               .prot_l1   = PMD_TYPE_TABLE,
-               .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_NONSHARED_DEV |
-                               PMD_SECT_AP_WRITE,
-               .domain    = DOMAIN_IO,
-       }
 };
 
+const struct mem_type *get_mem_type(unsigned int type)
+{
+       return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
+}
+
 /*
  * Adjust the PMD section entries according to the CPU in use.
  */
@@ -222,65 +263,140 @@ static void __init build_mem_type_table(void)
 {
        struct cachepolicy *cp;
        unsigned int cr = get_cr();
-       unsigned int user_pgprot, kern_pgprot;
+       unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
        int cpu_arch = cpu_architecture();
        int i;
 
+       if (cpu_arch < CPU_ARCH_ARMv6) {
 #if defined(CONFIG_CPU_DCACHE_DISABLE)
-       if (cachepolicy > CPOLICY_BUFFERED)
-               cachepolicy = CPOLICY_BUFFERED;
+               if (cachepolicy > CPOLICY_BUFFERED)
+                       cachepolicy = CPOLICY_BUFFERED;
 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
-       if (cachepolicy > CPOLICY_WRITETHROUGH)
-               cachepolicy = CPOLICY_WRITETHROUGH;
+               if (cachepolicy > CPOLICY_WRITETHROUGH)
+                       cachepolicy = CPOLICY_WRITETHROUGH;
 #endif
+       }
        if (cpu_arch < CPU_ARCH_ARMv5) {
                if (cachepolicy >= CPOLICY_WRITEALLOC)
                        cachepolicy = CPOLICY_WRITEBACK;
                ecc_mask = 0;
        }
+#ifdef CONFIG_SMP
+       cachepolicy = CPOLICY_WRITEALLOC;
+#endif
 
        /*
-        * Xscale must not have PMD bit 4 set for section mappings.
+        * Strip out features not present on earlier architectures.
+        * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
+        * without extended page tables don't have the 'Shared' bit.
         */
-       if (cpu_is_xscale())
+       if (cpu_arch < CPU_ARCH_ARMv5)
                for (i = 0; i < ARRAY_SIZE(mem_types); i++)
-                       mem_types[i].prot_sect &= ~PMD_BIT4;
+                       mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
+       if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
+               for (i = 0; i < ARRAY_SIZE(mem_types); i++)
+                       mem_types[i].prot_sect &= ~PMD_SECT_S;
 
        /*
-        * ARMv5 and lower, excluding Xscale, bit 4 must be set for
-        * page tables.
+        * ARMv5 and lower, bit 4 must be set for page tables (was: cache
+        * "update-able on write" bit on ARM610).  However, Xscale and
+        * Xscale3 require this bit to be cleared.
         */
-       if (cpu_arch < CPU_ARCH_ARMv6 && !cpu_is_xscale())
-               for (i = 0; i < ARRAY_SIZE(mem_types); i++)
+       if (cpu_is_xscale() || cpu_is_xsc3()) {
+               for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+                       mem_types[i].prot_sect &= ~PMD_BIT4;
+                       mem_types[i].prot_l1 &= ~PMD_BIT4;
+               }
+       } else if (cpu_arch < CPU_ARCH_ARMv6) {
+               for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
                        if (mem_types[i].prot_l1)
                                mem_types[i].prot_l1 |= PMD_BIT4;
+                       if (mem_types[i].prot_sect)
+                               mem_types[i].prot_sect |= PMD_BIT4;
+               }
+       }
+
+       /*
+        * Mark the device areas according to the CPU/architecture.
+        */
+       if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
+               if (!cpu_is_xsc3()) {
+                       /*
+                        * Mark device regions on ARMv6+ as execute-never
+                        * to prevent speculative instruction fetches.
+                        */
+                       mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
+                       mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
+                       mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
+                       mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
+               }
+               if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+                       /*
+                        * For ARMv7 with TEX remapping,
+                        * - shared device is SXCB=1100
+                        * - nonshared device is SXCB=0100
+                        * - write combine device mem is SXCB=0001
+                        * (Uncached Normal memory)
+                        */
+                       mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
+                       mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
+                       mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+               } else if (cpu_is_xsc3()) {
+                       /*
+                        * For Xscale3,
+                        * - shared device is TEXCB=00101
+                        * - nonshared device is TEXCB=01000
+                        * - write combine device mem is TEXCB=00100
+                        * (Inner/Outer Uncacheable in xsc3 parlance)
+                        */
+                       mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
+                       mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
+                       mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
+               } else {
+                       /*
+                        * For ARMv6 and ARMv7 without TEX remapping,
+                        * - shared device is TEXCB=00001
+                        * - nonshared device is TEXCB=01000
+                        * - write combine device mem is TEXCB=00100
+                        * (Uncached Normal in ARMv6 parlance).
+                        */
+                       mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
+                       mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
+                       mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
+               }
+       } else {
+               /*
+                * On others, write combining is "Uncached/Buffered"
+                */
+               mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+       }
 
+       /*
+        * Now deal with the memory-type mappings
+        */
        cp = &cache_policies[cachepolicy];
-       kern_pgprot = user_pgprot = cp->pte;
+       vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
+
+#ifndef CONFIG_SMP
+       /*
+        * Only use write-through for non-SMP systems
+        */
+       if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
+               vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
+#endif
 
        /*
         * Enable CPU-specific coherency if supported.
         * (Only available on XSC3 at the moment.)
         */
-       if (arch_is_coherent()) {
-               if (cpu_is_xsc3()) {
-                       mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
-                       mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
-               }
-       }
+       if (arch_is_coherent() && cpu_is_xsc3())
+               mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 
        /*
         * ARMv6 and above have extended page tables.
         */
        if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
                /*
-                * bit 4 becomes XN which we must clear for the
-                * kernel memory mapping.
-                */
-               mem_types[MT_MEMORY].prot_sect &= ~PMD_SECT_XN;
-               mem_types[MT_ROM].prot_sect &= ~PMD_SECT_XN;
-
-               /*
                 * Mark cache clean areas and XIP ROM read only
                 * from SVC mode and no access from userspace.
                 */
@@ -288,49 +404,45 @@ static void __init build_mem_type_table(void)
                mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 
-               /*
-                * Mark the device area as "shared device"
-                */
-               mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
-               mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
-
-               /*
-                * User pages need to be mapped with the ASID
-                * (iow, non-global)
-                */
-               user_pgprot |= L_PTE_ASID;
-
 #ifdef CONFIG_SMP
                /*
                 * Mark memory with the "shared" attribute for SMP systems
                 */
                user_pgprot |= L_PTE_SHARED;
                kern_pgprot |= L_PTE_SHARED;
+               vecs_pgprot |= L_PTE_SHARED;
                mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+               mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 #endif
        }
 
+       /*
+        * Non-cacheable Normal - intended for memory areas that must
+        * not cause dirty cache line writebacks when used
+        */
+       if (cpu_arch >= CPU_ARCH_ARMv6) {
+               if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+                       /* Non-cacheable Normal is XCB = 001 */
+                       mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+                               PMD_SECT_BUFFERED;
+               } else {
+                       /* For both ARMv6 and non-TEX-remapping ARMv7 */
+                       mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+                               PMD_SECT_TEX(1);
+               }
+       } else {
+               mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+       }
+
        for (i = 0; i < 16; i++) {
                unsigned long v = pgprot_val(protection_map[i]);
-               v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
-               protection_map[i] = __pgprot(v);
+               protection_map[i] = __pgprot(v | user_pgprot);
        }
 
-       mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
-       mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
-
-       if (cpu_arch >= CPU_ARCH_ARMv5) {
-#ifndef CONFIG_SMP
-               /*
-                * Only use write-through for non-SMP systems
-                */
-               mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-               mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-#endif
-       } else {
-               mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
-       }
+       mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
+       mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
 
+       pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
                                 L_PTE_DIRTY | L_PTE_WRITE |
                                 L_PTE_EXEC | kern_pgprot);
@@ -351,64 +463,125 @@ static void __init build_mem_type_table(void)
        }
        printk("Memory policy: ECC %sabled, Data cache %s\n",
                ecc_mask ? "en" : "dis", cp->policy);
+
+       for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+               struct mem_type *t = &mem_types[i];
+               if (t->prot_l1)
+                       t->prot_l1 |= PMD_DOMAIN(t->domain);
+               if (t->prot_sect)
+                       t->prot_sect |= PMD_DOMAIN(t->domain);
+       }
 }
 
 #define vectors_base() (vectors_high() ? 0xffff0000 : 0)
 
-/*
- * Create a SECTION PGD between VIRT and PHYS in domain
- * DOMAIN with protection PROT.  This operates on half-
- * pgdir entry increments.
- */
-static inline void
-alloc_init_section(unsigned long virt, unsigned long phys, int prot)
+static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
+                                 unsigned long end, unsigned long pfn,
+                                 const struct mem_type *type)
 {
-       pmd_t *pmdp = pmd_off_k(virt);
+       pte_t *pte;
 
-       if (virt & (1 << 20))
-               pmdp++;
+       if (pmd_none(*pmd)) {
+               pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+               __pmd_populate(pmd, __pa(pte) | type->prot_l1);
+       }
 
-       *pmdp = __pmd(phys | prot);
-       flush_pmd_entry(pmdp);
+       pte = pte_offset_kernel(pmd, addr);
+       do {
+               set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
+               pfn++;
+       } while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-/*
- * Create a SUPER SECTION PGD between VIRT and PHYS with protection PROT
- */
-static inline void
-alloc_init_supersection(unsigned long virt, unsigned long phys, int prot)
+static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
+                                     unsigned long end, unsigned long phys,
+                                     const struct mem_type *type)
 {
-       int i;
+       pmd_t *pmd = pmd_offset(pgd, addr);
+
+       /*
+        * Try a section mapping - end, addr and phys must all be aligned
+        * to a section boundary.  Note that PMDs refer to the individual
+        * L1 entries, whereas PGDs refer to a group of L1 entries making
+        * up one logical pointer to an L2 table.
+        */
+       if (((addr | end | phys) & ~SECTION_MASK) == 0) {
+               pmd_t *p = pmd;
+
+               if (addr & SECTION_SIZE)
+                       pmd++;
 
-       for (i = 0; i < 16; i += 1) {
-               alloc_init_section(virt, phys, prot | PMD_SECT_SUPER);
+               do {
+                       *pmd = __pmd(phys | type->prot_sect);
+                       phys += SECTION_SIZE;
+               } while (pmd++, addr += SECTION_SIZE, addr != end);
 
-               virt += (PGDIR_SIZE / 2);
+               flush_pmd_entry(p);
+       } else {
+               /*
+                * No need to loop; pte's aren't interested in the
+                * individual L1 entries.
+                */
+               alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
        }
 }
 
-/*
- * Add a PAGE mapping between VIRT and PHYS in domain
- * DOMAIN with protection PROT.  Note that due to the
- * way we map the PTEs, we must allocate two PTE_SIZE'd
- * blocks - one for the Linux pte table, and one for
- * the hardware pte table.
- */
-static inline void
-alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
+static void __init create_36bit_mapping(struct map_desc *md,
+                                       const struct mem_type *type)
 {
-       pmd_t *pmdp = pmd_off_k(virt);
-       pte_t *ptep;
+       unsigned long phys, addr, length, end;
+       pgd_t *pgd;
+
+       addr = md->virtual;
+       phys = (unsigned long)__pfn_to_phys(md->pfn);
+       length = PAGE_ALIGN(md->length);
+
+       if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
+               printk(KERN_ERR "MM: CPU does not support supersection "
+                      "mapping for 0x%08llx at 0x%08lx\n",
+                      __pfn_to_phys((u64)md->pfn), addr);
+               return;
+       }
 
-       if (pmd_none(*pmdp)) {
-               ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
-                                              sizeof(pte_t));
+       /* N.B. ARMv6 supersections are only defined to work with domain 0.
+        *      Since domain assignments can in fact be arbitrary, the
+        *      'domain == 0' check below is required to insure that ARMv6
+        *      supersections are only allocated for domain 0 regardless
+        *      of the actual domain assignments in use.
+        */
+       if (type->domain) {
+               printk(KERN_ERR "MM: invalid domain in supersection "
+                      "mapping for 0x%08llx at 0x%08lx\n",
+                      __pfn_to_phys((u64)md->pfn), addr);
+               return;
+       }
 
-               __pmd_populate(pmdp, __pa(ptep) | prot_l1);
+       if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
+               printk(KERN_ERR "MM: cannot create mapping for "
+                      "0x%08llx at 0x%08lx invalid alignment\n",
+                      __pfn_to_phys((u64)md->pfn), addr);
+               return;
        }
-       ptep = pte_offset_kernel(pmdp, virt);
 
-       set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
+       /*
+        * Shift bits [35:32] of address into bits [23:20] of PMD
+        * (See ARMv6 spec).
+        */
+       phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
+
+       pgd = pgd_offset_k(addr);
+       end = addr + length;
+       do {
+               pmd_t *pmd = pmd_offset(pgd, addr);
+               int i;
+
+               for (i = 0; i < 16; i++)
+                       *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
+
+               addr += SUPERSECTION_SIZE;
+               phys += SUPERSECTION_SIZE;
+               pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
+       } while (addr != end);
 }
 
 /*
@@ -420,10 +593,9 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg
  */
 void __init create_mapping(struct map_desc *md)
 {
-       unsigned long virt, length;
-       int prot_sect, prot_l1, domain;
-       pgprot_t prot_pte;
-       unsigned long off = (u32)__pfn_to_phys(md->pfn);
+       unsigned long phys, addr, length, end;
+       const struct mem_type *type;
+       pgd_t *pgd;
 
        if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
                printk(KERN_WARNING "BUG: not creating mapping for "
@@ -439,105 +611,37 @@ void __init create_mapping(struct map_desc *md)
                       __pfn_to_phys((u64)md->pfn), md->virtual);
        }
 
-       domain    = mem_types[md->type].domain;
-       prot_pte  = __pgprot(mem_types[md->type].prot_pte);
-       prot_l1   = mem_types[md->type].prot_l1 | PMD_DOMAIN(domain);
-       prot_sect = mem_types[md->type].prot_sect | PMD_DOMAIN(domain);
+       type = &mem_types[md->type];
 
        /*
         * Catch 36-bit addresses
         */
-       if(md->pfn >= 0x100000) {
-               if(domain) {
-                       printk(KERN_ERR "MM: invalid domain in supersection "
-                               "mapping for 0x%08llx at 0x%08lx\n",
-                               __pfn_to_phys((u64)md->pfn), md->virtual);
-                       return;
-               }
-               if((md->virtual | md->length | __pfn_to_phys(md->pfn))
-                       & ~SUPERSECTION_MASK) {
-                       printk(KERN_ERR "MM: cannot create mapping for "
-                               "0x%08llx at 0x%08lx invalid alignment\n",
-                               __pfn_to_phys((u64)md->pfn), md->virtual);
-                       return;
-               }
-
-               /*
-                * Shift bits [35:32] of address into bits [23:20] of PMD
-                * (See ARMv6 spec).
-                */
-               off |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
+       if (md->pfn >= 0x100000) {
+               create_36bit_mapping(md, type);
+               return;
        }
 
-       virt   = md->virtual;
-       off   -= virt;
-       length = md->length;
+       addr = md->virtual & PAGE_MASK;
+       phys = (unsigned long)__pfn_to_phys(md->pfn);
+       length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 
-       if (mem_types[md->type].prot_l1 == 0 &&
-           (virt & 0xfffff || (virt + off) & 0xfffff || (virt + length) & 0xfffff)) {
+       if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
                printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
                       "be mapped using pages, ignoring.\n",
-                      __pfn_to_phys(md->pfn), md->virtual);
+                      __pfn_to_phys(md->pfn), addr);
                return;
        }
 
-       while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) {
-               alloc_init_page(virt, virt + off, prot_l1, prot_pte);
-
-               virt   += PAGE_SIZE;
-               length -= PAGE_SIZE;
-       }
-
-       /* N.B. ARMv6 supersections are only defined to work with domain 0.
-        *      Since domain assignments can in fact be arbitrary, the
-        *      'domain == 0' check below is required to insure that ARMv6
-        *      supersections are only allocated for domain 0 regardless
-        *      of the actual domain assignments in use.
-        */
-       if ((cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())
-               && domain == 0) {
-               /*
-                * Align to supersection boundary if !high pages.
-                * High pages have already been checked for proper
-                * alignment above and they will fail the SUPSERSECTION_MASK
-                * check because of the way the address is encoded into
-                * offset.
-                */
-               if (md->pfn <= 0x100000) {
-                       while ((virt & ~SUPERSECTION_MASK ||
-                               (virt + off) & ~SUPERSECTION_MASK) &&
-                               length >= (PGDIR_SIZE / 2)) {
-                               alloc_init_section(virt, virt + off, prot_sect);
-
-                               virt   += (PGDIR_SIZE / 2);
-                               length -= (PGDIR_SIZE / 2);
-                       }
-               }
-
-               while (length >= SUPERSECTION_SIZE) {
-                       alloc_init_supersection(virt, virt + off, prot_sect);
-
-                       virt   += SUPERSECTION_SIZE;
-                       length -= SUPERSECTION_SIZE;
-               }
-       }
-
-       /*
-        * A section mapping covers half a "pgdir" entry.
-        */
-       while (length >= (PGDIR_SIZE / 2)) {
-               alloc_init_section(virt, virt + off, prot_sect);
-
-               virt   += (PGDIR_SIZE / 2);
-               length -= (PGDIR_SIZE / 2);
-       }
+       pgd = pgd_offset_k(addr);
+       end = addr + length;
+       do {
+               unsigned long next = pgd_addr_end(addr, end);
 
-       while (length >= PAGE_SIZE) {
-               alloc_init_page(virt, virt + off, prot_l1, prot_pte);
+               alloc_init_section(pgd, addr, next, phys, type);
 
-               virt   += PAGE_SIZE;
-               length -= PAGE_SIZE;
-       }
+               phys += next - addr;
+               addr = next;
+       } while (pgd++, addr != end);
 }
 
 /*
@@ -551,19 +655,113 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
                create_mapping(io_desc + i);
 }
 
-static inline void prepare_page_table(struct meminfo *mi)
+static unsigned long __initdata vmalloc_reserve = SZ_128M;
+
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the vmalloc
+ * area - the default is 128m.
+ */
+static void __init early_vmalloc(char **arg)
+{
+       vmalloc_reserve = memparse(*arg, arg);
+
+       if (vmalloc_reserve < SZ_16M) {
+               vmalloc_reserve = SZ_16M;
+               printk(KERN_WARNING
+                       "vmalloc area too small, limiting to %luMB\n",
+                       vmalloc_reserve >> 20);
+       }
+
+       if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
+               vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
+               printk(KERN_WARNING
+                       "vmalloc area is too big, limiting to %luMB\n",
+                       vmalloc_reserve >> 20);
+       }
+}
+__early_param("vmalloc=", early_vmalloc);
+
+#define VMALLOC_MIN    (void *)(VMALLOC_END - vmalloc_reserve)
+
+static void __init sanity_check_meminfo(void)
+{
+       int i, j;
+
+       for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
+               struct membank *bank = &meminfo.bank[j];
+               *bank = meminfo.bank[i];
+
+#ifdef CONFIG_HIGHMEM
+               /*
+                * Split those memory banks which are partially overlapping
+                * the vmalloc area greatly simplifying things later.
+                */
+               if (__va(bank->start) < VMALLOC_MIN &&
+                   bank->size > VMALLOC_MIN - __va(bank->start)) {
+                       if (meminfo.nr_banks >= NR_BANKS) {
+                               printk(KERN_CRIT "NR_BANKS too low, "
+                                                "ignoring high memory\n");
+                       } else if (cache_is_vipt_aliasing()) {
+                               printk(KERN_CRIT "HIGHMEM is not yet supported "
+                                                "with VIPT aliasing cache, "
+                                                "ignoring high memory\n");
+                       } else {
+                               memmove(bank + 1, bank,
+                                       (meminfo.nr_banks - i) * sizeof(*bank));
+                               meminfo.nr_banks++;
+                               i++;
+                               bank[1].size -= VMALLOC_MIN - __va(bank->start);
+                               bank[1].start = __pa(VMALLOC_MIN - 1) + 1;
+                               j++;
+                       }
+                       bank->size = VMALLOC_MIN - __va(bank->start);
+               }
+#else
+               /*
+                * Check whether this memory bank would entirely overlap
+                * the vmalloc area.
+                */
+               if (__va(bank->start) >= VMALLOC_MIN ||
+                   __va(bank->start) < (void *)PAGE_OFFSET) {
+                       printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
+                              "(vmalloc region overlap).\n",
+                              bank->start, bank->start + bank->size - 1);
+                       continue;
+               }
+
+               /*
+                * Check whether this memory bank would partially overlap
+                * the vmalloc area.
+                */
+               if (__va(bank->start + bank->size) > VMALLOC_MIN ||
+                   __va(bank->start + bank->size) < __va(bank->start)) {
+                       unsigned long newsize = VMALLOC_MIN - __va(bank->start);
+                       printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
+                              "to -%.8lx (vmalloc region overlap).\n",
+                              bank->start, bank->start + bank->size - 1,
+                              bank->start + newsize - 1);
+                       bank->size = newsize;
+               }
+#endif
+               j++;
+       }
+       meminfo.nr_banks = j;
+}
+
+static inline void prepare_page_table(void)
 {
        unsigned long addr;
 
        /*
         * Clear out all the mappings below the kernel image.
         */
-       for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
+       for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
 
 #ifdef CONFIG_XIP_KERNEL
        /* The XIP kernel is mapped in the module area -- skip over it */
-       addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
+       addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
 #endif
        for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
@@ -572,7 +770,7 @@ static inline void prepare_page_table(struct meminfo *mi)
         * Clear out all the kernel space mappings, except for the first
         * memory bank, up to the end of the vmalloc region.
         */
-       for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
+       for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
             addr < VMALLOC_END; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
 }
@@ -589,9 +787,11 @@ void __init reserve_node_zero(pg_data_t *pgdat)
         * Note that this can only be in node 0.
         */
 #ifdef CONFIG_XIP_KERNEL
-       reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
+       reserve_bootmem_node(pgdat, __pa(_data), _end - _data,
+                       BOOTMEM_DEFAULT);
 #else
-       reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+       reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
+                       BOOTMEM_DEFAULT);
 #endif
 
        /*
@@ -599,7 +799,7 @@ void __init reserve_node_zero(pg_data_t *pgdat)
         * and can only be in node 0.
         */
        reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
-                            PTRS_PER_PGD * sizeof(pgd_t));
+                            PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
 
        /*
         * Hmm... This should go elsewhere, but we really really need to
@@ -619,6 +819,40 @@ void __init reserve_node_zero(pg_data_t *pgdat)
        if (machine_is_p720t())
                res_size = 0x00014000;
 
+       /* H1940 and RX3715 need to reserve this for suspend */
+
+       if (machine_is_h1940() || machine_is_rx3715()) {
+               reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
+                               BOOTMEM_DEFAULT);
+               reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
+                               BOOTMEM_DEFAULT);
+       }
+
+       if (machine_is_palmld() || machine_is_palmtx()) {
+               reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+               reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+       }
+
+       if (machine_is_palmt5())
+               reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+
+       /*
+        * U300 - This platform family can share physical memory
+        * between two ARM cpus, one running Linux and the other
+        * running another OS.
+        */
+       if (machine_is_u300()) {
+#ifdef CONFIG_MACH_U300_SINGLE_RAM
+#if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) &&   \
+       CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
+               res_size = 0x00100000;
+#endif
+#endif
+       }
+
 #ifdef CONFIG_SA1111
        /*
         * Because of the SA1111 DMA bug, we want to preserve our
@@ -627,7 +861,8 @@ void __init reserve_node_zero(pg_data_t *pgdat)
        res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
 #endif
        if (res_size)
-               reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
+               reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
+                               BOOTMEM_DEFAULT);
 }
 
 /*
@@ -647,7 +882,6 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
         * Allocate the vector page early.
         */
        vectors = alloc_bootmem_low_pages(PAGE_SIZE);
-       BUG_ON(!vectors);
 
        for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
@@ -658,8 +892,8 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
         */
 #ifdef CONFIG_XIP_KERNEL
        map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
-       map.virtual = MODULE_START;
-       map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+       map.virtual = MODULES_VADDR;
+       map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
        map.type = MT_ROM;
        create_mapping(&map);
 #endif
@@ -715,26 +949,39 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
        flush_cache_all();
 }
 
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+       pmd_t *pmd = pmd_off_k(PKMAP_BASE);
+       pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+       BUG_ON(!pmd_none(*pmd) || !pte);
+       __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
+       pkmap_page_table = pte + PTRS_PER_PTE;
+#endif
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
-void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
+void __init paging_init(struct machine_desc *mdesc)
 {
        void *zero_page;
 
        build_mem_type_table();
-       prepare_page_table(mi);
-       bootmem_init(mi);
+       sanity_check_meminfo();
+       prepare_page_table();
+       bootmem_init();
        devicemaps_init(mdesc);
+       kmap_init();
 
        top_pmd = pmd_off_k(0xffff0000);
 
        /*
-        * allocate the zero page.  Note that we count on this going ok.
+        * allocate the zero page.  Note that this always succeeds and
+        * returns a zeroed result.
         */
        zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
-       memzero(zero_page, PAGE_SIZE);
        empty_zero_page = virt_to_page(zero_page);
        flush_dcache_page(empty_zero_page);
 }