Merge branch 'linus' into cont_syslog
[safe/jmp/linux-2.6] / arch / arm / mm / mmu.c
index f24803c..2858941 100644 (file)
 #include <linux/bootmem.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
+#include <linux/sort.h>
 
 #include <asm/cputype.h>
 #include <asm/mach-types.h>
+#include <asm/sections.h>
+#include <asm/cachetype.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
+#include <asm/smp_plat.h>
 #include <asm/tlb.h>
+#include <asm/highmem.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -96,59 +101,66 @@ static struct cachepolicy cache_policies[] __initdata = {
  * writebuffer to be turned off.  (Note: the write
  * buffer should not be on and the cache off).
  */
-static void __init early_cachepolicy(char **p)
+static int __init early_cachepolicy(char *p)
 {
        int i;
 
        for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
                int len = strlen(cache_policies[i].policy);
 
-               if (memcmp(*p, cache_policies[i].policy, len) == 0) {
+               if (memcmp(p, cache_policies[i].policy, len) == 0) {
                        cachepolicy = i;
                        cr_alignment &= ~cache_policies[i].cr_mask;
                        cr_no_alignment &= ~cache_policies[i].cr_mask;
-                       *p += len;
                        break;
                }
        }
        if (i == ARRAY_SIZE(cache_policies))
                printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
+       /*
+        * This restriction is partly to do with the way we boot; it is
+        * unpredictable to have memory mapped using two different sets of
+        * memory attributes (shared, type, and cache attribs).  We can not
+        * change these attributes once the initial assembly has setup the
+        * page tables.
+        */
        if (cpu_architecture() >= CPU_ARCH_ARMv6) {
                printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
                cachepolicy = CPOLICY_WRITEBACK;
        }
        flush_cache_all();
        set_cr(cr_alignment);
+       return 0;
 }
-__early_param("cachepolicy=", early_cachepolicy);
+early_param("cachepolicy", early_cachepolicy);
 
-static void __init early_nocache(char **__unused)
+static int __init early_nocache(char *__unused)
 {
        char *p = "buffered";
        printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
-       early_cachepolicy(&p);
+       early_cachepolicy(p);
+       return 0;
 }
-__early_param("nocache", early_nocache);
+early_param("nocache", early_nocache);
 
-static void __init early_nowrite(char **__unused)
+static int __init early_nowrite(char *__unused)
 {
        char *p = "uncached";
        printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
-       early_cachepolicy(&p);
+       early_cachepolicy(p);
+       return 0;
 }
-__early_param("nowb", early_nowrite);
+early_param("nowb", early_nowrite);
 
-static void __init early_ecc(char **p)
+static int __init early_ecc(char *p)
 {
-       if (memcmp(*p, "on", 2) == 0) {
+       if (memcmp(p, "on", 2) == 0)
                ecc_mask = PMD_PROTECTION;
-               *p += 2;
-       } else if (memcmp(*p, "off", 3) == 0) {
+       else if (memcmp(p, "off", 3) == 0)
                ecc_mask = 0;
-               *p += 3;
-       }
+       return 0;
 }
-__early_param("ecc=", early_ecc);
+early_param("ecc", early_ecc);
 
 static int __init noalign_setup(char *__unused)
 {
@@ -242,12 +254,17 @@ static struct mem_type mem_types[] = {
                .prot_sect = PMD_TYPE_SECT,
                .domain    = DOMAIN_KERNEL,
        },
+       [MT_MEMORY_NONCACHED] = {
+               .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+               .domain    = DOMAIN_KERNEL,
+       },
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
 {
        return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
 }
+EXPORT_SYMBOL(get_mem_type);
 
 /*
  * Adjust the PMD section entries according to the CPU in use.
@@ -404,10 +421,33 @@ static void __init build_mem_type_table(void)
                user_pgprot |= L_PTE_SHARED;
                kern_pgprot |= L_PTE_SHARED;
                vecs_pgprot |= L_PTE_SHARED;
+               mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
+               mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
+               mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
+               mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
                mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+               mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 #endif
        }
 
+       /*
+        * Non-cacheable Normal - intended for memory areas that must
+        * not cause dirty cache line writebacks when used
+        */
+       if (cpu_arch >= CPU_ARCH_ARMv6) {
+               if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+                       /* Non-cacheable Normal is XCB = 001 */
+                       mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+                               PMD_SECT_BUFFERED;
+               } else {
+                       /* For both ARMv6 and non-TEX-remapping ARMv7 */
+                       mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+                               PMD_SECT_TEX(1);
+               }
+       } else {
+               mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+       }
+
        for (i = 0; i < 16; i++) {
                unsigned long v = pgprot_val(protection_map[i]);
                protection_map[i] = __pgprot(v | user_pgprot);
@@ -418,8 +458,7 @@ static void __init build_mem_type_table(void)
 
        pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
-                                L_PTE_DIRTY | L_PTE_WRITE |
-                                L_PTE_EXEC | kern_pgprot);
+                                L_PTE_DIRTY | L_PTE_WRITE | kern_pgprot);
 
        mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
        mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
@@ -565,7 +604,7 @@ static void __init create_36bit_mapping(struct map_desc *md,
  * offsets, and we take full advantage of sections and
  * supersections.
  */
-void __init create_mapping(struct map_desc *md)
+static void __init create_mapping(struct map_desc *md)
 {
        unsigned long phys, addr, length, end;
        const struct mem_type *type;
@@ -636,9 +675,9 @@ static unsigned long __initdata vmalloc_reserve = SZ_128M;
  * bytes. This can be used to increase (or decrease) the vmalloc
  * area - the default is 128m.
  */
-static void __init early_vmalloc(char **arg)
+static int __init early_vmalloc(char *arg)
 {
-       vmalloc_reserve = memparse(*arg, arg);
+       vmalloc_reserve = memparse(arg, NULL);
 
        if (vmalloc_reserve < SZ_16M) {
                vmalloc_reserve = SZ_16M;
@@ -646,61 +685,122 @@ static void __init early_vmalloc(char **arg)
                        "vmalloc area too small, limiting to %luMB\n",
                        vmalloc_reserve >> 20);
        }
+
+       if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
+               vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
+               printk(KERN_WARNING
+                       "vmalloc area is too big, limiting to %luMB\n",
+                       vmalloc_reserve >> 20);
+       }
+       return 0;
 }
-__early_param("vmalloc=", early_vmalloc);
+early_param("vmalloc", early_vmalloc);
 
 #define VMALLOC_MIN    (void *)(VMALLOC_END - vmalloc_reserve)
 
-static int __init check_membank_valid(struct membank *mb)
+static void __init sanity_check_meminfo(void)
 {
-       /*
-        * Check whether this memory region has non-zero size or
-        * invalid node number.
-        */
-       if (mb->size == 0 || mb->node >= MAX_NUMNODES)
-               return 0;
+       int i, j, highmem = 0;
 
-       /*
-        * Check whether this memory region would entirely overlap
-        * the vmalloc area.
-        */
-       if (phys_to_virt(mb->start) >= VMALLOC_MIN) {
-               printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
-                       "(vmalloc region overlap).\n",
-                       mb->start, mb->start + mb->size - 1);
-               return 0;
-       }
+       for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
+               struct membank *bank = &meminfo.bank[j];
+               *bank = meminfo.bank[i];
 
-       /*
-        * Check whether this memory region would partially overlap
-        * the vmalloc area.
-        */
-       if (phys_to_virt(mb->start + mb->size) < phys_to_virt(mb->start) ||
-           phys_to_virt(mb->start + mb->size) > VMALLOC_MIN) {
-               unsigned long newsize = VMALLOC_MIN - phys_to_virt(mb->start);
+#ifdef CONFIG_HIGHMEM
+               if (__va(bank->start) > VMALLOC_MIN ||
+                   __va(bank->start) < (void *)PAGE_OFFSET)
+                       highmem = 1;
 
-               printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
-                       "to -%.8lx (vmalloc region overlap).\n",
-                       mb->start, mb->start + mb->size - 1,
-                       mb->start + newsize - 1);
-               mb->size = newsize;
-       }
+               bank->highmem = highmem;
 
-       return 1;
-}
+               /*
+                * Split those memory banks which are partially overlapping
+                * the vmalloc area greatly simplifying things later.
+                */
+               if (__va(bank->start) < VMALLOC_MIN &&
+                   bank->size > VMALLOC_MIN - __va(bank->start)) {
+                       if (meminfo.nr_banks >= NR_BANKS) {
+                               printk(KERN_CRIT "NR_BANKS too low, "
+                                                "ignoring high memory\n");
+                       } else {
+                               memmove(bank + 1, bank,
+                                       (meminfo.nr_banks - i) * sizeof(*bank));
+                               meminfo.nr_banks++;
+                               i++;
+                               bank[1].size -= VMALLOC_MIN - __va(bank->start);
+                               bank[1].start = __pa(VMALLOC_MIN - 1) + 1;
+                               bank[1].highmem = highmem = 1;
+                               j++;
+                       }
+                       bank->size = VMALLOC_MIN - __va(bank->start);
+               }
+#else
+               bank->highmem = highmem;
 
-static void __init sanity_check_meminfo(struct meminfo *mi)
-{
-       int i, j;
+               /*
+                * Check whether this memory bank would entirely overlap
+                * the vmalloc area.
+                */
+               if (__va(bank->start) >= VMALLOC_MIN ||
+                   __va(bank->start) < (void *)PAGE_OFFSET) {
+                       printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
+                              "(vmalloc region overlap).\n",
+                              bank->start, bank->start + bank->size - 1);
+                       continue;
+               }
 
-       for (i = 0, j = 0; i < mi->nr_banks; i++) {
-               if (check_membank_valid(&mi->bank[i]))
-                       mi->bank[j++] = mi->bank[i];
+               /*
+                * Check whether this memory bank would partially overlap
+                * the vmalloc area.
+                */
+               if (__va(bank->start + bank->size) > VMALLOC_MIN ||
+                   __va(bank->start + bank->size) < __va(bank->start)) {
+                       unsigned long newsize = VMALLOC_MIN - __va(bank->start);
+                       printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
+                              "to -%.8lx (vmalloc region overlap).\n",
+                              bank->start, bank->start + bank->size - 1,
+                              bank->start + newsize - 1);
+                       bank->size = newsize;
+               }
+#endif
+               j++;
+       }
+#ifdef CONFIG_HIGHMEM
+       if (highmem) {
+               const char *reason = NULL;
+
+               if (cache_is_vipt_aliasing()) {
+                       /*
+                        * Interactions between kmap and other mappings
+                        * make highmem support with aliasing VIPT caches
+                        * rather difficult.
+                        */
+                       reason = "with VIPT aliasing cache";
+#ifdef CONFIG_SMP
+               } else if (tlb_ops_need_broadcast()) {
+                       /*
+                        * kmap_high needs to occasionally flush TLB entries,
+                        * however, if the TLB entries need to be broadcast
+                        * we may deadlock:
+                        *  kmap_high(irqs off)->flush_all_zero_pkmaps->
+                        *  flush_tlb_kernel_range->smp_call_function_many
+                        *   (must not be called with irqs off)
+                        */
+                       reason = "without hardware TLB ops broadcasting";
+#endif
+               }
+               if (reason) {
+                       printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
+                               reason);
+                       while (j > 0 && meminfo.bank[j - 1].highmem)
+                               j--;
+               }
        }
-       mi->nr_banks = j;
+#endif
+       meminfo.nr_banks = j;
 }
 
-static inline void prepare_page_table(struct meminfo *mi)
+static inline void prepare_page_table(void)
 {
        unsigned long addr;
 
@@ -712,7 +812,7 @@ static inline void prepare_page_table(struct meminfo *mi)
 
 #ifdef CONFIG_XIP_KERNEL
        /* The XIP kernel is mapped in the module area -- skip over it */
-       addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
+       addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
 #endif
        for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
@@ -721,7 +821,7 @@ static inline void prepare_page_table(struct meminfo *mi)
         * Clear out all the kernel space mappings, except for the first
         * memory bank, up to the end of the vmalloc region.
         */
-       for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
+       for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
             addr < VMALLOC_END; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
 }
@@ -738,10 +838,10 @@ void __init reserve_node_zero(pg_data_t *pgdat)
         * Note that this can only be in node 0.
         */
 #ifdef CONFIG_XIP_KERNEL
-       reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
+       reserve_bootmem_node(pgdat, __pa(_data), _end - _data,
                        BOOTMEM_DEFAULT);
 #else
-       reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
+       reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
                        BOOTMEM_DEFAULT);
 #endif
 
@@ -770,15 +870,48 @@ void __init reserve_node_zero(pg_data_t *pgdat)
        if (machine_is_p720t())
                res_size = 0x00014000;
 
-       /* H1940 and RX3715 need to reserve this for suspend */
+       /* H1940, RX3715 and RX1950 need to reserve this for suspend */
 
-       if (machine_is_h1940() || machine_is_rx3715()) {
+       if (machine_is_h1940() || machine_is_rx3715()
+               || machine_is_rx1950()) {
                reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
                                BOOTMEM_DEFAULT);
                reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
                                BOOTMEM_DEFAULT);
        }
 
+       if (machine_is_palmld() || machine_is_palmtx()) {
+               reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+               reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+       }
+
+       if (machine_is_treo680() || machine_is_centro()) {
+               reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+               reserve_bootmem_node(pgdat, 0xa2000000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+       }
+
+       if (machine_is_palmt5())
+               reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
+                               BOOTMEM_EXCLUSIVE);
+
+       /*
+        * U300 - This platform family can share physical memory
+        * between two ARM cpus, one running Linux and the other
+        * running another OS.
+        */
+       if (machine_is_u300()) {
+#ifdef CONFIG_MACH_U300_SINGLE_RAM
+#if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) &&   \
+       CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
+               res_size = 0x00100000;
+#endif
+#endif
+       }
+
 #ifdef CONFIG_SA1111
        /*
         * Because of the SA1111 DMA bug, we want to preserve our
@@ -808,7 +941,6 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
         * Allocate the vector page early.
         */
        vectors = alloc_bootmem_low_pages(PAGE_SIZE);
-       BUG_ON(!vectors);
 
        for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
                pmd_clear(pmd_off_k(addr));
@@ -820,7 +952,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 #ifdef CONFIG_XIP_KERNEL
        map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
        map.virtual = MODULES_VADDR;
-       map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+       map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
        map.type = MT_ROM;
        create_mapping(&map);
 #endif
@@ -876,29 +1008,77 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
        flush_cache_all();
 }
 
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+       pmd_t *pmd = pmd_off_k(PKMAP_BASE);
+       pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+       BUG_ON(!pmd_none(*pmd) || !pte);
+       __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
+       pkmap_page_table = pte + PTRS_PER_PTE;
+#endif
+}
+
+static inline void map_memory_bank(struct membank *bank)
+{
+       struct map_desc map;
+
+       map.pfn = bank_pfn_start(bank);
+       map.virtual = __phys_to_virt(bank_phys_start(bank));
+       map.length = bank_phys_size(bank);
+       map.type = MT_MEMORY;
+
+       create_mapping(&map);
+}
+
+static void __init map_lowmem(void)
+{
+       struct meminfo *mi = &meminfo;
+       int i;
+
+       /* Map all the lowmem memory banks. */
+       for (i = 0; i < mi->nr_banks; i++) {
+               struct membank *bank = &mi->bank[i];
+
+               if (!bank->highmem)
+                       map_memory_bank(bank);
+       }
+}
+
+static int __init meminfo_cmp(const void *_a, const void *_b)
+{
+       const struct membank *a = _a, *b = _b;
+       long cmp = bank_pfn_start(a) - bank_pfn_start(b);
+       return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
-void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
+void __init paging_init(struct machine_desc *mdesc)
 {
        void *zero_page;
 
+       sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
+
        build_mem_type_table();
-       sanity_check_meminfo(mi);
-       prepare_page_table(mi);
-       bootmem_init(mi);
+       sanity_check_meminfo();
+       prepare_page_table();
+       map_lowmem();
+       bootmem_init();
        devicemaps_init(mdesc);
+       kmap_init();
 
        top_pmd = pmd_off_k(0xffff0000);
 
        /*
-        * allocate the zero page.  Note that we count on this going ok.
+        * allocate the zero page.  Note that this always succeeds and
+        * returns a zeroed result.
         */
        zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
-       memset(zero_page, 0, PAGE_SIZE);
        empty_zero_page = virt_to_page(zero_page);
-       flush_dcache_page(empty_zero_page);
+       __flush_dcache_page(NULL, empty_zero_page);
 }
 
 /*
@@ -912,10 +1092,12 @@ void setup_mm_for_reboot(char mode)
        pgd_t *pgd;
        int i;
 
-       if (current->mm && current->mm->pgd)
-               pgd = current->mm->pgd;
-       else
-               pgd = init_mm.pgd;
+       /*
+        * We need to access to user-mode page tables here. For kernel threads
+        * we don't have any user-mode mappings so we use the context that we
+        * "borrowed".
+        */
+       pgd = current->active_mm->pgd;
 
        base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
        if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
@@ -930,4 +1112,6 @@ void setup_mm_for_reboot(char mode)
                pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
                flush_pmd_entry(pmd);
        }
+
+       local_flush_tlb_all();
 }