X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Farm%2Fmm%2Fmmu.c;h=4810a4c9ffce1ace6f0872949514ad3844d1c76c;hb=d73cd42893f4cdc06e6829fea2347bb92cb789d1;hp=b7f194af20b490b03c85805ad9731fda33c6745c;hpb=bbf6f2809dbadc2bacfd73a052d8b0893dbf1762;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index b7f194a..4810a4c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,10 +15,13 @@ #include #include +#include #include +#include #include #include #include +#include #include #include @@ -27,14 +30,12 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -extern void _stext, _etext, __data_start, _end; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; - /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. */ struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); /* * The pmd table for the upper-most set of pages. @@ -49,8 +50,10 @@ pmd_t *top_pmd; static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; +pgprot_t pgprot_user; pgprot_t pgprot_kernel; +EXPORT_SYMBOL(pgprot_user); EXPORT_SYMBOL(pgprot_kernel); struct cachepolicy { @@ -65,32 +68,32 @@ static struct cachepolicy cache_policies[] __initdata = { .policy = "uncached", .cr_mask = CR_W|CR_C, .pmd = PMD_SECT_UNCACHED, - .pte = 0, + .pte = L_PTE_MT_UNCACHED, }, { .policy = "buffered", .cr_mask = CR_C, .pmd = PMD_SECT_BUFFERED, - .pte = PTE_BUFFERABLE, + .pte = L_PTE_MT_BUFFERABLE, }, { .policy = "writethrough", .cr_mask = 0, .pmd = PMD_SECT_WT, - .pte = PTE_CACHEABLE, + .pte = L_PTE_MT_WRITETHROUGH, }, { .policy = "writeback", .cr_mask = 0, .pmd = PMD_SECT_WB, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + .pte = L_PTE_MT_WRITEBACK, }, { .policy = "writealloc", .cr_mask = 0, .pmd = PMD_SECT_WBWA, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + .pte = L_PTE_MT_WRITEALLOC, } }; /* - * These are useful for identifing cache coherency + * These are useful for identifying cache coherency * problems by allowing the cache or the cache and * writebuffer to be turned off. (Note: the write * buffer should not be on and the cache off). @@ -112,6 +115,10 @@ static void __init early_cachepolicy(char **p) } if (i == ARRAY_SIZE(cache_policies)) printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + if (cpu_architecture() >= CPU_ARCH_ARMv6) { + printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); + cachepolicy = CPOLICY_WRITEBACK; + } flush_cache_all(); set_cr(cr_alignment); } @@ -154,28 +161,67 @@ static int __init noalign_setup(char *__unused) } __setup("noalign", noalign_setup); -struct mem_types { - unsigned int prot_pte; - unsigned int prot_l1; - unsigned int prot_sect; - unsigned int domain; -}; +#ifndef CONFIG_SMP +void adjust_cr(unsigned long mask, unsigned long set) +{ + unsigned long flags; -static struct mem_types mem_types[] __initdata = { - [MT_DEVICE] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, - .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_UNCACHED | - PMD_SECT_AP_WRITE, - .domain = DOMAIN_IO, + mask &= ~CR_A; + + set &= mask; + + local_irq_save(flags); + + cr_no_alignment = (cr_no_alignment & ~mask) | set; + cr_alignment = (cr_alignment & ~mask) | set; + + set_cr((get_cr() & ~mask) | set); + + local_irq_restore(flags); +} +#endif + +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE + +static struct mem_type mem_types[] = { + [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_CACHED] = { /* ioremap_cached */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_UNCACHED] = { + .prot_pte = PROT_PTE_DEVICE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_IO, }, [MT_CACHECLEAN] = { - .prot_sect = PMD_TYPE_SECT | PMD_BIT4, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, [MT_MINICLEAN] = { - .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_MINICACHE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, .domain = DOMAIN_KERNEL, }, [MT_LOW_VECTORS] = { @@ -191,30 +237,20 @@ static struct mem_types mem_types[] __initdata = { .domain = DOMAIN_USER, }, [MT_MEMORY] = { - .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_AP_WRITE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, [MT_ROM] = { - .prot_sect = PMD_TYPE_SECT | PMD_BIT4, + .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, }, - [MT_IXP2000_DEVICE] = { /* IXP2400 requires XCB=101 for on-chip I/O */ - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, - .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_UNCACHED | - PMD_SECT_AP_WRITE | PMD_SECT_BUFFERABLE | - PMD_SECT_TEX(1), - .domain = DOMAIN_IO, - }, - [MT_NONSHARED_DEVICE] = { - .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PMD_TYPE_SECT | PMD_BIT4 | PMD_SECT_NONSHARED_DEV | - PMD_SECT_AP_WRITE, - .domain = DOMAIN_IO, - } }; +const struct mem_type *get_mem_type(unsigned int type) +{ + return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; +} + /* * Adjust the PMD section entries according to the CPU in use. */ @@ -222,65 +258,140 @@ static void __init build_mem_type_table(void) { struct cachepolicy *cp; unsigned int cr = get_cr(); - unsigned int user_pgprot, kern_pgprot; + unsigned int user_pgprot, kern_pgprot, vecs_pgprot; int cpu_arch = cpu_architecture(); int i; + if (cpu_arch < CPU_ARCH_ARMv6) { #if defined(CONFIG_CPU_DCACHE_DISABLE) - if (cachepolicy > CPOLICY_BUFFERED) - cachepolicy = CPOLICY_BUFFERED; + if (cachepolicy > CPOLICY_BUFFERED) + cachepolicy = CPOLICY_BUFFERED; #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) - if (cachepolicy > CPOLICY_WRITETHROUGH) - cachepolicy = CPOLICY_WRITETHROUGH; + if (cachepolicy > CPOLICY_WRITETHROUGH) + cachepolicy = CPOLICY_WRITETHROUGH; #endif + } if (cpu_arch < CPU_ARCH_ARMv5) { if (cachepolicy >= CPOLICY_WRITEALLOC) cachepolicy = CPOLICY_WRITEBACK; ecc_mask = 0; } +#ifdef CONFIG_SMP + cachepolicy = CPOLICY_WRITEALLOC; +#endif /* - * Xscale must not have PMD bit 4 set for section mappings. + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. */ - if (cpu_is_xscale()) + if (cpu_arch < CPU_ARCH_ARMv5) for (i = 0; i < ARRAY_SIZE(mem_types); i++) - mem_types[i].prot_sect &= ~PMD_BIT4; + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; /* - * ARMv5 and lower, excluding Xscale, bit 4 must be set for - * page tables. + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. */ - if (cpu_arch < CPU_ARCH_ARMv6 && !cpu_is_xscale()) - for (i = 0; i < ARRAY_SIZE(mem_types); i++) + if (cpu_is_xscale() || cpu_is_xsc3()) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_sect &= ~PMD_BIT4; + mem_types[i].prot_l1 &= ~PMD_BIT4; + } + } else if (cpu_arch < CPU_ARCH_ARMv6) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { if (mem_types[i].prot_l1) mem_types[i].prot_l1 |= PMD_BIT4; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_BIT4; + } + } + + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + /* + * Now deal with the memory-type mappings + */ cp = &cache_policies[cachepolicy]; - kern_pgprot = user_pgprot = cp->pte; + vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + +#ifndef CONFIG_SMP + /* + * Only use write-through for non-SMP systems + */ + if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) + vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; +#endif /* * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent()) { - if (cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - } - } + if (arch_is_coherent() && cpu_is_xsc3()) + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { /* - * bit 4 becomes XN which we must clear for the - * kernel memory mapping. - */ - mem_types[MT_MEMORY].prot_sect &= ~PMD_SECT_XN; - mem_types[MT_ROM].prot_sect &= ~PMD_SECT_XN; - - /* * Mark cache clean areas and XIP ROM read only * from SVC mode and no access from userspace. */ @@ -288,49 +399,26 @@ static void __init build_mem_type_table(void) mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - /* - * Mark the device area as "shared device" - */ - mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE; - mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; - - /* - * User pages need to be mapped with the ASID - * (iow, non-global) - */ - user_pgprot |= L_PTE_ASID; - #ifdef CONFIG_SMP /* * Mark memory with the "shared" attribute for SMP systems */ user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; #endif } for (i = 0; i < 16; i++) { unsigned long v = pgprot_val(protection_map[i]); - v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot; - protection_map[i] = __pgprot(v); + protection_map[i] = __pgprot(v | user_pgprot); } - mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot; - mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot; - - if (cpu_arch >= CPU_ARCH_ARMv5) { -#ifndef CONFIG_SMP - /* - * Only use write-through for non-SMP systems - */ - mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE; - mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE; -#endif - } else { - mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); - } + mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; + mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; + pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | L_PTE_EXEC | kern_pgprot); @@ -351,64 +439,125 @@ static void __init build_mem_type_table(void) } printk("Memory policy: ECC %sabled, Data cache %s\n", ecc_mask ? "en" : "dis", cp->policy); + + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + struct mem_type *t = &mem_types[i]; + if (t->prot_l1) + t->prot_l1 |= PMD_DOMAIN(t->domain); + if (t->prot_sect) + t->prot_sect |= PMD_DOMAIN(t->domain); + } } #define vectors_base() (vectors_high() ? 0xffff0000 : 0) -/* - * Create a SECTION PGD between VIRT and PHYS in domain - * DOMAIN with protection PROT. This operates on half- - * pgdir entry increments. - */ -static inline void -alloc_init_section(unsigned long virt, unsigned long phys, int prot) +static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn, + const struct mem_type *type) { - pmd_t *pmdp = pmd_off_k(virt); + pte_t *pte; - if (virt & (1 << 20)) - pmdp++; + if (pmd_none(*pmd)) { + pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t)); + __pmd_populate(pmd, __pa(pte) | type->prot_l1); + } - *pmdp = __pmd(phys | prot); - flush_pmd_entry(pmdp); + pte = pte_offset_kernel(pmd, addr); + do { + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); } -/* - * Create a SUPER SECTION PGD between VIRT and PHYS with protection PROT - */ -static inline void -alloc_init_supersection(unsigned long virt, unsigned long phys, int prot) +static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, + unsigned long end, unsigned long phys, + const struct mem_type *type) { - int i; + pmd_t *pmd = pmd_offset(pgd, addr); + + /* + * Try a section mapping - end, addr and phys must all be aligned + * to a section boundary. Note that PMDs refer to the individual + * L1 entries, whereas PGDs refer to a group of L1 entries making + * up one logical pointer to an L2 table. + */ + if (((addr | end | phys) & ~SECTION_MASK) == 0) { + pmd_t *p = pmd; + + if (addr & SECTION_SIZE) + pmd++; - for (i = 0; i < 16; i += 1) { - alloc_init_section(virt, phys, prot | PMD_SECT_SUPER); + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); - virt += (PGDIR_SIZE / 2); + flush_pmd_entry(p); + } else { + /* + * No need to loop; pte's aren't interested in the + * individual L1 entries. + */ + alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); } } -/* - * Add a PAGE mapping between VIRT and PHYS in domain - * DOMAIN with protection PROT. Note that due to the - * way we map the PTEs, we must allocate two PTE_SIZE'd - * blocks - one for the Linux pte table, and one for - * the hardware pte table. - */ -static inline void -alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot) +static void __init create_36bit_mapping(struct map_desc *md, + const struct mem_type *type) { - pmd_t *pmdp = pmd_off_k(virt); - pte_t *ptep; + unsigned long phys, addr, length, end; + pgd_t *pgd; + + addr = md->virtual; + phys = (unsigned long)__pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length); - if (pmd_none(*pmdp)) { - ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * - sizeof(pte_t)); + if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { + printk(KERN_ERR "MM: CPU does not support supersection " + "mapping for 0x%08llx at 0x%08lx\n", + __pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* N.B. ARMv6 supersections are only defined to work with domain 0. + * Since domain assignments can in fact be arbitrary, the + * 'domain == 0' check below is required to insure that ARMv6 + * supersections are only allocated for domain 0 regardless + * of the actual domain assignments in use. + */ + if (type->domain) { + printk(KERN_ERR "MM: invalid domain in supersection " + "mapping for 0x%08llx at 0x%08lx\n", + __pfn_to_phys((u64)md->pfn), addr); + return; + } - __pmd_populate(pmdp, __pa(ptep) | prot_l1); + if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { + printk(KERN_ERR "MM: cannot create mapping for " + "0x%08llx at 0x%08lx invalid alignment\n", + __pfn_to_phys((u64)md->pfn), addr); + return; } - ptep = pte_offset_kernel(pmdp, virt); - set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); + /* + * Shift bits [35:32] of address into bits [23:20] of PMD + * (See ARMv6 spec). + */ + phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + pmd_t *pmd = pmd_offset(pgd, addr); + int i; + + for (i = 0; i < 16; i++) + *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); + + addr += SUPERSECTION_SIZE; + phys += SUPERSECTION_SIZE; + pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; + } while (addr != end); } /* @@ -420,10 +569,9 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg */ void __init create_mapping(struct map_desc *md) { - unsigned long virt, length; - int prot_sect, prot_l1, domain; - pgprot_t prot_pte; - unsigned long off = (u32)__pfn_to_phys(md->pfn); + unsigned long phys, addr, length, end; + const struct mem_type *type; + pgd_t *pgd; if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { printk(KERN_WARNING "BUG: not creating mapping for " @@ -439,105 +587,37 @@ void __init create_mapping(struct map_desc *md) __pfn_to_phys((u64)md->pfn), md->virtual); } - domain = mem_types[md->type].domain; - prot_pte = __pgprot(mem_types[md->type].prot_pte); - prot_l1 = mem_types[md->type].prot_l1 | PMD_DOMAIN(domain); - prot_sect = mem_types[md->type].prot_sect | PMD_DOMAIN(domain); + type = &mem_types[md->type]; /* * Catch 36-bit addresses */ - if(md->pfn >= 0x100000) { - if(domain) { - printk(KERN_ERR "MM: invalid domain in supersection " - "mapping for 0x%08llx at 0x%08lx\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - if((md->virtual | md->length | __pfn_to_phys(md->pfn)) - & ~SUPERSECTION_MASK) { - printk(KERN_ERR "MM: cannot create mapping for " - "0x%08llx at 0x%08lx invalid alignment\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - - /* - * Shift bits [35:32] of address into bits [23:20] of PMD - * (See ARMv6 spec). - */ - off |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); + if (md->pfn >= 0x100000) { + create_36bit_mapping(md, type); + return; } - virt = md->virtual; - off -= virt; - length = md->length; + addr = md->virtual & PAGE_MASK; + phys = (unsigned long)__pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); - if (mem_types[md->type].prot_l1 == 0 && - (virt & 0xfffff || (virt + off) & 0xfffff || (virt + length) & 0xfffff)) { + if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not " "be mapped using pages, ignoring.\n", - __pfn_to_phys(md->pfn), md->virtual); + __pfn_to_phys(md->pfn), addr); return; } - while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) { - alloc_init_page(virt, virt + off, prot_l1, prot_pte); - - virt += PAGE_SIZE; - length -= PAGE_SIZE; - } - - /* N.B. ARMv6 supersections are only defined to work with domain 0. - * Since domain assignments can in fact be arbitrary, the - * 'domain == 0' check below is required to insure that ARMv6 - * supersections are only allocated for domain 0 regardless - * of the actual domain assignments in use. - */ - if ((cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3()) - && domain == 0) { - /* - * Align to supersection boundary if !high pages. - * High pages have already been checked for proper - * alignment above and they will fail the SUPSERSECTION_MASK - * check because of the way the address is encoded into - * offset. - */ - if (md->pfn <= 0x100000) { - while ((virt & ~SUPERSECTION_MASK || - (virt + off) & ~SUPERSECTION_MASK) && - length >= (PGDIR_SIZE / 2)) { - alloc_init_section(virt, virt + off, prot_sect); - - virt += (PGDIR_SIZE / 2); - length -= (PGDIR_SIZE / 2); - } - } - - while (length >= SUPERSECTION_SIZE) { - alloc_init_supersection(virt, virt + off, prot_sect); - - virt += SUPERSECTION_SIZE; - length -= SUPERSECTION_SIZE; - } - } - - /* - * A section mapping covers half a "pgdir" entry. - */ - while (length >= (PGDIR_SIZE / 2)) { - alloc_init_section(virt, virt + off, prot_sect); + pgd = pgd_offset_k(addr); + end = addr + length; + do { + unsigned long next = pgd_addr_end(addr, end); - virt += (PGDIR_SIZE / 2); - length -= (PGDIR_SIZE / 2); - } - - while (length >= PAGE_SIZE) { - alloc_init_page(virt, virt + off, prot_l1, prot_pte); + alloc_init_section(pgd, addr, next, phys, type); - virt += PAGE_SIZE; - length -= PAGE_SIZE; - } + phys += next - addr; + addr = next; + } while (pgd++, addr != end); } /* @@ -551,19 +631,109 @@ void __init iotable_init(struct map_desc *io_desc, int nr) create_mapping(io_desc + i); } -static inline void prepare_page_table(struct meminfo *mi) +static unsigned long __initdata vmalloc_reserve = SZ_128M; + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the vmalloc + * area - the default is 128m. + */ +static void __init early_vmalloc(char **arg) +{ + vmalloc_reserve = memparse(*arg, arg); + + if (vmalloc_reserve < SZ_16M) { + vmalloc_reserve = SZ_16M; + printk(KERN_WARNING + "vmalloc area too small, limiting to %luMB\n", + vmalloc_reserve >> 20); + } + + if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { + vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); + printk(KERN_WARNING + "vmalloc area is too big, limiting to %luMB\n", + vmalloc_reserve >> 20); + } +} +__early_param("vmalloc=", early_vmalloc); + +#define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve) + +static void __init sanity_check_meminfo(void) +{ + int i, j; + + for (i = 0, j = 0; i < meminfo.nr_banks; i++) { + struct membank *bank = &meminfo.bank[j]; + *bank = meminfo.bank[i]; + +#ifdef CONFIG_HIGHMEM + /* + * Split those memory banks which are partially overlapping + * the vmalloc area greatly simplifying things later. + */ + if (__va(bank->start) < VMALLOC_MIN && + bank->size > VMALLOC_MIN - __va(bank->start)) { + if (meminfo.nr_banks >= NR_BANKS) { + printk(KERN_CRIT "NR_BANKS too low, " + "ignoring high memory\n"); + } else { + memmove(bank + 1, bank, + (meminfo.nr_banks - i) * sizeof(*bank)); + meminfo.nr_banks++; + i++; + bank[1].size -= VMALLOC_MIN - __va(bank->start); + bank[1].start = __pa(VMALLOC_MIN - 1) + 1; + j++; + } + bank->size = VMALLOC_MIN - __va(bank->start); + } +#else + /* + * Check whether this memory bank would entirely overlap + * the vmalloc area. + */ + if (__va(bank->start) >= VMALLOC_MIN || + __va(bank->start) < PAGE_OFFSET) { + printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " + "(vmalloc region overlap).\n", + bank->start, bank->start + bank->size - 1); + continue; + } + + /* + * Check whether this memory bank would partially overlap + * the vmalloc area. + */ + if (__va(bank->start + bank->size) > VMALLOC_MIN || + __va(bank->start + bank->size) < __va(bank->start)) { + unsigned long newsize = VMALLOC_MIN - __va(bank->start); + printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx " + "to -%.8lx (vmalloc region overlap).\n", + bank->start, bank->start + bank->size - 1, + bank->start + newsize - 1); + bank->size = newsize; + } +#endif + j++; + } + meminfo.nr_banks = j; +} + +static inline void prepare_page_table(void) { unsigned long addr; /* * Clear out all the mappings below the kernel image. */ - for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE) + for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ - addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK; + addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK; #endif for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); @@ -572,7 +742,7 @@ static inline void prepare_page_table(struct meminfo *mi) * Clear out all the kernel space mappings, except for the first * memory bank, up to the end of the vmalloc region. */ - for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size); + for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0])); addr < VMALLOC_END; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); } @@ -589,9 +759,11 @@ void __init reserve_node_zero(pg_data_t *pgdat) * Note that this can only be in node 0. */ #ifdef CONFIG_XIP_KERNEL - reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); + reserve_bootmem_node(pgdat, __pa(_data), _end - _data, + BOOTMEM_DEFAULT); #else - reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); + reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext, + BOOTMEM_DEFAULT); #endif /* @@ -599,7 +771,7 @@ void __init reserve_node_zero(pg_data_t *pgdat) * and can only be in node 0. */ reserve_bootmem_node(pgdat, __pa(swapper_pg_dir), - PTRS_PER_PGD * sizeof(pgd_t)); + PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT); /* * Hmm... This should go elsewhere, but we really really need to @@ -622,8 +794,10 @@ void __init reserve_node_zero(pg_data_t *pgdat) /* H1940 and RX3715 need to reserve this for suspend */ if (machine_is_h1940() || machine_is_rx3715()) { - reserve_bootmem_node(pgdat, 0x30003000, 0x1000); - reserve_bootmem_node(pgdat, 0x30081000, 0x1000); + reserve_bootmem_node(pgdat, 0x30003000, 0x1000, + BOOTMEM_DEFAULT); + reserve_bootmem_node(pgdat, 0x30081000, 0x1000, + BOOTMEM_DEFAULT); } #ifdef CONFIG_SA1111 @@ -634,7 +808,8 @@ void __init reserve_node_zero(pg_data_t *pgdat) res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; #endif if (res_size) - reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); + reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size, + BOOTMEM_DEFAULT); } /* @@ -654,7 +829,6 @@ static void __init devicemaps_init(struct machine_desc *mdesc) * Allocate the vector page early. */ vectors = alloc_bootmem_low_pages(PAGE_SIZE); - BUG_ON(!vectors); for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); @@ -665,8 +839,8 @@ static void __init devicemaps_init(struct machine_desc *mdesc) */ #ifdef CONFIG_XIP_KERNEL map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); - map.virtual = MODULE_START; - map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.virtual = MODULES_VADDR; + map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; map.type = MT_ROM; create_mapping(&map); #endif @@ -722,26 +896,39 @@ static void __init devicemaps_init(struct machine_desc *mdesc) flush_cache_all(); } +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pmd_t *pmd = pmd_off_k(PKMAP_BASE); + pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t)); + BUG_ON(!pmd_none(*pmd) || !pte); + __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE); + pkmap_page_table = pte + PTRS_PER_PTE; +#endif +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ -void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) +void __init paging_init(struct machine_desc *mdesc) { void *zero_page; build_mem_type_table(); - prepare_page_table(mi); - bootmem_init(mi); + sanity_check_meminfo(); + prepare_page_table(); + bootmem_init(); devicemaps_init(mdesc); + kmap_init(); top_pmd = pmd_off_k(0xffff0000); /* - * allocate the zero page. Note that we count on this going ok. + * allocate the zero page. Note that this always succeeds and + * returns a zeroed result. */ zero_page = alloc_bootmem_low_pages(PAGE_SIZE); - memzero(zero_page, PAGE_SIZE); empty_zero_page = virt_to_page(zero_page); flush_dcache_page(empty_zero_page); }