X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fmm%2Finit_32.c;h=f59e9b8516378f426f78ccdc901864cbcef9535f;hb=b47e3418c52b26f6143fc696326ae52a21324551;hp=65d55056b6e781c72b1740dad18747682a2985eb;hpb=3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 65d5505..f59e9b8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include +#include #include #include #include @@ -48,8 +50,7 @@ #include #include -unsigned int __VMALLOC_RESERVE = 128 << 20; - +unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -57,6 +58,28 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); + +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + +int after_bootmem; + +int direct_gbpages; + +static __init void *alloc_low_page(void) +{ + unsigned long pfn = table_end++; + void *adr; + + if (pfn >= table_top) + panic("alloc_low_page: ran out of memory"); + + adr = __va(pfn * PAGE_SIZE); + memset(adr, 0, PAGE_SIZE); + return adr; +} + /* * Creates a middle page table and puts a pointer to it in the * given global directory entry. This only returns the gd entry @@ -69,12 +92,16 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - + if (after_bootmem) + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + else + pmd_table = (pmd_t *)alloc_low_page(); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); BUG_ON(pmd_table != pmd_offset(pud, 0)); + + return pmd_table; } #endif pud = pud_offset(pgd, 0); @@ -92,13 +119,15 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; + if (after_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif - if (!page_table) { - page_table = + if (!page_table) + page_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); - } + } else + page_table = (pte_t *)alloc_low_page(); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); @@ -108,6 +137,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } +static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, + unsigned long vaddr, pte_t *lastpte) +{ +#ifdef CONFIG_HIGHMEM + /* + * Something (early fixmap) may already have put a pte + * page here, which causes the page table allocation + * to become nonlinear. Attempt to fix it, and if it + * is still nonlinear then we have to bug. + */ + int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; + int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; + + if (pmd_idx_kmap_begin != pmd_idx_kmap_end + && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end + && ((__pa(pte) >> PAGE_SHIFT) < table_start + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { + pte_t *newpte; + int i; + + BUG_ON(after_bootmem); + newpte = alloc_low_page(); + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(newpte + i, pte[i]); + + paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); + BUG_ON(newpte != pte_offset_kernel(pmd, 0)); + __flush_tlb_all(); + + paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); + pte = newpte; + } + BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) + && vaddr > fix_to_virt(FIX_KMAP_END) + && lastpte && lastpte + PTRS_PER_PTE != pte); +#endif + return pte; +} + /* * This function initializes a certain range of kernel virtual memory * with new bootmem page tables, everywhere page tables are missing in @@ -124,6 +194,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) unsigned long vaddr; pgd_t *pgd; pmd_t *pmd; + pte_t *pte = NULL; vaddr = start; pgd_idx = pgd_index(vaddr); @@ -135,7 +206,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pmd = pmd + pmd_index(vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - one_page_table_init(pmd); + pte = page_table_kmap_check(one_page_table_init(pmd), + pmd, vaddr, pte); vaddr += PMD_SIZE; } @@ -155,41 +227,72 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) +static void __init kernel_physical_mapping_init(unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { + pgd_t *pgd_base = swapper_pg_dir; int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; - unsigned pages_2m = 0, pages_4k = 0; + unsigned pages_2m, pages_4k; + int mapping_iter; - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; + /* + * First iteration will setup identity mapping using large/small pages + * based on use_pse, with other attributes same as set by + * the early code in head_32.S + * + * Second iteration will setup the appropriate attributes (NX, GLOBAL..) + * as desired for the kernel identity mapping. + * + * This two pass mechanism conforms to the TLB app note which says: + * + * "Software should not write to a paging-structure entry in a way + * that would change, for any linear address, both the page size + * and either the page frame or attributes." + */ + mapping_iter = 1; + + if (!cpu_has_pse) + use_pse = 0; +repeat: + pages_2m = pages_4k = 0; + pfn = start_pfn; + pgd_idx = pgd_index((pfn<= max_low_pfn) - continue; - for (pmd_idx = 0; - pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; + if (pfn >= end_pfn) + continue; +#ifdef CONFIG_X86_PAE + pmd_idx = pmd_index((pfn<start_pfn); final_end_pfn = min(end_pfn, data->end_pfn); if (final_start_pfn >= final_end_pfn) - return; + return 0; for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; node_pfn++) { @@ -321,6 +433,8 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, add_one_highpage_init(page, node_pfn); } + return 0; + } void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, @@ -334,26 +448,12 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, work_with_active_regions(nid, add_highpages_work_fn, &data); } -#ifndef CONFIG_NUMA -static void __init set_highmem_pages_init(void) +#else +static inline void permanent_kmaps_init(pgd_t *pgd_base) { - add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); - - totalram_pages += totalhigh_pages; } -#endif /* !CONFIG_NUMA */ - -#else -# define kmap_init() do { } while (0) -# define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ -pteval_t __PAGE_KERNEL = _PAGE_KERNEL; -EXPORT_SYMBOL(__PAGE_KERNEL); - -pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; - void __init native_pagetable_setup_start(pgd_t *base) { unsigned long pfn, va; @@ -409,40 +509,26 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init pagetable_init(void) +static void __init early_ioremap_page_table_range_init(void) { pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; - paravirt_pagetable_setup_start(pgd_base); - - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; - } - - kernel_physical_mapping_init(pgd_base); - remap_numa_kva(); - /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): */ - early_ioremap_clear(); vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; page_table_range_init(vaddr, end, pgd_base); early_ioremap_reset(); +} - permanent_kmaps_init(pgd_base); +static void __init pagetable_init(void) +{ + pgd_t *pgd_base = swapper_pg_dir; - paravirt_pagetable_setup_done(pgd_base); + permanent_kmaps_init(pgd_base); } #ifdef CONFIG_ACPI_SLEEP @@ -485,7 +571,7 @@ void zap_low_mappings(void) int nx_enabled; -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); #ifdef CONFIG_X86_PAE @@ -538,27 +624,519 @@ static void __init set_nx(void) } #endif +/* user-defined highmem size */ +static unsigned int highmem_pages = -1; + /* - * paging_init() sets up the page tables - note that the first 8MB are - * already mapped by head.S. - * - * This routines also unmaps the page at virtual kernel address 0, so - * that we can trap those pesky NULL-reference errors in the kernel. + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. */ -void __init paging_init(void) +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; + + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); + +#define MSG_HIGHMEM_TOO_BIG \ + "highmem size (%luMB) is bigger than pages available (%luMB)!\n" + +#define MSG_LOWMEM_TOO_SMALL \ + "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n" +/* + * All of RAM fits into lowmem - but if user wants highmem + * artificially via the highmem=x boot parameter then create + * it: + */ +void __init lowmem_pfn_init(void) +{ + /* max_low_pfn is 0, we already have early_res support */ + max_low_pfn = max_pfn; + + if (highmem_pages == -1) + highmem_pages = 0; +#ifdef CONFIG_HIGHMEM + if (highmem_pages >= max_pfn) { + printk(KERN_ERR MSG_HIGHMEM_TOO_BIG, + pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); + highmem_pages = 0; + } + if (highmem_pages) { + if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) { + printk(KERN_ERR MSG_LOWMEM_TOO_SMALL, + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn -= highmem_pages; + } +#else + if (highmem_pages) + printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); +#endif +} + +#define MSG_HIGHMEM_TOO_SMALL \ + "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" + +#define MSG_HIGHMEM_TRIMMED \ + "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" +/* + * We have more RAM than fits into lowmem - we try to put it into + * highmem, also taking the highmem=x boot parameter into account: + */ +void __init highmem_pfn_init(void) +{ + max_low_pfn = MAXMEM_PFN; + + if (highmem_pages == -1) + highmem_pages = max_pfn - MAXMEM_PFN; + + if (highmem_pages + MAXMEM_PFN < max_pfn) + max_pfn = MAXMEM_PFN + highmem_pages; + + if (highmem_pages + MAXMEM_PFN > max_pfn) { + printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL, + pages_to_mb(max_pfn - MAXMEM_PFN), + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + max_pfn = MAXMEM_PFN; +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_HIGHMEM64G + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); + } +#endif /* !CONFIG_HIGHMEM64G */ +#endif /* !CONFIG_HIGHMEM */ +} + +/* + * Determine low and high memory ranges: + */ +void __init find_low_pfn_range(void) +{ + /* it could update max_pfn */ + + if (max_pfn <= MAXMEM_PFN) + lowmem_pfn_init(); + else + highmem_pfn_init(); +} + +#ifndef CONFIG_NEED_MULTIPLE_NODES +void __init initmem_init(unsigned long start_pfn, + unsigned long end_pfn) +{ +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > max_low_pfn) + highstart_pfn = max_low_pfn; + memory_present(0, 0, highend_pfn); + e820_register_active_regions(0, 0, highend_pfn); + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + memory_present(0, 0, max_low_pfn); + e820_register_active_regions(0, 0, max_low_pfn); + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); + + setup_bootmem_allocator(); +} +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ + +static void __init zone_sizes_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; +#endif + + free_area_init_nodes(max_zone_pfns); +} + +static unsigned long __init setup_node_bootmem(int nodeid, + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long bootmap) { + unsigned long bootmap_size; + + if (start_pfn > max_low_pfn) + return bootmap; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; + + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(nodeid), + bootmap >> PAGE_SHIFT, + start_pfn, end_pfn); + printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", + nodeid, start_pfn<> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + + if (use_gbpages) { + unsigned long extra; + + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 + extra += PMD_SIZE; +#endif + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + +#ifdef CONFIG_X86_32 + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ +#ifdef CONFIG_X86_32 + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); +} + +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#ifdef CONFIG_X86_32 +#define NR_RANGE_MR 3 +#else /* CONFIG_X86_64 */ +#define NR_RANGE_MR 5 +#endif + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; +#ifdef CONFIG_X86_32 + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + if (pos == 0) + end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + else + end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#endif + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } + + /* big page (2M) range */ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#ifdef CONFIG_X86_32 + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +#endif + + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof(struct map_range)); + mr[i--].start = old_start; + nr_range--; + } + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1<> PAGE_SHIFT, + mr[i].end >> PAGE_SHIFT, + mr[i].page_size_mask == (1< table_start) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + if (!after_bootmem) + early_memtest(start, end); + + return ret >> PAGE_SHIFT; +} + + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + pagetable_init(); __flush_tlb_all(); kmap_init(); + + /* + * NOTE: at this point the bootmem allocator is fully available. + */ + sparse_init(); + zone_sizes_init(); } /* @@ -595,6 +1173,8 @@ void __init mem_init(void) int codesize, reservedpages, datasize, initsize; int tmp; + pci_iommu_alloc(); + #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif @@ -664,17 +1244,30 @@ void __init mem_init(void) (unsigned long)&_text, (unsigned long)&_etext, ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + /* + * Check boundaries twice: Some fundamental inconsistencies can + * be detected at build time already. + */ +#define __FIXADDR_TOP (-PAGE_SIZE) +#ifdef CONFIG_HIGHMEM + BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif +#define high_memory (-128UL << 20) + BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); +#undef high_memory +#undef __FIXADDR_TOP + #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); BUG_ON(VMALLOC_END > PKMAP_BASE); #endif - BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON(VMALLOC_START >= VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - cpa_init(); save_pg_dir(); zap_low_mappings(); } @@ -687,7 +1280,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif @@ -724,6 +1317,8 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; +#ifndef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); @@ -736,6 +1331,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); #endif +#endif /* CONFIG_DYNAMIC_FTRACE */ + start += size; size = (unsigned long)__end_rodata - start; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); @@ -753,52 +1350,6 @@ void mark_rodata_ro(void) } #endif -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ -#ifdef CONFIG_DEBUG_PAGEALLOC - /* - * If debugging page accesses then do not free this memory but - * mark them not present - any buggy init-section access will - * create a kernel page fault: - */ - printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, PAGE_ALIGN(end)); - set_memory_np(begin, (end - begin) >> PAGE_SHIFT); -#else - unsigned long addr; - - /* - * We just marked the kernel text read only above, now that - * we are going to free part of that, we need to make that - * writeable first. - */ - set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); -#endif -} - -void free_initmem(void) -{ - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) {