/*
- * linux/arch/i386/mm/init.c
*
* Copyright (C) 1995 Linus Torvalds
*
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <asm/asm.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
+#include <asm/setup.h>
+#include <asm/cacheflush.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
+unsigned long max_pfn_mapped;
+
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+ paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
}
- paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+ paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
/*
* Map with big pages if possible, otherwise
* create normal page tables:
+ *
+ * Don't use a large page for the first 2/4MB of memory
+ * because there are often fixed size MTRRs in there
+ * and overlapping MTRRs into large pages can cause
+ * slowdowns.
*/
- if (cpu_has_pse) {
+ if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
unsigned int addr2;
pgprot_t prot = PAGE_KERNEL_LARGE;
set_pmd(pmd, pfn_pmd(pfn, prot));
pfn += PTRS_PER_PTE;
+ max_pfn_mapped = pfn;
continue;
}
pte = one_page_table_init(pmd);
set_pte(pte, pfn_pte(pfn, prot));
}
+ max_pfn_mapped = pfn;
}
}
}
return 0;
}
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+ if (pagenr <= 256)
+ return 1;
+ if (!page_is_ram(pagenr))
+ return 1;
+ return 0;
+}
+
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
pgprot_t kmap_prot;
pkmap_page_table = pte;
}
-static void __meminit free_new_highpage(struct page *page)
-{
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
-}
-
void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
- free_new_highpage(page);
+ init_page_count(page);
+ __free_page(page);
+ totalhigh_pages++;
} else
SetPageReserved(page);
}
-static int __meminit
-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
- free_new_highpage(page);
- totalram_pages++;
-#ifdef CONFIG_FLATMEM
- max_mapnr = max(pfn, max_mapnr);
-#endif
- num_physpages++;
-
- return 0;
-}
-
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM.
- */
-void __meminit online_page(struct page *page)
-{
- ClearPageReserved(page);
- add_one_highpage_hotplug(page, page_to_pfn(page));
-}
-
#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
{
void __init native_pagetable_setup_start(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- int i;
+ unsigned long pfn, va;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
/*
- * Init entries of the first-level page table to the
- * zero page, if they haven't already been set up.
- *
- * In a normal native boot, we'll be running on a
- * pagetable rooted in swapper_pg_dir, but not in PAE
- * mode, so this will end up clobbering the mappings
- * for the lower 24Mbytes of the address space,
- * without affecting the kernel address space.
+ * Remove any mappings which extend past the end of physical
+ * memory from the boot time page table:
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
- set_pgd(&base[i],
- __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
- /* Make sure kernel address space is empty so that a pagetable
- will be allocated for it. */
- memset(&base[USER_PTRS_PER_PGD], 0,
- KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
- paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
-#endif
+ for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+ va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
+ pgd = base + pgd_index(va);
+ if (!pgd_present(*pgd))
+ break;
+
+ pud = pud_offset(pgd, va);
+ pmd = pmd_offset(pud, va);
+ if (!pmd_present(*pmd))
+ break;
+
+ pte = pte_offset_kernel(pmd, va);
+ if (!pte_present(*pte))
+ break;
+
+ pte_clear(NULL, va, pte);
+ }
+ paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
}
void __init native_pagetable_setup_done(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
}
/*
* the boot process.
*
* If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE). The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S. The root of the
+ * pagetable will be swapper_pg_dir.
*
* If we're booting paravirtualized under a hypervisor, then there are
* more options: we may already be running PAE, and the pagetable may
paravirt_pagetable_setup_done(pgd_base);
}
-#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI_SLEEP
/*
- * Swap suspend & friends need this for resume because things like the intel-agp
+ * ACPI suspend needs this for resume, because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
*/
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+char swsusp_pg_dir[PAGE_SIZE]
__attribute__ ((aligned(PAGE_SIZE)));
static inline void save_pg_dir(void)
{
memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
}
-#else
+#else /* !CONFIG_ACPI_SLEEP */
static inline void save_pg_dir(void)
{
}
-#endif
+#endif /* !CONFIG_ACPI_SLEEP */
void zap_low_mappings(void)
{
int i;
- save_pg_dir();
-
/*
* Zap initial low-memory mappings.
*
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+ for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
load_cr3(swapper_pg_dir);
-#ifdef CONFIG_X86_PAE
- /*
- * We will bail out later - printk doesn't work right now so
- * the user would just see a hanging kernel.
- */
- if (cpu_has_pae)
- set_in_cr4(X86_CR4_PAE);
-#endif
__flush_tlb_all();
kmap_init();
/*
* Test if the WP bit works in supervisor mode. It isn't supported on 386's
- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
- * used to involve black magic jumps to work around some nasty CPU bugs,
- * but fortunately the switch to using exceptions got rid of all that.
+ * and also on some strange 486's. All 586+'s are OK. This used to involve
+ * black magic jumps to work around some nasty CPU bugs, but fortunately the
+ * switch to using exceptions got rid of all that.
*/
static void __init test_wp_bit(void)
{
BUG_ON((unsigned long)high_memory > VMALLOC_START);
#endif /* double-sanity-check paranoia */
-#ifdef CONFIG_X86_PAE
- if (!cpu_has_pae)
- panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
- /*
- * Subtle. SMP is doing it's boot stuff late (because it has to
- * fork idle threads) - but it also needs low mappings for the
- * protected-mode entry to work. We zap these entries only after
- * the WP-bit has been tested.
- */
-#ifndef CONFIG_SMP
+ cpa_init();
+ save_pg_dir();
zap_low_mappings();
-#endif
}
#ifdef CONFIG_MEMORY_HOTPLUG
}
#endif
-struct kmem_cache *pmd_cache;
-
-void __init pgtable_cache_init(void)
-{
- if (PTRS_PER_PMD > 1) {
- pmd_cache = kmem_cache_create("pmd",
- PTRS_PER_PMD*sizeof(pmd_t),
- PTRS_PER_PMD*sizeof(pmd_t),
- SLAB_PANIC,
- pmd_ctor);
- }
-}
-
/*
* This function cannot be __init, since exceptions don't work in that
* section. Put this after the callers, so that it cannot be inlined.
"1: movb %1, %0 \n"
" xorl %2, %2 \n"
"2: \n"
- ".section __ex_table, \"a\"\n"
- " .align 4 \n"
- " .long 1b, 2b \n"
- ".previous \n"
+ _ASM_EXTABLE(1b,2b)
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
-#ifndef CONFIG_KPROBES
-#ifdef CONFIG_HOTPLUG_CPU
- /* It must still be possible to apply SMP alternatives. */
- if (num_possible_cpus() <= 1)
-#endif
- {
- set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
- printk(KERN_INFO "Write protecting the kernel text: %luk\n",
- size >> 10);
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+ printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+ size >> 10);
#ifdef CONFIG_CPA_DEBUG
- printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
- start, start+size);
- set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+ printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+ start, start+size);
+ set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
- printk(KERN_INFO "Testing CPA: write protecting again\n");
- set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
-#endif
- }
+ printk(KERN_INFO "Testing CPA: write protecting again\n");
+ set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
start += size;
size = (unsigned long)__end_rodata - start;