From aeed5fce37196e09b4dac3a1c00d8b7122e040ce Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 May 2008 20:49:23 +0100 Subject: [PATCH] x86: fix PAE pmd_bad bootup warning Fix warning from pmd_bad() at bootup on a HIGHMEM64G HIGHPTE x86_32. That came from 9fc34113f6880b215cbea4e7017fc818700384c2 x86: debug pmd_bad(); but we understand now that the typecasting was wrong for PAE in the previous version: pagetable pages above 4GB looked bad and stopped Arjan from booting. And revert that cded932b75ab0a5f9181ee3da34a0a488d1a14fd x86: fix pmd_bad and pud_bad to support huge pages. It was the wrong way round: we shouldn't weaken every pmd_bad and pud_bad check to let huge pages slip through - in part they check that we _don't_ have a huge page where it's not expected. Put the x86 pmd_bad() and pud_bad() definitions back to what they have long been: they can be improved (x86_32 should use PTE_MASK, to stop PAE thinking junk in the upper word is good; and x86_64 should follow x86_32's stricter comparison, to stop thinking any subset of required bits is good); but that should be a later patch. Fix Hans' good observation that follow_page() will never find pmd_huge() because that would have already failed the pmd_bad test: test pmd_huge in between the pmd_none and pmd_bad tests. Tighten x86's pmd_huge() check? No, once it's a hugepage entry, it can get quite far from a good pmd: for example, PROT_NONE leaves it with only ACCESSED of the KERN_PGTABLE bits. However... though follow_page() contains this and another test for huge pages, so it's nice to keep it working on them, where does it actually get called on a huge page? get_user_pages() checks is_vm_hugetlb_page(vma) to to call alternative hugetlb processing, as does unmap_vmas() and others. Signed-off-by: Hugh Dickins Earlier-version-tested-by: Ingo Molnar Cc: Thomas Gleixner Cc: Jeff Chua Cc: Hans Rosenfeld Cc: Arjan van de Ven Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable_32.c | 7 ------- include/asm-x86/pgtable_32.h | 9 +-------- include/asm-x86/pgtable_64.h | 6 ++---- mm/memory.c | 5 ++++- 4 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9ee007b..369cf06 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve) __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; } - -int pmd_bad(pmd_t pmd) -{ - WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); - - return pmd_bad_v1(pmd); -} diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 577ab79..d7f0403 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -88,14 +88,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) - -extern int pmd_bad(pmd_t pmd); - -#define pmd_bad_v1(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER))) -#define pmd_bad_v2(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER | \ - _PAGE_PSE | _PAGE_NX))) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index a3bbf87..efe83dc 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -158,14 +158,12 @@ static inline unsigned long pgd_bad(pgd_t pgd) static inline unsigned long pud_bad(pud_t pud) { - return pud_val(pud) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } static inline unsigned long pmd_bad(pmd_t pmd) { - return pmd_val(pmd) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val((x))) diff --git a/mm/memory.c b/mm/memory.c index bbab1e3..48c122d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -969,7 +969,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto no_page_table; pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + if (pmd_none(*pmd)) goto no_page_table; if (pmd_huge(*pmd)) { @@ -978,6 +978,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto out; } + if (unlikely(pmd_bad(*pmd))) + goto no_page_table; + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) goto out; -- 1.8.2.3