include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[safe/jmp/linux-2.6] / arch / x86 / mm / pageattr.c
index b81b41a..28195c3 100644 (file)
@@ -6,11 +6,13 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/pfn.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -142,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
 
        mb();
 }
+EXPORT_SYMBOL_GPL(clflush_cache_range);
 
 static void __cpa_flush_all(void *arg)
 {
@@ -153,7 +156,7 @@ static void __cpa_flush_all(void *arg)
         */
        __flush_tlb_all();
 
-       if (cache && boot_cpu_data.x86_model >= 4)
+       if (cache && boot_cpu_data.x86 >= 4)
                wbinvd();
 }
 
@@ -208,20 +211,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
                            int in_flags, struct page **pages)
 {
        unsigned int i, level;
+       unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
 
        BUG_ON(irqs_disabled());
 
-       on_each_cpu(__cpa_flush_range, NULL, 1);
+       on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
 
-       if (!cache)
+       if (!cache || do_wbinvd)
                return;
 
-       /* 4M threshold */
-       if (numpages >= 1024) {
-               if (boot_cpu_data.x86_model >= 4)
-                       wbinvd();
-               return;
-       }
        /*
         * We only need to flush on one CPU,
         * clflush is a MESI-coherent instruction that
@@ -281,6 +279,43 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
                   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_RW;
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+       /*
+        * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+        * kernel text mappings for the large page aligned text, rodata sections
+        * will be always read-only. For the kernel identity mappings covering
+        * the holes caused by this alignment can be anything that user asks.
+        *
+        * This will preserve the large page mappings for kernel text/data
+        * at no extra cost.
+        */
+       if (kernel_set_to_readonly &&
+           within(address, (unsigned long)_text,
+                  (unsigned long)__end_rodata_hpage_align)) {
+               unsigned int level;
+
+               /*
+                * Don't enforce the !RW mapping for the kernel text mapping,
+                * if the current mapping is already using small page mapping.
+                * No need to work hard to preserve large page mappings in this
+                * case.
+                *
+                * This also fixes the Linux Xen paravirt guest boot failure
+                * (because of unexpected read-only mappings for kernel identity
+                * mappings). In this paravirt guest case, the kernel text
+                * mapping and the kernel identity mapping share the same
+                * page-table pages. Thus we can't really use different
+                * protections for the kernel text and identity mappings. Also,
+                * these shared mappings are made of small page mappings.
+                * Thus this don't enforce !RW mapping for small page kernel
+                * text mapping logic will help Linux Xen parvirt guest boot
+                * aswell.
+                */
+               if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
+                       pgprot_val(forbidden) |= _PAGE_RW;
+       }
+#endif
+
        prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 
        return prot;
@@ -475,7 +510,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 
        if (!debug_pagealloc)
                spin_unlock(&cpa_lock);
-       base = alloc_pages(GFP_KERNEL, 0);
+       base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
        if (!debug_pagealloc)
                spin_lock(&cpa_lock);
        if (!base)
@@ -595,9 +630,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
        unsigned int level;
        pte_t *kpte, old_pte;
 
-       if (cpa->flags & CPA_PAGES_ARRAY)
-               address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-       else if (cpa->flags & CPA_ARRAY)
+       if (cpa->flags & CPA_PAGES_ARRAY) {
+               struct page *page = cpa->pages[cpa->curpage];
+               if (unlikely(PageHighMem(page)))
+                       return 0;
+               address = (unsigned long)page_address(page);
+       } else if (cpa->flags & CPA_ARRAY)
                address = cpa->vaddr[cpa->curpage];
        else
                address = *cpa->vaddr;
@@ -686,8 +724,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
 static int cpa_process_alias(struct cpa_data *cpa)
 {
        struct cpa_data alias_cpa;
-       int ret = 0;
-       unsigned long temp_cpa_vaddr, vaddr;
+       unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+       unsigned long vaddr;
+       int ret;
 
        if (cpa->pfn >= max_pfn_mapped)
                return 0;
@@ -700,9 +739,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-       if (cpa->flags & CPA_PAGES_ARRAY)
-               vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-       else if (cpa->flags & CPA_ARRAY)
+       if (cpa->flags & CPA_PAGES_ARRAY) {
+               struct page *page = cpa->pages[cpa->curpage];
+               if (unlikely(PageHighMem(page)))
+                       return 0;
+               vaddr = (unsigned long)page_address(page);
+       } else if (cpa->flags & CPA_ARRAY)
                vaddr = cpa->vaddr[cpa->curpage];
        else
                vaddr = *cpa->vaddr;
@@ -711,42 +753,37 @@ static int cpa_process_alias(struct cpa_data *cpa)
                    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
 
                alias_cpa = *cpa;
-               temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
-               alias_cpa.vaddr = &temp_cpa_vaddr;
+               alias_cpa.vaddr = &laddr;
                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-
                ret = __change_page_attr_set_clr(&alias_cpa, 0);
+               if (ret)
+                       return ret;
        }
 
 #ifdef CONFIG_X86_64
-       if (ret)
-               return ret;
-       /*
-        * No need to redo, when the primary call touched the high
-        * mapping already:
-        */
-       if (within(vaddr, (unsigned long) _text, _brk_end))
-               return 0;
-
        /*
-        * If the physical address is inside the kernel map, we need
+        * If the primary call didn't touch the high mapping already
+        * and the physical address is inside the kernel map, we need
         * to touch the high mapped kernel as well:
         */
-       if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
-               return 0;
-
-       alias_cpa = *cpa;
-       temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
-       alias_cpa.vaddr = &temp_cpa_vaddr;
-       alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+       if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+           within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+               unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+                                              __START_KERNEL_map - phys_base;
+               alias_cpa = *cpa;
+               alias_cpa.vaddr = &temp_cpa_vaddr;
+               alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-       /*
-        * The high mapping range is imprecise, so ignore the return value.
-        */
-       __change_page_attr_set_clr(&alias_cpa, 0);
+               /*
+                * The high mapping range is imprecise, so ignore the
+                * return value.
+                */
+               __change_page_attr_set_clr(&alias_cpa, 0);
+       }
 #endif
-       return ret;
+
+       return 0;
 }
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
@@ -806,6 +843,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 {
        struct cpa_data cpa;
        int ret, cache, checkalias;
+       unsigned long baddr = 0;
 
        /*
         * Check, if we are requested to change a not supported
@@ -837,6 +875,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                         */
                        WARN_ON_ONCE(1);
                }
+               /*
+                * Save address for cache flush. *addr is modified in the call
+                * to __change_page_attr_set_clr() below.
+                */
+               baddr = *addr;
        }
 
        /* Must avoid aliasing mappings in the highmem code */
@@ -884,7 +927,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                        cpa_flush_array(addr, numpages, cache,
                                        cpa.flags, pages);
                } else
-                       cpa_flush_range(*addr, numpages, cache);
+                       cpa_flush_range(baddr, numpages, cache);
        } else
                cpa_flush_all(cache);
 
@@ -987,12 +1030,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
 int _set_memory_wc(unsigned long addr, int numpages)
 {
        int ret;
+       unsigned long addr_copy = addr;
+
        ret = change_page_attr_set(&addr, numpages,
                                    __pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
        if (!ret) {
-               ret = change_page_attr_set(&addr, numpages,
-                                   __pgprot(_PAGE_CACHE_WC), 0);
+               ret = change_page_attr_set_clr(&addr_copy, numpages,
+                                              __pgprot(_PAGE_CACHE_WC),
+                                              __pgprot(_PAGE_CACHE_MASK),
+                                              0, 0, NULL);
        }
        return ret;
 }
@@ -1060,12 +1106,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
 
 int set_memory_x(unsigned long addr, int numpages)
 {
+       if (!(__supported_pte_mask & _PAGE_NX))
+               return 0;
+
        return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
+       if (!(__supported_pte_mask & _PAGE_NX))
+               return 0;
+
        return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_nx);
@@ -1109,7 +1161,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
        int free_idx;
 
        for (i = 0; i < addrinarray; i++) {
-               start = (unsigned long)page_address(pages[i]);
+               if (PageHighMem(pages[i]))
+                       continue;
+               start = page_to_pfn(pages[i]) << PAGE_SHIFT;
                end = start + PAGE_SIZE;
                if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
                        goto err_out;
@@ -1122,7 +1176,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
 err_out:
        free_idx = i;
        for (i = 0; i < free_idx; i++) {
-               start = (unsigned long)page_address(pages[i]);
+               if (PageHighMem(pages[i]))
+                       continue;
+               start = page_to_pfn(pages[i]) << PAGE_SHIFT;
                end = start + PAGE_SIZE;
                free_memtype(start, end);
        }
@@ -1151,7 +1207,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
                return retval;
 
        for (i = 0; i < addrinarray; i++) {
-               start = (unsigned long)page_address(pages[i]);
+               if (PageHighMem(pages[i]))
+                       continue;
+               start = page_to_pfn(pages[i]) << PAGE_SHIFT;
                end = start + PAGE_SIZE;
                free_memtype(start, end);
        }