x86, pageattr: introduce APIs to change pageattr of a page array
authorShaohua Li <shaohua.li@intel.com>
Thu, 21 Aug 2008 02:46:06 +0000 (10:46 +0800)
committerIngo Molnar <mingo@elte.hu>
Thu, 21 Aug 2008 11:47:45 +0000 (13:47 +0200)
Add array interface APIs of pageattr. page based cache flush is quite
slow for a lot of pages. If pages are more than 1024 (4M), the patch
will use a wbinvd(). We have a simple test here (run a 3d game - open
arena), nearly all agp memory allocation are small (< 1M), so suppose
this will not impact runtime performance.

Signed-off-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/mm/pageattr.c
include/asm-x86/cacheflush.h

index 5c06469..041e81e 100644 (file)
  * The current flushing context - we pass it instead of 5 arguments:
  */
 struct cpa_data {
-       unsigned long   vaddr;
+       unsigned long   *vaddr;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
        int             numpages;
-       int             flushtlb;
+       int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
+       int             curpage;
 };
 
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+
 #ifdef CONFIG_PROC_FS
 static unsigned long direct_pages_count[PG_LEVEL_NUM];
 
@@ -184,6 +188,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
        }
 }
 
+static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+{
+       unsigned int i, level;
+       unsigned long *addr;
+
+       BUG_ON(irqs_disabled());
+
+       on_each_cpu(__cpa_flush_range, NULL, 1);
+
+       if (!cache)
+               return;
+
+       /* 4M threshold */
+       if (numpages >= 1024) {
+               if (boot_cpu_data.x86_model >= 4)
+                       wbinvd();
+               return;
+       }
+       /*
+        * We only need to flush on one CPU,
+        * clflush is a MESI-coherent instruction that
+        * will cause all other CPUs to flush the same
+        * cachelines:
+        */
+       for (i = 0, addr = start; i < numpages; i++, addr++) {
+               pte_t *pte = lookup_address(*addr, &level);
+
+               /*
+                * Only flush present addresses:
+                */
+               if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+                       clflush_cache_range((void *) *addr, PAGE_SIZE);
+       }
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -392,7 +431,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
                 */
                new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
                __set_pmd_pte(kpte, address, new_pte);
-               cpa->flushtlb = 1;
+               cpa->flags |= CPA_FLUSHTLB;
                do_split = 0;
        }
 
@@ -578,11 +617,16 @@ out_unlock:
 
 static int __change_page_attr(struct cpa_data *cpa, int primary)
 {
-       unsigned long address = cpa->vaddr;
+       unsigned long address;
        int do_split, err;
        unsigned int level;
        pte_t *kpte, old_pte;
 
+       if (cpa->flags & CPA_ARRAY)
+               address = cpa->vaddr[cpa->curpage];
+       else
+               address = *cpa->vaddr;
+
 repeat:
        kpte = lookup_address(address, &level);
        if (!kpte)
@@ -594,8 +638,8 @@ repeat:
                        return 0;
                printk(KERN_WARNING "CPA: called for zero pte. "
                       "vaddr = %lx cpa->vaddr = %lx\n", address,
-                      cpa->vaddr);
                WARN_ON(1);
+                      *cpa->vaddr);
                return -EINVAL;
        }
 
@@ -621,7 +665,7 @@ repeat:
                 */
                if (pte_val(old_pte) != pte_val(new_pte)) {
                        set_pte_atomic(kpte, new_pte);
-                       cpa->flushtlb = 1;
+                       cpa->flags |= CPA_FLUSHTLB;
                }
                cpa->numpages = 1;
                return 0;
@@ -645,7 +689,7 @@ repeat:
         */
        err = split_large_page(kpte, address);
        if (!err) {
-               cpa->flushtlb = 1;
+               cpa->flags |= CPA_FLUSHTLB;
                goto repeat;
        }
 
@@ -658,6 +702,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
        struct cpa_data alias_cpa;
        int ret = 0;
+       unsigned long temp_cpa_vaddr, vaddr;
 
        if (cpa->pfn >= max_pfn_mapped)
                return 0;
@@ -670,16 +715,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-       if (!(within(cpa->vaddr, PAGE_OFFSET,
+       if (cpa->flags & CPA_ARRAY)
+               vaddr = cpa->vaddr[cpa->curpage];
+       else
+               vaddr = *cpa->vaddr;
+
+       if (!(within(vaddr, PAGE_OFFSET,
                    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
 #ifdef CONFIG_X86_64
-               || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+               || within(vaddr, PAGE_OFFSET + (1UL<<32),
                    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
 #endif
        )) {
 
                alias_cpa = *cpa;
-               alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+               temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+               alias_cpa.vaddr = &temp_cpa_vaddr;
+               alias_cpa.flags &= ~CPA_ARRAY;
+
 
                ret = __change_page_attr_set_clr(&alias_cpa, 0);
        }
@@ -691,7 +744,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * No need to redo, when the primary call touched the high
         * mapping already:
         */
-       if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
+       if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
                return 0;
 
        /*
@@ -702,8 +755,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
                return 0;
 
        alias_cpa = *cpa;
-       alias_cpa.vaddr =
-               (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+       temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+       alias_cpa.vaddr = &temp_cpa_vaddr;
+       alias_cpa.flags &= ~CPA_ARRAY;
 
        /*
         * The high mapping range is imprecise, so ignore the return value.
@@ -723,6 +777,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * preservation check.
                 */
                cpa->numpages = numpages;
+               /* for array changes, we can't use large page */
+               if (cpa->flags & CPA_ARRAY)
+                       cpa->numpages = 1;
 
                ret = __change_page_attr(cpa, checkalias);
                if (ret)
@@ -741,7 +798,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 */
                BUG_ON(cpa->numpages > numpages);
                numpages -= cpa->numpages;
-               cpa->vaddr += cpa->numpages * PAGE_SIZE;
+               if (cpa->flags & CPA_ARRAY)
+                       cpa->curpage++;
+               else
+                       *cpa->vaddr += cpa->numpages * PAGE_SIZE;
+
        }
        return 0;
 }
@@ -752,9 +813,9 @@ static inline int cache_attr(pgprot_t attr)
                (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
 }
 
-static int change_page_attr_set_clr(unsigned long addr, int numpages,
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                    pgprot_t mask_set, pgprot_t mask_clr,
-                                   int force_split)
+                                   int force_split, int array)
 {
        struct cpa_data cpa;
        int ret, cache, checkalias;
@@ -769,12 +830,22 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
                return 0;
 
        /* Ensure we are PAGE_SIZE aligned */
-       if (addr & ~PAGE_MASK) {
-               addr &= PAGE_MASK;
-               /*
-                * People should not be passing in unaligned addresses:
-                */
-               WARN_ON_ONCE(1);
+       if (!array) {
+               if (*addr & ~PAGE_MASK) {
+                       *addr &= PAGE_MASK;
+                       /*
+                        * People should not be passing in unaligned addresses:
+                        */
+                       WARN_ON_ONCE(1);
+               }
+       } else {
+               int i;
+               for (i = 0; i < numpages; i++) {
+                       if (addr[i] & ~PAGE_MASK) {
+                               addr[i] &= PAGE_MASK;
+                               WARN_ON_ONCE(1);
+                       }
+               }
        }
 
        /* Must avoid aliasing mappings in the highmem code */
@@ -784,9 +855,13 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
        cpa.numpages = numpages;
        cpa.mask_set = mask_set;
        cpa.mask_clr = mask_clr;
-       cpa.flushtlb = 0;
+       cpa.flags = 0;
+       cpa.curpage = 0;
        cpa.force_split = force_split;
 
+       if (array)
+               cpa.flags |= CPA_ARRAY;
+
        /* No alias checking for _NX bit modifications */
        checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
 
@@ -795,7 +870,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
        /*
         * Check whether we really changed something:
         */
-       if (!cpa.flushtlb)
+       if (!(cpa.flags & CPA_FLUSHTLB))
                goto out;
 
        /*
@@ -810,9 +885,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
         * error case we fall back to cpa_flush_all (which uses
         * wbindv):
         */
-       if (!ret && cpu_has_clflush)
-               cpa_flush_range(addr, numpages, cache);
-       else
+       if (!ret && cpu_has_clflush) {
+               if (cpa.flags & CPA_ARRAY)
+                       cpa_flush_array(addr, numpages, cache);
+               else
+                       cpa_flush_range(*addr, numpages, cache);
+       } else
                cpa_flush_all(cache);
 
 out:
@@ -821,16 +899,18 @@ out:
        return ret;
 }
 
-static inline int change_page_attr_set(unsigned long addr, int numpages,
-                                      pgprot_t mask)
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+                                      pgprot_t mask, int array)
 {
-       return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+       return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+               array);
 }
 
-static inline int change_page_attr_clear(unsigned long addr, int numpages,
-                                        pgprot_t mask)
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+                                        pgprot_t mask, int array)
 {
-       return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+       return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+               array);
 }
 
 int _set_memory_uc(unsigned long addr, int numpages)
@@ -838,8 +918,8 @@ int _set_memory_uc(unsigned long addr, int numpages)
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
         */
-       return change_page_attr_set(addr, numpages,
-                                   __pgprot(_PAGE_CACHE_UC_MINUS));
+       return change_page_attr_set(&addr, numpages,
+                                   __pgprot(_PAGE_CACHE_UC_MINUS), 0);
 }
 
 int set_memory_uc(unsigned long addr, int numpages)
@@ -855,10 +935,31 @@ int set_memory_uc(unsigned long addr, int numpages)
 }
 EXPORT_SYMBOL(set_memory_uc);
 
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
+{
+       int i;
+       /*
+        * for now UC MINUS. see comments in ioremap_nocache()
+        */
+       for (i = 0; i < addrinarray; i++) {
+               if (reserve_memtype(addr[i], addr[i] + PAGE_SIZE,
+                           _PAGE_CACHE_UC_MINUS, NULL))
+                       goto out;
+       }
+
+       return change_page_attr_set(addr, addrinarray,
+                                   __pgprot(_PAGE_CACHE_UC_MINUS), 1);
+out:
+       while (--i >= 0)
+               free_memtype(addr[i], addr[i] + PAGE_SIZE);
+       return -EINVAL;
+}
+EXPORT_SYMBOL(set_memory_array_uc);
+
 int _set_memory_wc(unsigned long addr, int numpages)
 {
-       return change_page_attr_set(addr, numpages,
-                                   __pgprot(_PAGE_CACHE_WC));
+       return change_page_attr_set(&addr, numpages,
+                                   __pgprot(_PAGE_CACHE_WC), 0);
 }
 
 int set_memory_wc(unsigned long addr, int numpages)
@@ -876,8 +977,8 @@ EXPORT_SYMBOL(set_memory_wc);
 
 int _set_memory_wb(unsigned long addr, int numpages)
 {
-       return change_page_attr_clear(addr, numpages,
-                                     __pgprot(_PAGE_CACHE_MASK));
+       return change_page_attr_clear(&addr, numpages,
+                                     __pgprot(_PAGE_CACHE_MASK), 0);
 }
 
 int set_memory_wb(unsigned long addr, int numpages)
@@ -888,37 +989,48 @@ int set_memory_wb(unsigned long addr, int numpages)
 }
 EXPORT_SYMBOL(set_memory_wb);
 
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
+{
+       int i;
+       for (i = 0; i < addrinarray; i++)
+               free_memtype(addr[i], addr[i] + PAGE_SIZE);
+
+       return change_page_attr_clear(addr, addrinarray,
+                                     __pgprot(_PAGE_CACHE_MASK), 1);
+}
+EXPORT_SYMBOL(set_memory_array_wb);
+
 int set_memory_x(unsigned long addr, int numpages)
 {
-       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
+       return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
-       return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
+       return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_nx);
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
+       return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-       return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+       return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+       return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
 }
 
 int set_memory_4k(unsigned long addr, int numpages)
 {
-       return change_page_attr_set_clr(addr, numpages, __pgprot(0),
-                                       __pgprot(0), 1);
+       return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+                                       __pgprot(0), 1, 0);
 }
 
 int set_pages_uc(struct page *page, int numpages)
@@ -971,20 +1083,24 @@ int set_pages_rw(struct page *page, int numpages)
 
 static int __set_pages_p(struct page *page, int numpages)
 {
-       struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+       unsigned long tempaddr = (unsigned long) page_address(page);
+       struct cpa_data cpa = { .vaddr = &tempaddr,
                                .numpages = numpages,
                                .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-                               .mask_clr = __pgprot(0)};
+                               .mask_clr = __pgprot(0),
+                               .flags = 0};
 
        return __change_page_attr_set_clr(&cpa, 1);
 }
 
 static int __set_pages_np(struct page *page, int numpages)
 {
-       struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+       unsigned long tempaddr = (unsigned long) page_address(page);
+       struct cpa_data cpa = { .vaddr = &tempaddr,
                                .numpages = numpages,
                                .mask_set = __pgprot(0),
-                               .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
+                               .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+                               .flags = 0};
 
        return __change_page_attr_set_clr(&cpa, 1);
 }
index f4c0ab5..0a5f718 100644 (file)
@@ -66,6 +66,9 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
 
+int set_memory_array_uc(unsigned long *addr, int addrinarray);
+int set_memory_array_wb(unsigned long *addr, int addrinarray);
+
 /*
  * For legacy compatibility with the old APIs, a few functions
  * are provided that work on a "struct page".