Revert "powerpc: Rework dma-noncoherent to use generic vmalloc layer"
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 27 May 2009 03:33:14 +0000 (13:33 +1000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 27 May 2009 03:33:14 +0000 (13:33 +1000)
This reverts commit 33f00dcedb0e22cdb156a23632814fc580fcfcf8.

    While it was a good idea to try to use the mm/vmalloc.c allocator instead
    of our own (in fact, ours is itself a dup on an old variant of the vmalloc
    one), unfortunately, the approach is terminally busted since
    dma_alloc_coherent() can be called at interrupt time or in atomic contexts
    and there's little chances we'll make the code in mm/vmalloc.c cope with\       that :-(

    Until we can get the generic code to forbid that idiocy and fix all
    drivers abusing it, we pretty much have no choice but revert to
    our custom virtual space allocator.

    There's also a problem with SMP safety since freeing such mapping
    would require an IPI which cannot be done at interrupt time.

    However, right now, I don't think we support any platform that is
    both SMP and has non-coherent DMA (don't laugh, I know such things
    do exist !) so we can sort that out later.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/Kconfig
arch/powerpc/lib/dma-noncoherent.c

index a0d1146..3bb43ad 100644 (file)
@@ -868,6 +868,31 @@ config TASK_SIZE
        default "0x80000000" if PPC_PREP || PPC_8xx
        default "0xc0000000"
 
+config CONSISTENT_START_BOOL
+       bool "Set custom consistent memory pool address"
+       depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+       help
+         This option allows you to set the base virtual address
+         of the consistent memory pool.  This pool of virtual
+         memory is used to make consistent memory allocations.
+
+config CONSISTENT_START
+       hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
+       default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx)
+       default "0xff100000" if NOT_COHERENT_CACHE
+
+config CONSISTENT_SIZE_BOOL
+       bool "Set custom consistent memory pool size"
+       depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+       help
+         This option allows you to set the size of the
+         consistent memory pool.  This pool of virtual memory
+         is used to make consistent memory allocations.
+
+config CONSISTENT_SIZE
+       hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
+       default "0x00200000" if NOT_COHERENT_CACHE
+
 config PIN_TLB
        bool "Pinned Kernel TLBs (860 ONLY)"
        depends on ADVANCED_OPTIONS && 8xx
index 005a28d..b7dc4c1 100644 (file)
 #include <linux/types.h>
 #include <linux/highmem.h>
 #include <linux/dma-mapping.h>
-#include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
 
 /*
+ * This address range defaults to a value that is safe for all
+ * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
+ * can be further configured for specific applications under
+ * the "Advanced Setup" menu. -Matt
+ */
+#define CONSISTENT_BASE        (CONFIG_CONSISTENT_START)
+#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
+#define CONSISTENT_OFFSET(x)   (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
+
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region region;
+ *    unsigned long    flags;
+ *    struct page      **pages;
+ *    unsigned int     nr_pages;
+ *    unsigned long    phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *     .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *     .vm_start       = VMALLOC_START,
+ *     .vm_end         = VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct ppc_vm_region {
+       struct list_head        vm_list;
+       unsigned long           vm_start;
+       unsigned long           vm_end;
+};
+
+static struct ppc_vm_region consistent_head = {
+       .vm_list        = LIST_HEAD_INIT(consistent_head.vm_list),
+       .vm_start       = CONSISTENT_BASE,
+       .vm_end         = CONSISTENT_END,
+};
+
+static struct ppc_vm_region *
+ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
+{
+       unsigned long addr = head->vm_start, end = head->vm_end - size;
+       unsigned long flags;
+       struct ppc_vm_region *c, *new;
+
+       new = kmalloc(sizeof(struct ppc_vm_region), gfp);
+       if (!new)
+               goto out;
+
+       spin_lock_irqsave(&consistent_lock, flags);
+
+       list_for_each_entry(c, &head->vm_list, vm_list) {
+               if ((addr + size) < addr)
+                       goto nospc;
+               if ((addr + size) <= c->vm_start)
+                       goto found;
+               addr = c->vm_end;
+               if (addr > end)
+                       goto nospc;
+       }
+
+ found:
+       /*
+        * Insert this entry _before_ the one we found.
+        */
+       list_add_tail(&new->vm_list, &c->vm_list);
+       new->vm_start = addr;
+       new->vm_end = addr + size;
+
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       return new;
+
+ nospc:
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       kfree(new);
+ out:
+       return NULL;
+}
+
+static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
+{
+       struct ppc_vm_region *c;
+
+       list_for_each_entry(c, &head->vm_list, vm_list) {
+               if (c->vm_start == addr)
+                       goto out;
+       }
+       c = NULL;
+ out:
+       return c;
+}
+
+/*
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
  */
@@ -41,21 +151,21 @@ void *
 __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 {
        struct page *page;
+       struct ppc_vm_region *c;
        unsigned long order;
-       int i;
-       unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT;
-       unsigned int array_size = nr_pages * sizeof(struct page *);
-       struct page **pages;
-       struct page *end;
        u64 mask = 0x00ffffff, limit; /* ISA default */
-       struct vm_struct *area;
 
-       BUG_ON(!mem_init_done);
+       if (!consistent_pte) {
+               printk(KERN_ERR "%s: not initialised\n", __func__);
+               dump_stack();
+               return NULL;
+       }
+
        size = PAGE_ALIGN(size);
        limit = (mask + 1) & ~mask;
-       if (limit && size >= limit) {
-               printk(KERN_WARNING "coherent allocation too big (requested "
-                               "%#x mask %#Lx)\n", size, mask);
+       if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+               printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
+                      size, mask);
                return NULL;
        }
 
@@ -68,8 +178,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
        if (!page)
                goto no_page;
 
-       end = page + (1 << order);
-
        /*
         * Invalidate any data that might be lurking in the
         * kernel direct-mapped region for device DMA.
@@ -80,59 +188,48 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
                flush_dcache_range(kaddr, kaddr + size);
        }
 
-       split_page(page, order);
-
        /*
-        * Set the "dma handle"
+        * Allocate a virtual address in the consistent mapping region.
         */
-       *handle = page_to_phys(page);
-
-       area = get_vm_area_caller(size, VM_IOREMAP,
-                       __builtin_return_address(1));
-       if (!area)
-               goto out_free_pages;
-
-       if (array_size > PAGE_SIZE) {
-               pages = vmalloc(array_size);
-               area->flags |= VM_VPAGES;
-       } else {
-               pages = kmalloc(array_size, GFP_KERNEL);
-       }
-       if (!pages)
-               goto out_free_area;
+       c = ppc_vm_region_alloc(&consistent_head, size,
+                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+       if (c) {
+               unsigned long vaddr = c->vm_start;
+               pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+               struct page *end = page + (1 << order);
 
-       area->pages = pages;
-       area->nr_pages = nr_pages;
+               split_page(page, order);
 
-       for (i = 0; i < nr_pages; i++)
-               pages[i] = page + i;
+               /*
+                * Set the "dma handle"
+                */
+               *handle = page_to_phys(page);
 
-       if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages))
-               goto out_unmap;
+               do {
+                       BUG_ON(!pte_none(*pte));
 
-       /*
-        * Free the otherwise unused pages.
-        */
-       page += nr_pages;
-       while (page < end) {
-               __free_page(page);
-               page++;
+                       SetPageReserved(page);
+                       set_pte_at(&init_mm, vaddr,
+                                  pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+                       page++;
+                       pte++;
+                       vaddr += PAGE_SIZE;
+               } while (size -= PAGE_SIZE);
+
+               /*
+                * Free the otherwise unused pages.
+                */
+               while (page < end) {
+                       __free_page(page);
+                       page++;
+               }
+
+               return (void *)c->vm_start;
        }
 
-       return area->addr;
-out_unmap:
-       vunmap(area->addr);
-       if (array_size > PAGE_SIZE)
-               vfree(pages);
-       else
-               kfree(pages);
-       goto out_free_pages;
-out_free_area:
-       free_vm_area(area);
-out_free_pages:
        if (page)
                __free_pages(page, order);
-no_page:
+ no_page:
        return NULL;
 }
 EXPORT_SYMBOL(__dma_alloc_coherent);
@@ -142,12 +239,104 @@ EXPORT_SYMBOL(__dma_alloc_coherent);
  */
 void __dma_free_coherent(size_t size, void *vaddr)
 {
-       vfree(vaddr);
+       struct ppc_vm_region *c;
+       unsigned long flags, addr;
+       pte_t *ptep;
+
+       size = PAGE_ALIGN(size);
+
+       spin_lock_irqsave(&consistent_lock, flags);
+
+       c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
+       if (!c)
+               goto no_area;
+
+       if ((c->vm_end - c->vm_start) != size) {
+               printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+                      __func__, c->vm_end - c->vm_start, size);
+               dump_stack();
+               size = c->vm_end - c->vm_start;
+       }
+
+       ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+       addr = c->vm_start;
+       do {
+               pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+               unsigned long pfn;
+
+               ptep++;
+               addr += PAGE_SIZE;
 
+               if (!pte_none(pte) && pte_present(pte)) {
+                       pfn = pte_pfn(pte);
+
+                       if (pfn_valid(pfn)) {
+                               struct page *page = pfn_to_page(pfn);
+                               ClearPageReserved(page);
+
+                               __free_page(page);
+                               continue;
+                       }
+               }
+
+               printk(KERN_CRIT "%s: bad page in kernel page table\n",
+                      __func__);
+       } while (size -= PAGE_SIZE);
+
+       flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+       list_del(&c->vm_list);
+
+       spin_unlock_irqrestore(&consistent_lock, flags);
+
+       kfree(c);
+       return;
+
+ no_area:
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+              __func__, vaddr);
+       dump_stack();
 }
 EXPORT_SYMBOL(__dma_free_coherent);
 
 /*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+       int ret = 0;
+
+       do {
+               pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+               pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
+               pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
+               if (!pmd) {
+                       printk(KERN_ERR "%s: no pmd tables\n", __func__);
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
+               if (!pte) {
+                       printk(KERN_ERR "%s: no pte tables\n", __func__);
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               consistent_pte = pte;
+       } while (0);
+
+       return ret;
+}
+
+core_initcall(dma_alloc_init);
+
+/*
  * make an area consistent.
  */
 void __dma_sync(void *vaddr, size_t size, int direction)