X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fvmalloc.c;h=fab19876b4d178986979c2b5d2cb353285507b46;hb=f4112de6b679d84bd9b9681c7504be7bdfb7c7d5;hp=ba6b0f5f7fac6dcce7a9e8a7c71f00e0e691d193;hpb=9b46333406b9cb3397ab538485a4d57c316af0ff;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ba6b0f5..fab1987 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include @@ -77,7 +79,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) BUG_ON(addr >= end); pgd = pgd_offset_k(addr); - flush_cache_vunmap(addr, end); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -152,11 +153,12 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, * * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ -static int vmap_page_range(unsigned long addr, unsigned long end, - pgprot_t prot, struct page **pages) +static int vmap_page_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; + unsigned long addr = start; int err = 0; int nr = 0; @@ -168,13 +170,22 @@ static int vmap_page_range(unsigned long addr, unsigned long end, if (err) break; } while (pgd++, addr = next, addr != end); - flush_cache_vmap(addr, end); if (unlikely(err)) return err; return nr; } +static int vmap_page_range(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) +{ + int ret; + + ret = vmap_page_range_noflush(start, end, prot, pages); + flush_cache_vmap(start, end); + return ret; +} + static inline int is_vmalloc_or_module_addr(const void *x) { /* @@ -322,17 +333,21 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long addr; int purged = 0; + BUG_ON(!size); BUG_ON(size & ~PAGE_MASK); - addr = ALIGN(vstart, align); - va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); retry: + addr = ALIGN(vstart, align); + spin_lock(&vmap_area_lock); + if (addr + size - 1 < addr) + goto overflow; + /* XXX: could have a last_hole cache */ n = vmap_area_root.rb_node; if (n) { @@ -362,8 +377,10 @@ retry: goto found; } - while (addr + size >= first->va_start && addr + size <= vend) { + while (addr + size > first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end + PAGE_SIZE, align); + if (addr + size - 1 < addr) + goto overflow; n = rb_next(&first->rb_node); if (n) @@ -374,6 +391,7 @@ retry: } found: if (addr + size > vend) { +overflow: spin_unlock(&vmap_area_lock); if (!purged) { purge_vmap_area_lazy(); @@ -381,8 +399,9 @@ found: goto retry; } if (printk_ratelimit()) - printk(KERN_WARNING "vmap allocation failed: " - "use vmalloc= to increase size.\n"); + printk(KERN_WARNING + "vmap allocation for size %lu failed: " + "use vmalloc= to increase size.\n", size); return ERR_PTR(-EBUSY); } @@ -432,6 +451,27 @@ static void unmap_vmap_area(struct vmap_area *va) vunmap_page_range(va->va_start, va->va_end); } +static void vmap_debug_free_range(unsigned long start, unsigned long end) +{ + /* + * Unmap page tables and force a TLB flush immediately if + * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free + * bugs similarly to those in linear kernel virtual address + * space after a page has been freed. + * + * All the lazy freeing logic is still retained, in order to + * minimise intrusiveness of this debugging feature. + * + * This is going to be *slow* (linear kernel virtual address + * debugging doesn't do a broadcast TLB flush so it is a lot + * faster). + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + vunmap_page_range(start, end); + flush_tlb_kernel_range(start, end); +#endif +} + /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -475,6 +515,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, static DEFINE_SPINLOCK(purge_lock); LIST_HEAD(valist); struct vmap_area *va; + struct vmap_area *n_va; int nr = 0; /* @@ -514,7 +555,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, if (nr) { spin_lock(&vmap_area_lock); - list_for_each_entry(va, &valist, purge_list) + list_for_each_entry_safe(va, n_va, &valist, purge_list) __free_vmap_area(va); spin_unlock(&vmap_area_lock); } @@ -522,24 +563,45 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, } /* + * Kick off a purge of the outstanding lazy areas. Don't bother if somebody + * is already purging. + */ +static void try_purge_vmap_area_lazy(void) +{ + unsigned long start = ULONG_MAX, end = 0; + + __purge_vmap_area_lazy(&start, &end, 0, 0); +} + +/* * Kick off a purge of the outstanding lazy areas. */ static void purge_vmap_area_lazy(void) { unsigned long start = ULONG_MAX, end = 0; - __purge_vmap_area_lazy(&start, &end, 0, 0); + __purge_vmap_area_lazy(&start, &end, 1, 0); } /* - * Free and unmap a vmap area + * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been + * called for the correct range previously. */ -static void free_unmap_vmap_area(struct vmap_area *va) +static void free_unmap_vmap_area_noflush(struct vmap_area *va) { va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) - purge_vmap_area_lazy(); + try_purge_vmap_area_lazy(); +} + +/* + * Free and unmap a vmap area + */ +static void free_unmap_vmap_area(struct vmap_area *va) +{ + flush_cache_vunmap(va->va_start, va->va_end); + free_unmap_vmap_area_noflush(va); } static struct vmap_area *find_vmap_area(unsigned long addr) @@ -609,10 +671,7 @@ struct vmap_block { DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); union { - struct { - struct list_head free_list; - struct list_head dirty_list; - }; + struct list_head free_list; struct rcu_head rcu_head; }; }; @@ -679,7 +738,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); INIT_LIST_HEAD(&vb->free_list); - INIT_LIST_HEAD(&vb->dirty_list); vb_idx = addr_to_vb_idx(va->va_start); spin_lock(&vmap_block_tree_lock); @@ -710,12 +768,7 @@ static void free_vmap_block(struct vmap_block *vb) struct vmap_block *tmp; unsigned long vb_idx; - spin_lock(&vb->vbq->lock); - if (!list_empty(&vb->free_list)) - list_del(&vb->free_list); - if (!list_empty(&vb->dirty_list)) - list_del(&vb->dirty_list); - spin_unlock(&vb->vbq->lock); + BUG_ON(!list_empty(&vb->free_list)); vb_idx = addr_to_vb_idx(vb->va->va_start); spin_lock(&vmap_block_tree_lock); @@ -723,7 +776,7 @@ static void free_vmap_block(struct vmap_block *vb) spin_unlock(&vmap_block_tree_lock); BUG_ON(tmp != vb); - free_unmap_vmap_area(vb->va); + free_unmap_vmap_area_noflush(vb->va); call_rcu(&vb->rcu_head, rcu_free_vb); } @@ -785,6 +838,9 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); + + flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); + order = get_order(size); offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); @@ -797,11 +853,7 @@ static void vb_free(const void *addr, unsigned long size) spin_lock(&vb->lock); bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); - if (!vb->dirty) { - spin_lock(&vb->vbq->lock); - list_add(&vb->dirty_list, &vb->vbq->dirty); - spin_unlock(&vb->vbq->lock); - } + vb->dirty += 1UL << order; if (vb->dirty == VMAP_BBMAP_BITS) { BUG_ON(vb->free || !list_empty(&vb->free_list)); @@ -888,6 +940,7 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr & (PAGE_SIZE-1)); debug_check_no_locks_freed(mem, size); + vmap_debug_free_range(addr, addr+size); if (likely(count <= VMAP_MAX_ALLOC)) vb_free(mem, size); @@ -934,8 +987,36 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); +/** + * vm_area_register_early - register vmap area early during boot + * @vm: vm_struct to register + * @align: requested alignment + * + * This function is used to register kernel vm area before + * vmalloc_init() is called. @vm->size and @vm->flags should contain + * proper values on entry and other fields should be zero. On return, + * vm->addr contains the allocated address. + * + * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. + */ +void __init vm_area_register_early(struct vm_struct *vm, size_t align) +{ + static size_t vm_init_off __initdata; + unsigned long addr; + + addr = ALIGN(VMALLOC_START + vm_init_off, align); + vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; + + vm->addr = (void *)addr; + + vm->next = vmlist; + vmlist = vm; +} + void __init vmalloc_init(void) { + struct vmap_area *va; + struct vm_struct *tmp; int i; for_each_possible_cpu(i) { @@ -948,12 +1029,74 @@ void __init vmalloc_init(void) vbq->nr_dirty = 0; } + /* Import existing vmlist entries. */ + for (tmp = vmlist; tmp; tmp = tmp->next) { + va = alloc_bootmem(sizeof(struct vmap_area)); + va->flags = tmp->flags | VM_VM_AREA; + va->va_start = (unsigned long)tmp->addr; + va->va_end = va->va_start + tmp->size; + __insert_vmap_area(va); + } vmap_initialized = true; } +/** + * map_kernel_range_noflush - map kernel VM area with the specified pages + * @addr: start of the VM area to map + * @size: size of the VM area to map + * @prot: page protection flags to use + * @pages: pages to map + * + * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vmap() on to-be-mapped areas + * before calling this function. + * + * RETURNS: + * The number of pages mapped on success, -errno on failure. + */ +int map_kernel_range_noflush(unsigned long addr, unsigned long size, + pgprot_t prot, struct page **pages) +{ + return vmap_page_range_noflush(addr, addr + size, prot, pages); +} + +/** + * unmap_kernel_range_noflush - unmap kernel VM area + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vunmap() on to-be-mapped areas + * before calling this function and flush_tlb_kernel_range() after. + */ +void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) +{ + vunmap_page_range(addr, addr + size); +} + +/** + * unmap_kernel_range - unmap kernel VM area and flush cache and TLB + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Similar to unmap_kernel_range_noflush() but flushes vcache before + * the unmapping and tlb after. + */ void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; + + flush_cache_vunmap(addr, end); vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } @@ -1048,6 +1191,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, } EXPORT_SYMBOL_GPL(__get_vm_area); +struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, + unsigned long start, unsigned long end, + void *caller) +{ + return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, + caller); +} + /** * get_vm_area - reserve a contiguous kernel virtual area * @size: size of the area @@ -1104,6 +1255,8 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->private; struct vm_struct *tmp, **p; + + vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); vm->size -= PAGE_SIZE; @@ -1189,6 +1342,7 @@ EXPORT_SYMBOL(vfree); void vunmap(const void *addr) { BUG_ON(in_interrupt()); + might_sleep(); __vunmap(addr, 0); } EXPORT_SYMBOL(vunmap); @@ -1208,6 +1362,8 @@ void *vmap(struct page **pages, unsigned int count, { struct vm_struct *area; + might_sleep(); + if (count > num_physpages) return NULL; @@ -1351,7 +1507,8 @@ void *vmalloc_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; @@ -1396,7 +1553,8 @@ EXPORT_SYMBOL(vmalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); + return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + -1, __builtin_return_address(0)); } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) @@ -1416,7 +1574,8 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL); + return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL, + -1, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32); @@ -1432,7 +1591,8 @@ void *vmalloc_32_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, + -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; @@ -1694,7 +1854,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) { - char buff[2 * KSYM_NAME_LEN]; + char buff[KSYM_SYMBOL_LEN]; seq_putc(m, ' '); sprint_symbol(buff, (unsigned long)v->caller);