X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslob.c;h=23631e2bb57af43c5cb577a75323cc9768798016;hb=003386fff3e02e51cea882e60f7d28290113964c;hp=06e5e725fab32dacfdc909e51991241f3671235e;hpb=553948491c18413928b85a9025b92af80e7d61d6;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slob.c b/mm/slob.c index 06e5e72..23631e2 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -3,6 +3,8 @@ * * Matt Mackall 12/30/03 * + * NUMA support by Paul Mundt, 2007. + * * How SLOB works: * * The core of SLOB is a traditional K&R style heap allocator, with @@ -10,15 +12,22 @@ * allocator is as little as 2 bytes, however typically most architectures * will require 4 bytes on 32-bit and 8 bytes on 64-bit. * - * The slob heap is a linked list of pages from __get_free_page, and - * within each page, there is a singly-linked list of free blocks (slob_t). - * The heap is grown on demand and allocation from the heap is currently - * first-fit. + * The slob heap is a set of linked list of pages from alloc_pages(), + * and within each page, there is a singly-linked list of free blocks + * (slob_t). The heap is grown on demand. To reduce fragmentation, + * heap pages are segregated into three lists, with objects less than + * 256 bytes, objects less than 1024 bytes, and all other objects. + * + * Allocation from heap involves first searching for a page with + * sufficient free blocks (using a next-fit-like approach) followed by + * a first-fit scan of the page. Deallocation inserts objects back + * into the free list in address order, so this is effectively an + * address-ordered first fit. * * Above this is an implementation of kmalloc/kfree. Blocks returned * from kmalloc are prepended with a 4-byte header with the kmalloc size. * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls - * __get_free_pages directly, allocating compound pages so the page order + * alloc_pages() directly, allocating compound pages so the page order * does not have to be separately tracked, and also stores the exact * allocation size in page->private so that it can be used to accurately * provide ksize(). These objects are detected in kfree() because slob_page() @@ -29,20 +38,36 @@ * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which * case the low-level allocator will fragment blocks to create the proper * alignment. Again, objects of page-size or greater are allocated by - * calling __get_free_pages. As SLAB objects know their size, no separate + * calling alloc_pages(). As SLAB objects know their size, no separate * size bookkeeping is necessary and there is essentially no allocation * space overhead, and compound pages aren't needed for multi-page * allocations. + * + * NUMA support in SLOB is fairly simplistic, pushing most of the real + * logic down to the page allocator, and simply doing the node accounting + * on the upper levels. In the event that a node id is explicitly + * provided, alloc_pages_exact_node() with the specified node id is used + * instead. The common case (or when the node id isn't explicitly provided) + * will default to the current node, as per numa_node_id(). + * + * Node aware pages are still inserted in to the global freelist, and + * these are scanned for by matching against the node id encoded in the + * page flags. As a result, block allocations that can be satisfied from + * the freelist will only be done so on pages residing on the same node, + * in order to prevent random node placement. */ #include #include #include +#include /* struct reclaim_state */ #include #include #include #include #include +#include +#include #include /* @@ -95,26 +120,35 @@ static inline void free_slob_page(struct slob_page *sp) } /* - * All (partially) free slob pages go on this list. + * All partially free slob pages go on these lists. */ -static LIST_HEAD(free_slob_pages); +#define SLOB_BREAK1 256 +#define SLOB_BREAK2 1024 +static LIST_HEAD(free_slob_small); +static LIST_HEAD(free_slob_medium); +static LIST_HEAD(free_slob_large); /* - * slob_page: True for all slob pages (false for bigblock pages) + * is_slob_page: True for all slob pages (false for bigblock pages) */ -static inline int slob_page(struct slob_page *sp) +static inline int is_slob_page(struct slob_page *sp) { - return test_bit(PG_active, &sp->flags); + return PageSlab((struct page *)sp); } static inline void set_slob_page(struct slob_page *sp) { - __set_bit(PG_active, &sp->flags); + __SetPageSlab((struct page *)sp); } static inline void clear_slob_page(struct slob_page *sp) { - __clear_bit(PG_active, &sp->flags); + __ClearPageSlab((struct page *)sp); +} + +static inline struct slob_page *slob_page(const void *addr) +{ + return (struct slob_page *)virt_to_page(addr); } /* @@ -122,19 +156,19 @@ static inline void clear_slob_page(struct slob_page *sp) */ static inline int slob_page_free(struct slob_page *sp) { - return test_bit(PG_private, &sp->flags); + return PageSlobFree((struct page *)sp); } -static inline void set_slob_page_free(struct slob_page *sp) +static void set_slob_page_free(struct slob_page *sp, struct list_head *list) { - list_add(&sp->list, &free_slob_pages); - __set_bit(PG_private, &sp->flags); + list_add(&sp->list, list); + __SetPageSlobFree((struct page *)sp); } static inline void clear_slob_page_free(struct slob_page *sp) { list_del(&sp->list); - __clear_bit(PG_private, &sp->flags); + __ClearPageSlobFree((struct page *)sp); } #define SLOB_UNIT sizeof(slob_t) @@ -204,12 +238,36 @@ static int slob_last(slob_t *s) return !((unsigned long)slob_next(s) & ~PAGE_MASK); } +static void *slob_new_pages(gfp_t gfp, int order, int node) +{ + void *page; + +#ifdef CONFIG_NUMA + if (node != -1) + page = alloc_pages_exact_node(node, gfp, order); + else +#endif + page = alloc_pages(gfp, order); + + if (!page) + return NULL; + + return page_address(page); +} + +static void slob_free_pages(void *b, int order) +{ + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += 1 << order; + free_pages((unsigned long)b, order); +} + /* * Allocate a slob block within a given slob_page sp. */ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) { - slob_t *prev, *cur, *aligned = 0; + slob_t *prev, *cur, *aligned = NULL; int delta = 0, units = SLOB_UNITS(size); for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { @@ -258,29 +316,58 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) /* * slob_alloc: entry point into the slob allocator. */ -static void *slob_alloc(size_t size, gfp_t gfp, int align) +static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) { struct slob_page *sp; + struct list_head *prev; + struct list_head *slob_list; slob_t *b = NULL; unsigned long flags; + if (size < SLOB_BREAK1) + slob_list = &free_slob_small; + else if (size < SLOB_BREAK2) + slob_list = &free_slob_medium; + else + slob_list = &free_slob_large; + spin_lock_irqsave(&slob_lock, flags); /* Iterate through each partially free page, try to find room */ - list_for_each_entry(sp, &free_slob_pages, list) { - if (sp->units >= SLOB_UNITS(size)) { - b = slob_page_alloc(sp, size, align); - if (b) - break; - } + list_for_each_entry(sp, slob_list, list) { +#ifdef CONFIG_NUMA + /* + * If there's a node specification, search for a partial + * page with a matching node id in the freelist. + */ + if (node != -1 && page_to_nid(&sp->page) != node) + continue; +#endif + /* Enough room on this page? */ + if (sp->units < SLOB_UNITS(size)) + continue; + + /* Attempt to alloc */ + prev = sp->list.prev; + b = slob_page_alloc(sp, size, align); + if (!b) + continue; + + /* Improve fragment distribution and reduce our average + * search time by starting our next search here. (see + * Knuth vol 1, sec 2.5, pg 449) */ + if (prev != slob_list->prev && + slob_list->next != prev->next) + list_move_tail(slob_list, prev->next); + break; } spin_unlock_irqrestore(&slob_lock, flags); /* Not enough space: must allocate a new page */ if (!b) { - b = (slob_t *)__get_free_page(gfp); + b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); if (!b) - return 0; - sp = (struct slob_page *)virt_to_page(b); + return NULL; + sp = slob_page(b); set_slob_page(sp); spin_lock_irqsave(&slob_lock, flags); @@ -288,11 +375,13 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align) sp->free = b; INIT_LIST_HEAD(&sp->list); set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); - set_slob_page_free(sp); + set_slob_page_free(sp, slob_list); b = slob_page_alloc(sp, size, align); BUG_ON(!b); spin_unlock_irqrestore(&slob_lock, flags); } + if (unlikely((gfp & __GFP_ZERO) && b)) + memset(b, 0, size); return b; } @@ -306,11 +395,11 @@ static void slob_free(void *block, int size) slobidx_t units; unsigned long flags; - if (!block) + if (unlikely(ZERO_OR_NULL_PTR(block))) return; BUG_ON(!size); - sp = (struct slob_page *)virt_to_page(block); + sp = slob_page(block); units = SLOB_UNITS(size); spin_lock_irqsave(&slob_lock, flags); @@ -319,10 +408,11 @@ static void slob_free(void *block, int size) /* Go directly to page allocator. Do not pass slob allocator */ if (slob_page_free(sp)) clear_slob_page_free(sp); + spin_unlock_irqrestore(&slob_lock, flags); clear_slob_page(sp); free_slob_page(sp); - free_page((unsigned long)b); - goto out; + slob_free_pages(b, 0); + return; } if (!slob_page_free(sp)) { @@ -332,7 +422,7 @@ static void slob_free(void *block, int size) set_slob(b, units, (void *)((unsigned long)(b + SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); - set_slob_page_free(sp); + set_slob_page_free(sp, &free_slob_small); goto out; } @@ -343,6 +433,10 @@ static void slob_free(void *block, int size) sp->units += units; if (b < sp->free) { + if (b + units == sp->free) { + units += slob_units(sp->free); + sp->free = slob_next(sp->free); + } set_slob(b, units, sp->free); sp->free = b; } else { @@ -373,89 +467,64 @@ out: * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. */ -#ifndef ARCH_KMALLOC_MINALIGN -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) -#endif - -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) -#endif - - -void *__kmalloc(size_t size, gfp_t gfp) +void *__kmalloc_node(size_t size, gfp_t gfp, int node) { + unsigned int *m; int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + void *ret; + + lockdep_trace_alloc(gfp); if (size < PAGE_SIZE - align) { - unsigned int *m; - m = slob_alloc(size + align, gfp, align); - if (m) - *m = size; - return (void *)m + align; + if (!size) + return ZERO_SIZE_PTR; + + m = slob_alloc(size + align, gfp, align, node); + + if (!m) + return NULL; + *m = size; + ret = (void *)m + align; + + trace_kmalloc_node(_RET_IP_, ret, + size, size + align, gfp, node); } else { - void *ret; + unsigned int order = get_order(size); - ret = (void *) __get_free_pages(gfp | __GFP_COMP, - get_order(size)); + ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node); if (ret) { struct page *page; page = virt_to_page(ret); page->private = size; } - return ret; - } -} -EXPORT_SYMBOL(__kmalloc); -/** - * krealloc - reallocate memory. The contents will remain unchanged. - * - * @p: object to reallocate memory for. - * @new_size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * The contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. If @p is %NULL, krealloc() - * behaves exactly like kmalloc(). If @size is 0 and @p is not a - * %NULL pointer, the object pointed to is freed. - */ -void *krealloc(const void *p, size_t new_size, gfp_t flags) -{ - void *ret; - - if (unlikely(!p)) - return kmalloc_track_caller(new_size, flags); - - if (unlikely(!new_size)) { - kfree(p); - return NULL; + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << order, gfp, node); } - ret = kmalloc_track_caller(new_size, flags); - if (ret) { - memcpy(ret, p, min(new_size, ksize(p))); - kfree(p); - } + kmemleak_alloc(ret, size, 1, gfp); return ret; } -EXPORT_SYMBOL(krealloc); +EXPORT_SYMBOL(__kmalloc_node); void kfree(const void *block) { struct slob_page *sp; - if (!block) + trace_kfree(_RET_IP_, block); + + if (unlikely(ZERO_OR_NULL_PTR(block))) return; + kmemleak_free(block); - sp = (struct slob_page *)virt_to_page(block); - if (slob_page(sp)) { + sp = slob_page(block); + if (is_slob_page(sp)) { int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); unsigned int *m = (unsigned int *)(block - align); slob_free(m, *m + align); } else put_page(&sp->page); } - EXPORT_SYMBOL(kfree); /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ @@ -463,31 +532,34 @@ size_t ksize(const void *block) { struct slob_page *sp; - if (!block) + BUG_ON(!block); + if (unlikely(block == ZERO_SIZE_PTR)) return 0; - sp = (struct slob_page *)virt_to_page(block); - if (slob_page(sp)) - return ((slob_t *)block - 1)->units + SLOB_UNIT; - else + sp = slob_page(block); + if (is_slob_page(sp)) { + int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int *m = (unsigned int *)(block - align); + return SLOB_UNITS(*m) * SLOB_UNIT; + } else return sp->page.private; } +EXPORT_SYMBOL(ksize); struct kmem_cache { unsigned int size, align; unsigned long flags; const char *name; - void (*ctor)(void *, struct kmem_cache *, unsigned long); + void (*ctor)(void *); }; struct kmem_cache *kmem_cache_create(const char *name, size_t size, - size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long), - void (*dtor)(void*, struct kmem_cache *, unsigned long)) + size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *c; - c = slob_alloc(sizeof(struct kmem_cache), flags, 0); + c = slob_alloc(sizeof(struct kmem_cache), + GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1); if (c) { c->name = name; @@ -507,48 +579,50 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, } else if (flags & SLAB_PANIC) panic("Cannot create slab cache %s\n", name); + kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL); return c; } EXPORT_SYMBOL(kmem_cache_create); void kmem_cache_destroy(struct kmem_cache *c) { + kmemleak_free(c); + if (c->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); slob_free(c, sizeof(struct kmem_cache)); } EXPORT_SYMBOL(kmem_cache_destroy); -void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) +void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; - if (c->size < PAGE_SIZE) - b = slob_alloc(c->size, flags, c->align); - else - b = (void *)__get_free_pages(flags, get_order(c->size)); + if (c->size < PAGE_SIZE) { + b = slob_alloc(c->size, flags, c->align, node); + trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, + SLOB_UNITS(c->size) * SLOB_UNIT, + flags, node); + } else { + b = slob_new_pages(flags, get_order(c->size), node); + trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, + PAGE_SIZE << get_order(c->size), + flags, node); + } if (c->ctor) - c->ctor(b, c, 0); + c->ctor(b); + kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); return b; } -EXPORT_SYMBOL(kmem_cache_alloc); - -void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) -{ - void *ret = kmem_cache_alloc(c, flags); - if (ret) - memset(ret, 0, c->size); - - return ret; -} -EXPORT_SYMBOL(kmem_cache_zalloc); +EXPORT_SYMBOL(kmem_cache_alloc_node); static void __kmem_cache_free(void *b, int size) { if (size < PAGE_SIZE) slob_free(b, size); else - free_pages((unsigned long)b, get_order(size)); + slob_free_pages(b, get_order(size)); } static void kmem_rcu_free(struct rcu_head *head) @@ -561,6 +635,7 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { + kmemleak_free_recursive(b, c->flags); if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); @@ -570,6 +645,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b) } else { __kmem_cache_free(b, c->size); } + + trace_kmem_cache_free(_RET_IP_, b); } EXPORT_SYMBOL(kmem_cache_free); @@ -596,6 +673,19 @@ int kmem_ptr_validate(struct kmem_cache *a, const void *b) return 0; } +static unsigned int slob_ready __read_mostly; + +int slab_is_available(void) +{ + return slob_ready; +} + void __init kmem_cache_init(void) { + slob_ready = 1; +} + +void __init kmem_cache_init_late(void) +{ + /* Nothing to do */ }