X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=mm%2Fslub.c;h=8d71aaf888d770b27ba26df5d4d9592832a87b2d;hp=0343b3b88984c5ce10874e1889e983ce58eadade;hb=e071041be037eca208b62b84469a06bdfc692bea;hpb=dc573f9b20c8710105ac35c08ed0fe1da5160ecd diff --git a/mm/slub.c b/mm/slub.c index 0343b3b..8d71aaf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -9,6 +9,7 @@ */ #include +#include /* struct reclaim_state */ #include #include #include @@ -16,7 +17,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -139,13 +141,20 @@ SLAB_POISON | SLAB_STORE_USER) /* + * Debugging flags that require metadata to be stored in the slab. These get + * disabled when slub_debug=O is used and a cache's min order increases with + * metadata. + */ +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) + +/* * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU) + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_CACHE_DMA) + SLAB_CACHE_DMA | SLAB_NOTRACK) #ifndef ARCH_KMALLOC_MINALIGN #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) @@ -323,6 +332,7 @@ static int slub_debug; #endif static char *slub_debug_slabs; +static int disable_higher_order_debug; /* * Object debugging @@ -375,14 +385,8 @@ static struct track *get_track(struct kmem_cache *s, void *object, static void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { - struct track *p; + struct track *p = get_track(s, object, alloc); - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; - - p += alloc; if (addr) { p->addr = addr; p->cpu = smp_processor_id(); @@ -650,7 +654,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); print_section("Padding", end - remainder, remainder); - restore_bytes(s, "slab padding", POISON_INUSE, start, end); + restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; } @@ -837,6 +841,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node) return atomic_long_read(&n->nr_slabs); } +static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) +{ + return atomic_long_read(&n->nr_slabs); +} + static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); @@ -975,6 +984,15 @@ static int __init setup_slub_debug(char *str) */ goto check_slabs; + if (tolower(*str) == 'o') { + /* + * Avoid enabling debugging on caches if its minimum order + * would increase as a result. + */ + disable_higher_order_debug = 1; + goto out; + } + slub_debug = 0; if (*str == '-') /* @@ -1025,8 +1043,8 @@ static unsigned long kmem_cache_flags(unsigned long objsize, * Enable debugging if selected on the kernel commandline. */ if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + flags |= slub_debug; return flags; } @@ -1053,8 +1071,12 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, } #define slub_debug 0 +#define disable_higher_order_debug 0 + static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; } +static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) + { return 0; } static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {} static inline void dec_slabs_node(struct kmem_cache *s, int node, @@ -1069,6 +1091,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, { int order = oo_order(oo); + flags |= __GFP_NOTRACK; + if (node == -1) return alloc_pages(flags, order); else @@ -1079,11 +1103,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; struct kmem_cache_order_objects oo = s->oo; + gfp_t alloc_gfp; flags |= s->allocflags; - page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, - oo); + /* + * Let the initial higher-order allocation fail under memory pressure + * so we fall-back to the minimum order allocation. + */ + alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; + + page = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!page)) { oo = s->min; /* @@ -1096,6 +1126,23 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); } + + if (kmemcheck_enabled + && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { + int pages = 1 << oo_order(oo); + + kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); + + /* + * Objects from caches that have a constructor don't get + * cleared when they're allocated, so we need to do it here. + */ + if (s->ctor) + kmemcheck_mark_uninitialized_pages(page, pages); + else + kmemcheck_mark_unallocated_pages(page, pages); + } + page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? @@ -1169,6 +1216,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlubDebug(page); } + kmemcheck_free_shadow(page, compound_order(page)); + mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, @@ -1176,6 +1225,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); reset_page_mapcount(page); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += pages; __free_pages(page, order); } @@ -1336,7 +1387,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) n = get_node(s, zone_to_nid(zone)); if (n && cpuset_zone_allowed_hardwall(zone, flags) && - n->nr_partial > n->min_partial) { + n->nr_partial > s->min_partial) { page = get_partial_node(n); if (page) return page; @@ -1388,7 +1439,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { stat(c, DEACTIVATE_EMPTY); - if (n->nr_partial < n->min_partial) { + if (n->nr_partial < s->min_partial) { /* * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs @@ -1487,6 +1538,69 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) return 1; } +static int count_free(struct page *page) +{ + return page->objects - page->inuse; +} + +static unsigned long count_partial(struct kmem_cache_node *n, + int (*get_count)(struct page *)) +{ + unsigned long flags; + unsigned long x = 0; + struct page *page; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + x += get_count(page); + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} + +static inline unsigned long node_nr_objs(struct kmem_cache_node *n) +{ +#ifdef CONFIG_SLUB_DEBUG + return atomic_long_read(&n->total_objects); +#else + return 0; +#endif +} + +static noinline void +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) +{ + int node; + + printk(KERN_WARNING + "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", + nid, gfpflags); + printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " + "default order: %d, min order: %d\n", s->name, s->objsize, + s->size, oo_order(s->oo), oo_order(s->min)); + + if (oo_order(s->min) > get_order(s->objsize)) + printk(KERN_WARNING " %s debugging increased min order, use " + "slub_debug=O to disable.\n", s->name); + + for_each_online_node(node) { + struct kmem_cache_node *n = get_node(s, node); + unsigned long nr_slabs; + unsigned long nr_objs; + unsigned long nr_free; + + if (!n) + continue; + + nr_free = count_partial(n, count_free); + nr_slabs = node_nr_slabs(n); + nr_objs = node_nr_objs(n); + + printk(KERN_WARNING + " node %d: slabs: %ld, objs: %ld, free: %ld\n", + node, nr_slabs, nr_objs, nr_free); + } +} + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -1568,6 +1682,8 @@ new_slab: c->page = new; goto load_freelist; } + if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(s, gfpflags, node); return NULL; debug: if (!alloc_debug_processing(s, c->page, object, addr)) @@ -1597,6 +1713,9 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; + gfpflags &= gfp_allowed_mask; + + lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); if (should_failslab(s->objsize, gfpflags)) @@ -1616,9 +1735,12 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, } local_irq_restore(flags); - if (unlikely((gfpflags & __GFP_ZERO) && object)) + if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, objsize); + kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); + kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); + return object; } @@ -1626,14 +1748,13 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags); + trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); return ret; } EXPORT_SYMBOL(kmem_cache_alloc); -#ifdef CONFIG_KMEMTRACE +#ifdef CONFIG_TRACING void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) { return slab_alloc(s, gfpflags, -1, _RET_IP_); @@ -1646,15 +1767,15 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags, node); + trace_kmem_cache_alloc_node(_RET_IP_, ret, + s->objsize, s->size, gfpflags, node); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif -#ifdef CONFIG_KMEMTRACE +#ifdef CONFIG_TRACING void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, gfp_t gfpflags, int node) @@ -1749,11 +1870,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long flags; + kmemleak_free_recursive(x, s->flags); local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); + kmemcheck_slab_free(s, object, c->objsize); debug_check_no_locks_freed(object, c->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(object, s->objsize); + debug_check_no_obj_freed(object, c->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; @@ -1772,7 +1895,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) slab_free(s, page, x, _RET_IP_); - kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x); + trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); @@ -1875,6 +1998,7 @@ static inline int calculate_order(int size) int order; int min_objects; int fraction; + int max_objects; /* * Attempt to find best configuration for a slab. This @@ -1887,6 +2011,9 @@ static inline int calculate_order(int size) min_objects = slub_min_objects; if (!min_objects) min_objects = 4 * (fls(nr_cpu_ids) + 1); + max_objects = (PAGE_SIZE << slub_max_order)/size; + min_objects = min(min_objects, max_objects); + while (min_objects > 1) { fraction = 16; while (fraction >= 4) { @@ -1896,7 +2023,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects /= 2; + min_objects--; } /* @@ -1911,7 +2038,7 @@ static inline int calculate_order(int size) * Doh this slab cannot be placed using slub_max_order. */ order = slab_order(size, 1, MAX_ORDER, 1); - if (order <= MAX_ORDER) + if (order < MAX_ORDER) return order; return -ENOSYS; } @@ -1959,17 +2086,6 @@ static void init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) { n->nr_partial = 0; - - /* - * The larger the object size is, the more pages we want on the partial - * list to avoid pounding the page allocator excessively. - */ - n->min_partial = ilog2(s->size); - if (n->min_partial < MIN_PARTIAL) - n->min_partial = MIN_PARTIAL; - else if (n->min_partial > MAX_PARTIAL) - n->min_partial = MAX_PARTIAL; - spin_lock_init(&n->list_lock); INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG @@ -1997,8 +2113,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) */ #define NR_KMEM_CACHE_CPU 100 -static DEFINE_PER_CPU(struct kmem_cache_cpu, - kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; +static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU], + kmem_cache_cpu); static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); @@ -2212,6 +2328,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) } #endif +static void set_min_partial(struct kmem_cache *s, unsigned long min) +{ + if (min < MIN_PARTIAL) + min = MIN_PARTIAL; + else if (min > MAX_PARTIAL) + min = MAX_PARTIAL; + s->min_partial = min; +} + /* * calculate_sizes() determines the order and the distribution of data within * a slab object. @@ -2297,6 +2422,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * on bootup. */ align = calculate_alignment(flags, align, s->objsize); + s->align = align; /* * SLUB stores one object immediately after another beginning from @@ -2349,7 +2475,24 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + if (disable_higher_order_debug) { + /* + * Disable debugging flags that store metadata if the min slab + * order increased. + */ + if (get_order(s->size) > get_order(s->objsize)) { + s->flags &= ~DEBUG_METADATA_FLAGS; + s->offset = 0; + if (!calculate_sizes(s, -1)) + goto error; + } + } + /* + * The larger the object size is, the more pages we want on the partial + * list to avoid pounding the page allocator excessively. + */ + set_min_partial(s, ilog2(s->size)); s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; @@ -2496,6 +2639,8 @@ void kmem_cache_destroy(struct kmem_cache *s) "still has objects.\n", s->name, __func__); dump_stack(); } + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); sysfs_slab_remove(s); } else up_write(&slub_lock); @@ -2506,7 +2651,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; +struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2521,6 +2666,7 @@ __setup("slub_min_order=", setup_slub_min_order); static int __init setup_slub_max_order(char *str) { get_option(&str, &slub_max_order); + slub_max_order = min(slub_max_order, MAX_ORDER - 1); return 1; } @@ -2552,13 +2698,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, if (gfp_flags & SLUB_DMA) flags = SLAB_CACHE_DMA; - down_write(&slub_lock); + /* + * This function is called with IRQs disabled during early-boot on + * single CPU so there's no need to take slub_lock here. + */ if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - up_write(&slub_lock); + if (sysfs_slab_add(s)) goto panic; return s; @@ -2568,7 +2717,7 @@ panic: } #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; +static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; static void sysfs_add_func(struct work_struct *w) { @@ -2591,6 +2740,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) struct kmem_cache *s; char *text; size_t realsize; + unsigned long slabflags; s = kmalloc_caches_dma[index]; if (s) @@ -2612,9 +2762,18 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) (unsigned int)realsize); s = kmalloc(kmem_size, flags & ~SLUB_DMA); + /* + * Must defer sysfs creation to a workqueue because we don't know + * what context we are called from. Before sysfs comes up, we don't + * need to do anything because our sysfs initcall will start by + * adding all existing slabs to sysfs. + */ + slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK; + if (slab_state >= SYSFS) + slabflags |= __SYSFS_ADD_DEFERRED; + if (!s || !text || !kmem_cache_open(s, flags, text, - realsize, ARCH_KMALLOC_MINALIGN, - SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) { + realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { kfree(s); kfree(text); goto unlock_out; @@ -2623,7 +2782,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) list_add(&s->list, &slab_caches); kmalloc_caches_dma[index] = s; - schedule_work(&sysfs_add_work); + if (slab_state >= SYSFS) + schedule_work(&sysfs_add_work); unlock_out: up_write(&slub_lock); @@ -2665,6 +2825,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2673,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -2690,7 +2855,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2700,8 +2865,7 @@ void *__kmalloc(size_t size, gfp_t flags) ret = slab_alloc(s, flags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags); + trace_kmalloc(_RET_IP_, ret, size, s->size, flags); return ret; } @@ -2709,13 +2873,16 @@ EXPORT_SYMBOL(__kmalloc); static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct page *page = alloc_pages_node(node, flags | __GFP_COMP, - get_order(size)); + struct page *page; + void *ptr = NULL; + flags |= __GFP_COMP | __GFP_NOTRACK; + page = alloc_pages_node(node, flags, get_order(size)); if (page) - return page_address(page); - else - return NULL; + ptr = page_address(page); + + kmemleak_alloc(ptr, size, 1, flags); + return ptr; } #ifdef CONFIG_NUMA @@ -2724,13 +2891,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) { + if (unlikely(size > SLUB_MAX_SIZE)) { ret = kmalloc_large_node(size, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _RET_IP_, ret, - size, PAGE_SIZE << get_order(size), - flags, node); + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << get_order(size), + flags, node); return ret; } @@ -2742,8 +2908,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) ret = slab_alloc(s, flags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags, node); + trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); return ret; } @@ -2787,24 +2952,26 @@ size_t ksize(const void *object) */ return s->size; } +EXPORT_SYMBOL(ksize); void kfree(const void *x) { struct page *page; void *object = (void *)x; + trace_kfree(_RET_IP_, x); + if (unlikely(ZERO_OR_NULL_PTR(x))) return; page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); + kmemleak_free(x); put_page(page); return; } slab_free(page->slab, page, object, _RET_IP_); - - kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x); } EXPORT_SYMBOL(kfree); @@ -3018,7 +3185,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_KERNEL); + sizeof(struct kmem_cache_node), GFP_NOWAIT); kmalloc_caches[0].refcount = -1; caches++; @@ -3029,18 +3196,20 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_KERNEL); + "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_KERNEL); + "kmalloc-192", 192, GFP_NOWAIT); caches++; } - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_KERNEL); + "kmalloc", 1 << i, GFP_NOWAIT); caches++; } @@ -3059,25 +3228,36 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = - kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); + kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); @@ -3095,6 +3275,10 @@ void __init kmem_cache_init(void) nr_cpu_ids, nr_node_ids); } +void __init kmem_cache_init_late(void) +{ +} + /* * Find a mergeable slab cache */ @@ -3161,6 +3345,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, { struct kmem_cache *s; + if (WARN_ON(!name)) + return NULL; + down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { @@ -3276,7 +3463,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3287,8 +3474,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) ret = slab_alloc(s, gfpflags, -1, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size, - s->size, gfpflags); + trace_kmalloc(caller, ret, size, s->size, gfpflags); return ret; } @@ -3299,7 +3485,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large_node(size, gfpflags, node); s = get_slab(size, gfpflags); @@ -3310,27 +3496,12 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ret = slab_alloc(s, gfpflags, node, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret, - size, s->size, gfpflags, node); + trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); return ret; } #ifdef CONFIG_SLUB_DEBUG -static unsigned long count_partial(struct kmem_cache_node *n, - int (*get_count)(struct page *)) -{ - unsigned long flags; - unsigned long x = 0; - struct page *page; - - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, lru) - x += get_count(page); - spin_unlock_irqrestore(&n->list_lock, flags); - return x; -} - static int count_inuse(struct page *page) { return page->inuse; @@ -3341,11 +3512,6 @@ static int count_total(struct page *page) return page->objects; } -static int count_free(struct page *page) -{ - return page->objects - page->inuse; -} - static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { @@ -3714,7 +3880,7 @@ static int list_locations(struct kmem_cache *s, char *buf, to_cpumask(l->cpus)); } - if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && + if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " nodes="); len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, @@ -3902,6 +4068,26 @@ static ssize_t order_show(struct kmem_cache *s, char *buf) } SLAB_ATTR(order); +static ssize_t min_partial_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%lu\n", s->min_partial); +} + +static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + unsigned long min; + int err; + + err = strict_strtoul(buf, 10, &min); + if (err) + return err; + + set_min_partial(s, min); + return length; +} +SLAB_ATTR(min_partial); + static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (s->ctor) { @@ -4185,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return len + sprintf(buf + len, "\n"); } +static void clear_stat(struct kmem_cache *s, enum stat_item si) +{ + int cpu; + + for_each_online_cpu(cpu) + get_cpu_slab(s, cpu)->stat[si] = 0; +} + #define STAT_ATTR(si, text) \ static ssize_t text##_show(struct kmem_cache *s, char *buf) \ { \ return show_stat(s, buf, si); \ } \ -SLAB_ATTR_RO(text); \ +static ssize_t text##_store(struct kmem_cache *s, \ + const char *buf, size_t length) \ +{ \ + if (buf[0] != '0') \ + return -EINVAL; \ + clear_stat(s, si); \ + return length; \ +} \ +SLAB_ATTR(text); \ STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); @@ -4217,6 +4419,7 @@ static struct attribute *slab_attrs[] = { &object_size_attr.attr, &objs_per_slab_attr.attr, &order_attr.attr, + &min_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &total_objects_attr.attr, @@ -4368,6 +4571,8 @@ static char *create_unique_id(struct kmem_cache *s) *p++ = 'a'; if (s->flags & SLAB_DEBUG_FREE) *p++ = 'F'; + if (!(s->flags & SLAB_NOTRACK)) + *p++ = 't'; if (p != name + 1) *p++ = '-'; p += sprintf(p, "%07d", s->size); @@ -4410,8 +4615,11 @@ static int sysfs_slab_add(struct kmem_cache *s) } err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) + if (err) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); return err; + } kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ @@ -4593,7 +4801,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init);