X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslub.c;h=d2a54fe71ea22519feada61f181e874685c0a51b;hb=d79df0b1eda0099a22cbcece01ce5e7d222450de;hp=6392ae5cc6b1e8972d98b059bee576aeb66d366c;hpb=0211a9c8508b2183e0e539509aad60414f1c3813;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slub.c b/mm/slub.c index 6392ae5..d2a54fe 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -9,6 +9,7 @@ */ #include +#include /* struct reclaim_state */ #include #include #include @@ -16,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -138,13 +141,21 @@ SLAB_POISON | SLAB_STORE_USER) /* + * Debugging flags that require metadata to be stored in the slab. These get + * disabled when slub_debug=O is used and a cache's min order increases with + * metadata. + */ +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) + +/* * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU) + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_FAILSLAB) #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_CACHE_DMA) + SLAB_CACHE_DMA | SLAB_NOTRACK) #ifndef ARCH_KMALLOC_MINALIGN #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) @@ -207,10 +218,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) #endif -static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) +static inline void stat(struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS - c->stat[si]++; + __this_cpu_inc(s->cpu_slab->stat[si]); #endif } @@ -232,15 +243,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } -static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) -{ -#ifdef CONFIG_SMP - return s->cpu_slab[cpu]; -#else - return &s->cpu_slab; -#endif -} - /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) @@ -259,13 +261,6 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; } -/* - * Slow version of get and set free pointer. - * - * This version requires touching the cache lines of kmem_cache which - * we avoid to do in the fast alloc free paths. There we obtain the offset - * from the page struct. - */ static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -322,6 +317,7 @@ static int slub_debug; #endif static char *slub_debug_slabs; +static int disable_higher_order_debug; /* * Object debugging @@ -374,14 +370,8 @@ static struct track *get_track(struct kmem_cache *s, void *object, static void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { - struct track *p; - - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; + struct track *p = get_track(s, object, alloc); - p += alloc; if (addr) { p->addr = addr; p->cpu = smp_processor_id(); @@ -649,7 +639,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); print_section("Padding", end - remainder, remainder); - restore_bytes(s, "slab padding", POISON_INUSE, start, end); + restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; } @@ -836,6 +826,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node) return atomic_long_read(&n->nr_slabs); } +static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) +{ + return atomic_long_read(&n->nr_slabs); +} + static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); @@ -974,6 +969,15 @@ static int __init setup_slub_debug(char *str) */ goto check_slabs; + if (tolower(*str) == 'o') { + /* + * Avoid enabling debugging on caches if its minimum order + * would increase as a result. + */ + disable_higher_order_debug = 1; + goto out; + } + slub_debug = 0; if (*str == '-') /* @@ -1001,6 +1005,9 @@ static int __init setup_slub_debug(char *str) case 't': slub_debug |= SLAB_TRACE; break; + case 'a': + slub_debug |= SLAB_FAILSLAB; + break; default: printk(KERN_ERR "slub_debug option '%c' " "unknown. skipped\n", *str); @@ -1024,8 +1031,8 @@ static unsigned long kmem_cache_flags(unsigned long objsize, * Enable debugging if selected on the kernel commandline. */ if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + flags |= slub_debug; return flags; } @@ -1052,8 +1059,12 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, } #define slub_debug 0 +#define disable_higher_order_debug 0 + static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; } +static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) + { return 0; } static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {} static inline void dec_slabs_node(struct kmem_cache *s, int node, @@ -1068,6 +1079,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, { int order = oo_order(oo); + flags |= __GFP_NOTRACK; + if (node == -1) return alloc_pages(flags, order); else @@ -1078,11 +1091,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; struct kmem_cache_order_objects oo = s->oo; + gfp_t alloc_gfp; flags |= s->allocflags; - page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, - oo); + /* + * Let the initial higher-order allocation fail under memory pressure + * so we fall-back to the minimum order allocation. + */ + alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; + + page = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!page)) { oo = s->min; /* @@ -1093,8 +1112,25 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) return NULL; - stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); + stat(s, ORDER_FALLBACK); } + + if (kmemcheck_enabled + && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { + int pages = 1 << oo_order(oo); + + kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); + + /* + * Objects from caches that have a constructor don't get + * cleared when they're allocated, so we need to do it here. + */ + if (s->ctor) + kmemcheck_mark_uninitialized_pages(page, pages); + else + kmemcheck_mark_unallocated_pages(page, pages); + } + page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? @@ -1168,6 +1204,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlubDebug(page); } + kmemcheck_free_shadow(page, compound_order(page)); + mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, @@ -1175,6 +1213,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); reset_page_mapcount(page); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += pages; __free_pages(page, order); } @@ -1335,7 +1375,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) n = get_node(s, zone_to_nid(zone)); if (n && cpuset_zone_allowed_hardwall(zone, flags) && - n->nr_partial > n->min_partial) { + n->nr_partial > s->min_partial) { page = get_partial_node(n); if (page) return page; @@ -1370,24 +1410,23 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); __ClearPageSlubFrozen(page); if (page->inuse) { if (page->freelist) { add_partial(n, page, tail); - stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); + stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { - stat(c, DEACTIVATE_FULL); + stat(s, DEACTIVATE_FULL); if (SLABDEBUG && PageSlubDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); } slab_unlock(page); } else { - stat(c, DEACTIVATE_EMPTY); - if (n->nr_partial < n->min_partial) { + stat(s, DEACTIVATE_EMPTY); + if (n->nr_partial < s->min_partial) { /* * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs @@ -1402,7 +1441,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { slab_unlock(page); - stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); } } @@ -1417,7 +1456,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) int tail = 1; if (page->freelist) - stat(c, DEACTIVATE_REMOTE_FREES); + stat(s, DEACTIVATE_REMOTE_FREES); /* * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely @@ -1430,10 +1469,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* Retrieve object from cpu_freelist */ object = c->freelist; - c->freelist = c->freelist[c->offset]; + c->freelist = get_freepointer(s, c->freelist); /* And put onto the regular freelist */ - object[c->offset] = page->freelist; + set_freepointer(s, object, page->freelist); page->freelist = object; page->inuse--; } @@ -1443,7 +1482,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { - stat(c, CPUSLAB_FLUSH); + stat(s, CPUSLAB_FLUSH); slab_lock(c->page); deactivate_slab(s, c); } @@ -1455,7 +1494,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (likely(c && c->page)) flush_slab(s, c); @@ -1486,6 +1525,69 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) return 1; } +static int count_free(struct page *page) +{ + return page->objects - page->inuse; +} + +static unsigned long count_partial(struct kmem_cache_node *n, + int (*get_count)(struct page *)) +{ + unsigned long flags; + unsigned long x = 0; + struct page *page; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + x += get_count(page); + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} + +static inline unsigned long node_nr_objs(struct kmem_cache_node *n) +{ +#ifdef CONFIG_SLUB_DEBUG + return atomic_long_read(&n->total_objects); +#else + return 0; +#endif +} + +static noinline void +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) +{ + int node; + + printk(KERN_WARNING + "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", + nid, gfpflags); + printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " + "default order: %d, min order: %d\n", s->name, s->objsize, + s->size, oo_order(s->oo), oo_order(s->min)); + + if (oo_order(s->min) > get_order(s->objsize)) + printk(KERN_WARNING " %s debugging increased min order, use " + "slub_debug=O to disable.\n", s->name); + + for_each_online_node(node) { + struct kmem_cache_node *n = get_node(s, node); + unsigned long nr_slabs; + unsigned long nr_objs; + unsigned long nr_free; + + if (!n) + continue; + + nr_free = count_partial(n, count_free); + nr_slabs = node_nr_slabs(n); + nr_objs = node_nr_objs(n); + + printk(KERN_WARNING + " node %d: slabs: %ld, objs: %ld, free: %ld\n", + node, nr_slabs, nr_objs, nr_free); + } +} + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -1520,7 +1622,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!node_match(c, node))) goto another_slab; - stat(c, ALLOC_REFILL); + stat(s, ALLOC_REFILL); load_freelist: object = c->page->freelist; @@ -1529,13 +1631,13 @@ load_freelist: if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) goto debug; - c->freelist = object[c->offset]; + c->freelist = get_freepointer(s, object); c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); - stat(c, ALLOC_SLOWPATH); + stat(s, ALLOC_SLOWPATH); return object; another_slab: @@ -1545,7 +1647,7 @@ new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; - stat(c, ALLOC_FROM_PARTIAL); + stat(s, ALLOC_FROM_PARTIAL); goto load_freelist; } @@ -1558,8 +1660,8 @@ new_slab: local_irq_disable(); if (new) { - c = get_cpu_slab(s, smp_processor_id()); - stat(c, ALLOC_SLAB); + c = __this_cpu_ptr(s->cpu_slab); + stat(s, ALLOC_SLAB); if (c->page) flush_slab(s, c); slab_lock(new); @@ -1567,13 +1669,15 @@ new_slab: c->page = new; goto load_freelist; } + if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(s, gfpflags, node); return NULL; debug: if (!alloc_debug_processing(s, c->page, object, addr)) goto another_slab; c->page->inuse++; - c->page->freelist = object[c->offset]; + c->page->freelist = get_freepointer(s, object); c->node = -1; goto unlock_out; } @@ -1594,47 +1698,78 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; - unsigned int objsize; + gfpflags &= gfp_allowed_mask; + + lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); - if (should_failslab(s->objsize, gfpflags)) + if (should_failslab(s->objsize, gfpflags, s->flags)) return NULL; local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); - objsize = c->objsize; - if (unlikely(!c->freelist || !node_match(c, node))) + c = __this_cpu_ptr(s->cpu_slab); + object = c->freelist; + if (unlikely(!object || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { - object = c->freelist; - c->freelist = object[c->offset]; - stat(c, ALLOC_FASTPATH); + c->freelist = get_freepointer(s, object); + stat(s, ALLOC_FASTPATH); } local_irq_restore(flags); - if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, objsize); + if (unlikely(gfpflags & __GFP_ZERO) && object) + memset(object, 0, s->objsize); + + kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); + kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); return object; } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); + + trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc); +#ifdef CONFIG_TRACING +void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +{ + return slab_alloc(s, gfpflags, -1, _RET_IP_); +} +EXPORT_SYMBOL(kmem_cache_alloc_notrace); +#endif + #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return slab_alloc(s, gfpflags, node, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); + + trace_kmem_cache_alloc_node(_RET_IP_, ret, + s->objsize, s->size, gfpflags, node); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif +#ifdef CONFIG_TRACING +void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node) +{ + return slab_alloc(s, gfpflags, node, _RET_IP_); +} +EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +#endif + /* * Slow patch handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. @@ -1644,26 +1779,25 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr, unsigned int offset) + void *x, unsigned long addr) { void *prior; void **object = (void *)x; - struct kmem_cache_cpu *c; - c = get_cpu_slab(s, raw_smp_processor_id()); - stat(c, FREE_SLOWPATH); + stat(s, FREE_SLOWPATH); slab_lock(page); if (unlikely(SLABDEBUG && PageSlubDebug(page))) goto debug; checks_ok: - prior = object[offset] = page->freelist; + prior = page->freelist; + set_freepointer(s, object, prior); page->freelist = object; page->inuse--; if (unlikely(PageSlubFrozen(page))) { - stat(c, FREE_FROZEN); + stat(s, FREE_FROZEN); goto out_unlock; } @@ -1676,7 +1810,7 @@ checks_ok: */ if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); - stat(c, FREE_ADD_PARTIAL); + stat(s, FREE_ADD_PARTIAL); } out_unlock: @@ -1689,10 +1823,10 @@ slab_empty: * Slab still on the partial list. */ remove_partial(s, page); - stat(c, FREE_REMOVE_PARTIAL); + stat(s, FREE_REMOVE_PARTIAL); } slab_unlock(page); - stat(c, FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); return; @@ -1720,17 +1854,19 @@ static __always_inline void slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long flags; + kmemleak_free_recursive(x, s->flags); local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); - debug_check_no_locks_freed(object, c->objsize); + c = __this_cpu_ptr(s->cpu_slab); + kmemcheck_slab_free(s, object, s->objsize); + debug_check_no_locks_freed(object, s->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { - object[c->offset] = c->freelist; + set_freepointer(s, object, c->freelist); c->freelist = object; - stat(c, FREE_FASTPATH); + stat(s, FREE_FASTPATH); } else - __slab_free(s, page, x, addr, c->offset); + __slab_free(s, page, x, addr); local_irq_restore(flags); } @@ -1742,6 +1878,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x) page = virt_to_head_page(x); slab_free(s, page, x, _RET_IP_); + + trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); @@ -1844,6 +1982,7 @@ static inline int calculate_order(int size) int order; int min_objects; int fraction; + int max_objects; /* * Attempt to find best configuration for a slab. This @@ -1856,6 +1995,9 @@ static inline int calculate_order(int size) min_objects = slub_min_objects; if (!min_objects) min_objects = 4 * (fls(nr_cpu_ids) + 1); + max_objects = (PAGE_SIZE << slub_max_order)/size; + min_objects = min(min_objects, max_objects); + while (min_objects > 1) { fraction = 16; while (fraction >= 4) { @@ -1865,7 +2007,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects /= 2; + min_objects--; } /* @@ -1880,7 +2022,7 @@ static inline int calculate_order(int size) * Doh this slab cannot be placed using slub_max_order. */ order = slab_order(size, 1, MAX_ORDER, 1); - if (order <= MAX_ORDER) + if (order < MAX_ORDER) return order; return -ENOSYS; } @@ -1911,34 +2053,10 @@ static unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } -static void init_kmem_cache_cpu(struct kmem_cache *s, - struct kmem_cache_cpu *c) -{ - c->page = NULL; - c->freelist = NULL; - c->node = 0; - c->offset = s->offset / sizeof(void *); - c->objsize = s->objsize; -#ifdef CONFIG_SLUB_STATS - memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); -#endif -} - static void init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) { n->nr_partial = 0; - - /* - * The larger the object size is, the more pages we want on the partial - * list to avoid pounding the page allocator excessively. - */ - n->min_partial = ilog2(s->size); - if (n->min_partial < MIN_PARTIAL) - n->min_partial = MIN_PARTIAL; - else if (n->min_partial > MAX_PARTIAL) - n->min_partial = MAX_PARTIAL; - spin_lock_init(&n->list_lock); INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG @@ -1948,130 +2066,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) #endif } -#ifdef CONFIG_SMP -/* - * Per cpu array for per cpu structures. - * - * The per cpu array places all kmem_cache_cpu structures from one processor - * close together meaning that it becomes possible that multiple per cpu - * structures are contained in one cacheline. This may be particularly - * beneficial for the kmalloc caches. - * - * A desktop system typically has around 60-80 slabs. With 100 here we are - * likely able to get per cpu structures for all caches from the array defined - * here. We must be able to cover all kmalloc caches during bootstrap. - * - * If the per cpu array is exhausted then fall back to kmalloc - * of individual cachelines. No sharing is possible then. - */ -#define NR_KMEM_CACHE_CPU 100 - -static DEFINE_PER_CPU(struct kmem_cache_cpu, - kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; - -static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); -static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); - -static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, - int cpu, gfp_t flags) -{ - struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); - - if (c) - per_cpu(kmem_cache_cpu_free, cpu) = - (void *)c->freelist; - else { - /* Table overflow: So allocate ourselves */ - c = kmalloc_node( - ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), - flags, cpu_to_node(cpu)); - if (!c) - return NULL; - } - - init_kmem_cache_cpu(s, c); - return c; -} - -static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) -{ - if (c < per_cpu(kmem_cache_cpu, cpu) || - c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { - kfree(c); - return; - } - c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); - per_cpu(kmem_cache_cpu_free, cpu) = c; -} - -static void free_kmem_cache_cpus(struct kmem_cache *s) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c) { - s->cpu_slab[cpu] = NULL; - free_kmem_cache_cpu(c, cpu); - } - } -} - -static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c) - continue; - - c = alloc_kmem_cache_cpu(s, cpu, flags); - if (!c) { - free_kmem_cache_cpus(s); - return 0; - } - s->cpu_slab[cpu] = c; - } - return 1; -} - -/* - * Initialize the per cpu array. - */ -static void init_alloc_cpu_cpu(int cpu) -{ - int i; - - if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) - return; - - for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) - free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); - - cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); -} +static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); -static void __init init_alloc_cpu(void) +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { - int cpu; - - for_each_online_cpu(cpu) - init_alloc_cpu_cpu(cpu); - } + if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) + /* + * Boot time creation of the kmalloc array. Use static per cpu data + * since the per cpu allocator is not available yet. + */ + s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); + else + s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); -#else -static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} -static inline void init_alloc_cpu(void) {} + if (!s->cpu_slab) + return 0; -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - init_kmem_cache_cpu(s, &s->cpu_slab); return 1; } -#endif #ifdef CONFIG_NUMA /* @@ -2140,7 +2152,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) int node; int local_node; - if (slab_state >= UP) + if (slab_state >= UP && (s < kmalloc_caches || + s >= kmalloc_caches + KMALLOC_CACHES)) local_node = page_to_nid(virt_to_page(s)); else local_node = 0; @@ -2181,6 +2194,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) } #endif +static void set_min_partial(struct kmem_cache *s, unsigned long min) +{ + if (min < MIN_PARTIAL) + min = MIN_PARTIAL; + else if (min > MAX_PARTIAL) + min = MAX_PARTIAL; + s->min_partial = min; +} + /* * calculate_sizes() determines the order and the distribution of data within * a slab object. @@ -2266,6 +2288,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * on bootup. */ align = calculate_alignment(flags, align, s->objsize); + s->align = align; /* * SLUB stores one object immediately after another beginning from @@ -2318,7 +2341,24 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + if (disable_higher_order_debug) { + /* + * Disable debugging flags that store metadata if the min slab + * order increased. + */ + if (get_order(s->size) > get_order(s->objsize)) { + s->flags &= ~DEBUG_METADATA_FLAGS; + s->offset = 0; + if (!calculate_sizes(s, -1)) + goto error; + } + } + /* + * The larger the object size is, the more pages we want on the partial + * list to avoid pounding the page allocator excessively. + */ + set_min_partial(s, ilog2(s->size)); s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; @@ -2328,6 +2368,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) return 1; + free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) @@ -2345,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) { struct page *page; + if (!kern_ptr_validate(object, s->size)) + return 0; + page = get_object_page(object); if (!page || s != page->slab) @@ -2435,9 +2479,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) int node; flush_all(s); - + free_percpu(s->cpu_slab); /* Attempt to free all objects */ - free_kmem_cache_cpus(s); for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -2465,6 +2508,8 @@ void kmem_cache_destroy(struct kmem_cache *s) "still has objects.\n", s->name, __func__); dump_stack(); } + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); sysfs_slab_remove(s); } else up_write(&slub_lock); @@ -2475,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; +struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2490,6 +2535,7 @@ __setup("slub_min_order=", setup_slub_min_order); static int __init setup_slub_max_order(char *str) { get_option(&str, &slub_max_order); + slub_max_order = min(slub_max_order, MAX_ORDER - 1); return 1; } @@ -2521,13 +2567,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, if (gfp_flags & SLUB_DMA) flags = SLAB_CACHE_DMA; - down_write(&slub_lock); + /* + * This function is called with IRQs disabled during early-boot on + * single CPU so there's no need to take slub_lock here. + */ if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - up_write(&slub_lock); + if (sysfs_slab_add(s)) goto panic; return s; @@ -2537,7 +2586,7 @@ panic: } #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; +static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; static void sysfs_add_func(struct work_struct *w) { @@ -2560,6 +2609,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) struct kmem_cache *s; char *text; size_t realsize; + unsigned long slabflags; + int i; s = kmalloc_caches_dma[index]; if (s) @@ -2579,12 +2630,28 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) realsize = kmalloc_caches[index].objsize; text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - s = kmalloc(kmem_size, flags & ~SLUB_DMA); - if (!s || !text || !kmem_cache_open(s, flags, text, - realsize, ARCH_KMALLOC_MINALIGN, - SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) { - kfree(s); + s = NULL; + for (i = 0; i < KMALLOC_CACHES; i++) + if (!kmalloc_caches[i].size) + break; + + BUG_ON(i >= KMALLOC_CACHES); + s = kmalloc_caches + i; + + /* + * Must defer sysfs creation to a workqueue because we don't know + * what context we are called from. Before sysfs comes up, we don't + * need to do anything because our sysfs initcall will start by + * adding all existing slabs to sysfs. + */ + slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK; + if (slab_state >= SYSFS) + slabflags |= __SYSFS_ADD_DEFERRED; + + if (!text || !kmem_cache_open(s, flags, text, + realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { + s->size = 0; kfree(text); goto unlock_out; } @@ -2592,7 +2659,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) list_add(&s->list, &slab_caches); kmalloc_caches_dma[index] = s; - schedule_work(&sysfs_add_work); + if (slab_state >= SYSFS) + schedule_work(&sysfs_add_work); unlock_out: up_write(&slub_lock); @@ -2634,6 +2702,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2642,7 +2715,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -2657,8 +2730,9 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; + void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2666,35 +2740,54 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, -1, _RET_IP_); + ret = slab_alloc(s, flags, -1, _RET_IP_); + + trace_kmalloc(_RET_IP_, ret, size, s->size, flags); + + return ret; } EXPORT_SYMBOL(__kmalloc); static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct page *page = alloc_pages_node(node, flags | __GFP_COMP, - get_order(size)); + struct page *page; + void *ptr = NULL; + flags |= __GFP_COMP | __GFP_NOTRACK; + page = alloc_pages_node(node, flags, get_order(size)); if (page) - return page_address(page); - else - return NULL; + ptr = page_address(page); + + kmemleak_alloc(ptr, size, 1, flags); + return ptr; } #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; + void *ret; + + if (unlikely(size > SLUB_MAX_SIZE)) { + ret = kmalloc_large_node(size, flags, node); - if (unlikely(size > PAGE_SIZE)) - return kmalloc_large_node(size, flags, node); + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << get_order(size), + flags, node); + + return ret; + } s = get_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, node, _RET_IP_); + ret = slab_alloc(s, flags, node, _RET_IP_); + + trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); + + return ret; } EXPORT_SYMBOL(__kmalloc_node); #endif @@ -2736,18 +2829,22 @@ size_t ksize(const void *object) */ return s->size; } +EXPORT_SYMBOL(ksize); void kfree(const void *x) { struct page *page; void *object = (void *)x; + trace_kfree(_RET_IP_, x); + if (unlikely(ZERO_OR_NULL_PTR(x))) return; page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); + kmemleak_free(x); put_page(page); return; } @@ -2866,7 +2963,7 @@ static void slab_mem_offline_callback(void *arg) /* * if n->nr_slabs > 0, slabs still exist on the node * that is going down. We were unable to free them, - * and offline_pages() function shoudn't call this + * and offline_pages() function shouldn't call this * callback. So, we must fail. */ BUG_ON(slabs_node(s, offline_node)); @@ -2956,8 +3053,6 @@ void __init kmem_cache_init(void) int i; int caches = 0; - init_alloc_cpu(); - #ifdef CONFIG_NUMA /* * Must first have the slab cache available for the allocations of the @@ -2965,7 +3060,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_KERNEL); + sizeof(struct kmem_cache_node), GFP_NOWAIT); kmalloc_caches[0].refcount = -1; caches++; @@ -2976,18 +3071,20 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_KERNEL); + "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_KERNEL); + "kmalloc-192", 192, GFP_NOWAIT); caches++; } - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_KERNEL); + "kmalloc", 1 << i, GFP_NOWAIT); caches++; } @@ -3006,30 +3103,43 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = - kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); + kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); - kmem_size = offsetof(struct kmem_cache, cpu_slab) + - nr_cpu_ids * sizeof(struct kmem_cache_cpu *); +#endif +#ifdef CONFIG_NUMA + kmem_size = offsetof(struct kmem_cache, node) + + nr_node_ids * sizeof(struct kmem_cache_node *); #else kmem_size = sizeof(struct kmem_cache); #endif @@ -3042,6 +3152,10 @@ void __init kmem_cache_init(void) nr_cpu_ids, nr_node_ids); } +void __init kmem_cache_init_late(void) +{ +} + /* * Find a mergeable slab cache */ @@ -3108,25 +3222,18 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, { struct kmem_cache *s; + if (WARN_ON(!name)) + return NULL; + down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { - int cpu; - s->refcount++; /* * Adjust the object sizes so that we clear * the complete object on kzalloc. */ s->objsize = max(s->objsize, (int)size); - - /* - * And then we need to update the object size in the - * per cpu structures - */ - for_each_online_cpu(cpu) - get_cpu_slab(s, cpu)->objsize = s->objsize; - s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); @@ -3180,29 +3287,15 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, unsigned long flags; switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - init_alloc_cpu_cpu(cpu); - down_read(&slub_lock); - list_for_each_entry(s, &slab_caches, list) - s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, - GFP_KERNEL); - up_read(&slub_lock); - break; - case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: down_read(&slub_lock); list_for_each_entry(s, &slab_caches, list) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - local_irq_save(flags); __flush_cpu_slab(s, cpu); local_irq_restore(flags); - free_kmem_cache_cpu(c, cpu); - s->cpu_slab[cpu] = NULL; } up_read(&slub_lock); break; @@ -3221,8 +3314,9 @@ static struct notifier_block __cpuinitdata slab_notifier = { void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) { struct kmem_cache *s; + void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3230,15 +3324,21 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, gfpflags, -1, caller); + ret = slab_alloc(s, gfpflags, -1, caller); + + /* Honor the call site pointer we recieved. */ + trace_kmalloc(caller, ret, size, s->size, gfpflags); + + return ret; } void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, unsigned long caller) { struct kmem_cache *s; + void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large_node(size, gfpflags, node); s = get_slab(size, gfpflags); @@ -3246,24 +3346,15 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, gfpflags, node, caller); -} + ret = slab_alloc(s, gfpflags, node, caller); -#ifdef CONFIG_SLUB_DEBUG -static unsigned long count_partial(struct kmem_cache_node *n, - int (*get_count)(struct page *)) -{ - unsigned long flags; - unsigned long x = 0; - struct page *page; + /* Honor the call site pointer we recieved. */ + trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, lru) - x += get_count(page); - spin_unlock_irqrestore(&n->list_lock, flags); - return x; + return ret; } +#ifdef CONFIG_SLUB_DEBUG static int count_inuse(struct page *page) { return page->inuse; @@ -3274,11 +3365,6 @@ static int count_total(struct page *page) return page->objects; } -static int count_free(struct page *page) -{ - return page->objects - page->inuse; -} - static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { @@ -3647,7 +3733,7 @@ static int list_locations(struct kmem_cache *s, char *buf, to_cpumask(l->cpus)); } - if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && + if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " nodes="); len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, @@ -3695,7 +3781,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, int cpu; for_each_possible_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (!c || c->node < 0) continue; @@ -3835,6 +3921,26 @@ static ssize_t order_show(struct kmem_cache *s, char *buf) } SLAB_ATTR(order); +static ssize_t min_partial_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%lu\n", s->min_partial); +} + +static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + unsigned long min; + int err; + + err = strict_strtoul(buf, 10, &min); + if (err) + return err; + + set_min_partial(s, min); + return length; +} +SLAB_ATTR(min_partial); + static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (s->ctor) { @@ -3918,6 +4024,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(trace); +#ifdef CONFIG_FAILSLAB +static ssize_t failslab_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); +} + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + s->flags &= ~SLAB_FAILSLAB; + if (buf[0] == '1') + s->flags |= SLAB_FAILSLAB; + return length; +} +SLAB_ATTR(failslab); +#endif + static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); @@ -4100,7 +4223,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return -ENOMEM; for_each_online_cpu(cpu) { - unsigned x = get_cpu_slab(s, cpu)->stat[si]; + unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; data[cpu] = x; sum += x; @@ -4118,12 +4241,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return len + sprintf(buf + len, "\n"); } +static void clear_stat(struct kmem_cache *s, enum stat_item si) +{ + int cpu; + + for_each_online_cpu(cpu) + per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; +} + #define STAT_ATTR(si, text) \ static ssize_t text##_show(struct kmem_cache *s, char *buf) \ { \ return show_stat(s, buf, si); \ } \ -SLAB_ATTR_RO(text); \ +static ssize_t text##_store(struct kmem_cache *s, \ + const char *buf, size_t length) \ +{ \ + if (buf[0] != '0') \ + return -EINVAL; \ + clear_stat(s, si); \ + return length; \ +} \ +SLAB_ATTR(text); \ STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); @@ -4150,6 +4289,7 @@ static struct attribute *slab_attrs[] = { &object_size_attr.attr, &objs_per_slab_attr.attr, &order_attr.attr, + &min_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &total_objects_attr.attr, @@ -4197,6 +4337,10 @@ static struct attribute *slab_attrs[] = { &deactivate_remote_frees_attr.attr, &order_fallback_attr.attr, #endif +#ifdef CONFIG_FAILSLAB + &failslab_attr.attr, +#endif + NULL }; @@ -4249,7 +4393,7 @@ static void kmem_cache_release(struct kobject *kobj) kfree(s); } -static struct sysfs_ops slab_sysfs_ops = { +static const struct sysfs_ops slab_sysfs_ops = { .show = slab_attr_show, .store = slab_attr_store, }; @@ -4268,7 +4412,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj) return 0; } -static struct kset_uevent_ops slab_uevent_ops = { +static const struct kset_uevent_ops slab_uevent_ops = { .filter = uevent_filter, }; @@ -4301,6 +4445,8 @@ static char *create_unique_id(struct kmem_cache *s) *p++ = 'a'; if (s->flags & SLAB_DEBUG_FREE) *p++ = 'F'; + if (!(s->flags & SLAB_NOTRACK)) + *p++ = 't'; if (p != name + 1) *p++ = '-'; p += sprintf(p, "%07d", s->size); @@ -4343,8 +4489,11 @@ static int sysfs_slab_add(struct kmem_cache *s) } err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) + if (err) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); return err; + } kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ @@ -4526,7 +4675,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init);