X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslub.c;h=b364844a1068be41e1419fa02746060af67e92bb;hb=8128f55a0bc60cf3779135a1f837c4323e77c582;hp=b2b0c78ae35d04b74c0cea458ac93a78ee859e44;hpb=e03ab9d415c47e1ff485b646f95604d3e3a91708;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slub.c b/mm/slub.c index b2b0c78..b364844 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -142,10 +141,18 @@ SLAB_POISON | SLAB_STORE_USER) /* + * Debugging flags that require metadata to be stored in the slab. These get + * disabled when slub_debug=O is used and a cache's min order increases with + * metadata. + */ +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) + +/* * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_FAILSLAB) #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ SLAB_CACHE_DMA | SLAB_NOTRACK) @@ -179,12 +186,6 @@ static enum { SYSFS /* Sysfs up */ } slab_state = DOWN; -/* - * The slab allocator is initialized with interrupts disabled. Therefore, make - * sure early boot allocations don't accidentally enable interrupts. - */ -static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; - /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); static LIST_HEAD(slab_caches); @@ -217,10 +218,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) #endif -static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) +static inline void stat(struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS - c->stat[si]++; + __this_cpu_inc(s->cpu_slab->stat[si]); #endif } @@ -242,15 +243,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } -static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) -{ -#ifdef CONFIG_SMP - return s->cpu_slab[cpu]; -#else - return &s->cpu_slab; -#endif -} - /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) @@ -269,13 +261,6 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; } -/* - * Slow version of get and set free pointer. - * - * This version requires touching the cache lines of kmem_cache which - * we avoid to do in the fast alloc free paths. There we obtain the offset - * from the page struct. - */ static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -332,6 +317,7 @@ static int slub_debug; #endif static char *slub_debug_slabs; +static int disable_higher_order_debug; /* * Object debugging @@ -653,7 +639,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); print_section("Padding", end - remainder, remainder); - restore_bytes(s, "slab padding", POISON_INUSE, start, end); + restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; } @@ -983,6 +969,15 @@ static int __init setup_slub_debug(char *str) */ goto check_slabs; + if (tolower(*str) == 'o') { + /* + * Avoid enabling debugging on caches if its minimum order + * would increase as a result. + */ + disable_higher_order_debug = 1; + goto out; + } + slub_debug = 0; if (*str == '-') /* @@ -1010,6 +1005,9 @@ static int __init setup_slub_debug(char *str) case 't': slub_debug |= SLAB_TRACE; break; + case 'a': + slub_debug |= SLAB_FAILSLAB; + break; default: printk(KERN_ERR "slub_debug option '%c' " "unknown. skipped\n", *str); @@ -1033,8 +1031,8 @@ static unsigned long kmem_cache_flags(unsigned long objsize, * Enable debugging if selected on the kernel commandline. */ if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + flags |= slub_debug; return flags; } @@ -1061,6 +1059,8 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, } #define slub_debug 0 +#define disable_higher_order_debug 0 + static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; } static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) @@ -1091,11 +1091,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; struct kmem_cache_order_objects oo = s->oo; + gfp_t alloc_gfp; flags |= s->allocflags; - page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, - oo); + /* + * Let the initial higher-order allocation fail under memory pressure + * so we fall-back to the minimum order allocation. + */ + alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; + + page = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!page)) { oo = s->min; /* @@ -1106,12 +1112,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) return NULL; - stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); + stat(s, ORDER_FALLBACK); } if (kmemcheck_enabled - && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) - { + && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); @@ -1405,23 +1410,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); __ClearPageSlubFrozen(page); if (page->inuse) { if (page->freelist) { add_partial(n, page, tail); - stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); + stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { - stat(c, DEACTIVATE_FULL); + stat(s, DEACTIVATE_FULL); if (SLABDEBUG && PageSlubDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); } slab_unlock(page); } else { - stat(c, DEACTIVATE_EMPTY); + stat(s, DEACTIVATE_EMPTY); if (n->nr_partial < s->min_partial) { /* * Adding an empty slab to the partial slabs in order @@ -1437,7 +1441,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { slab_unlock(page); - stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); } } @@ -1452,7 +1456,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) int tail = 1; if (page->freelist) - stat(c, DEACTIVATE_REMOTE_FREES); + stat(s, DEACTIVATE_REMOTE_FREES); /* * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely @@ -1465,10 +1469,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* Retrieve object from cpu_freelist */ object = c->freelist; - c->freelist = c->freelist[c->offset]; + c->freelist = get_freepointer(s, c->freelist); /* And put onto the regular freelist */ - object[c->offset] = page->freelist; + set_freepointer(s, object, page->freelist); page->freelist = object; page->inuse--; } @@ -1478,7 +1482,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { - stat(c, CPUSLAB_FLUSH); + stat(s, CPUSLAB_FLUSH); slab_lock(c->page); deactivate_slab(s, c); } @@ -1490,7 +1494,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (likely(c && c->page)) flush_slab(s, c); @@ -1561,6 +1565,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) "default order: %d, min order: %d\n", s->name, s->objsize, s->size, oo_order(s->oo), oo_order(s->min)); + if (oo_order(s->min) > get_order(s->objsize)) + printk(KERN_WARNING " %s debugging increased min order, use " + "slub_debug=O to disable.\n", s->name); + for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); unsigned long nr_slabs; @@ -1614,7 +1622,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!node_match(c, node))) goto another_slab; - stat(c, ALLOC_REFILL); + stat(s, ALLOC_REFILL); load_freelist: object = c->page->freelist; @@ -1623,13 +1631,13 @@ load_freelist: if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) goto debug; - c->freelist = object[c->offset]; + c->freelist = get_freepointer(s, object); c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); - stat(c, ALLOC_SLOWPATH); + stat(s, ALLOC_SLOWPATH); return object; another_slab: @@ -1639,7 +1647,7 @@ new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; - stat(c, ALLOC_FROM_PARTIAL); + stat(s, ALLOC_FROM_PARTIAL); goto load_freelist; } @@ -1652,8 +1660,8 @@ new_slab: local_irq_disable(); if (new) { - c = get_cpu_slab(s, smp_processor_id()); - stat(c, ALLOC_SLAB); + c = __this_cpu_ptr(s->cpu_slab); + stat(s, ALLOC_SLAB); if (c->page) flush_slab(s, c); slab_lock(new); @@ -1669,7 +1677,7 @@ debug: goto another_slab; c->page->inuse++; - c->page->freelist = object[c->offset]; + c->page->freelist = get_freepointer(s, object); c->node = -1; goto unlock_out; } @@ -1690,35 +1698,33 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; - unsigned int objsize; - gfpflags &= slab_gfp_mask; + gfpflags &= gfp_allowed_mask; lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); - if (should_failslab(s->objsize, gfpflags)) + if (should_failslab(s->objsize, gfpflags, s->flags)) return NULL; local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); - objsize = c->objsize; - if (unlikely(!c->freelist || !node_match(c, node))) + c = __this_cpu_ptr(s->cpu_slab); + object = c->freelist; + if (unlikely(!object || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { - object = c->freelist; - c->freelist = object[c->offset]; - stat(c, ALLOC_FASTPATH); + c->freelist = get_freepointer(s, object); + stat(s, ALLOC_FASTPATH); } local_irq_restore(flags); - if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, objsize); + if (unlikely(gfpflags & __GFP_ZERO) && object) + memset(object, 0, s->objsize); - kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); - kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); + kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); + kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); return object; } @@ -1733,7 +1739,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) } EXPORT_SYMBOL(kmem_cache_alloc); -#ifdef CONFIG_KMEMTRACE +#ifdef CONFIG_TRACING void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) { return slab_alloc(s, gfpflags, -1, _RET_IP_); @@ -1754,7 +1760,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) EXPORT_SYMBOL(kmem_cache_alloc_node); #endif -#ifdef CONFIG_KMEMTRACE +#ifdef CONFIG_TRACING void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, gfp_t gfpflags, int node) @@ -1773,26 +1779,25 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr, unsigned int offset) + void *x, unsigned long addr) { void *prior; void **object = (void *)x; - struct kmem_cache_cpu *c; - c = get_cpu_slab(s, raw_smp_processor_id()); - stat(c, FREE_SLOWPATH); + stat(s, FREE_SLOWPATH); slab_lock(page); if (unlikely(SLABDEBUG && PageSlubDebug(page))) goto debug; checks_ok: - prior = object[offset] = page->freelist; + prior = page->freelist; + set_freepointer(s, object, prior); page->freelist = object; page->inuse--; if (unlikely(PageSlubFrozen(page))) { - stat(c, FREE_FROZEN); + stat(s, FREE_FROZEN); goto out_unlock; } @@ -1805,7 +1810,7 @@ checks_ok: */ if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); - stat(c, FREE_ADD_PARTIAL); + stat(s, FREE_ADD_PARTIAL); } out_unlock: @@ -1818,10 +1823,10 @@ slab_empty: * Slab still on the partial list. */ remove_partial(s, page); - stat(c, FREE_REMOVE_PARTIAL); + stat(s, FREE_REMOVE_PARTIAL); } slab_unlock(page); - stat(c, FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); return; @@ -1851,17 +1856,17 @@ static __always_inline void slab_free(struct kmem_cache *s, kmemleak_free_recursive(x, s->flags); local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); - kmemcheck_slab_free(s, object, c->objsize); - debug_check_no_locks_freed(object, c->objsize); + c = __this_cpu_ptr(s->cpu_slab); + kmemcheck_slab_free(s, object, s->objsize); + debug_check_no_locks_freed(object, s->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(object, c->objsize); + debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { - object[c->offset] = c->freelist; + set_freepointer(s, object, c->freelist); c->freelist = object; - stat(c, FREE_FASTPATH); + stat(s, FREE_FASTPATH); } else - __slab_free(s, page, x, addr, c->offset); + __slab_free(s, page, x, addr); local_irq_restore(flags); } @@ -2002,7 +2007,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects --; + min_objects--; } /* @@ -2048,19 +2053,6 @@ static unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } -static void init_kmem_cache_cpu(struct kmem_cache *s, - struct kmem_cache_cpu *c) -{ - c->page = NULL; - c->freelist = NULL; - c->node = 0; - c->offset = s->offset / sizeof(void *); - c->objsize = s->objsize; -#ifdef CONFIG_SLUB_STATS - memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); -#endif -} - static void init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) { @@ -2074,130 +2066,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) #endif } -#ifdef CONFIG_SMP -/* - * Per cpu array for per cpu structures. - * - * The per cpu array places all kmem_cache_cpu structures from one processor - * close together meaning that it becomes possible that multiple per cpu - * structures are contained in one cacheline. This may be particularly - * beneficial for the kmalloc caches. - * - * A desktop system typically has around 60-80 slabs. With 100 here we are - * likely able to get per cpu structures for all caches from the array defined - * here. We must be able to cover all kmalloc caches during bootstrap. - * - * If the per cpu array is exhausted then fall back to kmalloc - * of individual cachelines. No sharing is possible then. - */ -#define NR_KMEM_CACHE_CPU 100 - -static DEFINE_PER_CPU(struct kmem_cache_cpu, - kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; - -static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); -static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); - -static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, - int cpu, gfp_t flags) -{ - struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); - - if (c) - per_cpu(kmem_cache_cpu_free, cpu) = - (void *)c->freelist; - else { - /* Table overflow: So allocate ourselves */ - c = kmalloc_node( - ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), - flags, cpu_to_node(cpu)); - if (!c) - return NULL; - } - - init_kmem_cache_cpu(s, c); - return c; -} - -static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) -{ - if (c < per_cpu(kmem_cache_cpu, cpu) || - c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { - kfree(c); - return; - } - c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); - per_cpu(kmem_cache_cpu_free, cpu) = c; -} - -static void free_kmem_cache_cpus(struct kmem_cache *s) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c) { - s->cpu_slab[cpu] = NULL; - free_kmem_cache_cpu(c, cpu); - } - } -} - -static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c) - continue; - - c = alloc_kmem_cache_cpu(s, cpu, flags); - if (!c) { - free_kmem_cache_cpus(s); - return 0; - } - s->cpu_slab[cpu] = c; - } - return 1; -} - -/* - * Initialize the per cpu array. - */ -static void init_alloc_cpu_cpu(int cpu) -{ - int i; - - if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) - return; - - for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) - free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); +static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); - cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); -} - -static void __init init_alloc_cpu(void) +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { - int cpu; - - for_each_online_cpu(cpu) - init_alloc_cpu_cpu(cpu); - } + if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) + /* + * Boot time creation of the kmalloc array. Use static per cpu data + * since the per cpu allocator is not available yet. + */ + s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); + else + s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); -#else -static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} -static inline void init_alloc_cpu(void) {} + if (!s->cpu_slab) + return 0; -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - init_kmem_cache_cpu(s, &s->cpu_slab); return 1; } -#endif #ifdef CONFIG_NUMA /* @@ -2266,7 +2152,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) int node; int local_node; - if (slab_state >= UP) + if (slab_state >= UP && (s < kmalloc_caches || + s > kmalloc_caches + KMALLOC_CACHES)) local_node = page_to_nid(virt_to_page(s)); else local_node = 0; @@ -2401,6 +2288,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * on bootup. */ align = calculate_alignment(flags, align, s->objsize); + s->align = align; /* * SLUB stores one object immediately after another beginning from @@ -2453,6 +2341,18 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + if (disable_higher_order_debug) { + /* + * Disable debugging flags that store metadata if the min slab + * order increased. + */ + if (get_order(s->size) > get_order(s->objsize)) { + s->flags &= ~DEBUG_METADATA_FLAGS; + s->offset = 0; + if (!calculate_sizes(s, -1)) + goto error; + } + } /* * The larger the object size is, the more pages we want on the partial @@ -2468,6 +2368,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) return 1; + free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) @@ -2575,9 +2476,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) int node; flush_all(s); - + free_percpu(s->cpu_slab); /* Attempt to free all objects */ - free_kmem_cache_cpus(s); for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -2605,6 +2505,8 @@ void kmem_cache_destroy(struct kmem_cache *s) "still has objects.\n", s->name, __func__); dump_stack(); } + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); sysfs_slab_remove(s); } else up_write(&slub_lock); @@ -2615,7 +2517,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; +struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2704,6 +2606,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) struct kmem_cache *s; char *text; size_t realsize; + unsigned long slabflags; + int i; s = kmalloc_caches_dma[index]; if (s) @@ -2723,13 +2627,28 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) realsize = kmalloc_caches[index].objsize; text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - s = kmalloc(kmem_size, flags & ~SLUB_DMA); - if (!s || !text || !kmem_cache_open(s, flags, text, - realsize, ARCH_KMALLOC_MINALIGN, - SLAB_CACHE_DMA|SLAB_NOTRACK|__SYSFS_ADD_DEFERRED, - NULL)) { - kfree(s); + s = NULL; + for (i = 0; i < KMALLOC_CACHES; i++) + if (!kmalloc_caches[i].size) + break; + + BUG_ON(i >= KMALLOC_CACHES); + s = kmalloc_caches + i; + + /* + * Must defer sysfs creation to a workqueue because we don't know + * what context we are called from. Before sysfs comes up, we don't + * need to do anything because our sysfs initcall will start by + * adding all existing slabs to sysfs. + */ + slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK; + if (slab_state >= SYSFS) + slabflags |= __SYSFS_ADD_DEFERRED; + + if (!text || !kmem_cache_open(s, flags, text, + realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { + s->size = 0; kfree(text); goto unlock_out; } @@ -2737,7 +2656,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) list_add(&s->list, &slab_caches); kmalloc_caches_dma[index] = s; - schedule_work(&sysfs_add_work); + if (slab_state >= SYSFS) + schedule_work(&sysfs_add_work); unlock_out: up_write(&slub_lock); @@ -2779,6 +2699,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2787,7 +2712,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -2823,13 +2748,15 @@ EXPORT_SYMBOL(__kmalloc); static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; + void *ptr = NULL; flags |= __GFP_COMP | __GFP_NOTRACK; page = alloc_pages_node(node, flags, get_order(size)); if (page) - return page_address(page); - else - return NULL; + ptr = page_address(page); + + kmemleak_alloc(ptr, size, 1, flags); + return ptr; } #ifdef CONFIG_NUMA @@ -2914,6 +2841,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); + kmemleak_free(x); put_page(page); return; } @@ -3032,7 +2960,7 @@ static void slab_mem_offline_callback(void *arg) /* * if n->nr_slabs > 0, slabs still exist on the node * that is going down. We were unable to free them, - * and offline_pages() function shoudn't call this + * and offline_pages() function shouldn't call this * callback. So, we must fail. */ BUG_ON(slabs_node(s, offline_node)); @@ -3122,8 +3050,6 @@ void __init kmem_cache_init(void) int i; int caches = 0; - init_alloc_cpu(); - #ifdef CONFIG_NUMA /* * Must first have the slab cache available for the allocations of the @@ -3142,10 +3068,12 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_NOWAIT); caches++; @@ -3172,17 +3100,28 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; @@ -3194,8 +3133,10 @@ void __init kmem_cache_init(void) #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); - kmem_size = offsetof(struct kmem_cache, cpu_slab) + - nr_cpu_ids * sizeof(struct kmem_cache_cpu *); +#endif +#ifdef CONFIG_NUMA + kmem_size = offsetof(struct kmem_cache, node) + + nr_node_ids * sizeof(struct kmem_cache_node *); #else kmem_size = sizeof(struct kmem_cache); #endif @@ -3210,10 +3151,6 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { - /* - * Interrupts are enabled now so all GFP allocations are safe. - */ - slab_gfp_mask = __GFP_BITS_MASK; } /* @@ -3282,25 +3219,18 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, { struct kmem_cache *s; + if (WARN_ON(!name)) + return NULL; + down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { - int cpu; - s->refcount++; /* * Adjust the object sizes so that we clear * the complete object on kzalloc. */ s->objsize = max(s->objsize, (int)size); - - /* - * And then we need to update the object size in the - * per cpu structures - */ - for_each_online_cpu(cpu) - get_cpu_slab(s, cpu)->objsize = s->objsize; - s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); @@ -3354,29 +3284,15 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, unsigned long flags; switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - init_alloc_cpu_cpu(cpu); - down_read(&slub_lock); - list_for_each_entry(s, &slab_caches, list) - s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, - GFP_KERNEL); - up_read(&slub_lock); - break; - case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: down_read(&slub_lock); list_for_each_entry(s, &slab_caches, list) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - local_irq_save(flags); __flush_cpu_slab(s, cpu); local_irq_restore(flags); - free_kmem_cache_cpu(c, cpu); - s->cpu_slab[cpu] = NULL; } up_read(&slub_lock); break; @@ -3862,7 +3778,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, int cpu; for_each_possible_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (!c || c->node < 0) continue; @@ -4105,6 +4021,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(trace); +#ifdef CONFIG_FAILSLAB +static ssize_t failslab_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); +} + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + s->flags &= ~SLAB_FAILSLAB; + if (buf[0] == '1') + s->flags |= SLAB_FAILSLAB; + return length; +} +SLAB_ATTR(failslab); +#endif + static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); @@ -4287,7 +4220,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return -ENOMEM; for_each_online_cpu(cpu) { - unsigned x = get_cpu_slab(s, cpu)->stat[si]; + unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; data[cpu] = x; sum += x; @@ -4305,12 +4238,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return len + sprintf(buf + len, "\n"); } +static void clear_stat(struct kmem_cache *s, enum stat_item si) +{ + int cpu; + + for_each_online_cpu(cpu) + per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; +} + #define STAT_ATTR(si, text) \ static ssize_t text##_show(struct kmem_cache *s, char *buf) \ { \ return show_stat(s, buf, si); \ } \ -SLAB_ATTR_RO(text); \ +static ssize_t text##_store(struct kmem_cache *s, \ + const char *buf, size_t length) \ +{ \ + if (buf[0] != '0') \ + return -EINVAL; \ + clear_stat(s, si); \ + return length; \ +} \ +SLAB_ATTR(text); \ STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); @@ -4385,6 +4334,10 @@ static struct attribute *slab_attrs[] = { &deactivate_remote_frees_attr.attr, &order_fallback_attr.attr, #endif +#ifdef CONFIG_FAILSLAB + &failslab_attr.attr, +#endif + NULL }; @@ -4437,7 +4390,7 @@ static void kmem_cache_release(struct kobject *kobj) kfree(s); } -static struct sysfs_ops slab_sysfs_ops = { +static const struct sysfs_ops slab_sysfs_ops = { .show = slab_attr_show, .store = slab_attr_store, }; @@ -4456,7 +4409,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj) return 0; } -static struct kset_uevent_ops slab_uevent_ops = { +static const struct kset_uevent_ops slab_uevent_ops = { .filter = uevent_filter, }; @@ -4533,8 +4486,11 @@ static int sysfs_slab_add(struct kmem_cache *s) } err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) + if (err) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); return err; + } kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ @@ -4716,7 +4672,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init);