X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslab.c;h=7dfa481c96bade62ae4ba34299dcd4fb8d79cdb3;hb=bf7ec5bb6114b2f086e536e24486fdacd1c0d339;hp=c3feeaab387537ef00aa2085b4f54f6d7e4abca0;hpb=a5c96d8a1c67f31ef48935a78da2d2076513842b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slab.c b/mm/slab.c index c3feeaa..7dfa481 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -26,7 +26,7 @@ * initialized objects. * * This means, that your constructor is used only for newly allocated - * slabs and you must pass objects with the same intializations to + * slabs and you must pass objects with the same initializations to * kmem_cache_free. * * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, @@ -95,21 +95,26 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include +#include #include #include #include #include #include +#include +#include #include #include @@ -139,10 +144,6 @@ #define BYTES_PER_WORD sizeof(void *) #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) -#ifndef cache_line_size -#define cache_line_size() L1_CACHE_BYTES -#endif - #ifndef ARCH_KMALLOC_MINALIGN /* * Enforce a minimum alignment for the kmalloc caches. @@ -178,12 +179,14 @@ SLAB_CACHE_DMA | \ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) #endif /* @@ -267,11 +270,10 @@ struct array_cache { unsigned int batchcount; unsigned int touched; spinlock_t lock; - void *entry[0]; /* + void *entry[]; /* * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. - * [0] is for gcc 2.95. It should really be []. */ }; @@ -305,17 +307,17 @@ struct kmem_list3 { /* * Need this for bootstrapping a per node allocator. */ -#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) +#define NUM_INIT_LISTS (3 * MAX_NUMNODES) struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define CACHE_CACHE 0 -#define SIZE_AC 1 -#define SIZE_L3 (1 + MAX_NUMNODES) +#define SIZE_AC MAX_NUMNODES +#define SIZE_L3 (2 * MAX_NUMNODES) static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); -static int enable_cpucache(struct kmem_cache *cachep); +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); /* @@ -334,7 +336,7 @@ static __always_inline int index_of(const size_t size) return i; \ else \ i++; -#include "linux/kmalloc_sizes.h" +#include #undef CACHE __bad_size(); } else @@ -373,87 +375,6 @@ static void kmem_list3_init(struct kmem_list3 *parent) MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ } while (0) -/* - * struct kmem_cache - * - * manages a cache. - */ - -struct kmem_cache { -/* 1) per-cpu data, touched during every alloc/free */ - struct array_cache *array[NR_CPUS]; -/* 2) Cache tunables. Protected by cache_chain_mutex */ - unsigned int batchcount; - unsigned int limit; - unsigned int shared; - - unsigned int buffer_size; - u32 reciprocal_buffer_size; -/* 3) touched by every alloc & free from the backend */ - - unsigned int flags; /* constant flags */ - unsigned int num; /* # of objs per slab */ - -/* 4) cache_grow/shrink */ - /* order of pgs per slab (2^n) */ - unsigned int gfporder; - - /* force GFP flags, e.g. GFP_DMA */ - gfp_t gfpflags; - - size_t colour; /* cache colouring range */ - unsigned int colour_off; /* colour offset */ - struct kmem_cache *slabp_cache; - unsigned int slab_size; - unsigned int dflags; /* dynamic flags */ - - /* constructor func */ - void (*ctor) (void *, struct kmem_cache *, unsigned long); - -/* 5) cache creation/removal */ - const char *name; - struct list_head next; - -/* 6) statistics */ -#if STATS - unsigned long num_active; - unsigned long num_allocations; - unsigned long high_mark; - unsigned long grown; - unsigned long reaped; - unsigned long errors; - unsigned long max_freeable; - unsigned long node_allocs; - unsigned long node_frees; - unsigned long node_overflow; - atomic_t allochit; - atomic_t allocmiss; - atomic_t freehit; - atomic_t freemiss; -#endif -#if DEBUG - /* - * If debugging is enabled, then the allocator can add additional - * fields and/or padding to every object. buffer_size contains the total - * object size including these internal fields, the following two - * variables contain the offset to the user object and its size. - */ - int obj_offset; - int obj_size; -#endif - /* - * We put nodelists[] at the end of kmem_cache, because we want to size - * this array to nr_node_ids slots instead of MAX_NUMNODES - * (see kmem_cache_init()) - * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache - * is statically defined, so we reserve the max number of nodes. - */ - struct kmem_list3 *nodelists[MAX_NUMNODES]; - /* - * Do not add fields after nodelists[] - */ -}; - #define CFLGS_OFF_SLAB (0x80000000UL) #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) @@ -569,6 +490,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #endif +#ifdef CONFIG_KMEMTRACE +size_t slab_buffer_size(struct kmem_cache *cachep) +{ + return cachep->buffer_size; +} +EXPORT_SYMBOL(slab_buffer_size); +#endif + /* * Do not go above this order unless 0 objects fit into the slab. */ @@ -731,8 +660,7 @@ static inline void init_lock_keys(void) #endif /* - * 1. Guard access to the cache-chain. - * 2. Protect sanity of cpu_online_map against cpu hotplug events + * Guard access to the cache-chain. */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -745,6 +673,7 @@ static enum { NONE, PARTIAL_AC, PARTIAL_L3, + EARLY, FULL } g_cpucache_up; @@ -753,7 +682,7 @@ static enum { */ int slab_is_available(void) { - return g_cpucache_up == FULL; + return g_cpucache_up >= EARLY; } static DEFINE_PER_CPU(struct delayed_work, reap_work); @@ -864,7 +793,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, *left_over = slab_size - nr_objs*buffer_size - mgmt_size; } -#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) +#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg) static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) @@ -950,12 +879,20 @@ static void __cpuinit start_cpu_timer(int cpu) } static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount) + int batchcount, gfp_t gfp) { int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL; - nc = kmalloc_node(memsize, GFP_KERNEL, node); + nc = kmalloc_node(memsize, gfp, node); + /* + * The array_cache structures contain pointers to free object. + * However, when such objects are allocated or transfered to another + * cache the pointers are not cleared and they could be counted as + * valid references during a kmemleak scan. Therefore, kmemleak must + * not scan such objects. + */ + kmemleak_no_scan(nc); if (nc) { nc->avail = 0; nc->limit = entries; @@ -995,7 +932,7 @@ static int transfer_objects(struct array_cache *to, #define drain_alien_cache(cachep, alien) do { } while (0) #define reap_alien(cachep, l3) do { } while (0) -static inline struct array_cache **alloc_alien_cache(int node, int limit) +static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { return (struct array_cache **)BAD_ALIEN_MAGIC; } @@ -1026,7 +963,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); -static struct array_cache **alloc_alien_cache(int node, int limit) +static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { struct array_cache **ac_ptr; int memsize = sizeof(void *) * nr_node_ids; @@ -1034,16 +971,16 @@ static struct array_cache **alloc_alien_cache(int node, int limit) if (limit > 1) limit = 12; - ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); + ac_ptr = kmalloc_node(memsize, gfp, node); if (ac_ptr) { for_each_node(i) { if (i == node || !node_online(i)) { ac_ptr[i] = NULL; continue; } - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); + ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); if (!ac_ptr[i]) { - for (i--; i <= 0; i--) + for (i--; i >= 0; i--) kfree(ac_ptr[i]); kfree(ac_ptr); return NULL; @@ -1156,105 +1093,185 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) } #endif -static int __cpuinit cpuup_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit cpuup_canceled(long cpu) +{ + struct kmem_cache *cachep; + struct kmem_list3 *l3 = NULL; + int node = cpu_to_node(cpu); + const struct cpumask *mask = cpumask_of_node(node); + + list_for_each_entry(cachep, &cache_chain, next) { + struct array_cache *nc; + struct array_cache *shared; + struct array_cache **alien; + + /* cpu is dead; no one can alloc from it. */ + nc = cachep->array[cpu]; + cachep->array[cpu] = NULL; + l3 = cachep->nodelists[node]; + + if (!l3) + goto free_array_cache; + + spin_lock_irq(&l3->list_lock); + + /* Free limit for this kmem_list3 */ + l3->free_limit -= cachep->batchcount; + if (nc) + free_block(cachep, nc->entry, nc->avail, node); + + if (!cpus_empty(*mask)) { + spin_unlock_irq(&l3->list_lock); + goto free_array_cache; + } + + shared = l3->shared; + if (shared) { + free_block(cachep, shared->entry, + shared->avail, node); + l3->shared = NULL; + } + + alien = l3->alien; + l3->alien = NULL; + + spin_unlock_irq(&l3->list_lock); + + kfree(shared); + if (alien) { + drain_alien_cache(cachep, alien); + free_alien_cache(alien); + } +free_array_cache: + kfree(nc); + } + /* + * In the previous loop, all the objects were freed to + * the respective cache's slabs, now we can go ahead and + * shrink each nodelist to its limit. + */ + list_for_each_entry(cachep, &cache_chain, next) { + l3 = cachep->nodelists[node]; + if (!l3) + continue; + drain_freelist(cachep, l3, l3->free_objects); + } +} + +static int __cpuinit cpuup_prepare(long cpu) { - long cpu = (long)hcpu; struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); const int memsize = sizeof(struct kmem_list3); - switch (action) { - case CPU_LOCK_ACQUIRE: - mutex_lock(&cache_chain_mutex); - break; - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: + /* + * We need to do this right in the beginning since + * alloc_arraycache's are going to use this list. + * kmalloc_node allows us to add the slab to the right + * kmem_list3 and not this cpu's kmem_list3 + */ + + list_for_each_entry(cachep, &cache_chain, next) { /* - * We need to do this right in the beginning since - * alloc_arraycache's are going to use this list. - * kmalloc_node allows us to add the slab to the right - * kmem_list3 and not this cpu's kmem_list3 + * Set up the size64 kmemlist for cpu before we can + * begin anything. Make sure some other cpu on this + * node has not already allocated this */ + if (!cachep->nodelists[node]) { + l3 = kmalloc_node(memsize, GFP_KERNEL, node); + if (!l3) + goto bad; + kmem_list3_init(l3); + l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - list_for_each_entry(cachep, &cache_chain, next) { /* - * Set up the size64 kmemlist for cpu before we can - * begin anything. Make sure some other cpu on this - * node has not already allocated this + * The l3s don't come and go as CPUs come and + * go. cache_chain_mutex is sufficient + * protection here. */ - if (!cachep->nodelists[node]) { - l3 = kmalloc_node(memsize, GFP_KERNEL, node); - if (!l3) - goto bad; - kmem_list3_init(l3); - l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - - /* - * The l3s don't come and go as CPUs come and - * go. cache_chain_mutex is sufficient - * protection here. - */ - cachep->nodelists[node] = l3; - } - - spin_lock_irq(&cachep->nodelists[node]->list_lock); - cachep->nodelists[node]->free_limit = - (1 + nr_cpus_node(node)) * - cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + cachep->nodelists[node] = l3; } - /* - * Now we can go ahead with allocating the shared arrays and - * array caches - */ - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - struct array_cache *shared = NULL; - struct array_cache **alien = NULL; - - nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); - if (!nc) + spin_lock_irq(&cachep->nodelists[node]->list_lock); + cachep->nodelists[node]->free_limit = + (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; + spin_unlock_irq(&cachep->nodelists[node]->list_lock); + } + + /* + * Now we can go ahead with allocating the shared arrays and + * array caches + */ + list_for_each_entry(cachep, &cache_chain, next) { + struct array_cache *nc; + struct array_cache *shared = NULL; + struct array_cache **alien = NULL; + + nc = alloc_arraycache(node, cachep->limit, + cachep->batchcount, GFP_KERNEL); + if (!nc) + goto bad; + if (cachep->shared) { + shared = alloc_arraycache(node, + cachep->shared * cachep->batchcount, + 0xbaadf00d, GFP_KERNEL); + if (!shared) { + kfree(nc); goto bad; - if (cachep->shared) { - shared = alloc_arraycache(node, - cachep->shared * cachep->batchcount, - 0xbaadf00d); - if (!shared) - goto bad; } - if (use_alien_caches) { - alien = alloc_alien_cache(node, cachep->limit); - if (!alien) - goto bad; - } - cachep->array[cpu] = nc; - l3 = cachep->nodelists[node]; - BUG_ON(!l3); - - spin_lock_irq(&l3->list_lock); - if (!l3->shared) { - /* - * We are serialised from CPU_DEAD or - * CPU_UP_CANCELLED by the cpucontrol lock - */ - l3->shared = shared; - shared = NULL; + } + if (use_alien_caches) { + alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); + if (!alien) { + kfree(shared); + kfree(nc); + goto bad; } + } + cachep->array[cpu] = nc; + l3 = cachep->nodelists[node]; + BUG_ON(!l3); + + spin_lock_irq(&l3->list_lock); + if (!l3->shared) { + /* + * We are serialised from CPU_DEAD or + * CPU_UP_CANCELLED by the cpucontrol lock + */ + l3->shared = shared; + shared = NULL; + } #ifdef CONFIG_NUMA - if (!l3->alien) { - l3->alien = alien; - alien = NULL; - } -#endif - spin_unlock_irq(&l3->list_lock); - kfree(shared); - free_alien_cache(alien); + if (!l3->alien) { + l3->alien = alien; + alien = NULL; } +#endif + spin_unlock_irq(&l3->list_lock); + kfree(shared); + free_alien_cache(alien); + } + return 0; +bad: + cpuup_canceled(cpu); + return -ENOMEM; +} + +static int __cpuinit cpuup_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + mutex_lock(&cache_chain_mutex); + err = cpuup_prepare(cpu); + mutex_unlock(&cache_chain_mutex); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -1287,76 +1304,16 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, * structure is usually allocated from kmem_cache_create() and * gets destroyed at kmem_cache_destroy(). */ - /* fall thru */ + /* fall through */ #endif case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - struct array_cache *shared; - struct array_cache **alien; - cpumask_t mask; - - mask = node_to_cpumask(node); - /* cpu is dead; no one can alloc from it. */ - nc = cachep->array[cpu]; - cachep->array[cpu] = NULL; - l3 = cachep->nodelists[node]; - - if (!l3) - goto free_array_cache; - - spin_lock_irq(&l3->list_lock); - - /* Free limit for this kmem_list3 */ - l3->free_limit -= cachep->batchcount; - if (nc) - free_block(cachep, nc->entry, nc->avail, node); - - if (!cpus_empty(mask)) { - spin_unlock_irq(&l3->list_lock); - goto free_array_cache; - } - - shared = l3->shared; - if (shared) { - free_block(cachep, shared->entry, - shared->avail, node); - l3->shared = NULL; - } - - alien = l3->alien; - l3->alien = NULL; - - spin_unlock_irq(&l3->list_lock); - - kfree(shared); - if (alien) { - drain_alien_cache(cachep, alien); - free_alien_cache(alien); - } -free_array_cache: - kfree(nc); - } - /* - * In the previous loop, all the objects were freed to - * the respective cache's slabs, now we can go ahead and - * shrink each nodelist to its limit. - */ - list_for_each_entry(cachep, &cache_chain, next) { - l3 = cachep->nodelists[node]; - if (!l3) - continue; - drain_freelist(cachep, l3, l3->free_objects); - } - break; - case CPU_LOCK_RELEASE: + mutex_lock(&cache_chain_mutex); + cpuup_canceled(cpu); mutex_unlock(&cache_chain_mutex); break; } - return NOTIFY_OK; -bad: - return NOTIFY_BAD; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block __cpuinitdata cpucache_notifier = { @@ -1371,10 +1328,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); BUG_ON(!ptr); - local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: @@ -1383,7 +1339,22 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; - local_irq_enable(); +} + +/* + * For setting up all the kmem_list3s for cache whose buffer_size is same as + * size of kmem_list3. + */ +static void __init set_up_list3s(struct kmem_cache *cachep, int index) +{ + int node; + + for_each_online_node(node) { + cachep->nodelists[node] = &initkmem_list3[index + node]; + cachep->nodelists[node]->next_reap = jiffies + + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; + } } /* @@ -1407,12 +1378,13 @@ void __init kmem_cache_init(void) if (i < MAX_NUMNODES) cache_cache.nodelists[i] = NULL; } + set_up_list3s(&cache_cache, CACHE_CACHE); /* * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */ - if (num_physpages > (32 << 20) >> PAGE_SHIFT) + if (totalram_pages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated @@ -1442,7 +1414,7 @@ void __init kmem_cache_init(void) list_add(&cache_cache.next, &cache_chain); cache_cache.colour_off = cache_line_size(); cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; + cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* * struct kmem_cache size depends on nr_node_ids, which @@ -1484,7 +1456,7 @@ void __init kmem_cache_init(void) sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); if (INDEX_AC != INDEX_L3) { sizes[INDEX_L3].cs_cachep = @@ -1492,7 +1464,7 @@ void __init kmem_cache_init(void) sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); } slab_early_init = 0; @@ -1510,7 +1482,7 @@ void __init kmem_cache_init(void) sizes->cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); } #ifdef CONFIG_ZONE_DMA sizes->cs_dmacachep = kmem_cache_create( @@ -1519,7 +1491,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC, - NULL, NULL); + NULL); #endif sizes++; names++; @@ -1528,9 +1500,8 @@ void __init kmem_cache_init(void) { struct array_cache *ptr; - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); @@ -1540,11 +1511,9 @@ void __init kmem_cache_init(void) spin_lock_init(&ptr->lock); cache_cache.array[smp_processor_id()] = ptr; - local_irq_enable(); - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), @@ -1556,16 +1525,14 @@ void __init kmem_cache_init(void) malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; - local_irq_enable(); } /* 5) Replace the bootstrap kmem_list3's */ { int nid; - /* Replace the static kmem_list3 structures for the boot cpu */ - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); - for_each_online_node(nid) { + init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid); + init_list(malloc_sizes[INDEX_AC].cs_cachep, &initkmem_list3[SIZE_AC + nid], nid); @@ -1576,23 +1543,26 @@ void __init kmem_cache_init(void) } } - /* 6) resize the head arrays to their final sizes */ - { - struct kmem_cache *cachep; - mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) - if (enable_cpucache(cachep)) - BUG(); - mutex_unlock(&cache_chain_mutex); - } + g_cpucache_up = EARLY; +} - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); +void __init kmem_cache_init_late(void) +{ + struct kmem_cache *cachep; + /* 6) resize the head arrays to their final sizes */ + mutex_lock(&cache_chain_mutex); + list_for_each_entry(cachep, &cache_chain, next) + if (enable_cpucache(cachep, GFP_NOWAIT)) + BUG(); + mutex_unlock(&cache_chain_mutex); /* Done! */ g_cpucache_up = FULL; + /* Annotate slab for lockdep -- annotate the malloc caches */ + init_lock_keys(); + /* * Register a cpu startup notifier callback that initializes * cpu_cache_get for all new cpus @@ -1640,8 +1610,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) #endif flags |= cachep->gfpflags; + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + flags |= __GFP_RECLAIMABLE; - page = alloc_pages_node(nodeid, flags, cachep->gfporder); + page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); if (!page) return NULL; @@ -1654,6 +1626,16 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) NR_SLAB_UNRECLAIMABLE, nr_pages); for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); + + if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { + kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); + + if (cachep->ctor) + kmemcheck_mark_uninitialized_pages(page, nr_pages); + else + kmemcheck_mark_unallocated_pages(page, nr_pages); + } + return page_address(page); } @@ -1666,6 +1648,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; + kmemcheck_free_shadow(page, cachep->gfporder); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) sub_zone_page_state(page_zone(page), NR_SLAB_RECLAIMABLE, nr_freed); @@ -1863,15 +1847,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #endif #if DEBUG -/** - * slab_destroy_objs - destroy a slab and its objects - * @cachep: cache pointer being destroyed - * @slabp: slab pointer being destroyed - * - * Call the registered destructor for each object in a slab that is being - * destroyed. - */ -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) { int i; for (i = 0; i < cachep->num; i++) { @@ -1900,7 +1876,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) } } #else -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) { } #endif @@ -1918,7 +1894,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) { void *addr = slabp->s_mem - slabp->colouroff; - slab_destroy_objs(cachep, slabp); + slab_destroy_debugcheck(cachep, slabp); if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { struct slab_rcu *slab_rcu; @@ -1933,22 +1909,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) } } -/* - * For setting up all the kmem_list3s for cache whose buffer_size is same as - * size of kmem_list3. - */ -static void __init set_up_list3s(struct kmem_cache *cachep, int index) -{ - int node; - - for_each_online_node(node) { - cachep->nodelists[node] = &initkmem_list3[index + node]; - cachep->nodelists[node]->next_reap = jiffies + - REAPTIMEOUT_LIST3 + - ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - } -} - static void __kmem_cache_destroy(struct kmem_cache *cachep) { int i; @@ -2040,10 +2000,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, return left_over; } -static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) +static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { if (g_cpucache_up == FULL) - return enable_cpucache(cachep); + return enable_cpucache(cachep, gfp); if (g_cpucache_up == NONE) { /* @@ -2065,7 +2025,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) g_cpucache_up = PARTIAL_AC; } else { cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + kmalloc(sizeof(struct arraycache_init), gfp); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -2075,7 +2035,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) for_each_online_node(node) { cachep->nodelists[node] = kmalloc_node(sizeof(struct kmem_list3), - GFP_KERNEL, node); + gfp, node); BUG_ON(!cachep->nodelists[node]); kmem_list3_init(cachep->nodelists[node]); } @@ -2101,15 +2061,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) * @align: The required alignment for the objects. * @flags: SLAB flags * @ctor: A constructor for the objects. - * @dtor: A destructor for the objects (not implemented anymore). * * Returns a ptr to the cache on success, NULL on failure. * Cannot be called within a int, but can be interrupted. - * The @ctor is run when new pages are allocated by the cache - * and the @dtor is run before the pages are handed back. + * The @ctor is run when new pages are allocated by the cache. * * @name must be valid until the cache is destroyed. This implies that * the module calling this has to destroy the cache before getting unloaded. + * Note that kmem_cache_name() is not guaranteed to return the same pointer, + * therefore applications must manage it themselves. * * The flags are * @@ -2125,28 +2085,30 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) */ struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, - unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long), - void (*dtor)(void*, struct kmem_cache *, unsigned long)) + unsigned long flags, void (*ctor)(void *)) { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; + gfp_t gfp; /* * Sanity checks... these are all serious usage bugs. */ if (!name || in_interrupt() || (size < BYTES_PER_WORD) || - size > KMALLOC_MAX_SIZE || dtor) { - printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, + size > KMALLOC_MAX_SIZE) { + printk(KERN_ERR "%s: Early error in slab %s\n", __func__, name); BUG(); } /* * We use cache_chain_mutex to ensure a consistent view of - * cpu_online_map as well. Please see cpuup_callback + * cpu_online_mask as well. Please see cpuup_callback */ - mutex_lock(&cache_chain_mutex); + if (slab_is_available()) { + get_online_cpus(); + mutex_lock(&cache_chain_mutex); + } list_for_each_entry(pc, &cache_chain, next) { char tmp; @@ -2255,8 +2217,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ align = ralign; + if (slab_is_available()) + gfp = GFP_KERNEL; + else + gfp = GFP_NOWAIT; + /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); + cachep = kmem_cache_zalloc(&cache_cache, gfp); if (!cachep) goto oops; @@ -2330,6 +2297,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* really off slab. No need for manual alignment */ slab_size = cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); + +#ifdef CONFIG_PAGE_POISONING + /* If we're going to use the generic kernel_map_pages() + * poisoning, then it's going to smash the contents of + * the redzone and userword anyhow, so switch them off. + */ + if (size % PAGE_SIZE == 0 && flags & SLAB_POISON) + flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); +#endif } cachep->colour_off = cache_line_size(); @@ -2359,7 +2335,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->ctor = ctor; cachep->name = name; - if (setup_cpu_cache(cachep)) { + if (setup_cpu_cache(cachep, gfp)) { __kmem_cache_destroy(cachep); cachep = NULL; goto oops; @@ -2371,7 +2347,10 @@ oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); - mutex_unlock(&cache_chain_mutex); + if (slab_is_available()) { + mutex_unlock(&cache_chain_mutex); + put_online_cpus(); + } return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2433,7 +2412,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep) struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1, 1); + on_each_cpu(do_drain, cachep, 1); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -2523,9 +2502,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep) int ret; BUG_ON(!cachep || in_interrupt()); + get_online_cpus(); mutex_lock(&cache_chain_mutex); ret = __cache_shrink(cachep); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return ret; } EXPORT_SYMBOL(kmem_cache_shrink); @@ -2551,6 +2532,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) BUG_ON(!cachep || in_interrupt()); /* Find the cache in the chain of caches. */ + get_online_cpus(); mutex_lock(&cache_chain_mutex); /* * the chain is never empty, cache_cache is never destroyed @@ -2560,14 +2542,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep) slab_error(cachep, "Can't free all objects"); list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return; } if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) - synchronize_rcu(); + rcu_barrier(); __kmem_cache_destroy(cachep); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -2591,7 +2575,15 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ slabp = kmem_cache_alloc_node(cachep->slabp_cache, - local_flags & ~GFP_THISNODE, nodeid); + local_flags, nodeid); + /* + * If the first object in the slab is leaked (it's allocated + * but no one has a reference to it), we want to make sure + * kmemleak does not treat the ->s_mem pointer as a reference + * to the object. Otherwise we will not report the leak. + */ + kmemleak_scan_area(slabp, offsetof(struct slab, list), + sizeof(struct list_head), local_flags); if (!slabp) return NULL; } else { @@ -2602,6 +2594,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, slabp->colouroff = colour_off; slabp->s_mem = objp + colour_off; slabp->nodeid = nodeid; + slabp->free = 0; return slabp; } @@ -2634,8 +2627,7 @@ static void cache_init_objs(struct kmem_cache *cachep, * They must also be threaded. */ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) - cachep->ctor(objp + obj_offset(cachep), cachep, - 0); + cachep->ctor(objp + obj_offset(cachep)); if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) @@ -2651,12 +2643,11 @@ static void cache_init_objs(struct kmem_cache *cachep, cachep->buffer_size / PAGE_SIZE, 0); #else if (cachep->ctor) - cachep->ctor(objp, cachep, 0); + cachep->ctor(objp); #endif slab_bufctl(slabp)[i] = i + 1; } slab_bufctl(slabp)[i - 1] = BUFCTL_END; - slabp->free = 0; } static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) @@ -2746,9 +2737,9 @@ static int cache_grow(struct kmem_cache *cachep, * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); + BUG_ON(flags & GFP_SLAB_BUG_MASK); + local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); - local_flags = (flags & GFP_LEVEL_MASK); /* Take the l3 list lock to change the colour_next on this node */ check_irq_off(); l3 = cachep->nodelists[nodeid]; @@ -2779,17 +2770,16 @@ static int cache_grow(struct kmem_cache *cachep, * 'nodeid'. */ if (!objp) - objp = kmem_getpages(cachep, flags, nodeid); + objp = kmem_getpages(cachep, local_flags, nodeid); if (!objp) goto failed; /* Get slab management. */ slabp = alloc_slabmgmt(cachep, objp, offset, - local_flags & ~GFP_THISNODE, nodeid); + local_flags & ~GFP_CONSTRAINT_MASK, nodeid); if (!slabp) goto opps1; - slabp->nodeid = nodeid; slab_map_pages(cachep, slabp, objp); cache_init_objs(cachep, slabp); @@ -2858,6 +2848,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, unsigned int objnr; struct slab *slabp; + BUG_ON(virt_to_cache(objp) != cachep); + objp -= obj_offset(cachep); kfree_debugcheck(objp); page = virt_to_head_page(objp); @@ -2936,11 +2928,10 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) struct array_cache *ac; int node; - node = numa_node_id(); - +retry: check_irq_off(); + node = numa_node_id(); ac = cpu_cache_get(cachep); -retry: batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { /* @@ -2980,7 +2971,7 @@ retry: * there must be at least one object available for * allocation. */ - BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); + BUG_ON(slabp->inuse >= cachep->num); while (slabp->inuse < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); @@ -3076,7 +3067,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, #endif objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) - cachep->ctor(objp, cachep, 0); + cachep->ctor(objp); #if ARCH_SLAB_MINALIGN if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", @@ -3089,79 +3080,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) #endif -#ifdef CONFIG_FAILSLAB - -static struct failslab_attr { - - struct fault_attr attr; - - u32 ignore_gfp_wait; -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - struct dentry *ignore_gfp_wait_file; -#endif - -} failslab = { - .attr = FAULT_ATTR_INITIALIZER, - .ignore_gfp_wait = 1, -}; - -static int __init setup_failslab(char *str) -{ - return setup_fault_attr(&failslab.attr, str); -} -__setup("failslab=", setup_failslab); - -static int should_failslab(struct kmem_cache *cachep, gfp_t flags) +static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) { if (cachep == &cache_cache) - return 0; - if (flags & __GFP_NOFAIL) - return 0; - if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT)) - return 0; - - return should_fail(&failslab.attr, obj_size(cachep)); -} - -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - -static int __init failslab_debugfs(void) -{ - mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; - struct dentry *dir; - int err; - - err = init_fault_attr_dentries(&failslab.attr, "failslab"); - if (err) - return err; - dir = failslab.attr.dentries.dir; - - failslab.ignore_gfp_wait_file = - debugfs_create_bool("ignore-gfp-wait", mode, dir, - &failslab.ignore_gfp_wait); - - if (!failslab.ignore_gfp_wait_file) { - err = -ENOMEM; - debugfs_remove(failslab.ignore_gfp_wait_file); - cleanup_fault_attr_dentries(&failslab.attr); - } + return false; - return err; + return should_failslab(obj_size(cachep), flags); } -late_initcall(failslab_debugfs); - -#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ - -#else /* CONFIG_FAILSLAB */ - -static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags) -{ - return 0; -} - -#endif /* CONFIG_FAILSLAB */ - static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; @@ -3178,6 +3104,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); } + /* + * To avoid a false negative, if an object that is in one of the + * per-CPU caches is leaked, we need to make sure kmemleak doesn't + * treat the array pointers as a reference to the object. + */ + kmemleak_erase(&ac->entry[ac->avail]); return objp; } @@ -3216,30 +3148,34 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) { struct zonelist *zonelist; gfp_t local_flags; - struct zone **z; + struct zoneref *z; + struct zone *zone; + enum zone_type high_zoneidx = gfp_zone(flags); void *obj = NULL; int nid; if (flags & __GFP_THISNODE) return NULL; - zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; - local_flags = (flags & GFP_LEVEL_MASK); + zonelist = node_zonelist(slab_node(current->mempolicy), flags); + local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); retry: /* * Look through allowed nodes for objects available * from existing per node queues. */ - for (z = zonelist->zones; *z && !obj; z++) { - nid = zone_to_nid(*z); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + nid = zone_to_nid(zone); - if (cpuset_zone_allowed_hardwall(*z, flags) && + if (cpuset_zone_allowed_hardwall(zone, flags) && cache->nodelists[nid] && - cache->nodelists[nid]->free_objects) + cache->nodelists[nid]->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); + if (obj) + break; + } } if (!obj) { @@ -3252,7 +3188,7 @@ retry: if (local_flags & __GFP_WAIT) local_irq_enable(); kmem_flagcheck(cache, flags); - obj = kmem_getpages(cache, flags, -1); + obj = kmem_getpages(cache, local_flags, numa_node_id()); if (local_flags & __GFP_WAIT) local_irq_disable(); if (obj) { @@ -3360,7 +3296,11 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long save_flags; void *ptr; - if (should_failslab(cachep, flags)) + flags &= gfp_allowed_mask; + + lockdep_trace_alloc(flags); + + if (slab_should_failslab(cachep, flags)) return NULL; cache_alloc_debugcheck_before(cachep, flags); @@ -3391,6 +3331,11 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); + kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, + flags); + + if (likely(ptr)) + kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); if (unlikely((flags & __GFP_ZERO) && ptr)) memset(ptr, 0, obj_size(cachep)); @@ -3436,7 +3381,11 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) unsigned long save_flags; void *objp; - if (should_failslab(cachep, flags)) + flags &= gfp_allowed_mask; + + lockdep_trace_alloc(flags); + + if (slab_should_failslab(cachep, flags)) return NULL; cache_alloc_debugcheck_before(cachep, flags); @@ -3444,8 +3393,13 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); + kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, + flags); prefetchw(objp); + if (likely(objp)) + kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); + if (unlikely((flags & __GFP_ZERO) && objp)) memset(objp, 0, obj_size(cachep)); @@ -3559,9 +3513,19 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); + kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - if (cache_free_alien(cachep, objp)) + kmemcheck_slab_free(cachep, objp, obj_size(cachep)); + + /* + * Skip calling cache_free_alien() when the platform is not numa. + * This will avoid cache misses that happen while accessing slabp (which + * is per page memory reference) to get nodeid. Instead use a global + * variable to skip the call, which is mostly likely to be present in + * the cache. + */ + if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) return; if (likely(ac->avail < ac->limit)) { @@ -3585,17 +3549,29 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { - return __cache_alloc(cachep, flags, __builtin_return_address(0)); + void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); + + trace_kmem_cache_alloc(_RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, flags); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc); +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) +{ + return __cache_alloc(cachep, flags, __builtin_return_address(0)); +} +EXPORT_SYMBOL(kmem_cache_alloc_notrace); +#endif + /** - * kmem_ptr_validate - check if an untrusted pointer might - * be a slab entry. + * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. * @cachep: the cache we're checking against * @ptr: pointer to validate * - * This verifies that the untrusted pointer looks sane: + * This verifies that the untrusted pointer looks sane; * it is _not_ a guarantee that the pointer is actually * part of the slab cache in question, but it at least * validates that the pointer can be dereferenced and @@ -3634,23 +3610,46 @@ out: #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - return __cache_alloc_node(cachep, flags, nodeid, - __builtin_return_address(0)); + void *ret = __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); + + trace_kmem_cache_alloc_node(_RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, + flags, nodeid); + + return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); +#ifdef CONFIG_KMEMTRACE +void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid) +{ + return __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +#endif + static __always_inline void * __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; + void *ret; cachep = kmem_find_general_cachep(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - return kmem_cache_alloc_node(cachep, flags, node); + ret = kmem_cache_alloc_node_notrace(cachep, flags, node); + + trace_kmalloc_node((unsigned long) caller, ret, + size, cachep->buffer_size, flags, node); + + return ret; } -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) void *__kmalloc_node(size_t size, gfp_t flags, int node) { return __do_kmalloc_node(size, flags, node, @@ -3659,9 +3658,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_node); void *__kmalloc_node_track_caller(size_t size, gfp_t flags, - int node, void *caller) + int node, unsigned long caller) { - return __do_kmalloc_node(size, flags, node, caller); + return __do_kmalloc_node(size, flags, node, (void *)caller); } EXPORT_SYMBOL(__kmalloc_node_track_caller); #else @@ -3683,6 +3682,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller) { struct kmem_cache *cachep; + void *ret; /* If you want to save a few bytes .text space: replace * __ with kmem_. @@ -3692,20 +3692,25 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, cachep = __find_general_cachep(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - return __cache_alloc(cachep, flags, caller); + ret = __cache_alloc(cachep, flags, caller); + + trace_kmalloc((unsigned long) caller, ret, + size, cachep->buffer_size, flags); + + return ret; } -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) void *__kmalloc(size_t size, gfp_t flags) { return __do_kmalloc(size, flags, __builtin_return_address(0)); } EXPORT_SYMBOL(__kmalloc); -void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) +void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) { - return __do_kmalloc(size, flags, caller); + return __do_kmalloc(size, flags, (void *)caller); } EXPORT_SYMBOL(__kmalloc_track_caller); @@ -3729,12 +3734,14 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; - BUG_ON(virt_to_cache(objp) != cachep); - local_irq_save(flags); debug_check_no_locks_freed(objp, obj_size(cachep)); + if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, obj_size(cachep)); __cache_free(cachep, objp); local_irq_restore(flags); + + trace_kmem_cache_free(_RET_IP_, objp); } EXPORT_SYMBOL(kmem_cache_free); @@ -3752,12 +3759,15 @@ void kfree(const void *objp) struct kmem_cache *c; unsigned long flags; + trace_kfree(_RET_IP_, objp); + if (unlikely(ZERO_OR_NULL_PTR(objp))) return; local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); + debug_check_no_obj_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); } @@ -3776,9 +3786,9 @@ const char *kmem_cache_name(struct kmem_cache *cachep) EXPORT_SYMBOL_GPL(kmem_cache_name); /* - * This initializes kmem_list3 or resizes varioius caches for all nodes. + * This initializes kmem_list3 or resizes various caches for all nodes. */ -static int alloc_kmemlist(struct kmem_cache *cachep) +static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; struct kmem_list3 *l3; @@ -3788,7 +3798,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) for_each_online_node(node) { if (use_alien_caches) { - new_alien = alloc_alien_cache(node, cachep->limit); + new_alien = alloc_alien_cache(node, cachep->limit, gfp); if (!new_alien) goto fail; } @@ -3797,7 +3807,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) if (cachep->shared) { new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, - 0xbaadf00d); + 0xbaadf00d, gfp); if (!new_shared) { free_alien_cache(new_alien); goto fail; @@ -3826,7 +3836,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); + l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -3882,18 +3892,18 @@ static void do_ccupdate_local(void *info) /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared) + int batchcount, int shared, gfp_t gfp) { struct ccupdate_struct *new; int i; - new = kzalloc(sizeof(*new), GFP_KERNEL); + new = kzalloc(sizeof(*new), gfp); if (!new) return -ENOMEM; for_each_online_cpu(i) { new->new[i] = alloc_arraycache(cpu_to_node(i), limit, - batchcount); + batchcount, gfp); if (!new->new[i]) { for (i--; i >= 0; i--) kfree(new->new[i]); @@ -3903,7 +3913,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); + on_each_cpu(do_ccupdate_local, (void *)new, 1); check_irq_on(); cachep->batchcount = batchcount; @@ -3920,11 +3930,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, kfree(ccold); } kfree(new); - return alloc_kmemlist(cachep); + return alloc_kmemlist(cachep, gfp); } /* Called with cache_chain_mutex held always */ -static int enable_cpucache(struct kmem_cache *cachep) +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) { int err; int limit, shared; @@ -3970,7 +3980,7 @@ static int enable_cpucache(struct kmem_cache *cachep) if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); + err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); @@ -4023,8 +4033,7 @@ static void cache_reap(struct work_struct *w) struct kmem_cache *searchp; struct kmem_list3 *l3; int node = numa_node_id(); - struct delayed_work *work = - container_of(w, struct delayed_work, work); + struct delayed_work *work = to_delayed_work(w); if (!mutex_trylock(&cache_chain_mutex)) /* Give up. Setup the next iteration. */ @@ -4075,7 +4084,7 @@ out: schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); } -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_SLABINFO static void print_slabinfo_header(struct seq_file *m) { @@ -4231,7 +4240,7 @@ static int s_show(struct seq_file *m, void *p) * + further values on SMP and with statistics enabled */ -const struct seq_operations slabinfo_op = { +static const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -4277,7 +4286,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, res = 0; } else { res = do_tune_cpucache(cachep, limit, - batchcount, shared); + batchcount, shared, + GFP_KERNEL); } break; } @@ -4288,6 +4298,19 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, return res; } +static int slabinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slabinfo_op); +} + +static const struct file_operations proc_slabinfo_operations = { + .open = slabinfo_open, + .read = seq_read, + .write = slabinfo_write, + .llseek = seq_lseek, + .release = seq_release, +}; + #ifdef CONFIG_DEBUG_SLAB_LEAK static void *leaks_start(struct seq_file *m, loff_t *pos) @@ -4416,13 +4439,47 @@ static int leaks_show(struct seq_file *m, void *p) return 0; } -const struct seq_operations slabstats_op = { +static const struct seq_operations slabstats_op = { .start = leaks_start, .next = s_next, .stop = s_stop, .show = leaks_show, }; + +static int slabstats_open(struct inode *inode, struct file *file) +{ + unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); + int ret = -ENOMEM; + if (n) { + ret = seq_open(file, &slabstats_op); + if (!ret) { + struct seq_file *m = file->private_data; + *n = PAGE_SIZE / (2 * sizeof(unsigned long)); + m->private = n; + n = NULL; + } + kfree(n); + } + return ret; +} + +static const struct file_operations proc_slabstats_operations = { + .open = slabstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; +#endif + +static int __init slab_proc_init(void) +{ + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); +#ifdef CONFIG_DEBUG_SLAB_LEAK + proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif + return 0; +} +module_init(slab_proc_init); #endif /** @@ -4439,8 +4496,10 @@ const struct seq_operations slabstats_op = { */ size_t ksize(const void *objp) { - if (unlikely(ZERO_OR_NULL_PTR(objp))) + BUG_ON(!objp); + if (unlikely(objp == ZERO_SIZE_PTR)) return 0; return obj_size(virt_to_cache(objp)); } +EXPORT_SYMBOL(ksize);