X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslub.c;h=7ab54ecbd3f3a5abe50eba2c0a5dc035bf4d5efc;hb=342a437ef625953e8bdeee3bc1605ccd27a38863;hp=e27472368cc3dcf48202ebca331ca85ed6b7c8a3;hpb=2e67624c22321fa40ad3aa89c307c84bd679d9b2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slub.c b/mm/slub.c index e274723..7ab54ec 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ #include #include #include -#include +#include /* * Lock order: @@ -154,6 +155,10 @@ #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif +#define OO_SHIFT 16 +#define OO_MASK ((1 << OO_SHIFT) - 1) +#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ + /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ @@ -291,7 +296,7 @@ static inline struct kmem_cache_order_objects oo_make(int order, unsigned long size) { struct kmem_cache_order_objects x = { - (order << 16) + (PAGE_SIZE << order) / size + (order << OO_SHIFT) + (PAGE_SIZE << order) / size }; return x; @@ -299,12 +304,12 @@ static inline struct kmem_cache_order_objects oo_make(int order, static inline int oo_order(struct kmem_cache_order_objects x) { - return x.x >> 16; + return x.x >> OO_SHIFT; } static inline int oo_objects(struct kmem_cache_order_objects x) { - return x.x & ((1 << 16) - 1); + return x.x & OO_MASK; } #ifdef CONFIG_SLUB_DEBUG @@ -370,14 +375,8 @@ static struct track *get_track(struct kmem_cache *s, void *object, static void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { - struct track *p; + struct track *p = get_track(s, object, alloc); - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; - - p += alloc; if (addr) { p->addr = addr; p->cpu = smp_processor_id(); @@ -693,7 +692,7 @@ static int check_object(struct kmem_cache *s, struct page *page, if (!check_valid_pointer(s, page, get_freepointer(s, p))) { object_err(s, page, p, "Freepointer corrupt"); /* - * No choice but to zap it and thus loose the remainder + * No choice but to zap it and thus lose the remainder * of the free objects in this slab. May cause * another error because the object count is now wrong. */ @@ -765,8 +764,8 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) } max_objects = (PAGE_SIZE << compound_order(page)) / s->size; - if (max_objects > 65535) - max_objects = 65535; + if (max_objects > MAX_OBJS_PER_PAGE) + max_objects = MAX_OBJS_PER_PAGE; if (page->objects != max_objects) { slab_err(s, page, "Wrong number of objects. Found %d but " @@ -1331,7 +1330,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) n = get_node(s, zone_to_nid(zone)); if (n && cpuset_zone_allowed_hardwall(zone, flags) && - n->nr_partial > n->min_partial) { + n->nr_partial > s->min_partial) { page = get_partial_node(n); if (page) return page; @@ -1383,7 +1382,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { stat(c, DEACTIVATE_EMPTY); - if (n->nr_partial < n->min_partial) { + if (n->nr_partial < s->min_partial) { /* * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs @@ -1592,6 +1591,12 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; + lockdep_trace_alloc(gfpflags); + might_sleep_if(gfpflags & __GFP_WAIT); + + if (should_failslab(s->objsize, gfpflags)) + return NULL; + local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); objsize = c->objsize; @@ -1616,8 +1621,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags); + trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); return ret; } @@ -1636,8 +1640,8 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags, node); + trace_kmem_cache_alloc_node(_RET_IP_, ret, + s->objsize, s->size, gfpflags, node); return ret; } @@ -1743,7 +1747,7 @@ static __always_inline void slab_free(struct kmem_cache *s, c = get_cpu_slab(s, smp_processor_id()); debug_check_no_locks_freed(object, c->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(object, s->objsize); + debug_check_no_obj_freed(object, c->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; @@ -1762,11 +1766,11 @@ void kmem_cache_free(struct kmem_cache *s, void *x) slab_free(s, page, x, _RET_IP_); - kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x); + trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); -/* Figure out on which slab object the object resides */ +/* Figure out on which slab page the object resides */ static struct page *get_object_page(const void *x) { struct page *page = virt_to_head_page(x); @@ -1838,8 +1842,8 @@ static inline int slab_order(int size, int min_objects, int rem; int min_order = slub_min_order; - if ((PAGE_SIZE << min_order) / size > 65535) - return get_order(size * 65535) - 1; + if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) + return get_order(size * MAX_OBJS_PER_PAGE) - 1; for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); @@ -1865,6 +1869,7 @@ static inline int calculate_order(int size) int order; int min_objects; int fraction; + int max_objects; /* * Attempt to find best configuration for a slab. This @@ -1877,6 +1882,9 @@ static inline int calculate_order(int size) min_objects = slub_min_objects; if (!min_objects) min_objects = 4 * (fls(nr_cpu_ids) + 1); + max_objects = (PAGE_SIZE << slub_max_order)/size; + min_objects = min(min_objects, max_objects); + while (min_objects > 1) { fraction = 16; while (fraction >= 4) { @@ -1886,7 +1894,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects /= 2; + min_objects --; } /* @@ -1949,17 +1957,6 @@ static void init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) { n->nr_partial = 0; - - /* - * The larger the object size is, the more pages we want on the partial - * list to avoid pounding the page allocator excessively. - */ - n->min_partial = ilog2(s->size); - if (n->min_partial < MIN_PARTIAL) - n->min_partial = MIN_PARTIAL; - else if (n->min_partial > MAX_PARTIAL) - n->min_partial = MAX_PARTIAL; - spin_lock_init(&n->list_lock); INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG @@ -1991,7 +1988,7 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); -static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE; +static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, int cpu, gfp_t flags) @@ -2017,7 +2014,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) { if (c < per_cpu(kmem_cache_cpu, cpu) || - c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { + c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { kfree(c); return; } @@ -2066,13 +2063,13 @@ static void init_alloc_cpu_cpu(int cpu) { int i; - if (cpu_isset(cpu, kmem_cach_cpu_free_init_once)) + if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) return; for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); - cpu_set(cpu, kmem_cach_cpu_free_init_once); + cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); } static void __init init_alloc_cpu(void) @@ -2104,8 +2101,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) * when allocating for the kmalloc_node_cache. This is used for bootstrapping * memory on a fresh node that has no slab structures yet. */ -static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, - int node) +static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) { struct page *page; struct kmem_cache_node *n; @@ -2143,7 +2139,6 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, local_irq_save(flags); add_partial(n, page, 0); local_irq_restore(flags); - return n; } static void free_kmem_cache_nodes(struct kmem_cache *s) @@ -2175,8 +2170,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) n = &s->local_node; else { if (slab_state == DOWN) { - n = early_kmem_cache_node_alloc(gfpflags, - node); + early_kmem_cache_node_alloc(gfpflags, node); continue; } n = kmem_cache_alloc_node(kmalloc_caches, @@ -2205,6 +2199,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) } #endif +static void set_min_partial(struct kmem_cache *s, unsigned long min) +{ + if (min < MIN_PARTIAL) + min = MIN_PARTIAL; + else if (min > MAX_PARTIAL) + min = MAX_PARTIAL; + s->min_partial = min; +} + /* * calculate_sizes() determines the order and the distribution of data within * a slab object. @@ -2278,7 +2281,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * Add some empty padding so that we can catch * overwrites from earlier objects rather than let * tracking information or the free pointer be - * corrupted if an user writes before the start + * corrupted if a user writes before the start * of the object. */ size += sizeof(void *); @@ -2343,6 +2346,11 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + /* + * The larger the object size is, the more pages we want on the partial + * list to avoid pounding the page allocator excessively. + */ + set_min_partial(s, ilog2(s->size)); s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; @@ -2499,7 +2507,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; +struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2561,7 +2569,7 @@ panic: } #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; +static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; static void sysfs_add_func(struct work_struct *w) { @@ -2683,7 +2691,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2693,8 +2701,7 @@ void *__kmalloc(size_t size, gfp_t flags) ret = slab_alloc(s, flags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags); + trace_kmalloc(_RET_IP_, ret, size, s->size, flags); return ret; } @@ -2717,13 +2724,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) { + if (unlikely(size > SLUB_MAX_SIZE)) { ret = kmalloc_large_node(size, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _RET_IP_, ret, - size, PAGE_SIZE << get_order(size), - flags, node); + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << get_order(size), + flags, node); return ret; } @@ -2735,8 +2741,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) ret = slab_alloc(s, flags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags, node); + trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); return ret; } @@ -2780,12 +2785,15 @@ size_t ksize(const void *object) */ return s->size; } +EXPORT_SYMBOL(ksize); void kfree(const void *x) { struct page *page; void *object = (void *)x; + trace_kfree(_RET_IP_, x); + if (unlikely(ZERO_OR_NULL_PTR(x))) return; @@ -2796,8 +2804,6 @@ void kfree(const void *x) return; } slab_free(page->slab, page, object, _RET_IP_); - - kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x); } EXPORT_SYMBOL(kfree); @@ -2984,8 +2990,10 @@ static int slab_memory_callback(struct notifier_block *self, case MEM_CANCEL_OFFLINE: break; } - - ret = notifier_from_errno(ret); + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; return ret; } @@ -3029,7 +3037,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -3066,7 +3074,7 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); @@ -3174,8 +3182,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); - if (sysfs_slab_alias(s, name)) + if (sysfs_slab_alias(s, name)) { + down_write(&slub_lock); + s->refcount--; + up_write(&slub_lock); goto err; + } return s; } @@ -3185,8 +3197,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); up_write(&slub_lock); - if (sysfs_slab_add(s)) + if (sysfs_slab_add(s)) { + down_write(&slub_lock); + list_del(&s->list); + up_write(&slub_lock); + kfree(s); goto err; + } return s; } kfree(s); @@ -3258,7 +3275,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3269,8 +3286,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) ret = slab_alloc(s, gfpflags, -1, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size, - s->size, gfpflags); + trace_kmalloc(caller, ret, size, s->size, gfpflags); return ret; } @@ -3281,7 +3297,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; void *ret; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(size > SLUB_MAX_SIZE)) return kmalloc_large_node(size, gfpflags, node); s = get_slab(size, gfpflags); @@ -3292,8 +3308,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ret = slab_alloc(s, gfpflags, node, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret, - size, s->size, gfpflags, node); + trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); return ret; } @@ -3500,7 +3515,7 @@ struct location { long max_time; long min_pid; long max_pid; - cpumask_t cpus; + DECLARE_BITMAP(cpus, NR_CPUS); nodemask_t nodes; }; @@ -3575,7 +3590,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, if (track->pid > l->max_pid) l->max_pid = track->pid; - cpu_set(track->cpu, l->cpus); + cpumask_set_cpu(track->cpu, + to_cpumask(l->cpus)); } node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; @@ -3605,8 +3621,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, l->max_time = age; l->min_pid = track->pid; l->max_pid = track->pid; - cpus_clear(l->cpus); - cpu_set(track->cpu, l->cpus); + cpumask_clear(to_cpumask(l->cpus)); + cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); nodes_clear(l->nodes); node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; @@ -3662,7 +3678,7 @@ static int list_locations(struct kmem_cache *s, char *buf, for (i = 0; i < t.count; i++) { struct location *l = &t.loc[i]; - if (len > PAGE_SIZE - 100) + if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) break; len += sprintf(buf + len, "%7ld ", l->count); @@ -3687,11 +3703,12 @@ static int list_locations(struct kmem_cache *s, char *buf, len += sprintf(buf + len, " pid=%ld", l->min_pid); - if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && + if (num_online_cpus() > 1 && + !cpumask_empty(to_cpumask(l->cpus)) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->cpus); + to_cpumask(l->cpus)); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && @@ -3882,6 +3899,26 @@ static ssize_t order_show(struct kmem_cache *s, char *buf) } SLAB_ATTR(order); +static ssize_t min_partial_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%lu\n", s->min_partial); +} + +static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + unsigned long min; + int err; + + err = strict_strtoul(buf, 10, &min); + if (err) + return err; + + set_min_partial(s, min); + return length; +} +SLAB_ATTR(min_partial); + static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (s->ctor) { @@ -4197,6 +4234,7 @@ static struct attribute *slab_attrs[] = { &object_size_attr.attr, &objs_per_slab_attr.attr, &order_attr.attr, + &min_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &total_objects_attr.attr, @@ -4410,7 +4448,7 @@ static void sysfs_slab_remove(struct kmem_cache *s) /* * Need to buffer aliases during bootup until sysfs becomes - * available lest we loose that information. + * available lest we lose that information. */ struct saved_alias { struct kmem_cache *s;