X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fslub.c;h=35ab38a94b46279dc018c14841452f533313702b;hb=1a781a777b2f6ac46523fe92396215762ced624d;hp=06533f342be0e4bf86abe1ca3f1b2c5539bead8a;hpb=31d33baf36bda7a2fea800648d87c9fe6155e7ca;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/slub.c b/mm/slub.c index 06533f3..35ab38a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5,7 +5,7 @@ * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * - * (C) 2007 SGI, Christoph Lameter + * (C) 2007 SGI, Christoph Lameter */ #include @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include /* * Lock order: @@ -149,25 +151,6 @@ static inline void ClearSlabDebug(struct page *page) /* Enable to test recovery from slab corruption on boot */ #undef SLUB_RESILIENCY_TEST -#if PAGE_SHIFT <= 12 - -/* - * Small page size. Make sure that we do not fragment memory - */ -#define DEFAULT_MAX_ORDER 1 -#define DEFAULT_MIN_OBJECTS 4 - -#else - -/* - * Large page machines are customarily able to handle larger - * page orders. - */ -#define DEFAULT_MAX_ORDER 2 -#define DEFAULT_MIN_OBJECTS 8 - -#endif - /* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -205,11 +188,6 @@ static inline void ClearSlabDebug(struct page *page) #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ -/* Not all arches define cache_line_size */ -#ifndef cache_line_size -#define cache_line_size() L1_CACHE_BYTES -#endif - static int kmem_size = sizeof(struct kmem_cache); #ifdef CONFIG_SMP @@ -239,7 +217,7 @@ struct track { enum track_item { TRACK_ALLOC, TRACK_FREE }; -#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) +#ifdef CONFIG_SLUB_DEBUG static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); @@ -433,7 +411,7 @@ static void set_track(struct kmem_cache *s, void *object, if (addr) { p->addr = addr; p->cpu = smp_processor_id(); - p->pid = current ? current->pid : -1; + p->pid = current->pid; p->when = jiffies; } else memset(p, 0, sizeof(struct track)); @@ -453,9 +431,8 @@ static void print_track(const char *s, struct track *t) if (!t->addr) return; - printk(KERN_ERR "INFO: %s in ", s); - __print_symbol("%s", (unsigned long)t->addr); - printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); + printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", + s, t->addr, jiffies - t->when, t->cpu, t->pid); } static void print_tracking(struct kmem_cache *s, void *object) @@ -836,7 +813,8 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) return search == NULL; } -static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) +static void trace(struct kmem_cache *s, struct page *page, void *object, + int alloc) { if (s->flags & SLAB_TRACE) { printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", @@ -1289,8 +1267,7 @@ static void add_partial(struct kmem_cache_node *n, spin_unlock(&n->list_lock); } -static void remove_partial(struct kmem_cache *s, - struct page *page) +static void remove_partial(struct kmem_cache *s, struct page *page) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); @@ -1305,7 +1282,8 @@ static void remove_partial(struct kmem_cache *s, * * Must hold list_lock. */ -static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) +static inline int lock_and_freeze_slab(struct kmem_cache_node *n, + struct page *page) { if (slab_trylock(page)) { list_del(&page->lru); @@ -1349,7 +1327,9 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { #ifdef CONFIG_NUMA struct zonelist *zonelist; - struct zone **z; + struct zoneref *z; + struct zone *zone; + enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; /* @@ -1374,14 +1354,13 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) get_cycles() % 1024 > s->remote_node_defrag_ratio) return NULL; - zonelist = &NODE_DATA( - slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)]; - for (z = zonelist->zones; *z; z++) { + zonelist = node_zonelist(slab_node(current->mempolicy), flags); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; - n = get_node(s, zone_to_nid(*z)); + n = get_node(s, zone_to_nid(zone)); - if (n && cpuset_zone_allowed_hardwall(*z, flags) && + if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > MIN_PARTIAL) { page = get_partial_node(n); if (page) @@ -1441,8 +1420,8 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * so that the others get filled first. That way the * size of the partial list stays small. * - * kmem_cache_shrink can reclaim any empty slabs from the - * partial list. + * kmem_cache_shrink can reclaim any empty slabs from + * the partial list. */ add_partial(n, page, 1); slab_unlock(page); @@ -1517,7 +1496,7 @@ static void flush_cpu_slab(void *d) static void flush_all(struct kmem_cache *s) { #ifdef CONFIG_SMP - on_each_cpu(flush_cpu_slab, s, 1, 1); + on_each_cpu(flush_cpu_slab, s, 1); #else unsigned long flags; @@ -1648,9 +1627,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; + unsigned int objsize; local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); + objsize = c->objsize; if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1663,7 +1644,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_restore(flags); if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, c->objsize); + memset(object, 0, objsize); return object; } @@ -1770,6 +1751,8 @@ static __always_inline void slab_free(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); debug_check_no_locks_freed(object, c->objsize); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; @@ -1821,8 +1804,8 @@ static struct page *get_object_page(const void *x) * take the list_lock. */ static int slub_min_order; -static int slub_max_order = DEFAULT_MAX_ORDER; -static int slub_min_objects = DEFAULT_MIN_OBJECTS; +static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static int slub_min_objects; /* * Merge control. If this is set then no merging of slab caches will occur. @@ -1837,7 +1820,7 @@ static int slub_nomerge; * system components. Generally order 0 allocations should be preferred since * order 0 does not cause fragmentation in the page allocator. Larger objects * be problematic to put into order 0 slabs because there may be too much - * unused space left. We go to a higher order if more than 1/8th of the slab + * unused space left. We go to a higher order if more than 1/16th of the slab * would be wasted. * * In order to reach satisfactory performance we must ensure that a minimum @@ -1899,8 +1882,10 @@ static inline int calculate_order(int size) * we reduce the minimum objects required in a slab. */ min_objects = slub_min_objects; + if (!min_objects) + min_objects = 4 * (fls(nr_cpu_ids) + 1); while (min_objects > 1) { - fraction = 8; + fraction = 16; while (fraction >= 4) { order = slab_order(size, min_objects, slub_max_order, fraction); @@ -2742,9 +2727,10 @@ size_t ksize(const void *object) page = virt_to_head_page(object); - if (unlikely(!PageSlab(page))) + if (unlikely(!PageSlab(page))) { + WARN_ON(!PageCompound(page)); return PAGE_SIZE << compound_order(page); - + } s = page->slab; #ifdef CONFIG_SLUB_DEBUG @@ -2780,6 +2766,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { + BUG_ON(!PageCompound(page)); put_page(page); return; } @@ -2926,7 +2913,7 @@ static int slab_mem_going_online_callback(void *arg) return 0; /* - * We are bringing a node online. No memory is availabe yet. We must + * We are bringing a node online. No memory is available yet. We must * allocate a kmem_cache_node structure in order to bring the node * online. */ @@ -2999,7 +2986,7 @@ void __init kmem_cache_init(void) kmalloc_caches[0].refcount = -1; caches++; - hotplug_memory_notifier(slab_memory_callback, 1); + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif /* Able to allocate the per node structures */ @@ -3010,8 +2997,6 @@ void __init kmem_cache_init(void) create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_KERNEL); caches++; - } - if (KMALLOC_MIN_SIZE <= 128) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_KERNEL); caches++; @@ -3041,6 +3026,16 @@ void __init kmem_cache_init(void) for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + if (KMALLOC_MIN_SIZE == 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[(i - 1) / 8] = 8; + } + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ @@ -3263,7 +3258,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return slab_alloc(s, gfpflags, node, caller); } -#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) +#ifdef CONFIG_SLUB_DEBUG static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -3292,9 +3287,7 @@ static int count_free(struct page *page) { return page->objects - page->inuse; } -#endif -#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { @@ -3639,12 +3632,10 @@ static int list_locations(struct kmem_cache *s, char *buf, len += sprintf(buf + len, ""); if (l->sum_time != l->min_time) { - unsigned long remainder; - len += sprintf(buf + len, " age=%ld/%ld/%ld", - l->min_time, - div_long_long_rem(l->sum_time, l->count, &remainder), - l->max_time); + l->min_time, + (long)div_u64(l->sum_time, l->count), + l->max_time); } else len += sprintf(buf + len, " age=%ld", l->min_time); @@ -3782,7 +3773,7 @@ static int any_slab_objects(struct kmem_cache *s) if (!n) continue; - if (atomic_read(&n->total_objects)) + if (atomic_long_read(&n->total_objects)) return 1; } return 0; @@ -3831,7 +3822,12 @@ SLAB_ATTR_RO(objs_per_slab); static ssize_t order_store(struct kmem_cache *s, const char *buf, size_t length) { - int order = simple_strtoul(buf, NULL, 10); + unsigned long order; + int err; + + err = strict_strtoul(buf, 10, &order); + if (err) + return err; if (order > slub_max_order || order < slub_min_order) return -EINVAL; @@ -4084,10 +4080,16 @@ static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, const char *buf, size_t length) { - int n = simple_strtoul(buf, NULL, 10); + unsigned long ratio; + int err; + + err = strict_strtoul(buf, 10, &ratio); + if (err) + return err; + + if (ratio < 100) + s->remote_node_defrag_ratio = ratio * 10; - if (n < 100) - s->remote_node_defrag_ratio = n * 10; return length; } SLAB_ATTR(remote_node_defrag_ratio); @@ -4444,8 +4446,8 @@ __initcall(slab_sysfs_init); */ #ifdef CONFIG_SLABINFO -ssize_t slabinfo_write(struct file *file, const char __user * buffer, - size_t count, loff_t *ppos) +ssize_t slabinfo_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { return -EINVAL; }