Merge branch 'generic-ipi' into generic-ipi-for-linus

[safe/jmp/linux-2.6] / mm / slub.c
diff --git a/mm/slub.c b/mm/slub.c

index 06533f3..35ab38a 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5,7 +5,7 @@
   * The allocator synchronizes using per slab locks and only
   * uses a centralized lock to manage a pool of partial slabs.
   *
- * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
+ * (C) 2007 SGI, Christoph Lameter
   */
  
  #include <linux/mm.h>
@@ -19,8 +19,10 @@
  #include <linux/cpuset.h>
  #include <linux/mempolicy.h>
  #include <linux/ctype.h>
+#include <linux/debugobjects.h>
  #include <linux/kallsyms.h>
  #include <linux/memory.h>
+#include <linux/math64.h>
  
  /*
   * Lock order:
@@ -149,25 +151,6 @@ static inline void ClearSlabDebug(struct page *page)
  /* Enable to test recovery from slab corruption on boot */
  #undef SLUB_RESILIENCY_TEST
  
-#if PAGE_SHIFT <= 12
-
-/*
- * Small page size. Make sure that we do not fragment memory
- */
-#define DEFAULT_MAX_ORDER 1
-#define DEFAULT_MIN_OBJECTS 4
-
-#else
-
-/*
- * Large page machines are customarily able to handle larger
- * page orders.
- */
-#define DEFAULT_MAX_ORDER 2
-#define DEFAULT_MIN_OBJECTS 8
-
-#endif
-
  /*
   * Mininum number of partial slabs. These will be left on the partial
   * lists even if they are empty. kmem_cache_shrink may reclaim them.
@@ -205,11 +188,6 @@ static inline void ClearSlabDebug(struct page *page)
  #define __OBJECT_POISON                0x80000000 /* Poison object */
  #define __SYSFS_ADD_DEFERRED   0x40000000 /* Not yet visible via sysfs */
  
-/* Not all arches define cache_line_size */
-#ifndef cache_line_size
-#define cache_line_size()      L1_CACHE_BYTES
-#endif
-
  static int kmem_size = sizeof(struct kmem_cache);
  
  #ifdef CONFIG_SMP
@@ -239,7 +217,7 @@ struct track {
  
  enum track_item { TRACK_ALLOC, TRACK_FREE };
  
-#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
+#ifdef CONFIG_SLUB_DEBUG
  static int sysfs_slab_add(struct kmem_cache *);
  static int sysfs_slab_alias(struct kmem_cache *, const char *);
  static void sysfs_slab_remove(struct kmem_cache *);
@@ -433,7 +411,7 @@ static void set_track(struct kmem_cache *s, void *object,
         if (addr) {
                 p->addr = addr;
                 p->cpu = smp_processor_id();
-               p->pid = current ? current->pid : -1;
+               p->pid = current->pid;
                 p->when = jiffies;
         } else
                 memset(p, 0, sizeof(struct track));
@@ -453,9 +431,8 @@ static void print_track(const char *s, struct track *t)
         if (!t->addr)
                 return;
  
-       printk(KERN_ERR "INFO: %s in ", s);
-       __print_symbol("%s", (unsigned long)t->addr);
-       printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid);
+       printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
+               s, t->addr, jiffies - t->when, t->cpu, t->pid);
  }
  
  static void print_tracking(struct kmem_cache *s, void *object)
@@ -836,7 +813,8 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
         return search == NULL;
  }
  
-static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc)
+static void trace(struct kmem_cache *s, struct page *page, void *object,
+                                                               int alloc)
  {
         if (s->flags & SLAB_TRACE) {
                 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
@@ -1289,8 +1267,7 @@ static void add_partial(struct kmem_cache_node *n,
         spin_unlock(&n->list_lock);
  }
  
-static void remove_partial(struct kmem_cache *s,
-                                               struct page *page)
+static void remove_partial(struct kmem_cache *s, struct page *page)
  {
         struct kmem_cache_node *n = get_node(s, page_to_nid(page));
  
@@ -1305,7 +1282,8 @@ static void remove_partial(struct kmem_cache *s,
   *
   * Must hold list_lock.
   */
-static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
+static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
+                                                       struct page *page)
  {
         if (slab_trylock(page)) {
                 list_del(&page->lru);
@@ -1349,7 +1327,9 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
  {
  #ifdef CONFIG_NUMA
         struct zonelist *zonelist;
-       struct zone **z;
+       struct zoneref *z;
+       struct zone *zone;
+       enum zone_type high_zoneidx = gfp_zone(flags);
         struct page *page;
  
         /*
@@ -1374,14 +1354,13 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
                         get_cycles() % 1024 > s->remote_node_defrag_ratio)
                 return NULL;
  
-       zonelist = &NODE_DATA(
-               slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
-       for (z = zonelist->zones; *z; z++) {
+       zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
                 struct kmem_cache_node *n;
  
-               n = get_node(s, zone_to_nid(*z));
+               n = get_node(s, zone_to_nid(zone));
  
-               if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
+               if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
                                 n->nr_partial > MIN_PARTIAL) {
                         page = get_partial_node(n);
                         if (page)
@@ -1441,8 +1420,8 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
                          * so that the others get filled first. That way the
                          * size of the partial list stays small.
                          *
-                        * kmem_cache_shrink can reclaim any empty slabs from the
-                        * partial list.
+                        * kmem_cache_shrink can reclaim any empty slabs from
+                        * the partial list.
                          */
                         add_partial(n, page, 1);
                         slab_unlock(page);
@@ -1517,7 +1496,7 @@ static void flush_cpu_slab(void *d)
  static void flush_all(struct kmem_cache *s)
  {
  #ifdef CONFIG_SMP
-       on_each_cpu(flush_cpu_slab, s, 1, 1);
+       on_each_cpu(flush_cpu_slab, s, 1);
  #else
         unsigned long flags;
  
@@ -1648,9 +1627,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
         void **object;
         struct kmem_cache_cpu *c;
         unsigned long flags;
+       unsigned int objsize;
  
         local_irq_save(flags);
         c = get_cpu_slab(s, smp_processor_id());
+       objsize = c->objsize;
         if (unlikely(!c->freelist || !node_match(c, node)))
  
                 object = __slab_alloc(s, gfpflags, node, addr, c);
@@ -1663,7 +1644,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
         local_irq_restore(flags);
  
         if (unlikely((gfpflags & __GFP_ZERO) && object))
-               memset(object, 0, c->objsize);
+               memset(object, 0, objsize);
  
         return object;
  }
@@ -1770,6 +1751,8 @@ static __always_inline void slab_free(struct kmem_cache *s,
         local_irq_save(flags);
         c = get_cpu_slab(s, smp_processor_id());
         debug_check_no_locks_freed(object, c->objsize);
+       if (!(s->flags & SLAB_DEBUG_OBJECTS))
+               debug_check_no_obj_freed(object, s->objsize);
         if (likely(page == c->page && c->node >= 0)) {
                 object[c->offset] = c->freelist;
                 c->freelist = object;
@@ -1821,8 +1804,8 @@ static struct page *get_object_page(const void *x)
   * take the list_lock.
   */
  static int slub_min_order;
-static int slub_max_order = DEFAULT_MAX_ORDER;
-static int slub_min_objects = DEFAULT_MIN_OBJECTS;
+static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
+static int slub_min_objects;
  
  /*
   * Merge control. If this is set then no merging of slab caches will occur.
@@ -1837,7 +1820,7 @@ static int slub_nomerge;
   * system components. Generally order 0 allocations should be preferred since
   * order 0 does not cause fragmentation in the page allocator. Larger objects
   * be problematic to put into order 0 slabs because there may be too much
- * unused space left. We go to a higher order if more than 1/8th of the slab
+ * unused space left. We go to a higher order if more than 1/16th of the slab
   * would be wasted.
   *
   * In order to reach satisfactory performance we must ensure that a minimum
@@ -1899,8 +1882,10 @@ static inline int calculate_order(int size)
          * we reduce the minimum objects required in a slab.
          */
         min_objects = slub_min_objects;
+       if (!min_objects)
+               min_objects = 4 * (fls(nr_cpu_ids) + 1);
         while (min_objects > 1) {
-               fraction = 8;
+               fraction = 16;
                 while (fraction >= 4) {
                         order = slab_order(size, min_objects,
                                                 slub_max_order, fraction);
@@ -2742,9 +2727,10 @@ size_t ksize(const void *object)
  
         page = virt_to_head_page(object);
  
-       if (unlikely(!PageSlab(page)))
+       if (unlikely(!PageSlab(page))) {
+               WARN_ON(!PageCompound(page));
                 return PAGE_SIZE << compound_order(page);
-
+       }
         s = page->slab;
  
  #ifdef CONFIG_SLUB_DEBUG
@@ -2780,6 +2766,7 @@ void kfree(const void *x)
  
         page = virt_to_head_page(x);
         if (unlikely(!PageSlab(page))) {
+               BUG_ON(!PageCompound(page));
                 put_page(page);
                 return;
         }
@@ -2926,7 +2913,7 @@ static int slab_mem_going_online_callback(void *arg)
                 return 0;
  
         /*
-        * We are bringing a node online. No memory is availabe yet. We must
+        * We are bringing a node online. No memory is available yet. We must
          * allocate a kmem_cache_node structure in order to bring the node
          * online.
          */
@@ -2999,7 +2986,7 @@ void __init kmem_cache_init(void)
         kmalloc_caches[0].refcount = -1;
         caches++;
  
-       hotplug_memory_notifier(slab_memory_callback, 1);
+       hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
  #endif
  
         /* Able to allocate the per node structures */
@@ -3010,8 +2997,6 @@ void __init kmem_cache_init(void)
                 create_kmalloc_cache(&kmalloc_caches[1],
                                 "kmalloc-96", 96, GFP_KERNEL);
                 caches++;
-       }
-       if (KMALLOC_MIN_SIZE <= 128) {
                 create_kmalloc_cache(&kmalloc_caches[2],
                                 "kmalloc-192", 192, GFP_KERNEL);
                 caches++;
@@ -3041,6 +3026,16 @@ void __init kmem_cache_init(void)
         for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
                 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
  
+       if (KMALLOC_MIN_SIZE == 128) {
+               /*
+                * The 192 byte sized cache is not used if the alignment
+                * is 128 byte. Redirect kmalloc to use the 256 byte cache
+                * instead.
+                */
+               for (i = 128 + 8; i <= 192; i += 8)
+                       size_index[(i - 1) / 8] = 8;
+       }
+
         slab_state = UP;
  
         /* Provide the correct kmalloc names now that the caches are up */
@@ -3263,7 +3258,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
         return slab_alloc(s, gfpflags, node, caller);
  }
  
-#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+#ifdef CONFIG_SLUB_DEBUG
  static unsigned long count_partial(struct kmem_cache_node *n,
                                         int (*get_count)(struct page *))
  {
@@ -3292,9 +3287,7 @@ static int count_free(struct page *page)
  {
         return page->objects - page->inuse;
  }
-#endif
  
-#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
  static int validate_slab(struct kmem_cache *s, struct page *page,
                                                 unsigned long *map)
  {
@@ -3639,12 +3632,10 @@ static int list_locations(struct kmem_cache *s, char *buf,
                         len += sprintf(buf + len, "<not-available>");
  
                 if (l->sum_time != l->min_time) {
-                       unsigned long remainder;
-
                         len += sprintf(buf + len, " age=%ld/%ld/%ld",
-                       l->min_time,
-                       div_long_long_rem(l->sum_time, l->count, &remainder),
-                       l->max_time);
+                               l->min_time,
+                               (long)div_u64(l->sum_time, l->count),
+                               l->max_time);
                 } else
                         len += sprintf(buf + len, " age=%ld",
                                 l->min_time);
@@ -3782,7 +3773,7 @@ static int any_slab_objects(struct kmem_cache *s)
                 if (!n)
                         continue;
  
-               if (atomic_read(&n->total_objects))
+               if (atomic_long_read(&n->total_objects))
                         return 1;
         }
         return 0;
@@ -3831,7 +3822,12 @@ SLAB_ATTR_RO(objs_per_slab);
  static ssize_t order_store(struct kmem_cache *s,
                                 const char *buf, size_t length)
  {
-       int order = simple_strtoul(buf, NULL, 10);
+       unsigned long order;
+       int err;
+
+       err = strict_strtoul(buf, 10, &order);
+       if (err)
+               return err;
  
         if (order > slub_max_order || order < slub_min_order)
                 return -EINVAL;
@@ -4084,10 +4080,16 @@ static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
  static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
                                 const char *buf, size_t length)
  {
-       int n = simple_strtoul(buf, NULL, 10);
+       unsigned long ratio;
+       int err;
+
+       err = strict_strtoul(buf, 10, &ratio);
+       if (err)
+               return err;
+
+       if (ratio < 100)
+               s->remote_node_defrag_ratio = ratio * 10;
  
-       if (n < 100)
-               s->remote_node_defrag_ratio = n * 10;
         return length;
  }
  SLAB_ATTR(remote_node_defrag_ratio);
@@ -4444,8 +4446,8 @@ __initcall(slab_sysfs_init);
   */
  #ifdef CONFIG_SLABINFO
  
-ssize_t slabinfo_write(struct file *file, const char __user * buffer,
-                       size_t count, loff_t *ppos)
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+                      size_t count, loff_t *ppos)
  {
         return -EINVAL;
  }