nfsd: 4.1 has an rfc number

[safe/jmp/linux-2.6] / mm / slub.c
diff --git a/mm/slub.c b/mm/slub.c

index 4c64493..8d71aaf 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -21,7 +21,6 @@
  #include <linux/kmemcheck.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
-#include <linux/kmemleak.h>
  #include <linux/mempolicy.h>
  #include <linux/ctype.h>
  #include <linux/debugobjects.h>
@@ -142,6 +141,13 @@
                                 SLAB_POISON | SLAB_STORE_USER)
  
  /*
+ * Debugging flags that require metadata to be stored in the slab.  These get
+ * disabled when slub_debug=O is used and a cache's min order increases with
+ * metadata.
+ */
+#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+
+/*
   * Set of flags that will prevent slab merging
   */
  #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
@@ -179,12 +185,6 @@ static enum {
         SYSFS           /* Sysfs up */
  } slab_state = DOWN;
  
-/*
- * The slab allocator is initialized with interrupts disabled. Therefore, make
- * sure early boot allocations don't accidentally enable interrupts.
- */
-static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
-
  /* A list of all slab caches on the system */
  static DECLARE_RWSEM(slub_lock);
  static LIST_HEAD(slab_caches);
@@ -332,6 +332,7 @@ static int slub_debug;
  #endif
  
  static char *slub_debug_slabs;
+static int disable_higher_order_debug;
  
  /*
   * Object debugging
@@ -653,7 +654,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
         slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
         print_section("Padding", end - remainder, remainder);
  
-       restore_bytes(s, "slab padding", POISON_INUSE, start, end);
+       restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
         return 0;
  }
  
@@ -983,6 +984,15 @@ static int __init setup_slub_debug(char *str)
                  */
                 goto check_slabs;
  
+       if (tolower(*str) == 'o') {
+               /*
+                * Avoid enabling debugging on caches if its minimum order
+                * would increase as a result.
+                */
+               disable_higher_order_debug = 1;
+               goto out;
+       }
+
         slub_debug = 0;
         if (*str == '-')
                 /*
@@ -1033,8 +1043,8 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
          * Enable debugging if selected on the kernel commandline.
          */
         if (slub_debug && (!slub_debug_slabs ||
-           strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
-                       flags |= slub_debug;
+               !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+               flags |= slub_debug;
  
         return flags;
  }
@@ -1061,6 +1071,8 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
  }
  #define slub_debug 0
  
+#define disable_higher_order_debug 0
+
  static inline unsigned long slabs_node(struct kmem_cache *s, int node)
                                                         { return 0; }
  static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
@@ -1091,11 +1103,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  {
         struct page *page;
         struct kmem_cache_order_objects oo = s->oo;
+       gfp_t alloc_gfp;
  
         flags |= s->allocflags;
  
-       page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
-                                                                       oo);
+       /*
+        * Let the initial higher-order allocation fail under memory pressure
+        * so we fall-back to the minimum order allocation.
+        */
+       alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
+
+       page = alloc_slab_page(alloc_gfp, node, oo);
         if (unlikely(!page)) {
                 oo = s->min;
                 /*
@@ -1110,8 +1128,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
         }
  
         if (kmemcheck_enabled
-               && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS)))
-       {
+               && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
                 int pages = 1 << oo_order(oo);
  
                 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
@@ -1561,6 +1578,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
                 "default order: %d, min order: %d\n", s->name, s->objsize,
                 s->size, oo_order(s->oo), oo_order(s->min));
  
+       if (oo_order(s->min) > get_order(s->objsize))
+               printk(KERN_WARNING "  %s debugging increased min order, use "
+                      "slub_debug=O to disable.\n", s->name);
+
         for_each_online_node(node) {
                 struct kmem_cache_node *n = get_node(s, node);
                 unsigned long nr_slabs;
@@ -1692,7 +1713,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
         unsigned long flags;
         unsigned int objsize;
  
-       gfpflags &= slab_gfp_mask;
+       gfpflags &= gfp_allowed_mask;
  
         lockdep_trace_alloc(gfpflags);
         might_sleep_if(gfpflags & __GFP_WAIT);
@@ -1714,7 +1735,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
         }
         local_irq_restore(flags);
  
-       if (unlikely((gfpflags & __GFP_ZERO) && object))
+       if (unlikely(gfpflags & __GFP_ZERO) && object)
                 memset(object, 0, objsize);
  
         kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
@@ -1733,7 +1754,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
  }
  EXPORT_SYMBOL(kmem_cache_alloc);
  
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
  void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
  {
         return slab_alloc(s, gfpflags, -1, _RET_IP_);
@@ -1754,7 +1775,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
  EXPORT_SYMBOL(kmem_cache_alloc_node);
  #endif
  
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
  void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
                                     gfp_t gfpflags,
                                     int node)
@@ -2002,7 +2023,7 @@ static inline int calculate_order(int size)
                                 return order;
                         fraction /= 2;
                 }
-               min_objects --;
+               min_objects--;
         }
  
         /*
@@ -2092,8 +2113,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
   */
  #define NR_KMEM_CACHE_CPU 100
  
-static DEFINE_PER_CPU(struct kmem_cache_cpu,
-                               kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
+static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
+                     kmem_cache_cpu);
  
  static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
  static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
@@ -2401,6 +2422,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
          * on bootup.
          */
         align = calculate_alignment(flags, align, s->objsize);
+       s->align = align;
  
         /*
          * SLUB stores one object immediately after another beginning from
@@ -2453,6 +2475,18 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
  
         if (!calculate_sizes(s, -1))
                 goto error;
+       if (disable_higher_order_debug) {
+               /*
+                * Disable debugging flags that store metadata if the min slab
+                * order increased.
+                */
+               if (get_order(s->size) > get_order(s->objsize)) {
+                       s->flags &= ~DEBUG_METADATA_FLAGS;
+                       s->offset = 0;
+                       if (!calculate_sizes(s, -1))
+                               goto error;
+               }
+       }
  
         /*
          * The larger the object size is, the more pages we want on the partial
@@ -2605,6 +2639,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
                                 "still has objects.\n", s->name, __func__);
                         dump_stack();
                 }
+               if (s->flags & SLAB_DESTROY_BY_RCU)
+                       rcu_barrier();
                 sysfs_slab_remove(s);
         } else
                 up_write(&slub_lock);
@@ -2789,6 +2825,11 @@ static s8 size_index[24] = {
         2       /* 192 */
  };
  
+static inline int size_index_elem(size_t bytes)
+{
+       return (bytes - 1) / 8;
+}
+
  static struct kmem_cache *get_slab(size_t size, gfp_t flags)
  {
         int index;
@@ -2797,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
                 if (!size)
                         return ZERO_SIZE_PTR;
  
-               index = size_index[(size - 1) / 8];
+               index = size_index[size_index_elem(size)];
         } else
                 index = fls(size - 1);
  
@@ -2833,13 +2874,15 @@ EXPORT_SYMBOL(__kmalloc);
  static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
  {
         struct page *page;
+       void *ptr = NULL;
  
         flags |= __GFP_COMP | __GFP_NOTRACK;
         page = alloc_pages_node(node, flags, get_order(size));
         if (page)
-               return page_address(page);
-       else
-               return NULL;
+               ptr = page_address(page);
+
+       kmemleak_alloc(ptr, size, 1, flags);
+       return ptr;
  }
  
  #ifdef CONFIG_NUMA
@@ -2924,6 +2967,7 @@ void kfree(const void *x)
         page = virt_to_head_page(x);
         if (unlikely(!PageSlab(page))) {
                 BUG_ON(!PageCompound(page));
+               kmemleak_free(x);
                 put_page(page);
                 return;
         }
@@ -3152,10 +3196,12 @@ void __init kmem_cache_init(void)
         slab_state = PARTIAL;
  
         /* Caches that are not of the two-to-the-power-of size */
-       if (KMALLOC_MIN_SIZE <= 64) {
+       if (KMALLOC_MIN_SIZE <= 32) {
                 create_kmalloc_cache(&kmalloc_caches[1],
                                 "kmalloc-96", 96, GFP_NOWAIT);
                 caches++;
+       }
+       if (KMALLOC_MIN_SIZE <= 64) {
                 create_kmalloc_cache(&kmalloc_caches[2],
                                 "kmalloc-192", 192, GFP_NOWAIT);
                 caches++;
@@ -3182,17 +3228,28 @@ void __init kmem_cache_init(void)
         BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
                 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
  
-       for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
-               size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
+       for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
+               int elem = size_index_elem(i);
+               if (elem >= ARRAY_SIZE(size_index))
+                       break;
+               size_index[elem] = KMALLOC_SHIFT_LOW;
+       }
  
-       if (KMALLOC_MIN_SIZE == 128) {
+       if (KMALLOC_MIN_SIZE == 64) {
+               /*
+                * The 96 byte size cache is not used if the alignment
+                * is 64 byte.
+                */
+               for (i = 64 + 8; i <= 96; i += 8)
+                       size_index[size_index_elem(i)] = 7;
+       } else if (KMALLOC_MIN_SIZE == 128) {
                 /*
                  * The 192 byte sized cache is not used if the alignment
                  * is 128 byte. Redirect kmalloc to use the 256 byte cache
                  * instead.
                  */
                 for (i = 128 + 8; i <= 192; i += 8)
-                       size_index[(i - 1) / 8] = 8;
+                       size_index[size_index_elem(i)] = 8;
         }
  
         slab_state = UP;
@@ -3220,10 +3277,6 @@ void __init kmem_cache_init(void)
  
  void __init kmem_cache_init_late(void)
  {
-       /*
-        * Interrupts are enabled now so all GFP allocations are safe.
-        */
-       slab_gfp_mask = __GFP_BITS_MASK;
  }
  
  /*
@@ -3292,6 +3345,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
  {
         struct kmem_cache *s;
  
+       if (WARN_ON(!name))
+               return NULL;
+
         down_write(&slub_lock);
         s = find_mergeable(size, align, flags, name, ctor);
         if (s) {
@@ -4315,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
         return len + sprintf(buf + len, "\n");
  }
  
+static void clear_stat(struct kmem_cache *s, enum stat_item si)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               get_cpu_slab(s, cpu)->stat[si] = 0;
+}
+
  #define STAT_ATTR(si, text)                                    \
  static ssize_t text##_show(struct kmem_cache *s, char *buf)    \
  {                                                              \
         return show_stat(s, buf, si);                           \
  }                                                              \
-SLAB_ATTR_RO(text);                                            \
+static ssize_t text##_store(struct kmem_cache *s,              \
+                               const char *buf, size_t length) \
+{                                                              \
+       if (buf[0] != '0')                                      \
+               return -EINVAL;                                 \
+       clear_stat(s, si);                                      \
+       return length;                                          \
+}                                                              \
+SLAB_ATTR(text);                                               \
  
  STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
  STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
@@ -4543,8 +4615,11 @@ static int sysfs_slab_add(struct kmem_cache *s)
         }
  
         err = sysfs_create_group(&s->kobj, &slab_attr_group);
-       if (err)
+       if (err) {
+               kobject_del(&s->kobj);
+               kobject_put(&s->kobj);
                 return err;
+       }
         kobject_uevent(&s->kobj, KOBJ_ADD);
         if (!unmergeable) {
                 /* Setup first alias */
@@ -4726,7 +4801,7 @@ static const struct file_operations proc_slabinfo_operations = {
  
  static int __init slab_proc_init(void)
  {
-       proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
+       proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
         return 0;
  }
  module_init(slab_proc_init);