*/
#include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
#include <linux/module.h>
#include <linux/bit_spinlock.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/slab.h>
+#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/kmemtrace.h>
+#include <linux/kmemcheck.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/math64.h>
+#include <linux/fault-inject.h>
/*
* Lock order:
SLAB_POISON | SLAB_STORE_USER)
/*
+ * Debugging flags that require metadata to be stored in the slab. These get
+ * disabled when slub_debug=O is used and a cache's min order increases with
+ * metadata.
+ */
+#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+
+/*
* Set of flags that will prevent slab merging
*/
#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
- SLAB_TRACE | SLAB_DESTROY_BY_RCU)
+ SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
- SLAB_CACHE_DMA)
+ SLAB_CACHE_DMA | SLAB_NOTRACK)
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
+#define OO_SHIFT 16
+#define OO_MASK ((1 << OO_SHIFT) - 1)
+#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
+
/* Internal SLUB flags */
#define __OBJECT_POISON 0x80000000 /* Poison object */
#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
* Tracking user of a slab.
*/
struct track {
- void *addr; /* Called from address */
+ unsigned long addr; /* Called from address */
int cpu; /* Was running on cpu */
int pid; /* Pid context */
unsigned long when; /* When did the operation occur */
unsigned long size)
{
struct kmem_cache_order_objects x = {
- (order << 16) + (PAGE_SIZE << order) / size
+ (order << OO_SHIFT) + (PAGE_SIZE << order) / size
};
return x;
static inline int oo_order(struct kmem_cache_order_objects x)
{
- return x.x >> 16;
+ return x.x >> OO_SHIFT;
}
static inline int oo_objects(struct kmem_cache_order_objects x)
{
- return x.x & ((1 << 16) - 1);
+ return x.x & OO_MASK;
}
#ifdef CONFIG_SLUB_DEBUG
#endif
static char *slub_debug_slabs;
+static int disable_higher_order_debug;
/*
* Object debugging
}
static void set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, void *addr)
+ enum track_item alloc, unsigned long addr)
{
- struct track *p;
+ struct track *p = get_track(s, object, alloc);
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
-
- p += alloc;
if (addr) {
p->addr = addr;
p->cpu = smp_processor_id();
if (!(s->flags & SLAB_STORE_USER))
return;
- set_track(s, object, TRACK_FREE, NULL);
- set_track(s, object, TRACK_ALLOC, NULL);
+ set_track(s, object, TRACK_FREE, 0UL);
+ set_track(s, object, TRACK_ALLOC, 0UL);
}
static void print_track(const char *s, struct track *t)
return;
printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
- s, t->addr, jiffies - t->when, t->cpu, t->pid);
+ s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
}
static void print_tracking(struct kmem_cache *s, void *object)
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
print_section("Padding", end - remainder, remainder);
- restore_bytes(s, "slab padding", POISON_INUSE, start, end);
+ restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
return 0;
}
if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
object_err(s, page, p, "Freepointer corrupt");
/*
- * No choice but to zap it and thus loose the remainder
+ * No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
}
max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
- if (max_objects > 65535)
- max_objects = 65535;
+ if (max_objects > MAX_OBJS_PER_PAGE)
+ max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
slab_err(s, page, "Wrong number of objects. Found %d but "
return atomic_long_read(&n->nr_slabs);
}
+static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+{
+ return atomic_long_read(&n->nr_slabs);
+}
+
static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
}
static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+ void *object, unsigned long addr)
{
if (!check_slab(s, page))
goto bad;
}
static int free_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+ void *object, unsigned long addr)
{
if (!check_slab(s, page))
goto fail;
*/
goto check_slabs;
+ if (tolower(*str) == 'o') {
+ /*
+ * Avoid enabling debugging on caches if its minimum order
+ * would increase as a result.
+ */
+ disable_higher_order_debug = 1;
+ goto out;
+ }
+
slub_debug = 0;
if (*str == '-')
/*
* Enable debugging if selected on the kernel commandline.
*/
if (slub_debug && (!slub_debug_slabs ||
- strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
- flags |= slub_debug;
+ !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+ flags |= slub_debug;
return flags;
}
struct page *page, void *object) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, void *addr) { return 0; }
+ struct page *page, void *object, unsigned long addr) { return 0; }
static inline int free_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, void *addr) { return 0; }
+ struct page *page, void *object, unsigned long addr) { return 0; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
{ return 1; }
}
#define slub_debug 0
+#define disable_higher_order_debug 0
+
static inline unsigned long slabs_node(struct kmem_cache *s, int node)
{ return 0; }
+static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+ { return 0; }
static inline void inc_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static inline void dec_slabs_node(struct kmem_cache *s, int node,
{
int order = oo_order(oo);
+ flags |= __GFP_NOTRACK;
+
if (node == -1)
return alloc_pages(flags, order);
else
{
struct page *page;
struct kmem_cache_order_objects oo = s->oo;
+ gfp_t alloc_gfp;
flags |= s->allocflags;
- page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
- oo);
+ /*
+ * Let the initial higher-order allocation fail under memory pressure
+ * so we fall-back to the minimum order allocation.
+ */
+ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
+
+ page = alloc_slab_page(alloc_gfp, node, oo);
if (unlikely(!page)) {
oo = s->min;
/*
stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
}
+
+ if (kmemcheck_enabled
+ && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
+ int pages = 1 << oo_order(oo);
+
+ kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
+
+ /*
+ * Objects from caches that have a constructor don't get
+ * cleared when they're allocated, so we need to do it here.
+ */
+ if (s->ctor)
+ kmemcheck_mark_uninitialized_pages(page, pages);
+ else
+ kmemcheck_mark_unallocated_pages(page, pages);
+ }
+
page->objects = oo_objects(oo);
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
__ClearPageSlubDebug(page);
}
+ kmemcheck_free_shadow(page, compound_order(page));
+
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
__ClearPageSlab(page);
reset_page_mapcount(page);
+ if (current->reclaim_state)
+ current->reclaim_state->reclaimed_slab += pages;
__free_pages(page, order);
}
n = get_node(s, zone_to_nid(zone));
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- n->nr_partial > MIN_PARTIAL) {
+ n->nr_partial > s->min_partial) {
page = get_partial_node(n);
if (page)
return page;
slab_unlock(page);
} else {
stat(c, DEACTIVATE_EMPTY);
- if (n->nr_partial < MIN_PARTIAL) {
+ if (n->nr_partial < s->min_partial) {
/*
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
return 1;
}
+static int count_free(struct page *page)
+{
+ return page->objects - page->inuse;
+}
+
+static unsigned long count_partial(struct kmem_cache_node *n,
+ int (*get_count)(struct page *))
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += get_count(page);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
+static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
+{
+#ifdef CONFIG_SLUB_DEBUG
+ return atomic_long_read(&n->total_objects);
+#else
+ return 0;
+#endif
+}
+
+static noinline void
+slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+{
+ int node;
+
+ printk(KERN_WARNING
+ "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+ nid, gfpflags);
+ printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
+ "default order: %d, min order: %d\n", s->name, s->objsize,
+ s->size, oo_order(s->oo), oo_order(s->min));
+
+ if (oo_order(s->min) > get_order(s->objsize))
+ printk(KERN_WARNING " %s debugging increased min order, use "
+ "slub_debug=O to disable.\n", s->name);
+
+ for_each_online_node(node) {
+ struct kmem_cache_node *n = get_node(s, node);
+ unsigned long nr_slabs;
+ unsigned long nr_objs;
+ unsigned long nr_free;
+
+ if (!n)
+ continue;
+
+ nr_free = count_partial(n, count_free);
+ nr_slabs = node_nr_slabs(n);
+ nr_objs = node_nr_objs(n);
+
+ printk(KERN_WARNING
+ " node %d: slabs: %ld, objs: %ld, free: %ld\n",
+ node, nr_slabs, nr_objs, nr_free);
+ }
+}
+
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
* we need to allocate a new slab. This is the slowest path since it involves
* a call to the page allocator and the setup of a new slab.
*/
-static void *__slab_alloc(struct kmem_cache *s,
- gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ unsigned long addr, struct kmem_cache_cpu *c)
{
void **object;
struct page *new;
c->page = new;
goto load_freelist;
}
+ if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
+ slab_out_of_memory(s, gfpflags, node);
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __always_inline void *slab_alloc(struct kmem_cache *s,
- gfp_t gfpflags, int node, void *addr)
+ gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
struct kmem_cache_cpu *c;
unsigned long flags;
unsigned int objsize;
+ gfpflags &= gfp_allowed_mask;
+
+ lockdep_trace_alloc(gfpflags);
+ might_sleep_if(gfpflags & __GFP_WAIT);
+
+ if (should_failslab(s->objsize, gfpflags))
+ return NULL;
+
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
objsize = c->objsize;
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, objsize);
+ kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
+ kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
+
return object;
}
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+ void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+ trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+ void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+ trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
#endif
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
- void *x, void *addr, unsigned int offset)
+ void *x, unsigned long addr, unsigned int offset)
{
void *prior;
void **object = (void *)x;
* with all sorts of special processing.
*/
static __always_inline void slab_free(struct kmem_cache *s,
- struct page *page, void *x, void *addr)
+ struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
struct kmem_cache_cpu *c;
unsigned long flags;
+ kmemleak_free_recursive(x, s->flags);
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
+ kmemcheck_slab_free(s, object, c->objsize);
debug_check_no_locks_freed(object, c->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(object, s->objsize);
+ debug_check_no_obj_freed(object, c->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
page = virt_to_head_page(x);
- slab_free(s, page, x, __builtin_return_address(0));
+ slab_free(s, page, x, _RET_IP_);
+
+ trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Figure out on which slab object the object resides */
+/* Figure out on which slab page the object resides */
static struct page *get_object_page(const void *x)
{
struct page *page = virt_to_head_page(x);
int rem;
int min_order = slub_min_order;
- if ((PAGE_SIZE << min_order) / size > 65535)
- return get_order(size * 65535) - 1;
+ if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+ return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order,
fls(min_objects * size - 1) - PAGE_SHIFT);
int order;
int min_objects;
int fraction;
+ int max_objects;
/*
* Attempt to find best configuration for a slab. This
min_objects = slub_min_objects;
if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1);
+ max_objects = (PAGE_SIZE << slub_max_order)/size;
+ min_objects = min(min_objects, max_objects);
+
while (min_objects > 1) {
fraction = 16;
while (fraction >= 4) {
return order;
fraction /= 2;
}
- min_objects /= 2;
+ min_objects--;
}
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
- if (order <= MAX_ORDER)
+ if (order < MAX_ORDER)
return order;
return -ENOSYS;
}
#endif
}
-static void init_kmem_cache_node(struct kmem_cache_node *n)
+static void
+init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
{
n->nr_partial = 0;
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
+ atomic_long_set(&n->total_objects, 0);
INIT_LIST_HEAD(&n->full);
#endif
}
*/
#define NR_KMEM_CACHE_CPU 100
-static DEFINE_PER_CPU(struct kmem_cache_cpu,
- kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
+static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
+ kmem_cache_cpu);
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
+static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
int cpu, gfp_t flags)
static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
{
if (c < per_cpu(kmem_cache_cpu, cpu) ||
- c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
+ c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
kfree(c);
return;
}
{
int i;
- if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
+ if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
return;
for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
- cpu_set(cpu, kmem_cach_cpu_free_init_once);
+ cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
}
static void __init init_alloc_cpu(void)
* when allocating for the kmalloc_node_cache. This is used for bootstrapping
* memory on a fresh node that has no slab structures yet.
*/
-static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
- int node)
+static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
{
struct page *page;
struct kmem_cache_node *n;
init_object(kmalloc_caches, n, 1);
init_tracking(kmalloc_caches, n);
#endif
- init_kmem_cache_node(n);
+ init_kmem_cache_node(n, kmalloc_caches);
inc_slabs_node(kmalloc_caches, node, page->objects);
/*
local_irq_save(flags);
add_partial(n, page, 0);
local_irq_restore(flags);
- return n;
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
n = &s->local_node;
else {
if (slab_state == DOWN) {
- n = early_kmem_cache_node_alloc(gfpflags,
- node);
+ early_kmem_cache_node_alloc(gfpflags, node);
continue;
}
n = kmem_cache_alloc_node(kmalloc_caches,
}
s->node[node] = n;
- init_kmem_cache_node(n);
+ init_kmem_cache_node(n, s);
}
return 1;
}
static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
{
- init_kmem_cache_node(&s->local_node);
+ init_kmem_cache_node(&s->local_node, s);
return 1;
}
#endif
+static void set_min_partial(struct kmem_cache *s, unsigned long min)
+{
+ if (min < MIN_PARTIAL)
+ min = MIN_PARTIAL;
+ else if (min > MAX_PARTIAL)
+ min = MAX_PARTIAL;
+ s->min_partial = min;
+}
+
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
* Add some empty padding so that we can catch
* overwrites from earlier objects rather than let
* tracking information or the free pointer be
- * corrupted if an user writes before the start
+ * corrupted if a user writes before the start
* of the object.
*/
size += sizeof(void *);
* on bootup.
*/
align = calculate_alignment(flags, align, s->objsize);
+ s->align = align;
/*
* SLUB stores one object immediately after another beginning from
if (!calculate_sizes(s, -1))
goto error;
+ if (disable_higher_order_debug) {
+ /*
+ * Disable debugging flags that store metadata if the min slab
+ * order increased.
+ */
+ if (get_order(s->size) > get_order(s->objsize)) {
+ s->flags &= ~DEBUG_METADATA_FLAGS;
+ s->offset = 0;
+ if (!calculate_sizes(s, -1))
+ goto error;
+ }
+ }
+ /*
+ * The larger the object size is, the more pages we want on the partial
+ * list to avoid pounding the page allocator excessively.
+ */
+ set_min_partial(s, ilog2(s->size));
s->refcount = 1;
#ifdef CONFIG_NUMA
- s->remote_node_defrag_ratio = 100;
+ s->remote_node_defrag_ratio = 1000;
#endif
if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
goto error;
"still has objects.\n", s->name, __func__);
dump_stack();
}
+ if (s->flags & SLAB_DESTROY_BY_RCU)
+ rcu_barrier();
sysfs_slab_remove(s);
} else
up_write(&slub_lock);
* Kmalloc subsystem
*******************************************************************/
-struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);
static int __init setup_slub_min_order(char *str)
static int __init setup_slub_max_order(char *str)
{
get_option(&str, &slub_max_order);
+ slub_max_order = min(slub_max_order, MAX_ORDER - 1);
return 1;
}
if (gfp_flags & SLUB_DMA)
flags = SLAB_CACHE_DMA;
- down_write(&slub_lock);
+ /*
+ * This function is called with IRQs disabled during early-boot on
+ * single CPU so there's no need to take slub_lock here.
+ */
if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
flags, NULL))
goto panic;
list_add(&s->list, &slab_caches);
- up_write(&slub_lock);
+
if (sysfs_slab_add(s))
goto panic;
return s;
}
#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
+static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
static void sysfs_add_func(struct work_struct *w)
{
struct kmem_cache *s;
char *text;
size_t realsize;
+ unsigned long slabflags;
s = kmalloc_caches_dma[index];
if (s)
(unsigned int)realsize);
s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+ /*
+ * Must defer sysfs creation to a workqueue because we don't know
+ * what context we are called from. Before sysfs comes up, we don't
+ * need to do anything because our sysfs initcall will start by
+ * adding all existing slabs to sysfs.
+ */
+ slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
+ if (slab_state >= SYSFS)
+ slabflags |= __SYSFS_ADD_DEFERRED;
+
if (!s || !text || !kmem_cache_open(s, flags, text,
- realsize, ARCH_KMALLOC_MINALIGN,
- SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
+ realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
kfree(s);
kfree(text);
goto unlock_out;
list_add(&s->list, &slab_caches);
kmalloc_caches_dma[index] = s;
- schedule_work(&sysfs_add_work);
+ if (slab_state >= SYSFS)
+ schedule_work(&sysfs_add_work);
unlock_out:
up_write(&slub_lock);
2 /* 192 */
};
+static inline int size_index_elem(size_t bytes)
+{
+ return (bytes - 1) / 8;
+}
+
static struct kmem_cache *get_slab(size_t size, gfp_t flags)
{
int index;
if (!size)
return ZERO_SIZE_PTR;
- index = size_index[(size - 1) / 8];
+ index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, flags);
s = get_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, -1, __builtin_return_address(0));
+ ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+ trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc);
static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
- struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
- get_order(size));
+ struct page *page;
+ void *ptr = NULL;
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_pages_node(node, flags, get_order(size));
if (page)
- return page_address(page);
- else
- return NULL;
+ ptr = page_address(page);
+
+ kmemleak_alloc(ptr, size, 1, flags);
+ return ptr;
}
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
+ void *ret;
+
+ if (unlikely(size > SLUB_MAX_SIZE)) {
+ ret = kmalloc_large_node(size, flags, node);
- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large_node(size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
+
+ return ret;
+ }
s = get_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, node, __builtin_return_address(0));
+ ret = slab_alloc(s, flags, node, _RET_IP_);
+
+ trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
*/
return s->size;
}
+EXPORT_SYMBOL(ksize);
void kfree(const void *x)
{
struct page *page;
void *object = (void *)x;
+ trace_kfree(_RET_IP_, x);
+
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
+ kmemleak_free(x);
put_page(page);
return;
}
- slab_free(page->slab, page, object, __builtin_return_address(0));
+ slab_free(page->slab, page, object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
ret = -ENOMEM;
goto out;
}
- init_kmem_cache_node(n);
+ init_kmem_cache_node(n, s);
s->node[nid] = n;
}
out:
case MEM_CANCEL_OFFLINE:
break;
}
-
- ret = notifier_from_errno(ret);
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
return ret;
}
* kmem_cache_open for slab_state == DOWN.
*/
create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
- sizeof(struct kmem_cache_node), GFP_KERNEL);
+ sizeof(struct kmem_cache_node), GFP_NOWAIT);
kmalloc_caches[0].refcount = -1;
caches++;
slab_state = PARTIAL;
/* Caches that are not of the two-to-the-power-of size */
- if (KMALLOC_MIN_SIZE <= 64) {
+ if (KMALLOC_MIN_SIZE <= 32) {
create_kmalloc_cache(&kmalloc_caches[1],
- "kmalloc-96", 96, GFP_KERNEL);
+ "kmalloc-96", 96, GFP_NOWAIT);
caches++;
+ }
+ if (KMALLOC_MIN_SIZE <= 64) {
create_kmalloc_cache(&kmalloc_caches[2],
- "kmalloc-192", 192, GFP_KERNEL);
+ "kmalloc-192", 192, GFP_NOWAIT);
caches++;
}
- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
+ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
create_kmalloc_cache(&kmalloc_caches[i],
- "kmalloc", 1 << i, GFP_KERNEL);
+ "kmalloc", 1 << i, GFP_NOWAIT);
caches++;
}
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
- for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
- size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
+ for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
+ int elem = size_index_elem(i);
+ if (elem >= ARRAY_SIZE(size_index))
+ break;
+ size_index[elem] = KMALLOC_SHIFT_LOW;
+ }
- if (KMALLOC_MIN_SIZE == 128) {
+ if (KMALLOC_MIN_SIZE == 64) {
+ /*
+ * The 96 byte size cache is not used if the alignment
+ * is 64 byte.
+ */
+ for (i = 64 + 8; i <= 96; i += 8)
+ size_index[size_index_elem(i)] = 7;
+ } else if (KMALLOC_MIN_SIZE == 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
- size_index[(i - 1) / 8] = 8;
+ size_index[size_index_elem(i)] = 8;
}
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
+ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
kmalloc_caches[i]. name =
- kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+ kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
nr_cpu_ids, nr_node_ids);
}
+void __init kmem_cache_init_late(void)
+{
+}
+
/*
* Find a mergeable slab cache
*/
{
struct kmem_cache *s;
+ if (WARN_ON(!name))
+ return NULL;
+
down_write(&slub_lock);
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
- if (sysfs_slab_alias(s, name))
+ if (sysfs_slab_alias(s, name)) {
+ down_write(&slub_lock);
+ s->refcount--;
+ up_write(&slub_lock);
goto err;
+ }
return s;
}
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);
up_write(&slub_lock);
- if (sysfs_slab_add(s))
+ if (sysfs_slab_add(s)) {
+ down_write(&slub_lock);
+ list_del(&s->list);
+ up_write(&slub_lock);
+ kfree(s);
goto err;
+ }
return s;
}
kfree(s);
#endif
-void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, gfpflags);
s = get_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, -1, caller);
+ ret = slab_alloc(s, gfpflags, -1, caller);
+
+ /* Honor the call site pointer we recieved. */
+ trace_kmalloc(caller, ret, size, s->size, gfpflags);
+
+ return ret;
}
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
- int node, void *caller)
+ int node, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large_node(size, gfpflags, node);
s = get_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, node, caller);
-}
+ ret = slab_alloc(s, gfpflags, node, caller);
-#ifdef CONFIG_SLUB_DEBUG
-static unsigned long count_partial(struct kmem_cache_node *n,
- int (*get_count)(struct page *))
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
+ /* Honor the call site pointer we recieved. */
+ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
+ return ret;
}
+#ifdef CONFIG_SLUB_DEBUG
static int count_inuse(struct page *page)
{
return page->inuse;
return page->objects;
}
-static int count_free(struct page *page)
-{
- return page->objects - page->inuse;
-}
-
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
struct location {
unsigned long count;
- void *addr;
+ unsigned long addr;
long long sum_time;
long min_time;
long max_time;
long min_pid;
long max_pid;
- cpumask_t cpus;
+ DECLARE_BITMAP(cpus, NR_CPUS);
nodemask_t nodes;
};
{
long start, end, pos;
struct location *l;
- void *caddr;
+ unsigned long caddr;
unsigned long age = jiffies - track->when;
start = -1;
if (track->pid > l->max_pid)
l->max_pid = track->pid;
- cpu_set(track->cpu, l->cpus);
+ cpumask_set_cpu(track->cpu,
+ to_cpumask(l->cpus));
}
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
l->max_time = age;
l->min_pid = track->pid;
l->max_pid = track->pid;
- cpus_clear(l->cpus);
- cpu_set(track->cpu, l->cpus);
+ cpumask_clear(to_cpumask(l->cpus));
+ cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
nodes_clear(l->nodes);
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
for (i = 0; i < t.count; i++) {
struct location *l = &t.loc[i];
- if (len > PAGE_SIZE - 100)
+ if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
break;
len += sprintf(buf + len, "%7ld ", l->count);
len += sprintf(buf + len, " pid=%ld",
l->min_pid);
- if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
+ if (num_online_cpus() > 1 &&
+ !cpumask_empty(to_cpumask(l->cpus)) &&
len < PAGE_SIZE - 60) {
len += sprintf(buf + len, " cpus=");
len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
- l->cpus);
+ to_cpumask(l->cpus));
}
- if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
len < PAGE_SIZE - 60) {
len += sprintf(buf + len, " nodes=");
len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
}
SLAB_ATTR(order);
+static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
+{
+ return sprintf(buf, "%lu\n", s->min_partial);
+}
+
+static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
+ size_t length)
+{
+ unsigned long min;
+ int err;
+
+ err = strict_strtoul(buf, 10, &min);
+ if (err)
+ return err;
+
+ set_min_partial(s, min);
+ return length;
+}
+SLAB_ATTR(min_partial);
+
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
if (s->ctor) {
if (err)
return err;
- if (ratio < 100)
+ if (ratio <= 100)
s->remote_node_defrag_ratio = ratio * 10;
return length;
&object_size_attr.attr,
&objs_per_slab_attr.attr,
&order_attr.attr,
+ &min_partial_attr.attr,
&objects_attr.attr,
&objects_partial_attr.attr,
&total_objects_attr.attr,
*p++ = 'a';
if (s->flags & SLAB_DEBUG_FREE)
*p++ = 'F';
+ if (!(s->flags & SLAB_NOTRACK))
+ *p++ = 't';
if (p != name + 1)
*p++ = '-';
p += sprintf(p, "%07d", s->size);
}
err = sysfs_create_group(&s->kobj, &slab_attr_group);
- if (err)
+ if (err) {
+ kobject_del(&s->kobj);
+ kobject_put(&s->kobj);
return err;
+ }
kobject_uevent(&s->kobj, KOBJ_ADD);
if (!unmergeable) {
/* Setup first alias */
/*
* Need to buffer aliases during bootup until sysfs becomes
- * available lest we loose that information.
+ * available lest we lose that information.
*/
struct saved_alias {
struct kmem_cache *s;
* The /proc/slabinfo ABI
*/
#ifdef CONFIG_SLABINFO
-
-ssize_t slabinfo_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- return -EINVAL;
-}
-
-
static void print_slabinfo_header(struct seq_file *m)
{
seq_puts(m, "slabinfo - version: 2.1\n");
return 0;
}
-const struct seq_operations slabinfo_op = {
+static const struct seq_operations slabinfo_op = {
.start = s_start,
.next = s_next,
.stop = s_stop,
.show = s_show,
};
+static int slabinfo_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &slabinfo_op);
+}
+
+static const struct file_operations proc_slabinfo_operations = {
+ .open = slabinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init slab_proc_init(void)
+{
+ proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
+ return 0;
+}
+module_init(slab_proc_init);
#endif /* CONFIG_SLABINFO */