*/
#include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
#include <linux/module.h>
#include <linux/bit_spinlock.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
+#include <linux/kmemleak.h>
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/debugobjects.h>
* Set of flags that will prevent slab merging
*/
#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
- SLAB_TRACE | SLAB_DESTROY_BY_RCU)
+ SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
SLAB_CACHE_DMA)
SYSFS /* Sysfs up */
} slab_state = DOWN;
+/*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
/* A list of all slab caches on the system */
static DECLARE_RWSEM(slub_lock);
static LIST_HEAD(slab_caches);
static void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr)
{
- struct track *p;
-
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
+ struct track *p = get_track(s, object, alloc);
- p += alloc;
if (addr) {
p->addr = addr;
p->cpu = smp_processor_id();
__ClearPageSlab(page);
reset_page_mapcount(page);
+ if (current->reclaim_state)
+ current->reclaim_state->reclaimed_slab += pages;
__free_pages(page, order);
}
n = get_node(s, zone_to_nid(zone));
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- n->nr_partial > n->min_partial) {
+ n->nr_partial > s->min_partial) {
page = get_partial_node(n);
if (page)
return page;
slab_unlock(page);
} else {
stat(c, DEACTIVATE_EMPTY);
- if (n->nr_partial < n->min_partial) {
+ if (n->nr_partial < s->min_partial) {
/*
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
unsigned long flags;
unsigned int objsize;
+ gfpflags &= slab_gfp_mask;
+
lockdep_trace_alloc(gfpflags);
might_sleep_if(gfpflags & __GFP_WAIT);
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, objsize);
+ kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
return object;
}
{
void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
- s->objsize, s->size, gfpflags);
+ trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
return ret;
}
{
void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
- s->objsize, s->size, gfpflags, node);
+ trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
return ret;
}
struct kmem_cache_cpu *c;
unsigned long flags;
+ kmemleak_free_recursive(x, s->flags);
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
debug_check_no_locks_freed(object, c->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(object, s->objsize);
+ debug_check_no_obj_freed(object, c->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
slab_free(s, page, x, _RET_IP_);
- kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
+ trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
int order;
int min_objects;
int fraction;
+ int max_objects;
/*
* Attempt to find best configuration for a slab. This
min_objects = slub_min_objects;
if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1);
+ max_objects = (PAGE_SIZE << slub_max_order)/size;
+ min_objects = min(min_objects, max_objects);
+
while (min_objects > 1) {
fraction = 16;
while (fraction >= 4) {
return order;
fraction /= 2;
}
- min_objects /= 2;
+ min_objects --;
}
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
- if (order <= MAX_ORDER)
+ if (order < MAX_ORDER)
return order;
return -ENOSYS;
}
init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
{
n->nr_partial = 0;
-
- /*
- * The larger the object size is, the more pages we want on the partial
- * list to avoid pounding the page allocator excessively.
- */
- n->min_partial = ilog2(s->size);
- if (n->min_partial < MIN_PARTIAL)
- n->min_partial = MIN_PARTIAL;
- else if (n->min_partial > MAX_PARTIAL)
- n->min_partial = MAX_PARTIAL;
-
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
}
#endif
+static void set_min_partial(struct kmem_cache *s, unsigned long min)
+{
+ if (min < MIN_PARTIAL)
+ min = MIN_PARTIAL;
+ else if (min > MAX_PARTIAL)
+ min = MAX_PARTIAL;
+ s->min_partial = min;
+}
+
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
if (!calculate_sizes(s, -1))
goto error;
+ /*
+ * The larger the object size is, the more pages we want on the partial
+ * list to avoid pounding the page allocator excessively.
+ */
+ set_min_partial(s, ilog2(s->size));
s->refcount = 1;
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
static int __init setup_slub_max_order(char *str)
{
get_option(&str, &slub_max_order);
+ slub_max_order = min(slub_max_order, MAX_ORDER - 1);
return 1;
}
if (gfp_flags & SLUB_DMA)
flags = SLAB_CACHE_DMA;
- down_write(&slub_lock);
+ /*
+ * This function is called with IRQs disabled during early-boot on
+ * single CPU so there's no need to take slub_lock here.
+ */
if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
flags, NULL))
goto panic;
list_add(&s->list, &slab_caches);
- up_write(&slub_lock);
+
if (sysfs_slab_add(s))
goto panic;
return s;
ret = slab_alloc(s, flags, -1, _RET_IP_);
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
- size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
return ret;
}
if (unlikely(size > SLUB_MAX_SIZE)) {
ret = kmalloc_large_node(size, flags, node);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
- _RET_IP_, ret,
- size, PAGE_SIZE << get_order(size),
- flags, node);
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
return ret;
}
ret = slab_alloc(s, flags, node, _RET_IP_);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
- size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
return ret;
}
struct page *page;
void *object = (void *)x;
+ trace_kfree(_RET_IP_, x);
+
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
return;
}
slab_free(page->slab, page, object, _RET_IP_);
-
- kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
}
EXPORT_SYMBOL(kfree);
* kmem_cache_open for slab_state == DOWN.
*/
create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
- sizeof(struct kmem_cache_node), GFP_KERNEL);
+ sizeof(struct kmem_cache_node), GFP_NOWAIT);
kmalloc_caches[0].refcount = -1;
caches++;
/* Caches that are not of the two-to-the-power-of size */
if (KMALLOC_MIN_SIZE <= 64) {
create_kmalloc_cache(&kmalloc_caches[1],
- "kmalloc-96", 96, GFP_KERNEL);
+ "kmalloc-96", 96, GFP_NOWAIT);
caches++;
create_kmalloc_cache(&kmalloc_caches[2],
- "kmalloc-192", 192, GFP_KERNEL);
+ "kmalloc-192", 192, GFP_NOWAIT);
caches++;
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
create_kmalloc_cache(&kmalloc_caches[i],
- "kmalloc", 1 << i, GFP_KERNEL);
+ "kmalloc", 1 << i, GFP_NOWAIT);
caches++;
}
/* Provide the correct kmalloc names now that the caches are up */
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
kmalloc_caches[i]. name =
- kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+ kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
nr_cpu_ids, nr_node_ids);
}
+void __init kmem_cache_init_late(void)
+{
+ /*
+ * Interrupts are enabled now so all GFP allocations are safe.
+ */
+ slab_gfp_mask = __GFP_BITS_MASK;
+}
+
/*
* Find a mergeable slab cache
*/
ret = slab_alloc(s, gfpflags, -1, caller);
/* Honor the call site pointer we recieved. */
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
- s->size, gfpflags);
+ trace_kmalloc(caller, ret, size, s->size, gfpflags);
return ret;
}
ret = slab_alloc(s, gfpflags, node, caller);
/* Honor the call site pointer we recieved. */
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
- size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
return ret;
}
to_cpumask(l->cpus));
}
- if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
len < PAGE_SIZE - 60) {
len += sprintf(buf + len, " nodes=");
len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
}
SLAB_ATTR(order);
+static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
+{
+ return sprintf(buf, "%lu\n", s->min_partial);
+}
+
+static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
+ size_t length)
+{
+ unsigned long min;
+ int err;
+
+ err = strict_strtoul(buf, 10, &min);
+ if (err)
+ return err;
+
+ set_min_partial(s, min);
+ return length;
+}
+SLAB_ATTR(min_partial);
+
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
if (s->ctor) {
&object_size_attr.attr,
&objs_per_slab_attr.attr,
&order_attr.attr,
+ &min_partial_attr.attr,
&objects_attr.attr,
&objects_partial_attr.attr,
&total_objects_attr.attr,