#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/math64.h>
+#include <linux/fault-inject.h>
/*
* Lock order:
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
+#define OO_SHIFT 16
+#define OO_MASK ((1 << OO_SHIFT) - 1)
+#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
+
/* Internal SLUB flags */
#define __OBJECT_POISON 0x80000000 /* Poison object */
#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
unsigned long size)
{
struct kmem_cache_order_objects x = {
- (order << 16) + (PAGE_SIZE << order) / size
+ (order << OO_SHIFT) + (PAGE_SIZE << order) / size
};
return x;
static inline int oo_order(struct kmem_cache_order_objects x)
{
- return x.x >> 16;
+ return x.x >> OO_SHIFT;
}
static inline int oo_objects(struct kmem_cache_order_objects x)
{
- return x.x & ((1 << 16) - 1);
+ return x.x & OO_MASK;
}
#ifdef CONFIG_SLUB_DEBUG
static void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr)
{
- struct track *p;
+ struct track *p = get_track(s, object, alloc);
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
-
- p += alloc;
if (addr) {
p->addr = addr;
p->cpu = smp_processor_id();
if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
object_err(s, page, p, "Freepointer corrupt");
/*
- * No choice but to zap it and thus loose the remainder
+ * No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
}
max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
- if (max_objects > 65535)
- max_objects = 65535;
+ if (max_objects > MAX_OBJS_PER_PAGE)
+ max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
slab_err(s, page, "Wrong number of objects. Found %d but "
n = get_node(s, zone_to_nid(zone));
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- n->nr_partial > n->min_partial) {
+ n->nr_partial > s->min_partial) {
page = get_partial_node(n);
if (page)
return page;
slab_unlock(page);
} else {
stat(c, DEACTIVATE_EMPTY);
- if (n->nr_partial < n->min_partial) {
+ if (n->nr_partial < s->min_partial) {
/*
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
unsigned long flags;
unsigned int objsize;
+ lockdep_trace_alloc(gfpflags);
+ might_sleep_if(gfpflags & __GFP_WAIT);
+
+ if (should_failslab(s->objsize, gfpflags))
+ return NULL;
+
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
objsize = c->objsize;
{
void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
- s->objsize, s->size, gfpflags);
+ trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
return ret;
}
{
void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
- s->objsize, s->size, gfpflags, node);
+ trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
return ret;
}
c = get_cpu_slab(s, smp_processor_id());
debug_check_no_locks_freed(object, c->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(object, s->objsize);
+ debug_check_no_obj_freed(object, c->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
slab_free(s, page, x, _RET_IP_);
- kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
+ trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Figure out on which slab object the object resides */
+/* Figure out on which slab page the object resides */
static struct page *get_object_page(const void *x)
{
struct page *page = virt_to_head_page(x);
int rem;
int min_order = slub_min_order;
- if ((PAGE_SIZE << min_order) / size > 65535)
- return get_order(size * 65535) - 1;
+ if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+ return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order,
fls(min_objects * size - 1) - PAGE_SHIFT);
int order;
int min_objects;
int fraction;
+ int max_objects;
/*
* Attempt to find best configuration for a slab. This
min_objects = slub_min_objects;
if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1);
+ max_objects = (PAGE_SIZE << slub_max_order)/size;
+ min_objects = min(min_objects, max_objects);
+
while (min_objects > 1) {
fraction = 16;
while (fraction >= 4) {
return order;
fraction /= 2;
}
- min_objects /= 2;
+ min_objects --;
}
/*
init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
{
n->nr_partial = 0;
-
- /*
- * The larger the object size is, the more pages we want on the partial
- * list to avoid pounding the page allocator excessively.
- */
- n->min_partial = ilog2(s->size);
- if (n->min_partial < MIN_PARTIAL)
- n->min_partial = MIN_PARTIAL;
- else if (n->min_partial > MAX_PARTIAL)
- n->min_partial = MAX_PARTIAL;
-
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
+static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
int cpu, gfp_t flags)
static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
{
if (c < per_cpu(kmem_cache_cpu, cpu) ||
- c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
+ c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
kfree(c);
return;
}
{
int i;
- if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
+ if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
return;
for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
- cpu_set(cpu, kmem_cach_cpu_free_init_once);
+ cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
}
static void __init init_alloc_cpu(void)
* when allocating for the kmalloc_node_cache. This is used for bootstrapping
* memory on a fresh node that has no slab structures yet.
*/
-static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
- int node)
+static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
{
struct page *page;
struct kmem_cache_node *n;
local_irq_save(flags);
add_partial(n, page, 0);
local_irq_restore(flags);
- return n;
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
n = &s->local_node;
else {
if (slab_state == DOWN) {
- n = early_kmem_cache_node_alloc(gfpflags,
- node);
+ early_kmem_cache_node_alloc(gfpflags, node);
continue;
}
n = kmem_cache_alloc_node(kmalloc_caches,
}
#endif
+static void set_min_partial(struct kmem_cache *s, unsigned long min)
+{
+ if (min < MIN_PARTIAL)
+ min = MIN_PARTIAL;
+ else if (min > MAX_PARTIAL)
+ min = MAX_PARTIAL;
+ s->min_partial = min;
+}
+
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
* Add some empty padding so that we can catch
* overwrites from earlier objects rather than let
* tracking information or the free pointer be
- * corrupted if an user writes before the start
+ * corrupted if a user writes before the start
* of the object.
*/
size += sizeof(void *);
if (!calculate_sizes(s, -1))
goto error;
+ /*
+ * The larger the object size is, the more pages we want on the partial
+ * list to avoid pounding the page allocator excessively.
+ */
+ set_min_partial(s, ilog2(s->size));
s->refcount = 1;
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
* Kmalloc subsystem
*******************************************************************/
-struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);
static int __init setup_slub_min_order(char *str)
}
#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
+static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
static void sysfs_add_func(struct work_struct *w)
{
struct kmem_cache *s;
void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, flags);
s = get_slab(size, flags);
ret = slab_alloc(s, flags, -1, _RET_IP_);
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
- size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
return ret;
}
struct kmem_cache *s;
void *ret;
- if (unlikely(size > PAGE_SIZE)) {
+ if (unlikely(size > SLUB_MAX_SIZE)) {
ret = kmalloc_large_node(size, flags, node);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
- _RET_IP_, ret,
- size, PAGE_SIZE << get_order(size),
- flags, node);
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
return ret;
}
ret = slab_alloc(s, flags, node, _RET_IP_);
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
- size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
return ret;
}
*/
return s->size;
}
+EXPORT_SYMBOL(ksize);
void kfree(const void *x)
{
struct page *page;
void *object = (void *)x;
+ trace_kfree(_RET_IP_, x);
+
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
return;
}
slab_free(page->slab, page, object, _RET_IP_);
-
- kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
}
EXPORT_SYMBOL(kfree);
caches++;
}
- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
+ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
create_kmalloc_cache(&kmalloc_caches[i],
"kmalloc", 1 << i, GFP_KERNEL);
caches++;
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
+ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
kmalloc_caches[i]. name =
kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
- if (sysfs_slab_alias(s, name))
+ if (sysfs_slab_alias(s, name)) {
+ down_write(&slub_lock);
+ s->refcount--;
+ up_write(&slub_lock);
goto err;
+ }
return s;
}
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);
up_write(&slub_lock);
- if (sysfs_slab_add(s))
+ if (sysfs_slab_add(s)) {
+ down_write(&slub_lock);
+ list_del(&s->list);
+ up_write(&slub_lock);
+ kfree(s);
goto err;
+ }
return s;
}
kfree(s);
struct kmem_cache *s;
void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, gfpflags);
s = get_slab(size, gfpflags);
ret = slab_alloc(s, gfpflags, -1, caller);
/* Honor the call site pointer we recieved. */
- kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
- s->size, gfpflags);
+ trace_kmalloc(caller, ret, size, s->size, gfpflags);
return ret;
}
struct kmem_cache *s;
void *ret;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large_node(size, gfpflags, node);
s = get_slab(size, gfpflags);
ret = slab_alloc(s, gfpflags, node, caller);
/* Honor the call site pointer we recieved. */
- kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
- size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
return ret;
}
long max_time;
long min_pid;
long max_pid;
- cpumask_t cpus;
+ DECLARE_BITMAP(cpus, NR_CPUS);
nodemask_t nodes;
};
if (track->pid > l->max_pid)
l->max_pid = track->pid;
- cpu_set(track->cpu, l->cpus);
+ cpumask_set_cpu(track->cpu,
+ to_cpumask(l->cpus));
}
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
l->max_time = age;
l->min_pid = track->pid;
l->max_pid = track->pid;
- cpus_clear(l->cpus);
- cpu_set(track->cpu, l->cpus);
+ cpumask_clear(to_cpumask(l->cpus));
+ cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
nodes_clear(l->nodes);
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
len += sprintf(buf + len, " pid=%ld",
l->min_pid);
- if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
+ if (num_online_cpus() > 1 &&
+ !cpumask_empty(to_cpumask(l->cpus)) &&
len < PAGE_SIZE - 60) {
len += sprintf(buf + len, " cpus=");
len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
- l->cpus);
+ to_cpumask(l->cpus));
}
if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
}
SLAB_ATTR(order);
+static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
+{
+ return sprintf(buf, "%lu\n", s->min_partial);
+}
+
+static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
+ size_t length)
+{
+ unsigned long min;
+ int err;
+
+ err = strict_strtoul(buf, 10, &min);
+ if (err)
+ return err;
+
+ set_min_partial(s, min);
+ return length;
+}
+SLAB_ATTR(min_partial);
+
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
if (s->ctor) {
&object_size_attr.attr,
&objs_per_slab_attr.attr,
&order_attr.attr,
+ &min_partial_attr.attr,
&objects_attr.attr,
&objects_partial_attr.attr,
&total_objects_attr.attr,
/*
* Need to buffer aliases during bootup until sysfs becomes
- * available lest we loose that information.
+ * available lest we lose that information.
*/
struct saved_alias {
struct kmem_cache *s;