#include <linux/kobject.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+
#include <asm/atomic.h>
#include <asm/uaccess.h>
#define MEMORY_CLASS_NAME "memory"
static struct sysdev_class memory_sysdev_class = {
- set_kset_name(MEMORY_CLASS_NAME),
+ .name = MEMORY_CLASS_NAME,
};
static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
return MEMORY_CLASS_NAME;
}
-static int memory_uevent(struct kset *kset, struct kobject *kobj, char **envp,
- int num_envp, char *buffer, int buffer_size)
+static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
{
int retval = 0;
return retval;
}
-static struct kset_uevent_ops memory_uevent_ops = {
+static const struct kset_uevent_ops memory_uevent_ops = {
.name = memory_uevent_name,
.uevent = memory_uevent,
};
-static struct notifier_block *memory_chain;
+static BLOCKING_NOTIFIER_HEAD(memory_chain);
int register_memory_notifier(struct notifier_block *nb)
{
- return notifier_chain_register(&memory_chain, nb);
+ return blocking_notifier_chain_register(&memory_chain, nb);
}
+EXPORT_SYMBOL(register_memory_notifier);
void unregister_memory_notifier(struct notifier_block *nb)
{
- notifier_chain_unregister(&memory_chain, nb);
+ blocking_notifier_chain_unregister(&memory_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&memory_isolate_chain, nb);
}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+ atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
/*
* register_memory - Setup a sysfs device for a memory block
*/
-int register_memory(struct memory_block *memory, struct mem_section *section,
- struct node *root)
+static
+int register_memory(struct memory_block *memory, struct mem_section *section)
{
int error;
memory->sysdev.id = __section_nr(section);
error = sysdev_register(&memory->sysdev);
-
- if (root && !error)
- error = sysfs_create_link(&root->sysdev.kobj,
- &memory->sysdev.kobj,
- kobject_name(&memory->sysdev.kobj));
-
return error;
}
static void
-unregister_memory(struct memory_block *memory, struct mem_section *section,
- struct node *root)
+unregister_memory(struct memory_block *memory, struct mem_section *section)
{
BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
BUG_ON(memory->sysdev.id != __section_nr(section));
+ /* drop the ref. we got in remove_memory_block() */
+ kobject_put(&memory->sysdev.kobj);
sysdev_unregister(&memory->sysdev);
- if (root)
- sysfs_remove_link(&root->sysdev.kobj,
- kobject_name(&memory->sysdev.kobj));
}
/*
* uses.
*/
-static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf)
+static ssize_t show_mem_phys_index(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
}
/*
+ * Show whether the section of memory is likely to be hot-removable
+ */
+static ssize_t show_mem_removable(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
+{
+ unsigned long start_pfn;
+ int ret;
+ struct memory_block *mem =
+ container_of(dev, struct memory_block, sysdev);
+
+ start_pfn = section_nr_to_pfn(mem->phys_index);
+ ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+ return sprintf(buf, "%d\n", ret);
+}
+
+/*
* online, offline, going offline, etc.
*/
-static ssize_t show_mem_state(struct sys_device *dev, char *buf)
+static ssize_t show_mem_state(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
return len;
}
-static inline int memory_notify(unsigned long val, void *v)
+int memory_notify(unsigned long val, void *v)
+{
+ return blocking_notifier_call_chain(&memory_chain, val, v);
+}
+
+int memory_isolate_notify(unsigned long val, void *v)
{
- return notifier_call_chain(&memory_chain, val, v);
+ return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
}
/*
break;
case MEM_OFFLINE:
mem->state = MEM_GOING_OFFLINE;
- memory_notify(MEM_GOING_OFFLINE, NULL);
start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
ret = remove_memory(start_paddr,
PAGES_PER_SECTION << PAGE_SHIFT);
mem->state = old_state;
break;
}
- memory_notify(MEM_MAPPING_INVALID, NULL);
break;
default:
- printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
- __FUNCTION__, mem, action, action);
- WARN_ON(1);
+ WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
+ __func__, mem, action, action);
ret = -EINVAL;
}
- /*
- * For now, only notify on successful memory operations
- */
- if (!ret)
- memory_notify(action, NULL);
return ret;
}
unsigned long to_state, unsigned long from_state_req)
{
int ret = 0;
- down(&mem->state_sem);
+ mutex_lock(&mem->state_mutex);
if (mem->state != from_state_req) {
ret = -EINVAL;
mem->state = to_state;
out:
- up(&mem->state_sem);
+ mutex_unlock(&mem->state_mutex);
return ret;
}
static ssize_t
-store_mem_state(struct sys_device *dev, const char *buf, size_t count)
+store_mem_state(struct sys_device *dev,
+ struct sysdev_attribute *attr, const char *buf, size_t count)
{
struct memory_block *mem;
unsigned int phys_section_nr;
mem = container_of(dev, struct memory_block, sysdev);
phys_section_nr = mem->phys_index;
- if (!valid_section_nr(phys_section_nr))
+ if (!present_section_nr(phys_section_nr))
goto out;
if (!strncmp(buf, "online", min((int)count, 6)))
* s.t. if I offline all of these sections I can then
* remove the physical device?
*/
-static ssize_t show_phys_device(struct sys_device *dev, char *buf)
+static ssize_t show_phys_device(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
+static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
#define mem_create_simple_file(mem, attr_name) \
sysdev_create_file(&mem->sysdev, &attr_##attr_name)
* Block size attribute stuff
*/
static ssize_t
-print_block_size(struct class *class, char *buf)
+print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
}
-static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
+static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
static int block_size_init(void)
{
- sysfs_create_file(&memory_sysdev_class.kset.kobj,
- &class_attr_block_size_bytes.attr);
- return 0;
+ return sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &attr_block_size_bytes.attr);
}
/*
*/
#ifdef CONFIG_ARCH_MEMORY_PROBE
static ssize_t
-memory_probe_store(struct class *class, const char __user *buf, size_t count)
+memory_probe_store(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t count)
{
u64 phys_addr;
+ int nid;
int ret;
phys_addr = simple_strtoull(buf, NULL, 0);
- ret = add_memory(phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
+ nid = memory_add_physaddr_to_nid(phys_addr);
+ ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
if (ret)
count = ret;
return count;
}
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
static int memory_probe_init(void)
{
- sysfs_create_file(&memory_sysdev_class.kset.kobj,
- &class_attr_probe.attr);
+ return sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_probe.attr);
+}
+#else
+static inline int memory_probe_init(void)
+{
return 0;
}
+#endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for offlining pages of memory
+ */
+
+/* Soft offline a page */
+static ssize_t
+store_soft_offline_page(struct class *class,
+ struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ u64 pfn;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (strict_strtoull(buf, 0, &pfn) < 0)
+ return -EINVAL;
+ pfn >>= PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -ENXIO;
+ ret = soft_offline_page(pfn_to_page(pfn), 0);
+ return ret == 0 ? count : ret;
+}
+
+/* Forcibly offline a page, including killing processes. */
+static ssize_t
+store_hard_offline_page(struct class *class,
+ struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ u64 pfn;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (strict_strtoull(buf, 0, &pfn) < 0)
+ return -EINVAL;
+ pfn >>= PAGE_SHIFT;
+ ret = __memory_failure(pfn, 0, 0);
+ return ret ? ret : count;
+}
+
+static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+
+static __init int memory_fail_init(void)
+{
+ int err;
+
+ err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_soft_offline_page.attr);
+ if (!err)
+ err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_hard_offline_page.attr);
+ return err;
+}
#else
-#define memory_probe_init(...) do {} while (0)
+static inline int memory_fail_init(void)
+{
+ return 0;
+}
#endif
/*
* differentiation between which *physical* devices each
* section belongs to...
*/
+int __weak arch_get_memory_phys_device(unsigned long start_pfn)
+{
+ return 0;
+}
-static int add_memory_block(unsigned long node_id, struct mem_section *section,
- unsigned long state, int phys_device)
+static int add_memory_block(int nid, struct mem_section *section,
+ unsigned long state, enum mem_add_context context)
{
struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ unsigned long start_pfn;
int ret = 0;
if (!mem)
mem->phys_index = __section_nr(section);
mem->state = state;
- init_MUTEX(&mem->state_sem);
- mem->phys_device = phys_device;
+ mutex_init(&mem->state_mutex);
+ start_pfn = section_nr_to_pfn(mem->phys_index);
+ mem->phys_device = arch_get_memory_phys_device(start_pfn);
- ret = register_memory(mem, section, NULL);
+ ret = register_memory(mem, section);
if (!ret)
ret = mem_create_simple_file(mem, phys_index);
if (!ret)
ret = mem_create_simple_file(mem, state);
if (!ret)
ret = mem_create_simple_file(mem, phys_device);
+ if (!ret)
+ ret = mem_create_simple_file(mem, removable);
+ if (!ret) {
+ if (context == HOTPLUG)
+ ret = register_mem_sect_under_node(mem, nid);
+ }
return ret;
}
*
* This could be made generic for all sysdev classes.
*/
-static struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(struct mem_section *section)
{
struct kobject *kobj;
struct sys_device *sysdev;
struct memory_block *mem;
mem = find_memory_block(section);
+ unregister_mem_sect_under_nodes(mem);
mem_remove_simple_file(mem, phys_index);
mem_remove_simple_file(mem, state);
mem_remove_simple_file(mem, phys_device);
- unregister_memory(mem, section, NULL);
+ mem_remove_simple_file(mem, removable);
+ unregister_memory(mem, section);
return 0;
}
* need an interface for the VM to add new memory regions,
* but without onlining it.
*/
-int register_new_memory(struct mem_section *section)
+int register_new_memory(int nid, struct mem_section *section)
{
- return add_memory_block(0, section, MEM_OFFLINE, 0);
+ return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
}
int unregister_memory_section(struct mem_section *section)
{
- if (!valid_section(section))
+ if (!present_section(section))
return -EINVAL;
return remove_memory_block(0, section, 0);
{
unsigned int i;
int ret;
+ int err;
memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
ret = sysdev_class_register(&memory_sysdev_class);
+ if (ret)
+ goto out;
/*
* Create entries for memory sections that were found
* during boot and have been initialized
*/
for (i = 0; i < NR_MEM_SECTIONS; i++) {
- if (!valid_section_nr(i))
+ if (!present_section_nr(i))
continue;
- add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
+ err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
+ BOOT);
+ if (!ret)
+ ret = err;
}
- memory_probe_init();
- block_size_init();
-
+ err = memory_probe_init();
+ if (!ret)
+ ret = err;
+ err = memory_fail_init();
+ if (!ret)
+ ret = err;
+ err = block_size_init();
+ if (!ret)
+ ret = err;
+out:
+ if (ret)
+ printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
return ret;
}