#include <linux/memory_hotplug.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+
#include <asm/atomic.h>
#include <asm/uaccess.h>
return retval;
}
-static struct kset_uevent_ops memory_uevent_ops = {
+static const struct kset_uevent_ops memory_uevent_ops = {
.name = memory_uevent_name,
.uevent = memory_uevent,
};
}
EXPORT_SYMBOL(unregister_memory_notifier);
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+ atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
/*
* register_memory - Setup a sysfs device for a memory block
*/
return blocking_notifier_call_chain(&memory_chain, val, v);
}
+int memory_isolate_notify(unsigned long val, void *v)
+{
+ return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
/*
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
* OK to have direct references to sparsemem variables in here.
* Block size attribute stuff
*/
static ssize_t
-print_block_size(struct class *class, char *buf)
+print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
}
-static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
+static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
static int block_size_init(void)
{
return sysfs_create_file(&memory_sysdev_class.kset.kobj,
- &class_attr_block_size_bytes.attr);
+ &attr_block_size_bytes.attr);
}
/*
*/
#ifdef CONFIG_ARCH_MEMORY_PROBE
static ssize_t
-memory_probe_store(struct class *class, const char *buf, size_t count)
+memory_probe_store(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t count)
{
u64 phys_addr;
int nid;
return count;
}
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
static int memory_probe_init(void)
{
}
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for offlining pages of memory
+ */
+
+/* Soft offline a page */
+static ssize_t
+store_soft_offline_page(struct class *class,
+ struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ u64 pfn;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (strict_strtoull(buf, 0, &pfn) < 0)
+ return -EINVAL;
+ pfn >>= PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -ENXIO;
+ ret = soft_offline_page(pfn_to_page(pfn), 0);
+ return ret == 0 ? count : ret;
+}
+
+/* Forcibly offline a page, including killing processes. */
+static ssize_t
+store_hard_offline_page(struct class *class,
+ struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ u64 pfn;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (strict_strtoull(buf, 0, &pfn) < 0)
+ return -EINVAL;
+ pfn >>= PAGE_SHIFT;
+ ret = __memory_failure(pfn, 0, 0);
+ return ret ? ret : count;
+}
+
+static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+
+static __init int memory_fail_init(void)
+{
+ int err;
+
+ err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_soft_offline_page.attr);
+ if (!err)
+ err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_hard_offline_page.attr);
+ return err;
+}
+#else
+static inline int memory_fail_init(void)
+{
+ return 0;
+}
+#endif
+
/*
* Note that phys_device is optional. It is here to allow for
* differentiation between which *physical* devices each
* section belongs to...
*/
+int __weak arch_get_memory_phys_device(unsigned long start_pfn)
+{
+ return 0;
+}
-static int add_memory_block(unsigned long node_id, struct mem_section *section,
- unsigned long state, int phys_device)
+static int add_memory_block(int nid, struct mem_section *section,
+ unsigned long state, enum mem_add_context context)
{
struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ unsigned long start_pfn;
int ret = 0;
if (!mem)
mem->phys_index = __section_nr(section);
mem->state = state;
mutex_init(&mem->state_mutex);
- mem->phys_device = phys_device;
+ start_pfn = section_nr_to_pfn(mem->phys_index);
+ mem->phys_device = arch_get_memory_phys_device(start_pfn);
ret = register_memory(mem, section);
if (!ret)
ret = mem_create_simple_file(mem, phys_device);
if (!ret)
ret = mem_create_simple_file(mem, removable);
+ if (!ret) {
+ if (context == HOTPLUG)
+ ret = register_mem_sect_under_node(mem, nid);
+ }
return ret;
}
*
* This could be made generic for all sysdev classes.
*/
-static struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(struct mem_section *section)
{
struct kobject *kobj;
struct sys_device *sysdev;
struct memory_block *mem;
mem = find_memory_block(section);
+ unregister_mem_sect_under_nodes(mem);
mem_remove_simple_file(mem, phys_index);
mem_remove_simple_file(mem, state);
mem_remove_simple_file(mem, phys_device);
* need an interface for the VM to add new memory regions,
* but without onlining it.
*/
-int register_new_memory(struct mem_section *section)
+int register_new_memory(int nid, struct mem_section *section)
{
- return add_memory_block(0, section, MEM_OFFLINE, 0);
+ return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
}
int unregister_memory_section(struct mem_section *section)
for (i = 0; i < NR_MEM_SECTIONS; i++) {
if (!present_section_nr(i))
continue;
- err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
+ err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
+ BOOT);
if (!ret)
ret = err;
}
err = memory_probe_init();
if (!ret)
ret = err;
+ err = memory_fail_init();
+ if (!ret)
+ ret = err;
err = block_size_init();
if (!ret)
ret = err;