/*
* Virtual Memory Map support
*
- * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.
+ * (C) 2007 sgi. Christoph Lameter.
*
* Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
* virt_to_page, page_address() to be implemented as a base offset
* case the overhead consists of a few additional pages that are
* allocated to create a view of memory for vmemmap.
*
- * Special Kconfig settings:
- *
- * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
- *
- * The architecture has its own functions to populate the memory
- * map and provides a vmemmap_populate function.
- *
- * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
- *
- * The architecture provides functions to populate the pmd level
- * of the vmemmap mappings. Allowing mappings using large pages
- * where available.
- *
- * If neither are set then PAGE_SIZE mappings are generated which
- * require one PTE/TLB per PAGE_SIZE chunk of the virtual memory map.
+ * The architecture is expected to provide a vmemmap_populate() function
+ * to instantiate the mapping.
*/
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
+#include <linux/sched.h>
#include <asm/dma.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
* or to back the page tables that are used to create the mapping.
* Uses the main allocators if they are available, else bootmem.
*/
+
+static void * __init_refok __earlyonly_bootmem_alloc(int node,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal)
+{
+ return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
+}
+
+static void *vmemmap_buf;
+static void *vmemmap_buf_end;
+
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
/* If the main allocator is up use that, fallback to bootmem. */
if (slab_is_available()) {
- struct page *page = alloc_pages_node(node,
+ struct page *page;
+
+ if (node_state(node, N_HIGH_MEMORY))
+ page = alloc_pages_node(node,
GFP_KERNEL | __GFP_ZERO, get_order(size));
+ else
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(size));
if (page)
return page_address(page);
return NULL;
} else
- return __alloc_bootmem_node(NODE_DATA(node), size, size,
+ return __earlyonly_bootmem_alloc(node, size, size,
__pa(MAX_DMA_ADDRESS));
}
-#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
+/* need to make sure size is all the same during early stage */
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+ void *ptr;
+
+ if (!vmemmap_buf)
+ return vmemmap_alloc_block(size, node);
+
+ /* take the from buf */
+ ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
+ if (ptr + size > vmemmap_buf_end)
+ return vmemmap_alloc_block(size, node);
+
+ vmemmap_buf = ptr + size;
+
+ return ptr;
+}
+
void __meminit vmemmap_verify(pte_t *pte, int node,
unsigned long start, unsigned long end)
{
unsigned long pfn = pte_pfn(*pte);
int actual_node = early_pfn_to_nid(pfn);
- if (actual_node != node)
+ if (node_distance(actual_node, node) > LOCAL_DISTANCE)
printk(KERN_WARNING "[%lx-%lx] potential offnode "
"page_structs\n", start, end - 1);
}
-#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
-static int __meminit vmemmap_populate_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, int node)
+pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
{
- pte_t *pte;
-
- for (pte = pte_offset_kernel(pmd, addr); addr < end;
- pte++, addr += PAGE_SIZE)
- if (pte_none(*pte)) {
- pte_t entry;
- void *p = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!p)
- return -ENOMEM;
-
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
- set_pte(pte, entry);
-
- } else
- vmemmap_verify(pte, node, addr + PAGE_SIZE, end);
-
- return 0;
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+ if (pte_none(*pte)) {
+ pte_t entry;
+ void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, pte, entry);
+ }
+ return pte;
}
-int __meminit vmemmap_populate_pmd(pud_t *pud, unsigned long addr,
- unsigned long end, int node)
+pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
{
- pmd_t *pmd;
- int error = 0;
- unsigned long next;
-
- for (pmd = pmd_offset(pud, addr); addr < end && !error;
- pmd++, addr = next) {
- if (pmd_none(*pmd)) {
- void *p = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!p)
- return -ENOMEM;
-
- pmd_populate_kernel(&init_mm, pmd, p);
- } else
- vmemmap_verify((pte_t *)pmd, node,
- pmd_addr_end(addr, end), end);
- next = pmd_addr_end(addr, end);
- error = vmemmap_populate_pte(pmd, addr, next, node);
+ pmd_t *pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+ pmd_populate_kernel(&init_mm, pmd, p);
}
- return error;
+ return pmd;
}
-#endif /* CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD */
-static int __meminit vmemmap_populate_pud(pgd_t *pgd, unsigned long addr,
- unsigned long end, int node)
+pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
{
- pud_t *pud;
- int error = 0;
- unsigned long next;
-
- for (pud = pud_offset(pgd, addr); addr < end && !error;
- pud++, addr = next) {
- if (pud_none(*pud)) {
- void *p = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!p)
- return -ENOMEM;
+ pud_t *pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+ pud_populate(&init_mm, pud, p);
+ }
+ return pud;
+}
- pud_populate(&init_mm, pud, p);
- }
- next = pud_addr_end(addr, end);
- error = vmemmap_populate_pmd(pud, addr, next, node);
+pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+ pgd_populate(&init_mm, pgd, p);
}
- return error;
+ return pgd;
}
-int __meminit vmemmap_populate(struct page *start_page,
- unsigned long nr, int node)
+int __meminit vmemmap_populate_basepages(struct page *start_page,
+ unsigned long size, int node)
{
- pgd_t *pgd;
unsigned long addr = (unsigned long)start_page;
- unsigned long end = (unsigned long)(start_page + nr);
- unsigned long next;
- int error = 0;
-
- printk(KERN_DEBUG "[%lx-%lx] Virtual memory section"
- " (%ld pages) node %d\n", addr, end - 1, nr, node);
-
- for (pgd = pgd_offset_k(addr); addr < end && !error;
- pgd++, addr = next) {
- if (pgd_none(*pgd)) {
- void *p = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!p)
- return -ENOMEM;
+ unsigned long end = (unsigned long)(start_page + size);
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
- pgd_populate(&init_mm, pgd, p);
- }
- next = pgd_addr_end(addr,end);
- error = vmemmap_populate_pud(pgd, addr, next, node);
+ for (; addr < end; addr += PAGE_SIZE) {
+ pgd = vmemmap_pgd_populate(addr, node);
+ if (!pgd)
+ return -ENOMEM;
+ pud = vmemmap_pud_populate(pgd, addr, node);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_populate(pud, addr, node);
+ if (!pmd)
+ return -ENOMEM;
+ pte = vmemmap_pte_populate(pmd, addr, node);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
}
- return error;
+
+ return 0;
}
-#endif /* !CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP */
-struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid)
+struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
{
struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
return map;
}
+
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long map_count, int nodeid)
+{
+ unsigned long pnum;
+ unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+ void *vmemmap_buf_start;
+
+ size = ALIGN(size, PMD_SIZE);
+ vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
+ PMD_SIZE, __pa(MAX_DMA_ADDRESS));
+
+ if (vmemmap_buf_start) {
+ vmemmap_buf = vmemmap_buf_start;
+ vmemmap_buf_end = vmemmap_buf_start + size * map_count;
+ }
+
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ struct mem_section *ms;
+
+ if (!present_section_nr(pnum))
+ continue;
+
+ map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ if (map_map[pnum])
+ continue;
+ ms = __nr_to_section(pnum);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed "
+ "some memory will not be available.\n", __func__);
+ ms->section_mem_map = 0;
+ }
+
+ if (vmemmap_buf_start) {
+ /* need to free left buf */
+#ifdef CONFIG_NO_BOOTMEM
+ free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
+ if (vmemmap_buf_start < vmemmap_buf) {
+ char name[15];
+
+ snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
+ reserve_early_without_check(__pa(vmemmap_buf_start),
+ __pa(vmemmap_buf), name);
+ }
+#else
+ free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+#endif
+ vmemmap_buf = NULL;
+ vmemmap_buf_end = NULL;
+ }
+}