x86: zap invalid and unused pmds in early boot
[safe/jmp/linux-2.6] / arch / x86 / mm / numa_64.c
index 382377d..1aecc65 100644 (file)
@@ -31,16 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-u16 x86_cpu_to_node_map_init[NR_CPUS] __initdata = {
+int x86_cpu_to_node_map_init[NR_CPUS] = {
        [0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 void *x86_cpu_to_node_map_early_ptr;
-EXPORT_SYMBOL(x86_cpu_to_node_map_init);
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
-DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
+DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
 
-u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
        [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
@@ -64,7 +63,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
        unsigned long addr, end;
        int i, res = -1;
 
-       memset(memnodemap, 0xff, memnodemapsize);
+       memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
        for (i = 0; i < numnodes; i++) {
                addr = nodes[i].start;
                end = nodes[i].end;
@@ -73,7 +72,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
                if ((end >> shift) >= memnodemapsize)
                        return 0;
                do {
-                       if (memnodemap[addr >> shift] != 0xff)
+                       if (memnodemap[addr >> shift] != NUMA_NO_NODE)
                                return -1;
                        memnodemap[addr >> shift] = i;
                        addr += (1UL << shift);
@@ -85,25 +84,24 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
 
 static int __init allocate_cachealigned_memnodemap(void)
 {
-       unsigned long pad, pad_addr;
+       unsigned long addr;
 
        memnodemap = memnode.embedded_map;
-       if (memnodemapsize <= 48)
+       if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
                return 0;
 
-       pad = L1_CACHE_BYTES - 1;
-       pad_addr = 0x8000;
-       nodemap_size = pad + memnodemapsize;
-       nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
-                                     nodemap_size);
+       addr = 0x8000;
+       nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
+       nodemap_addr = find_e820_area(addr, end_pfn<<PAGE_SHIFT,
+                                     nodemap_size, L1_CACHE_BYTES);
        if (nodemap_addr == -1UL) {
                printk(KERN_ERR
                       "NUMA: Unable to allocate Memory to Node hash map\n");
                nodemap_addr = nodemap_size = 0;
                return -1;
        }
-       pad_addr = (nodemap_addr + pad) & ~pad;
-       memnodemap = phys_to_virt(pad_addr);
+       memnodemap = phys_to_virt(nodemap_addr);
+       reserve_early(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
 
        printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
               nodemap_addr, nodemap_addr + nodemap_size);
@@ -164,15 +162,16 @@ int early_pfn_to_nid(unsigned long pfn)
 }
 
 static void * __init early_node_mem(int nodeid, unsigned long start,
-                                   unsigned long end, unsigned long size)
+                                   unsigned long end, unsigned long size,
+                                   unsigned long align)
 {
-       unsigned long mem = find_e820_area(start, end, size);
+       unsigned long mem = find_e820_area(start, end, size, align);
        void *ptr;
 
        if (mem != -1L)
                return __va(mem);
-       ptr = __alloc_bootmem_nopanic(size,
-                               SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
+
+       ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
        if (ptr == NULL) {
                printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
                       size, nodeid);
@@ -198,10 +197,13 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        start_pfn = start >> PAGE_SHIFT;
        end_pfn = end >> PAGE_SHIFT;
 
-       node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size);
+       node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
+                                          SMP_CACHE_BYTES);
        if (node_data[nodeid] == NULL)
                return;
        nodedata_phys = __pa(node_data[nodeid]);
+       printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+               nodedata_phys + pgdat_size - 1);
 
        memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
        NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -211,8 +213,12 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        /* Find a place for the bootmem map */
        bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
        bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+       /*
+        * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
+        * to use that to align to PAGE_SIZE
+        */
        bootmap = early_node_mem(nodeid, bootmap_start, end,
-                                       bootmap_pages<<PAGE_SHIFT);
+                                bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
        if (bootmap == NULL)  {
                if (nodedata_phys < start || nodedata_phys >= end)
                        free_bootmem((unsigned long)node_data[nodeid],
@@ -221,52 +227,27 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
                return;
        }
        bootmap_start = __pa(bootmap);
-       Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
 
        bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
                                         bootmap_start >> PAGE_SHIFT,
                                         start_pfn, end_pfn);
 
+       printk(KERN_INFO "  bootmap [%016lx -  %016lx] pages %lx\n",
+                bootmap_start, bootmap_start + bootmap_size - 1,
+                bootmap_pages);
+
        free_bootmem_with_active_regions(nodeid, end);
 
-       reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+       reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
+                       BOOTMEM_DEFAULT);
        reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
-                            bootmap_pages<<PAGE_SHIFT);
+                       bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 #ifdef CONFIG_ACPI_NUMA
        srat_reserve_add_area(nodeid);
 #endif
        node_set_online(nodeid);
 }
 
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
-/* Initialize final allocator for a zone */
-static void __init flat_setup_node_zones(int nodeid)
-{
-       unsigned long start_pfn, end_pfn, memmapsize, limit;
-
-       start_pfn = node_start_pfn(nodeid);
-       end_pfn = node_end_pfn(nodeid);
-
-       Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
-               nodeid, start_pfn, end_pfn);
-
-       /*
-        * Try to allocate mem_map at end to not fill up precious <4GB
-        * memory.
-        */
-       memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
-       limit = end_pfn << PAGE_SHIFT;
-
-       NODE_DATA(nodeid)->node_mem_map =
-               __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
-                                    memmapsize, SMP_CACHE_BYTES,
-                                    round_down(limit - memmapsize, PAGE_SIZE),
-                                    limit);
-}
-#else
-#define flat_setup_node_zones(i) do {} while (0)
-#endif
-
 /*
  * There are unfortunately some poorly designed mainboards around that
  * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -280,7 +261,7 @@ void __init numa_init_array(void)
 
        rr = first_node(node_online_map);
        for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_to_node(i) != NUMA_NO_NODE)
+               if (early_cpu_to_node(i) != NUMA_NO_NODE)
                        continue;
                numa_set_node(i, rr);
                rr = next_node(rr, node_online_map);
@@ -557,12 +538,13 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-       set_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
+       set_bit(cpu,
+               (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
-       u16 *cpu_to_node_map = (u16 *)x86_cpu_to_node_map_early_ptr;
+       int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
        cpu_pda(cpu)->nodenumber = node;
 
@@ -588,7 +570,6 @@ unsigned long __init numa_free_all_bootmem(void)
 void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
-       int i;
 
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
        max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
@@ -598,9 +579,6 @@ void __init paging_init(void)
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
 
-       for_each_online_node(i)
-               flat_setup_node_zones(i);
-
        free_area_init_nodes(max_zone_pfns);
 }