x86: cleanup early per cpu variables/accesses v4

[safe/jmp/linux-2.6] / arch / x86 / mm / srat_64.c
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c

index 56089cc..012220e 100644 (file)
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -20,6 +20,7 @@
  #include <asm/proto.h>
  #include <asm/numa.h>
  #include <asm/e820.h>
+#include <asm/genapic.h>
  
  int acpi_numa __initdata;
  
@@ -31,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
  static int found_add_area __initdata;
  int hotadd_percent __initdata = 0;
  
+static int num_node_memblks __initdata;
+static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
+static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
+
  /* Too small nodes confuse the VM badly. Usually they result
     from BIOS bugs. */
  #define NODE_MIN_SIZE (4*1024*1024)
@@ -40,17 +45,17 @@ static __init int setup_node(int pxm)
         return acpi_map_pxm_to_node(pxm);
  }
  
-static __init int conflicting_nodes(unsigned long start, unsigned long end)
+static __init int conflicting_memblks(unsigned long start, unsigned long end)
  {
         int i;
-       for_each_node_mask(i, nodes_parsed) {
-               struct bootnode *nd = &nodes[i];
+       for (i = 0; i < num_node_memblks; i++) {
+               struct bootnode *nd = &node_memblk_range[i];
                 if (nd->start == nd->end)
                         continue;
                 if (nd->end > start && nd->start < end)
-                       return i;
+                       return memblk_nodeid[i];
                 if (nd->end == end && nd->start == start)
-                       return i;
+                       return memblk_nodeid[i];
         }
         return -1;
  }
@@ -92,36 +97,9 @@ static __init inline int srat_disabled(void)
         return numa_off || acpi_numa < 0;
  }
  
-/*
- * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
- * up the NUMA heuristics which wants the local node to have a smaller
- * distance than the others.
- * Do some quick checks here and only use the SLIT if it passes.
- */
-static __init int slit_valid(struct acpi_table_slit *slit)
-{
-       int i, j;
-       int d = slit->locality_count;
-       for (i = 0; i < d; i++) {
-               for (j = 0; j < d; j++)  {
-                       u8 val = slit->entry[d*i + j];
-                       if (i == j) {
-                               if (val != LOCAL_DISTANCE)
-                                       return 0;
-                       } else if (val <= LOCAL_DISTANCE)
-                               return 0;
-               }
-       }
-       return 1;
-}
-
  /* Callback for SLIT parsing */
  void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
  {
-       if (!slit_valid(slit)) {
-               printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
-               return;
-       }
         acpi_slit = slit;
  }
  
@@ -130,6 +108,8 @@ void __init
  acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
  {
         int pxm, node;
+       int apic_id;
+
         if (srat_disabled())
                 return;
         if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
@@ -145,82 +125,31 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
                 bad_srat();
                 return;
         }
-       apicid_to_node[pa->apic_id] = node;
+
+       if (is_uv_system())
+               apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
+       else
+               apic_id = pa->apic_id;
+       apicid_to_node[apic_id] = node;
         acpi_numa = 1;
         printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
-              pxm, pa->apic_id, node);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Protect against too large hotadd areas that would fill up memory.
- */
-static int hotadd_enough_memory(struct bootnode *nd)
-{
-       static unsigned long allocated;
-       static unsigned long last_area_end;
-       unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
-       long mem = pages * sizeof(struct page);
-       unsigned long addr;
-       unsigned long allowed;
-       unsigned long oldpages = pages;
-
-       if (mem < 0)
-               return 0;
-       allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
-       allowed = (allowed / 100) * hotadd_percent;
-       if (allocated + mem > allowed) {
-               unsigned long range;
-               /* Give them at least part of their hotadd memory upto hotadd_percent
-                  It would be better to spread the limit out
-                  over multiple hotplug areas, but that is too complicated
-                  right now */
-               if (allocated >= allowed)
-                       return 0;
-               range = allowed - allocated;
-               pages = (range / PAGE_SIZE);
-               mem = pages * sizeof(struct page);
-               nd->end = nd->start + range;
-       }
-       /* Not completely fool proof, but a good sanity check */
-       addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
-       if (addr == -1UL)
-               return 0;
-       if (pages != oldpages)
-               printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
-                       pages << PAGE_SHIFT);
-       last_area_end = addr + mem;
-       allocated += mem;
-       return 1;
+              pxm, apic_id, node);
  }
  
-static int update_end_of_memory(unsigned long end)
-{
-       found_add_area = 1;
-       if ((end >> PAGE_SHIFT) > end_pfn)
-               end_pfn = end >> PAGE_SHIFT;
-       return 1;
-}
-
-static inline int save_add_info(void)
-{
-       return hotadd_percent > 0;
-}
-#else
-int update_end_of_memory(unsigned long end) {return -1;}
+static int update_end_of_memory(unsigned long end) {return -1;}
  static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  static inline int save_add_info(void) {return 1;}
  #else
  static inline int save_add_info(void) {return 0;}
  #endif
-#endif
  /*
   * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add infomation.
- * This code supports one contigious hot add area per node.
+ * Both SPARSE and RESERVE need nodes_add information.
+ * This code supports one contiguous hot add area per node.
   */
-static int reserve_hotadd(int node, unsigned long start, unsigned long end)
+static int __init
+reserve_hotadd(int node, unsigned long start, unsigned long end)
  {
         unsigned long s_pfn = start >> PAGE_SHIFT;
         unsigned long e_pfn = end >> PAGE_SHIFT;
@@ -306,7 +235,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
                 bad_srat();
                 return;
         }
-       i = conflicting_nodes(start, end);
+       i = conflicting_memblks(start, end);
         if (i == node) {
                 printk(KERN_WARNING
                 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
@@ -331,10 +260,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
                         nd->end = end;
         }
  
-       printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
-              nd->start, nd->end);
-       e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
-                                               nd->end >> PAGE_SHIFT);
+       printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
+              start, end);
+       e820_register_active_regions(node, start >> PAGE_SHIFT,
+                                    end >> PAGE_SHIFT);
         push_node_boundaries(node, nd->start >> PAGE_SHIFT,
                                                 nd->end >> PAGE_SHIFT);
  
@@ -346,6 +275,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
                 if ((nd->start | nd->end) == 0)
                         node_clear(node, nodes_parsed);
         }
+
+       node_memblk_range[num_node_memblks].start = start;
+       node_memblk_range[num_node_memblks].end = end;
+       memblk_nodeid[num_node_memblks] = node;
+       num_node_memblks++;
  }
  
  /* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -377,7 +311,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
         return 1;
  }
  
-static void unparse_node(int node)
+static void __init unparse_node(int node)
  {
         int i;
         node_clear(node, nodes_parsed);
@@ -400,7 +334,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
         /* First clean up the node list */
         for (i = 0; i < MAX_NUMNODES; i++) {
                 cutoff_node(i, start, end);
-               if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
+               /*
+                * don't confuse VM with a node that doesn't have the
+                * minimum memory.
+                */
+               if (nodes[i].end &&
+                       (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
                         unparse_node(i);
                         node_set_offline(i);
                 }
@@ -411,7 +350,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
                 return -1;
         }
  
-       memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
+       memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
+                                          memblk_nodeid);
         if (memnode_shift < 0) {
                 printk(KERN_ERR
                      "SRAT: No NUMA node hash function found. Contact maintainer\n");
@@ -431,16 +371,24 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
                         setup_node_bootmem(i, nodes[i].start, nodes[i].end);
  
         for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_to_node(i) == NUMA_NO_NODE)
+               int node = early_cpu_to_node(i);
+
+               if (node == NUMA_NO_NODE)
                         continue;
-               if (!node_isset(cpu_to_node(i), node_possible_map))
-                       numa_set_node(i, NUMA_NO_NODE);
+               if (!node_isset(node, node_possible_map))
+                       numa_clear_node(i);
         }
         numa_init_array();
         return 0;
  }
  
  #ifdef CONFIG_NUMA_EMU
+static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
+       [0 ... MAX_NUMNODES-1] = PXM_INVAL
+};
+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+       [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
  static int __init find_node_by_addr(unsigned long addr)
  {
         int ret = NUMA_NO_NODE;
@@ -457,7 +405,7 @@ static int __init find_node_by_addr(unsigned long addr)
                         break;
                 }
         }
-       return i;
+       return ret;
  }
  
  /*
@@ -471,12 +419,6 @@ static int __init find_node_by_addr(unsigned long addr)
  void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
  {
         int i, j;
-       int fake_node_to_pxm_map[MAX_NUMNODES] = {
-               [0 ... MAX_NUMNODES-1] = PXM_INVAL
-       };
-       unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
-               [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-       };
  
         printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
                          "topology.\n");
@@ -535,7 +477,8 @@ void __init srat_reserve_add_area(int nodeid)
                 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
                                 "pre-allocated memory.\n", (unsigned long long)total_mb);
                 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-                              nodes_add[nodeid].end - nodes_add[nodeid].start);
+                              nodes_add[nodeid].end - nodes_add[nodeid].start,
+                              BOOTMEM_DEFAULT);
         }
  }