x86: cleanup early per cpu variables/accesses v4
[safe/jmp/linux-2.6] / arch / x86 / kernel / setup_64.c
index f4f7ecf..e8df64f 100644 (file)
 #include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/pci.h>
+#include <asm/pci-direct.h>
 #include <linux/efi.h>
 #include <linux/acpi.h>
 #include <linux/kallsyms.h>
 #include <linux/edd.h>
+#include <linux/iscsi_ibft.h>
 #include <linux/mmzone.h>
 #include <linux/kexec.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/sort.h>
 #include <linux/uaccess.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -58,7 +62,6 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
-#include <asm/mach_apic.h>
 #include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/dmi.h>
 #include <asm/mce.h>
 #include <asm/ds.h>
 #include <asm/topology.h>
+#include <asm/trampoline.h>
+#include <asm/pat.h>
 
+#include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -114,7 +120,7 @@ extern int root_mountflags;
 
 char __initdata command_line[COMMAND_LINE_SIZE];
 
-struct resource standard_io_resources[] = {
+static struct resource standard_io_resources[] = {
        { .name = "dma1", .start = 0x00, .end = 0x1f,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "pic1", .start = 0x20, .end = 0x21,
@@ -123,7 +129,9 @@ struct resource standard_io_resources[] = {
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "timer1", .start = 0x50, .end = 0x53,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-       { .name = "keyboard", .start = 0x60, .end = 0x6f,
+       { .name = "keyboard", .start = 0x60, .end = 0x60,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "keyboard", .start = 0x64, .end = 0x64,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "dma page reg", .start = 0x80, .end = 0x8f,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
@@ -188,6 +196,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
        e820_register_active_regions(0, start_pfn, end_pfn);
        free_bootmem_with_active_regions(0, end_pfn);
+       early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
        reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
 #endif
@@ -248,6 +257,7 @@ static void __init reserve_crashkernel(void)
                                (unsigned long)(total_mem >> 20));
                crashk_res.start = crash_base;
                crashk_res.end   = crash_base + crash_size - 1;
+               insert_resource(&iomem_resource, &crashk_res);
        }
 }
 #else
@@ -261,6 +271,40 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
+static void __init parse_setup_data(void)
+{
+       struct setup_data *data;
+       unsigned long pa_data;
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, PAGE_SIZE);
+               switch (data->type) {
+               default:
+                       break;
+               }
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+               free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+               pa_data = data->next;
+               early_iounmap(data, PAGE_SIZE);
+       }
+}
+
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __cpuinit fam10h_check_enable_mmcfg(void);
+extern void __init check_enable_amd_mmconf_dmi(void);
+#else
+void __cpuinit fam10h_check_enable_mmcfg(void)
+{
+}
+void __init check_enable_amd_mmconf_dmi(void)
+{
+}
+#endif
+
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
@@ -313,6 +357,8 @@ void __init setup_arch(char **cmdline_p)
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
+       parse_setup_data();
+
        parse_early_param();
 
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
@@ -322,6 +368,11 @@ void __init setup_arch(char **cmdline_p)
 
        finish_e820_parsing();
 
+       /* after parse_early_param, so could debug it */
+       insert_resource(&iomem_resource, &code_resource);
+       insert_resource(&iomem_resource, &data_resource);
+       insert_resource(&iomem_resource, &bss_resource);
+
        early_gart_iommu_check();
 
        e820_register_active_regions(0, 0, -1UL);
@@ -341,21 +392,18 @@ void __init setup_arch(char **cmdline_p)
 
        check_efer();
 
-       init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+       max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
        if (efi_enabled)
                efi_init();
 
+       vsmp_init();
+
        dmi_scan_machine();
 
        io_delay_init();
 
-#ifdef CONFIG_SMP
-       /* setup to use the early static init tables during kernel startup */
-       x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
-       x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-#ifdef CONFIG_NUMA
-       x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-#endif
+#ifdef CONFIG_KVM_CLOCK
+       kvmclock_init();
 #endif
 
 #ifdef CONFIG_ACPI
@@ -387,7 +435,7 @@ void __init setup_arch(char **cmdline_p)
        contig_initmem_init(0, end_pfn);
 #endif
 
-       early_res_to_bootmem();
+       dma32_reserve_bootmem();
 
 #ifdef CONFIG_ACPI_SLEEP
        /*
@@ -411,11 +459,14 @@ void __init setup_arch(char **cmdline_p)
                unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
 
                if (ramdisk_end <= end_of_mem) {
-                       reserve_bootmem_generic(ramdisk_image, ramdisk_size);
+                       /*
+                        * don't need to reserve again, already reserved early
+                        * in x86_64_start_kernel, and early_res_to_bootmem
+                        * convert that to reserved in bootmem
+                        */
                        initrd_start = ramdisk_image + PAGE_OFFSET;
                        initrd_end = initrd_start+ramdisk_size;
                } else {
-                       /* Assumes everything on node 0 */
                        free_bootmem(ramdisk_image, ramdisk_size);
                        printk(KERN_ERR "initrd extends beyond end of memory "
                               "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
@@ -425,6 +476,9 @@ void __init setup_arch(char **cmdline_p)
        }
 #endif
        reserve_crashkernel();
+
+       reserve_ibft_region();
+
        paging_init();
        map_vsyscall();
 
@@ -447,10 +501,12 @@ void __init setup_arch(char **cmdline_p)
        init_apic_mappings();
        ioapic_init_mappings();
 
+       kvm_guest_init();
+
        /*
         * We trust e820 completely. No explicit ROM probing in memory.
         */
-       e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
+       e820_reserve_resources();
        e820_mark_nosave_regions();
 
        /* request I/O space for devices used on all i[345]86 PCs */
@@ -467,6 +523,9 @@ void __init setup_arch(char **cmdline_p)
        conswitchp = &dummy_con;
 #endif
 #endif
+
+       /* do this before identify_cpu for boot cpu */
+       check_enable_amd_mmconf_dmi();
 }
 
 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
@@ -552,9 +611,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
        bits = c->x86_coreid_bits;
 
        /* Low order bits define the core id (index of core in socket) */
-       c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
-       /* Convert the APIC ID into the socket ID */
-       c->phys_proc_id = phys_pkg_id(bits);
+       c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+       /* Convert the initial APIC ID into the socket ID */
+       c->phys_proc_id = c->initial_apicid >> bits;
 
 #ifdef CONFIG_NUMA
        node = c->phys_proc_id;
@@ -571,7 +630,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
                   If that doesn't result in a usable node fall back to the
                   path for the previous case.  */
 
-               int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
+               int ht_nodeid = c->initial_apicid;
 
                if (ht_nodeid >= 0 &&
                    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
@@ -677,7 +736,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
        /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
           3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-       clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
+       clear_cpu_cap(c, 0*32+31);
 
        /* On C+ stepping K8 rep microcode works well for copy/memset */
        level = cpuid_eax(1);
@@ -719,8 +778,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
        /* MFENCE stops RDTSC speculation */
        set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 
+       if (c->x86 == 0x10)
+               fam10h_check_enable_mmcfg();
+
        if (amd_apic_timer_broken())
                disable_apic_timer = 1;
+
+       if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
+               unsigned long long tseg;
+
+               /*
+                * Split up direct mapping around the TSEG SMM area.
+                * Don't do it for gbpages because there seems very little
+                * benefit in doing so.
+                */
+               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
+               (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+                       set_memory_4k((unsigned long)__va(tseg), 1);
+       }
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -813,7 +888,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
        if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
            (c->x86 == 0x6 && c->x86_model >= 0x0e))
-               set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
@@ -856,9 +931,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
        if (c->x86 == 15)
                c->x86_cache_alignment = c->x86_clflush_size * 2;
-       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
-           (c->x86 == 0x6 && c->x86_model >= 0x0e))
-               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
        if (c->x86 == 6)
                set_cpu_cap(c, X86_FEATURE_REP_GOOD);
        set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
@@ -867,6 +939,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
        srat_detect_node();
 }
 
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+       if (c->x86 == 0x6 && c->x86_model >= 0xf)
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+       /* Cache sizes */
+       unsigned n;
+
+       n = c->extended_cpuid_level;
+       if (n >= 0x80000008) {
+               unsigned eax = cpuid_eax(0x80000008);
+               c->x86_virt_bits = (eax >> 8) & 0xff;
+               c->x86_phys_bits = eax & 0xff;
+       }
+
+       if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+               c->x86_cache_alignment = c->x86_clflush_size * 2;
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+       }
+       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+}
+
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
        char *v = c->x86_vendor_id;
@@ -875,6 +973,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
                c->x86_vendor = X86_VENDOR_AMD;
        else if (!strcmp(v, "GenuineIntel"))
                c->x86_vendor = X86_VENDOR_INTEL;
+       else if (!strcmp(v, "CentaurHauls"))
+               c->x86_vendor = X86_VENDOR_CENTAUR;
        else
                c->x86_vendor = X86_VENDOR_UNKNOWN;
 }
@@ -922,15 +1022,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
                        c->x86 += (tfms >> 20) & 0xff;
                if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               if (c->x86_capability[0] & (1<<19))
+               if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
        } else {
                /* Have CPUID level 0 only - unheard of */
                c->x86 = 4;
        }
 
+       c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
 #ifdef CONFIG_SMP
-       c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+       c->phys_proc_id = c->initial_apicid;
 #endif
        /* AMD-defined flags: level 0x80000001 */
        xlvl = cpuid_eax(0x80000000);
@@ -963,8 +1064,12 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
        case X86_VENDOR_INTEL:
                early_init_intel(c);
                break;
+       case X86_VENDOR_CENTAUR:
+               early_init_centaur(c);
+               break;
        }
 
+       validate_pat_support(c);
 }
 
 /*
@@ -999,6 +1104,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                init_intel(c);
                break;
 
+       case X86_VENDOR_CENTAUR:
+               init_centaur(c);
+               break;
+
        case X86_VENDOR_UNKNOWN:
        default:
                display_cacheinfo(c);
@@ -1028,14 +1137,24 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #endif
        select_idle_routine(c);
 
-       if (c != &boot_cpu_data)
-               mtrr_ap_init();
 #ifdef CONFIG_NUMA
        numa_add_cpu(smp_processor_id());
 #endif
 
 }
 
+void __cpuinit identify_boot_cpu(void)
+{
+       identify_cpu(&boot_cpu_data);
+}
+
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+       BUG_ON(c == &boot_cpu_data);
+       identify_cpu(c);
+       mtrr_ap_init();
+}
+
 static __init int setup_noclflush(char *arg)
 {
        setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
@@ -1064,123 +1183,3 @@ static __init int setup_disablecpuid(char *arg)
        return 1;
 }
 __setup("clearcpuid=", setup_disablecpuid);
-
-/*
- *     Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
-       struct cpuinfo_x86 *c = v;
-       int cpu = 0, i;
-
-#ifdef CONFIG_SMP
-       cpu = c->cpu_index;
-#endif
-
-       seq_printf(m, "processor\t: %u\n"
-                  "vendor_id\t: %s\n"
-                  "cpu family\t: %d\n"
-                  "model\t\t: %d\n"
-                  "model name\t: %s\n",
-                  (unsigned)cpu,
-                  c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-                  c->x86,
-                  (int)c->x86_model,
-                  c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
-       if (c->x86_mask || c->cpuid_level >= 0)
-               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
-       else
-               seq_printf(m, "stepping\t: unknown\n");
-
-       if (cpu_has(c, X86_FEATURE_TSC)) {
-               unsigned int freq = cpufreq_quick_get((unsigned)cpu);
-
-               if (!freq)
-                       freq = cpu_khz;
-               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-                          freq / 1000, (freq % 1000));
-       }
-
-       /* Cache size */
-       if (c->x86_cache_size >= 0)
-               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
-#ifdef CONFIG_SMP
-       if (smp_num_siblings * c->x86_max_cores > 1) {
-               seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-               seq_printf(m, "siblings\t: %d\n",
-                              cpus_weight(per_cpu(cpu_core_map, cpu)));
-               seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-               seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-       }
-#endif
-
-       seq_printf(m,
-                  "fpu\t\t: yes\n"
-                  "fpu_exception\t: yes\n"
-                  "cpuid level\t: %d\n"
-                  "wp\t\t: yes\n"
-                  "flags\t\t:",
-                  c->cpuid_level);
-
-       for (i = 0; i < 32*NCAPINTS; i++)
-               if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
-                       seq_printf(m, " %s", x86_cap_flags[i]);
-
-       seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
-                  c->loops_per_jiffy/(500000/HZ),
-                  (c->loops_per_jiffy/(5000/HZ)) % 100);
-
-       if (c->x86_tlbsize > 0)
-               seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
-       seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
-       seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
-
-       seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
-                  c->x86_phys_bits, c->x86_virt_bits);
-
-       seq_printf(m, "power management:");
-       for (i = 0; i < 32; i++) {
-               if (c->x86_power & (1 << i)) {
-                       if (i < ARRAY_SIZE(x86_power_flags) &&
-                           x86_power_flags[i])
-                               seq_printf(m, "%s%s",
-                                          x86_power_flags[i][0]?" ":"",
-                                          x86_power_flags[i]);
-                       else
-                               seq_printf(m, " [%d]", i);
-               }
-       }
-
-       seq_printf(m, "\n\n");
-
-       return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-       if (*pos == 0)  /* just in case, cpu 0 is not the first */
-               *pos = first_cpu(cpu_online_map);
-       if ((*pos) < NR_CPUS && cpu_online(*pos))
-               return &cpu_data(*pos);
-       return NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       *pos = next_cpu(*pos, cpu_online_map);
-       return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-       .start = c_start,
-       .next = c_next,
-       .stop = c_stop,
-       .show = show_cpuinfo,
-};