x86: cleanup early per cpu variables/accesses v4
[safe/jmp/linux-2.6] / arch / x86 / kernel / setup_64.c
index 63dd39b..e8df64f 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/a.out.h>
 #include <linux/screen_info.h>
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/pci.h>
+#include <asm/pci-direct.h>
+#include <linux/efi.h>
 #include <linux/acpi.h>
 #include <linux/kallsyms.h>
 #include <linux/edd.h>
+#include <linux/iscsi_ibft.h>
 #include <linux/mmzone.h>
 #include <linux/kexec.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/sort.h>
+#include <linux/uaccess.h>
+#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <asm/vsyscall.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/msr.h>
 #include <video/edid.h>
 #include <asm/e820.h>
 #include <asm/dma.h>
+#include <asm/gart.h>
 #include <asm/mpspec.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
-#include <asm/mach_apic.h>
 #include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/dmi.h>
 #include <asm/cacheflush.h>
 #include <asm/mce.h>
+#include <asm/ds.h>
+#include <asm/topology.h>
+#include <asm/trampoline.h>
+#include <asm/pat.h>
+
+#include <mach_apic.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define ARCH_SETUP
+#endif
 
 /*
  * Machine setup..
@@ -68,6 +86,8 @@
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
 EXPORT_SYMBOL(boot_cpu_data);
 
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
 unsigned long mmu_cr4_features;
 
 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
@@ -100,7 +120,7 @@ extern int root_mountflags;
 
 char __initdata command_line[COMMAND_LINE_SIZE];
 
-struct resource standard_io_resources[] = {
+static struct resource standard_io_resources[] = {
        { .name = "dma1", .start = 0x00, .end = 0x1f,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "pic1", .start = 0x20, .end = 0x21,
@@ -109,7 +129,9 @@ struct resource standard_io_resources[] = {
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "timer1", .start = 0x50, .end = 0x53,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-       { .name = "keyboard", .start = 0x60, .end = 0x6f,
+       { .name = "keyboard", .start = 0x60, .end = 0x60,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "keyboard", .start = 0x64, .end = 0x64,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
        { .name = "dma page reg", .start = 0x80, .end = 0x8f,
                .flags = IORESOURCE_BUSY | IORESOURCE_IO },
@@ -167,13 +189,15 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long bootmap_size, bootmap;
 
        bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
-       bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
+       bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
+                                PAGE_SIZE);
        if (bootmap == -1L)
                panic("Cannot find bootmem map of size %ld\n", bootmap_size);
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
        e820_register_active_regions(0, start_pfn, end_pfn);
        free_bootmem_with_active_regions(0, end_pfn);
-       reserve_bootmem(bootmap, bootmap_size);
+       early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
+       reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
 #endif
 
@@ -204,28 +228,36 @@ static inline void copy_edd(void)
 #ifdef CONFIG_KEXEC
 static void __init reserve_crashkernel(void)
 {
-       unsigned long long free_mem;
+       unsigned long long total_mem;
        unsigned long long crash_size, crash_base;
        int ret;
 
-       free_mem =
-               ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+       total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
 
-       ret = parse_crashkernel(boot_command_line, free_mem,
+       ret = parse_crashkernel(boot_command_line, total_mem,
                        &crash_size, &crash_base);
        if (ret == 0 && crash_size) {
-               if (crash_base > 0) {
-                       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-                                       "for crashkernel (System RAM: %ldMB)\n",
-                                       (unsigned long)(crash_size >> 20),
-                                       (unsigned long)(crash_base >> 20),
-                                       (unsigned long)(free_mem >> 20));
-                       crashk_res.start = crash_base;
-                       crashk_res.end   = crash_base + crash_size - 1;
-                       reserve_bootmem(crash_base, crash_size);
-               } else
+               if (crash_base <= 0) {
                        printk(KERN_INFO "crashkernel reservation failed - "
                                        "you have to specify a base address\n");
+                       return;
+               }
+
+               if (reserve_bootmem(crash_base, crash_size,
+                                       BOOTMEM_EXCLUSIVE) < 0) {
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "memory is in use\n");
+                       return;
+               }
+
+               printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                               "for crashkernel (System RAM: %ldMB)\n",
+                               (unsigned long)(crash_size >> 20),
+                               (unsigned long)(crash_base >> 20),
+                               (unsigned long)(total_mem >> 20));
+               crashk_res.start = crash_base;
+               crashk_res.end   = crash_base + crash_size - 1;
+               insert_resource(&iomem_resource, &crashk_res);
        }
 }
 #else
@@ -233,31 +265,51 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
-#define EBDA_ADDR_POINTER 0x40E
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) __init memory_setup(void)
+{
+       machine_specific_memory_setup();
+}
 
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
+static void __init parse_setup_data(void)
+{
+       struct setup_data *data;
+       unsigned long pa_data;
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, PAGE_SIZE);
+               switch (data->type) {
+               default:
+                       break;
+               }
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+               free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+               pa_data = data->next;
+               early_iounmap(data, PAGE_SIZE);
+       }
+}
 
-static void discover_ebda(void)
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __cpuinit fam10h_check_enable_mmcfg(void);
+extern void __init check_enable_amd_mmconf_dmi(void);
+#else
+void __cpuinit fam10h_check_enable_mmcfg(void)
+{
+}
+void __init check_enable_amd_mmconf_dmi(void)
 {
-       /*
-        * there is a real-mode segmented pointer pointing to the
-        * 4K EBDA area at 0x40E
-        */
-       ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
-       ebda_addr <<= 4;
-
-       ebda_size = *(unsigned short *)__va(ebda_addr);
-
-       /* Round EBDA up to pages */
-       if (ebda_size == 0)
-               ebda_size = 1;
-       ebda_size <<= 10;
-       ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
-       if (ebda_size > 64*1024)
-               ebda_size = 64*1024;
 }
+#endif
 
+/*
+ * setup_arch - architecture-specific boot-time initializations
+ *
+ * Note: On x86_64, fixmaps are ready for use even before this is called.
+ */
 void __init setup_arch(char **cmdline_p)
 {
        unsigned i;
@@ -275,7 +327,15 @@ void __init setup_arch(char **cmdline_p)
        rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
        rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
 #endif
-       setup_memory_region();
+#ifdef CONFIG_EFI
+       if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+                    "EL64", 4))
+               efi_enabled = 1;
+#endif
+
+       ARCH_SETUP
+
+       memory_setup();
        copy_edd();
 
        if (!boot_params.hdr.root_flags)
@@ -297,31 +357,53 @@ void __init setup_arch(char **cmdline_p)
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
+       parse_setup_data();
+
        parse_early_param();
 
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+       if (init_ohci1394_dma_early)
+               init_ohci1394_dma_on_all_controllers();
+#endif
+
        finish_e820_parsing();
 
+       /* after parse_early_param, so could debug it */
+       insert_resource(&iomem_resource, &code_resource);
+       insert_resource(&iomem_resource, &data_resource);
+       insert_resource(&iomem_resource, &bss_resource);
+
+       early_gart_iommu_check();
+
        e820_register_active_regions(0, 0, -1UL);
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
        end_pfn = e820_end_of_ram();
+       /* update e820 for memory not covered by WB MTRRs */
+       mtrr_bp_init();
+       if (mtrr_trim_uncached_memory(end_pfn)) {
+               e820_register_active_regions(0, 0, -1UL);
+               end_pfn = e820_end_of_ram();
+       }
+
        num_physpages = end_pfn;
 
        check_efer();
 
-       discover_ebda();
+       max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
+       if (efi_enabled)
+               efi_init();
 
-       init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+       vsmp_init();
 
        dmi_scan_machine();
 
        io_delay_init();
 
-#ifdef CONFIG_SMP
-       /* setup to use the static apicid table during kernel startup */
-       x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+#ifdef CONFIG_KVM_CLOCK
+       kvmclock_init();
 #endif
 
 #ifdef CONFIG_ACPI
@@ -353,33 +435,7 @@ void __init setup_arch(char **cmdline_p)
        contig_initmem_init(0, end_pfn);
 #endif
 
-       /* Reserve direct mapping */
-       reserve_bootmem_generic(table_start << PAGE_SHIFT,
-                               (table_end - table_start) << PAGE_SHIFT);
-
-       /* reserve kernel */
-       reserve_bootmem_generic(__pa_symbol(&_text),
-                               __pa_symbol(&_end) - __pa_symbol(&_text));
-
-       /*
-        * reserve physical page 0 - it's a special BIOS page on many boxes,
-        * enabling clean reboots, SMP operation, laptop functions.
-        */
-       reserve_bootmem_generic(0, PAGE_SIZE);
-
-       /* reserve ebda region */
-       if (ebda_addr)
-               reserve_bootmem_generic(ebda_addr, ebda_size);
-#ifdef CONFIG_NUMA
-       /* reserve nodemap region */
-       if (nodemap_addr)
-               reserve_bootmem_generic(nodemap_addr, nodemap_size);
-#endif
-
-#ifdef CONFIG_SMP
-       /* Reserve SMP trampoline */
-       reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
-#endif
+       dma32_reserve_bootmem();
 
 #ifdef CONFIG_ACPI_SLEEP
        /*
@@ -387,6 +443,10 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_reserve_bootmem();
 #endif
+
+       if (efi_enabled)
+               efi_reserve_bootmem();
+
        /*
        * Find and reserve possible boot-time SMP configuration:
        */
@@ -399,10 +459,15 @@ void __init setup_arch(char **cmdline_p)
                unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
 
                if (ramdisk_end <= end_of_mem) {
-                       reserve_bootmem_generic(ramdisk_image, ramdisk_size);
+                       /*
+                        * don't need to reserve again, already reserved early
+                        * in x86_64_start_kernel, and early_res_to_bootmem
+                        * convert that to reserved in bootmem
+                        */
                        initrd_start = ramdisk_image + PAGE_OFFSET;
                        initrd_end = initrd_start+ramdisk_size;
                } else {
+                       free_bootmem(ramdisk_image, ramdisk_size);
                        printk(KERN_ERR "initrd extends beyond end of memory "
                               "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
                               ramdisk_end, end_of_mem);
@@ -411,16 +476,14 @@ void __init setup_arch(char **cmdline_p)
        }
 #endif
        reserve_crashkernel();
+
+       reserve_ibft_region();
+
        paging_init();
+       map_vsyscall();
 
        early_quirks();
 
-       /*
-        * set this early, so we dont allocate cpu0
-        * if MADT list doesnt list BSP first
-        * mpparse.c/MP_processor_info() allocates logical cpu numbers.
-        */
-       cpu_set(0, cpu_present_map);
 #ifdef CONFIG_ACPI
        /*
         * Read APIC and some other early information from ACPI tables.
@@ -438,10 +501,12 @@ void __init setup_arch(char **cmdline_p)
        init_apic_mappings();
        ioapic_init_mappings();
 
+       kvm_guest_init();
+
        /*
         * We trust e820 completely. No explicit ROM probing in memory.
         */
-       e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
+       e820_reserve_resources();
        e820_mark_nosave_regions();
 
        /* request I/O space for devices used on all i[345]86 PCs */
@@ -452,11 +517,15 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-       conswitchp = &vga_con;
+       if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+               conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
        conswitchp = &dummy_con;
 #endif
 #endif
+
+       /* do this before identify_cpu for boot cpu */
+       check_enable_amd_mmconf_dmi();
 }
 
 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
@@ -500,9 +569,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
                printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
                c->x86_cache_size, ecx & 0xFF);
        }
-
-       if (n >= 0x80000007)
-               cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
        if (n >= 0x80000008) {
                cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
                c->x86_virt_bits = (eax >> 8) & 0xff;
@@ -511,7 +577,7 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 }
 
 #ifdef CONFIG_NUMA
-static int nearby_node(int apicid)
+static int __cpuinit nearby_node(int apicid)
 {
        int i, node;
 
@@ -533,7 +599,7 @@ static int nearby_node(int apicid)
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
  */
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
+static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
        unsigned bits;
@@ -545,9 +611,9 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
        bits = c->x86_coreid_bits;
 
        /* Low order bits define the core id (index of core in socket) */
-       c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
-       /* Convert the APIC ID into the socket ID */
-       c->phys_proc_id = phys_pkg_id(bits);
+       c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+       /* Convert the initial APIC ID into the socket ID */
+       c->phys_proc_id = c->initial_apicid >> bits;
 
 #ifdef CONFIG_NUMA
        node = c->phys_proc_id;
@@ -564,7 +630,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
                   If that doesn't result in a usable node fall back to the
                   path for the previous case.  */
 
-               int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
+               int ht_nodeid = c->initial_apicid;
 
                if (ht_nodeid >= 0 &&
                    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
@@ -580,7 +646,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
        unsigned bits, ecx;
@@ -638,6 +704,15 @@ static __cpuinit int amd_apic_timer_broken(void)
        return 0;
 }
 
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+       early_init_amd_mc(c);
+
+       /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+       if (c->x86_power & (1<<8))
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
        unsigned level;
@@ -661,19 +736,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
        /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
           3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-       clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
+       clear_cpu_cap(c, 0*32+31);
 
        /* On C+ stepping K8 rep microcode works well for copy/memset */
        level = cpuid_eax(1);
        if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
                             level >= 0x0f58))
-               set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
        if (c->x86 == 0x10 || c->x86 == 0x11)
-               set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
        /* Enable workaround for FXSAVE leak */
        if (c->x86 >= 6)
-               set_bit(X86_FEATURE_FXSAVE_LEAK, (unsigned long *)&c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
        level = get_model_name(c);
        if (!level) {
@@ -687,10 +762,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
        }
        display_cacheinfo(c);
 
-       /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-       if (c->x86_power & (1<<8))
-               set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
-
        /* Multi core CPU? */
        if (c->extended_cpuid_level >= 0x80000008)
                amd_detect_cmp(c);
@@ -702,20 +773,32 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
                num_cache_leaves = 3;
 
        if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
-               set_bit(X86_FEATURE_K8, (unsigned long *)&c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_K8);
 
-       /* RDTSC can be speculated around */
-       clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
+       /* MFENCE stops RDTSC speculation */
+       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 
-       /* Family 10 doesn't support C states in MWAIT so don't use it */
-       if (c->x86 == 0x10 && !force_mwait)
-               clear_bit(X86_FEATURE_MWAIT, (unsigned long *)&c->x86_capability);
+       if (c->x86 == 0x10)
+               fam10h_check_enable_mmcfg();
 
        if (amd_apic_timer_broken())
                disable_apic_timer = 1;
+
+       if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
+               unsigned long long tseg;
+
+               /*
+                * Split up direct mapping around the TSEG SMM area.
+                * Don't do it for gbpages because there seems very little
+                * benefit in doing so.
+                */
+               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
+               (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+                       set_memory_4k((unsigned long)__va(tseg), 1);
+       }
 }
 
-static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
        u32 eax, ebx, ecx, edx;
@@ -783,7 +866,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
                return 1;
 }
 
-static void srat_detect_node(void)
+static void __cpuinit srat_detect_node(void)
 {
 #ifdef CONFIG_NUMA
        unsigned node;
@@ -793,7 +876,7 @@ static void srat_detect_node(void)
        /* Don't do the funky fallback heuristics the AMD version employs
           for now. */
        node = apicid_to_node[apicid];
-       if (node == NUMA_NO_NODE)
+       if (node == NUMA_NO_NODE || !node_online(node))
                node = first_node(node_online_map);
        numa_set_node(cpu, node);
 
@@ -801,6 +884,13 @@ static void srat_detect_node(void)
 #endif
 }
 
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+           (c->x86 == 0x6 && c->x86_model >= 0x0e))
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
        /* Cache sizes */
@@ -811,19 +901,22 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
                unsigned eax = cpuid_eax(10);
                /* Check for version and the number of counters */
                if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-                       set_bit(X86_FEATURE_ARCH_PERFMON,
-                               (unsigned long *)&c->x86_capability);
+                       set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
        }
 
        if (cpu_has_ds) {
                unsigned int l1, l2;
                rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
                if (!(l1 & (1<<11)))
-                       set_bit(X86_FEATURE_BTS, (unsigned long *)c->x86_capability);
+                       set_cpu_cap(c, X86_FEATURE_BTS);
                if (!(l1 & (1<<12)))
-                       set_bit(X86_FEATURE_PEBS, (unsigned long *)c->x86_capability);
+                       set_cpu_cap(c, X86_FEATURE_PEBS);
        }
 
+
+       if (cpu_has_bts)
+               ds_init_intel(c);
+
        n = c->extended_cpuid_level;
        if (n >= 0x80000008) {
                unsigned eax = cpuid_eax(0x80000008);
@@ -838,20 +931,40 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
        if (c->x86 == 15)
                c->x86_cache_alignment = c->x86_clflush_size * 2;
-       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
-           (c->x86 == 0x6 && c->x86_model >= 0x0e))
-               set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
        if (c->x86 == 6)
-               set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
-       if (c->x86 == 15)
-               set_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
-       else
-               clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
        c->x86_max_cores = intel_num_cpu_cores(c);
 
        srat_detect_node();
 }
 
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+       if (c->x86 == 0x6 && c->x86_model >= 0xf)
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+       /* Cache sizes */
+       unsigned n;
+
+       n = c->extended_cpuid_level;
+       if (n >= 0x80000008) {
+               unsigned eax = cpuid_eax(0x80000008);
+               c->x86_virt_bits = (eax >> 8) & 0xff;
+               c->x86_phys_bits = eax & 0xff;
+       }
+
+       if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+               c->x86_cache_alignment = c->x86_clflush_size * 2;
+               set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+       }
+       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+}
+
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
        char *v = c->x86_vendor_id;
@@ -860,16 +973,12 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
                c->x86_vendor = X86_VENDOR_AMD;
        else if (!strcmp(v, "GenuineIntel"))
                c->x86_vendor = X86_VENDOR_INTEL;
+       else if (!strcmp(v, "CentaurHauls"))
+               c->x86_vendor = X86_VENDOR_CENTAUR;
        else
                c->x86_vendor = X86_VENDOR_UNKNOWN;
 }
 
-struct cpu_model_info {
-       int vendor;
-       int family;
-       char *model_names[16];
-};
-
 /* Do some early cpuid on the boot CPU to get some parameter that are
    needed before check_bugs. Everything advanced is in identify_cpu
    below. */
@@ -913,15 +1022,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
                        c->x86 += (tfms >> 20) & 0xff;
                if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               if (c->x86_capability[0] & (1<<19))
+               if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
        } else {
                /* Have CPUID level 0 only - unheard of */
                c->x86 = 4;
        }
 
+       c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
 #ifdef CONFIG_SMP
-       c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+       c->phys_proc_id = c->initial_apicid;
 #endif
        /* AMD-defined flags: level 0x80000001 */
        xlvl = cpuid_eax(0x80000000);
@@ -943,12 +1053,23 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
                        c->x86_capability[2] = cpuid_edx(0x80860001);
        }
 
+       c->extended_cpuid_level = cpuid_eax(0x80000000);
+       if (c->extended_cpuid_level >= 0x80000007)
+               c->x86_power = cpuid_edx(0x80000007);
+
        switch (c->x86_vendor) {
        case X86_VENDOR_AMD:
                early_init_amd(c);
                break;
+       case X86_VENDOR_INTEL:
+               early_init_intel(c);
+               break;
+       case X86_VENDOR_CENTAUR:
+               early_init_centaur(c);
+               break;
        }
 
+       validate_pat_support(c);
 }
 
 /*
@@ -983,13 +1104,16 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                init_intel(c);
                break;
 
+       case X86_VENDOR_CENTAUR:
+               init_centaur(c);
+               break;
+
        case X86_VENDOR_UNKNOWN:
        default:
                display_cacheinfo(c);
                break;
        }
 
-       select_idle_routine(c);
        detect_ht(c);
 
        /*
@@ -1004,219 +1128,58 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                        boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
        }
 
+       /* Clear all flags overriden by options */
+       for (i = 0; i < NCAPINTS; i++)
+               c->x86_capability[i] &= ~cleared_cpu_caps[i];
+
 #ifdef CONFIG_X86_MCE
        mcheck_init(c);
 #endif
-       if (c != &boot_cpu_data)
-               mtrr_ap_init();
+       select_idle_routine(c);
+
 #ifdef CONFIG_NUMA
        numa_add_cpu(smp_processor_id());
 #endif
+
 }
 
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+void __cpuinit identify_boot_cpu(void)
 {
-       if (c->x86_model_id[0])
-               printk(KERN_INFO "%s", c->x86_model_id);
-
-       if (c->x86_mask || c->cpuid_level >= 0)
-               printk(KERN_CONT " stepping %02x\n", c->x86_mask);
-       else
-               printk(KERN_CONT "\n");
+       identify_cpu(&boot_cpu_data);
 }
 
-/*
- *     Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
 {
-       struct cpuinfo_x86 *c = v;
-       int cpu = 0, i;
-
-       /*
-        * These flag bits must match the definitions in <asm/cpufeature.h>.
-        * NULL means this bit is undefined or reserved; either way it doesn't
-        * have meaning as far as Linux is concerned.  Note that it's important
-        * to realize there is a difference between this table and CPUID -- if
-        * applications want to get the raw CPUID data, they should access
-        * /dev/cpu/<cpu_nr>/cpuid instead.
-        */
-       static const char *const x86_cap_flags[] = {
-               /* Intel-defined */
-               "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-               "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-               "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-               "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-               /* AMD-defined */
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
-               NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-               "3dnowext", "3dnow",
-
-               /* Transmeta-defined */
-               "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-               /* Other (Linux-defined) */
-               "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-               NULL, NULL, NULL, NULL,
-               "constant_tsc", "up", NULL, "arch_perfmon",
-               "pebs", "bts", NULL, "sync_rdtsc",
-               "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-               /* Intel-defined (#2) */
-               "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-               "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-               NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-               /* VIA/Cyrix/Centaur-defined */
-               NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-               "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-               /* AMD-defined (#2) */
-               "lahf_lm", "cmp_legacy", "svm", "extapic",
-               "cr8_legacy", "abm", "sse4a", "misalignsse",
-               "3dnowprefetch", "osvw", "ibs", "sse5",
-               "skinit", "wdt", NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-               /* Auxiliary (Linux-defined) */
-               "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-       };
-       static const char *const x86_power_flags[] = {
-               "ts",   /* temperature sensor */
-               "fid",  /* frequency id control */
-               "vid",  /* voltage id control */
-               "ttp",  /* thermal trip */
-               "tm",
-               "stc",
-               "100mhzsteps",
-               "hwpstate",
-               "",     /* tsc invariant mapped to constant_tsc */
-               /* nothing */
-       };
-
-
-#ifdef CONFIG_SMP
-       cpu = c->cpu_index;
-#endif
-
-       seq_printf(m, "processor\t: %u\n"
-                  "vendor_id\t: %s\n"
-                  "cpu family\t: %d\n"
-                  "model\t\t: %d\n"
-                  "model name\t: %s\n",
-                  (unsigned)cpu,
-                  c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-                  c->x86,
-                  (int)c->x86_model,
-                  c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
-       if (c->x86_mask || c->cpuid_level >= 0)
-               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
-       else
-               seq_printf(m, "stepping\t: unknown\n");
-
-       if (cpu_has(c, X86_FEATURE_TSC)) {
-               unsigned int freq = cpufreq_quick_get((unsigned)cpu);
-
-               if (!freq)
-                       freq = cpu_khz;
-               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-                          freq / 1000, (freq % 1000));
-       }
-
-       /* Cache size */
-       if (c->x86_cache_size >= 0)
-               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
-#ifdef CONFIG_SMP
-       if (smp_num_siblings * c->x86_max_cores > 1) {
-               seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-               seq_printf(m, "siblings\t: %d\n",
-                              cpus_weight(per_cpu(cpu_core_map, cpu)));
-               seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-               seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-       }
-#endif
-
-       seq_printf(m,
-                  "fpu\t\t: yes\n"
-                  "fpu_exception\t: yes\n"
-                  "cpuid level\t: %d\n"
-                  "wp\t\t: yes\n"
-                  "flags\t\t:",
-                  c->cpuid_level);
-
-       for (i = 0; i < 32*NCAPINTS; i++)
-               if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
-                       seq_printf(m, " %s", x86_cap_flags[i]);
-
-       seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
-                  c->loops_per_jiffy/(500000/HZ),
-                  (c->loops_per_jiffy/(5000/HZ)) % 100);
-
-       if (c->x86_tlbsize > 0)
-               seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
-       seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
-       seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
-
-       seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
-                  c->x86_phys_bits, c->x86_virt_bits);
-
-       seq_printf(m, "power management:");
-       for (i = 0; i < 32; i++) {
-               if (c->x86_power & (1 << i)) {
-                       if (i < ARRAY_SIZE(x86_power_flags) &&
-                           x86_power_flags[i])
-                               seq_printf(m, "%s%s",
-                                          x86_power_flags[i][0]?" ":"",
-                                          x86_power_flags[i]);
-                       else
-                               seq_printf(m, " [%d]", i);
-               }
-       }
-
-       seq_printf(m, "\n\n");
-
-       return 0;
+       BUG_ON(c == &boot_cpu_data);
+       identify_cpu(c);
+       mtrr_ap_init();
 }
 
-static void *c_start(struct seq_file *m, loff_t *pos)
+static __init int setup_noclflush(char *arg)
 {
-       if (*pos == 0)  /* just in case, cpu 0 is not the first */
-               *pos = first_cpu(cpu_online_map);
-       if ((*pos) < NR_CPUS && cpu_online(*pos))
-               return &cpu_data(*pos);
-       return NULL;
+       setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+       return 1;
 }
+__setup("noclflush", setup_noclflush);
 
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
-       *pos = next_cpu(*pos, cpu_online_map);
-       return c_start(m, pos);
+       if (c->x86_model_id[0])
+               printk(KERN_CONT "%s", c->x86_model_id);
+
+       if (c->x86_mask || c->cpuid_level >= 0)
+               printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+       else
+               printk(KERN_CONT "\n");
 }
 
-static void c_stop(struct seq_file *m, void *v)
+static __init int setup_disablecpuid(char *arg)
 {
+       int bit;
+       if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+               setup_clear_cpu_cap(bit);
+       else
+               return 0;
+       return 1;
 }
-
-struct seq_operations cpuinfo_op = {
-       .start = c_start,
-       .next = c_next,
-       .stop = c_stop,
-       .show = show_cpuinfo,
-};
+__setup("clearcpuid=", setup_disablecpuid);