Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[safe/jmp/linux-2.6] / arch / x86 / kernel / setup.c
index 172a83e..c461f6d 100644 (file)
 #include <linux/slab.h>
 #include <linux/user.h>
 #include <linux/delay.h>
-#include <linux/highmem.h>
 
 #include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/iscsi_ibft.h>
-#include <linux/kexec.h>
 #include <linux/cpufreq.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/dma.h>
+#include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 
 #include <mach_apic.h>
-#ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
-#else
-#define ARCH_SETUP
-#endif
+#include <asm/hypervisor.h>
 
 #include <asm/percpu.h>
-#include <asm/sections.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
-#ifdef CONFIG_X86_32
-#include <asm/highmem.h>
+
+#ifndef ARCH_SETUP
+#define ARCH_SETUP
 #endif
 
 #ifndef CONFIG_DEBUG_BOOT_PARAMS
@@ -231,6 +225,9 @@ unsigned long saved_video_mode;
 #define RAMDISK_LOAD_FLAG              0x4000
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -307,7 +304,7 @@ static void __init relocate_initrd(void)
                if (clen > MAX_MAP_CHUNK-slop)
                        clen = MAX_MAP_CHUNK-slop;
                mapaddr = ramdisk_image & PAGE_MASK;
-               p = early_ioremap(mapaddr, clen+slop);
+               p = early_memremap(mapaddr, clen+slop);
                memcpy(q, p+slop, clen);
                early_iounmap(p, clen+slop);
                q += clen;
@@ -384,7 +381,7 @@ static void __init parse_setup_data(void)
                return;
        pa_data = boot_params.hdr.setup_data;
        while (pa_data) {
-               data = early_ioremap(pa_data, PAGE_SIZE);
+               data = early_memremap(pa_data, PAGE_SIZE);
                switch (data->type) {
                case SETUP_E820_EXT:
                        parse_e820_ext(data, pa_data);
@@ -392,19 +389,89 @@ static void __init parse_setup_data(void)
                default:
                        break;
                }
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-               free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
                pa_data = data->next;
                early_iounmap(data, PAGE_SIZE);
        }
 }
 
+static void __init e820_reserve_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+       int found = 0;
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_memremap(pa_data, sizeof(*data));
+               e820_update_range(pa_data, sizeof(*data)+data->len,
+                        E820_RAM, E820_RESERVED_KERN);
+               found = 1;
+               pa_data = data->next;
+               early_iounmap(data, sizeof(*data));
+       }
+       if (!found)
+               return;
+
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+       memcpy(&e820_saved, &e820, sizeof(struct e820map));
+       printk(KERN_INFO "extended physical RAM map:\n");
+       e820_print_map("reserve setup_data");
+}
+
+static void __init reserve_early_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+       char buf[32];
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_memremap(pa_data, sizeof(*data));
+               sprintf(buf, "setup data %x", data->type);
+               reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+               pa_data = data->next;
+               early_iounmap(data, sizeof(*data));
+       }
+}
+
 /*
  * --------- Crashkernel reservation ------------------------------
  */
 
 #ifdef CONFIG_KEXEC
+
+/**
+ * Reserve @size bytes of crashkernel memory at any suitable offset.
+ *
+ * @size: Size of the crashkernel memory to reserve.
+ * Returns the base address on success, and -1ULL on failure.
+ */
+static
+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
+{
+       const unsigned long long alignment = 16<<20;    /* 16M */
+       unsigned long long start = 0LL;
+
+       while (1) {
+               int ret;
+
+               start = find_e820_area(start, ULONG_MAX, size, alignment);
+               if (start == -1ULL)
+                       return start;
+
+               /* try to reserve it */
+               ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
+               if (ret >= 0)
+                       return start;
+
+               start += alignment;
+       }
+}
+
 static inline unsigned long long get_total_mem(void)
 {
        unsigned long long total;
@@ -427,30 +494,36 @@ static void __init reserve_crashkernel(void)
 
        ret = parse_crashkernel(boot_command_line, total_mem,
                        &crash_size, &crash_base);
-       if (ret == 0 && crash_size > 0) {
-               if (crash_base <= 0) {
-                       printk(KERN_INFO "crashkernel reservation failed - "
-                                       "you have to specify a base address\n");
+       if (ret != 0 || crash_size <= 0)
+               return;
+
+       /* 0 means: find the address automatically */
+       if (crash_base <= 0) {
+               crash_base = find_and_reserve_crashkernel(crash_size);
+               if (crash_base == -1ULL) {
+                       pr_info("crashkernel reservation failed. "
+                               "No suitable area found.\n");
                        return;
                }
-
-               if (reserve_bootmem_generic(crash_base, crash_size,
-                                       BOOTMEM_EXCLUSIVE) < 0) {
-                       printk(KERN_INFO "crashkernel reservation failed - "
-                                       "memory is in use\n");
+       } else {
+               ret = reserve_bootmem_generic(crash_base, crash_size,
+                                       BOOTMEM_EXCLUSIVE);
+               if (ret < 0) {
+                       pr_info("crashkernel reservation failed - "
+                               "memory is in use\n");
                        return;
                }
+       }
 
-               printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-                               "for crashkernel (System RAM: %ldMB)\n",
-                               (unsigned long)(crash_size >> 20),
-                               (unsigned long)(crash_base >> 20),
-                               (unsigned long)(total_mem >> 20));
+       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                       "for crashkernel (System RAM: %ldMB)\n",
+                       (unsigned long)(crash_size >> 20),
+                       (unsigned long)(crash_base >> 20),
+                       (unsigned long)(total_mem >> 20));
 
-               crashk_res.start = crash_base;
-               crashk_res.end   = crash_base + crash_size - 1;
-               insert_resource(&iomem_resource, &crashk_res);
-       }
+       crashk_res.start = crash_base;
+       crashk_res.end   = crash_base + crash_size - 1;
+       insert_resource(&iomem_resource, &crashk_res);
 }
 #else
 static void __init reserve_crashkernel(void)
@@ -491,7 +564,13 @@ static void __init reserve_standard_io_resources(void)
 
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
  * by kexec loader to the capture kernel.
@@ -507,6 +586,58 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
+static int __init default_update_genapic(void)
+{
+#ifdef CONFIG_X86_SMP
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+       genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+# endif
+#endif
+
+       return 0;
+}
+
+static struct x86_quirks default_x86_quirks __initdata = {
+       .update_genapic         = default_update_genapic,
+};
+
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+
+#ifdef CONFIG_X86_RESERVE_LOW_64K
+static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
+{
+       printk(KERN_NOTICE
+               "%s detected: BIOS may corrupt low RAM, working around it.\n",
+               d->ident);
+
+       e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+
+       return 0;
+}
+#endif
+
+/* List of systems that have known low memory corruption BIOS problems */
+static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
+#ifdef CONFIG_X86_RESERVE_LOW_64K
+       {
+               .callback = dmi_low_memory_corruption,
+               .ident = "AMI BIOS",
+               .matches = {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+               },
+       },
+       {
+               .callback = dmi_low_memory_corruption,
+               .ident = "Phoenix BIOS",
+               .matches = {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
+               },
+       },
+#endif
+       {}
+};
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -524,14 +655,18 @@ void __init setup_arch(char **cmdline_p)
 {
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+       visws_early_detect();
        pre_setup_arch_hook();
-       early_cpu_init();
-       early_ioremap_init();
-       reserve_setup_data();
 #else
        printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
+       /* VMI may relocate the fixmap; do this before touching ioremap area */
+       vmi_init();
+
+       early_cpu_init();
+       early_ioremap_init();
+
        ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
        screen_info = boot_params.screen_info;
        edid_info = boot_params.edid_info;
@@ -569,6 +704,10 @@ void __init setup_arch(char **cmdline_p)
        ARCH_SETUP
 
        setup_memory_map();
+       parse_setup_data();
+       /* update the e820_saved too */
+       e820_reserve_setup_data();
+
        copy_edd();
 
        if (!boot_params.hdr.root_flags)
@@ -589,36 +728,60 @@ void __init setup_arch(char **cmdline_p)
        bss_resource.start = virt_to_phys(&__bss_start);
        bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
-#ifdef CONFIG_X86_64
-       early_cpu_init();
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+       strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+       if (builtin_cmdline[0]) {
+               /* append boot loader cmdline to builtin */
+               strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+               strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+       }
 #endif
+#endif
+
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
-       parse_setup_data();
-
        parse_early_param();
 
+#ifdef CONFIG_X86_64
+       check_efer();
+#endif
+
+       /* Must be before kernel pagetables are setup */
+       vmi_activate();
+
+       /* after early param, so could get panic from serial */
+       reserve_early_setup_data();
+
        if (acpi_mps_check()) {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifdef CONFIG_X86_32
-               enable_local_apic = -1;
-#else
                disable_apic = 1;
 #endif
-#endif
-               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+               setup_clear_cpu_cap(X86_FEATURE_APIC);
        }
 
+#ifdef CONFIG_PCI
+       if (pci_early_dump_regs)
+               early_dump_pci_devices();
+#endif
+
        finish_e820_parsing();
 
+       dmi_scan_machine();
+
+       dmi_check_system(bad_bios_dmi_table);
+
+       /*
+        * VMware detection requires dmi to be available, so this
+        * needs to be done after dmi_scan_machine, for the BP.
+        */
+       init_hypervisor(&boot_cpu_data);
+
 #ifdef CONFIG_X86_32
        probe_roms();
-#else
-# ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
-       if (init_ohci1394_dma_early)
-               init_ohci1394_dma_on_all_controllers();
-# endif
 #endif
 
        /* after parse_early_param, so could debug it */
@@ -641,22 +804,18 @@ void __init setup_arch(char **cmdline_p)
        early_gart_iommu_check();
 #endif
 
-       e820_register_active_regions(0, 0, -1UL);
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       max_pfn = e820_end_of_ram();
+       max_pfn = e820_end_of_ram_pfn();
 
        /* preallocate 4k for mptable mpc */
        early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
        mtrr_bp_init();
-       if (mtrr_trim_uncached_memory(max_pfn)) {
-               remove_all_active_ranges();
-               e820_register_active_regions(0, 0, -1UL);
-               max_pfn = e820_end_of_ram();
-       }
+       if (mtrr_trim_uncached_memory(max_pfn))
+               max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
        /* max_low_pfn get updated here */
@@ -664,16 +823,44 @@ void __init setup_arch(char **cmdline_p)
 #else
        num_physpages = max_pfn;
 
-       check_efer();
+       if (cpu_has_x2apic)
+               check_x2apic();
 
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
-       max_low_pfn = max_pfn;
+       if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+               max_low_pfn = e820_end_of_low_ram_pfn();
+       else
+               max_low_pfn = max_pfn;
+
        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+       setup_bios_corruption_check();
+#endif
+
        /* max_pfn_mapped is updated here */
-       max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+       max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+       max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping(1UL<<32,
+                                                    max_pfn<<PAGE_SHIFT);
+               /* can we preseve max_low_pfn ?*/
+               max_low_pfn = max_pfn;
+       }
+#endif
+
+       /*
+        * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
+        */
+
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+       if (init_ohci1394_dma_early)
+               init_ohci1394_dma_on_all_controllers();
+#endif
 
        reserve_initrd();
 
@@ -681,8 +868,6 @@ void __init setup_arch(char **cmdline_p)
        vsmp_init();
 #endif
 
-       dmi_scan_machine();
-
        io_delay_init();
 
        /*
@@ -690,10 +875,7 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_boot_table_init();
 
-#ifdef CONFIG_X86_64
-       /* Remove active ranges so rediscovery with NUMA-awareness happens */
-       remove_all_active_ranges();
-#endif
+       early_acpi_boot_init();
 
 #ifdef CONFIG_ACPI_NUMA
        /*
@@ -704,10 +886,6 @@ void __init setup_arch(char **cmdline_p)
 
        initmem_init(0, max_pfn);
 
-#ifdef CONFIG_X86_64
-       dma32_reserve_bootmem();
-#endif
-
 #ifdef CONFIG_ACPI_SLEEP
        /*
         * Reserve low memory region for sleep support.
@@ -722,35 +900,30 @@ void __init setup_arch(char **cmdline_p)
 #endif
        reserve_crashkernel();
 
+#ifdef CONFIG_X86_64
+       /*
+        * dma32_reserve_bootmem() allocates bootmem which may conflict
+        * with the crashkernel command line, so do that after
+        * reserve_crashkernel()
+        */
+       dma32_reserve_bootmem();
+#endif
+
        reserve_ibft_region();
 
 #ifdef CONFIG_KVM_CLOCK
        kvmclock_init();
 #endif
 
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
-       /*
-        * Must be after max_low_pfn is determined, and before kernel
-        * pagetables are setup.
-        */
-       vmi_init();
-#endif
-
+       paravirt_pagetable_setup_start(swapper_pg_dir);
        paging_init();
+       paravirt_pagetable_setup_done(swapper_pg_dir);
+       paravirt_post_allocator_init();
 
 #ifdef CONFIG_X86_64
        map_vsyscall();
 #endif
 
-       /*
-        * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
-        */
-
-#if defined(CONFIG_PROVIDE_OHCI1394_DMA_INIT) && defined(CONFIG_X86_32)
-       if (init_ohci1394_dma_early)
-               init_ohci1394_dma_on_all_controllers();
-#endif
-
 #ifdef CONFIG_X86_GENERICARCH
        generic_apic_probe();
 #endif
@@ -762,10 +935,6 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_boot_init();
 
-#ifdef CONFIG_X86_64
-       init_cpu_to_node();
-#endif
-
 #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
        /*
         * get boot-time SMP configuration:
@@ -774,17 +943,18 @@ void __init setup_arch(char **cmdline_p)
                get_smp_config();
 #endif
 
+       prefill_possible_map();
+
 #ifdef CONFIG_X86_64
+       init_cpu_to_node();
+#endif
+
        init_apic_mappings();
        ioapic_init_mappings();
-#else
-# if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
-       if (def_to_bigsmp)
-               printk(KERN_WARNING "More than 8 CPUs detected and "
-                       "CONFIG_X86_PC cannot handle it.\nUse "
-                       "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-# endif
-#endif
+
+       /* need to wait for io_apic is mapped */
+       probe_nr_irqs_gsi();
+
        kvm_guest_init();
 
        e820_reserve_resources();
@@ -806,3 +976,5 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 }
+
+