Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
[safe/jmp/linux-2.6] / arch / x86 / xen / enlighten.c
index 80f4c53..2b26dd5 100644 (file)
 #include <linux/delay.h>
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
+#include <linux/kprobes.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/highmem.h>
 #include <linux/console.h>
+#include <linux/pci.h>
 
+#include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 #include <xen/interface/physdev.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/msr-index.h>
+#include <asm/traps.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
+#include <asm/stackprotector.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -135,24 +140,23 @@ static void xen_vcpu_setup(int cpu)
  */
 void xen_vcpu_restore(void)
 {
-       if (have_vcpu_info_placement) {
-               int cpu;
+       int cpu;
 
-               for_each_online_cpu(cpu) {
-                       bool other_cpu = (cpu != smp_processor_id());
+       for_each_online_cpu(cpu) {
+               bool other_cpu = (cpu != smp_processor_id());
 
-                       if (other_cpu &&
-                           HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
-                               BUG();
+               if (other_cpu &&
+                   HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+                       BUG();
 
-                       xen_vcpu_setup(cpu);
+               xen_setup_runstate_info(cpu);
 
-                       if (other_cpu &&
-                           HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
-                               BUG();
-               }
+               if (have_vcpu_info_placement)
+                       xen_vcpu_setup(cpu);
 
-               BUG_ON(!have_vcpu_info_placement);
+               if (other_cpu &&
+                   HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+                       BUG();
        }
 }
 
@@ -175,6 +179,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
                      unsigned int *cx, unsigned int *dx)
 {
+       unsigned maskebx = ~0;
        unsigned maskecx = ~0;
        unsigned maskedx = ~0;
 
@@ -182,9 +187,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
         * Mask out inconvenient features, to try and disable as many
         * unsupported kernel subsystems as possible.
         */
-       if (*ax == 1) {
+       switch (*ax) {
+       case 1:
                maskecx = cpuid_leaf1_ecx_mask;
                maskedx = cpuid_leaf1_edx_mask;
+               break;
+
+       case 0xb:
+               /* Suppress extended topology stuff */
+               maskebx = 0;
+               break;
        }
 
        asm(XEN_EMULATE_PREFIX "cpuid"
@@ -194,6 +206,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
                  "=d" (*dx)
                : "0" (*ax), "2" (*cx));
 
+       *bx &= maskebx;
        *cx &= maskecx;
        *dx &= maskedx;
 }
@@ -213,6 +226,7 @@ static __init void xen_init_cpuid_mask(void)
                          (1 << X86_FEATURE_ACPI));  /* disable ACPI */
 
        ax = 1;
+       cx = 0;
        xen_cpuid(&ax, &bx, &cx, &dx);
 
        /* cpuid claims we support xsave; try enabling it to see what happens */
@@ -327,18 +341,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
        unsigned long frames[pages];
        int f;
 
-       /* A GDT can be up to 64k in size, which corresponds to 8192
-          8-byte entries, or 16 4k pages.. */
+       /*
+        * A GDT can be up to 64k in size, which corresponds to 8192
+        * 8-byte entries, or 16 4k pages..
+        */
 
        BUG_ON(size > 65536);
        BUG_ON(va & ~PAGE_MASK);
 
        for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
                int level;
-               pte_t *ptep = lookup_address(va, &level);
+               pte_t *ptep;
                unsigned long pfn, mfn;
                void *virt;
 
+               /*
+                * The GDT is per-cpu and is in the percpu data area.
+                * That can be virtually mapped, so we need to do a
+                * page-walk to get the underlying MFN for the
+                * hypercall.  The page can also be in the kernel's
+                * linear range, so we need to RO that mapping too.
+                */
+               ptep = lookup_address(va, &level);
                BUG_ON(ptep == NULL);
 
                pfn = pte_pfn(*ptep);
@@ -355,6 +379,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
                BUG();
 }
 
+/*
+ * load_gdt for early boot, when the gdt is only mapped once
+ */
+static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+{
+       unsigned long va = dtr->address;
+       unsigned int size = dtr->size + 1;
+       unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+       unsigned long frames[pages];
+       int f;
+
+       /*
+        * A GDT can be up to 64k in size, which corresponds to 8192
+        * 8-byte entries, or 16 4k pages..
+        */
+
+       BUG_ON(size > 65536);
+       BUG_ON(va & ~PAGE_MASK);
+
+       for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
+               pte_t pte;
+               unsigned long pfn, mfn;
+
+               pfn = virt_to_pfn(va);
+               mfn = pfn_to_mfn(pfn);
+
+               pte = pfn_pte(pfn, PAGE_KERNEL_RO);
+
+               if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+                       BUG();
+
+               frames[f] = mfn;
+       }
+
+       if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+               BUG();
+}
+
 static void load_TLS_descriptor(struct thread_struct *t,
                                unsigned int cpu, unsigned int i)
 {
@@ -428,16 +490,49 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
                            struct trap_info *info)
 {
-       if (val->type != 0xf && val->type != 0xe)
+       unsigned long addr;
+
+       if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
                return 0;
 
        info->vector = vector;
-       info->address = gate_offset(*val);
+
+       addr = gate_offset(*val);
+#ifdef CONFIG_X86_64
+       /*
+        * Look for known traps using IST, and substitute them
+        * appropriately.  The debugger ones are the only ones we care
+        * about.  Xen will handle faults like double_fault and
+        * machine_check, so we should never see them.  Warn if
+        * there's an unexpected IST-using fault handler.
+        */
+       if (addr == (unsigned long)debug)
+               addr = (unsigned long)xen_debug;
+       else if (addr == (unsigned long)int3)
+               addr = (unsigned long)xen_int3;
+       else if (addr == (unsigned long)stack_segment)
+               addr = (unsigned long)xen_stack_segment;
+       else if (addr == (unsigned long)double_fault ||
+                addr == (unsigned long)nmi) {
+               /* Don't need to handle these */
+               return 0;
+#ifdef CONFIG_X86_MCE
+       } else if (addr == (unsigned long)machine_check) {
+               return 0;
+#endif
+       } else {
+               /* Some other trap using IST? */
+               if (WARN_ON(val->ist != 0))
+                       return 0;
+       }
+#endif /* CONFIG_X86_64 */
+       info->address = addr;
+
        info->cs = gate_segment(*val);
        info->flags = val->dpl;
        /* interrupt gates clear IF */
-       if (val->type == 0xe)
-               info->flags |= 4;
+       if (val->type == GATE_INTERRUPT)
+               info->flags |= 1 << 2;
 
        return 1;
 }
@@ -545,6 +640,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
        preempt_enable();
 }
 
+/*
+ * Version of write_gdt_entry for use at early boot-time needed to
+ * update an entry as simply as possible.
+ */
+static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+                                           const void *desc, int type)
+{
+       switch (type) {
+       case DESC_LDT:
+       case DESC_TSS:
+               /* ignore */
+               break;
+
+       default: {
+               xmaddr_t maddr = virt_to_machine(&dt[entry]);
+
+               if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
+                       dt[entry] = *(struct desc_struct *)desc;
+       }
+
+       }
+}
+
 static void xen_load_sp0(struct tss_struct *tss,
                         struct thread_struct *thread)
 {
@@ -623,10 +741,26 @@ static void xen_clts(void)
        xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
+
+static unsigned long xen_read_cr0(void)
+{
+       unsigned long cr0 = percpu_read(xen_cr0_value);
+
+       if (unlikely(cr0 == 0)) {
+               cr0 = native_read_cr0();
+               percpu_write(xen_cr0_value, cr0);
+       }
+
+       return cr0;
+}
+
 static void xen_write_cr0(unsigned long cr0)
 {
        struct multicall_space mcs;
 
+       percpu_write(xen_cr0_value, cr0);
+
        /* Only pay attention to cr0.TS; everything else is
           ignored. */
        mcs = xen_mc_entry(0);
@@ -662,7 +796,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
        set:
                base = ((u64)high << 32) | low;
                if (HYPERVISOR_set_segment_base(which, base) != 0)
-                       ret = -EFAULT;
+                       ret = -EIO;
                break;
 #endif
 
@@ -788,19 +922,9 @@ static const struct pv_info xen_info __initdata = {
 
 static const struct pv_init_ops xen_init_ops __initdata = {
        .patch = xen_patch,
-
-       .banner = xen_banner,
-       .memory_setup = xen_memory_setup,
-       .arch_setup = xen_arch_setup,
-       .post_allocator_init = xen_post_allocator_init,
 };
 
 static const struct pv_time_ops xen_time_ops __initdata = {
-       .time_init = xen_time_init,
-
-       .set_wallclock = xen_set_wallclock,
-       .get_wallclock = xen_get_wallclock,
-       .get_tsc_khz = xen_tsc_khz,
        .sched_clock = xen_sched_clock,
 };
 
@@ -812,7 +936,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
        .clts = xen_clts,
 
-       .read_cr0 = native_read_cr0,
+       .read_cr0 = xen_read_cr0,
        .write_cr0 = xen_write_cr0,
 
        .read_cr4 = native_read_cr4,
@@ -866,8 +990,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
-       .setup_boot_clock = paravirt_nop,
-       .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
 #endif
 };
@@ -913,6 +1035,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
        .emergency_restart = xen_emergency_restart,
 };
 
+/*
+ * Set up the GDT and segment registers for -fstack-protector.  Until
+ * we do this, we have to be careful not to call any stack-protected
+ * function, which is most of the kernel.
+ */
+static void __init xen_setup_stackprotector(void)
+{
+       pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
+       pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+
+       setup_stack_canary_segment(0);
+       switch_to_new_gdt(0);
+
+       pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
+       pv_cpu_ops.load_gdt = xen_load_gdt;
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -923,20 +1062,54 @@ asmlinkage void __init xen_start_kernel(void)
 
        xen_domain_type = XEN_PV_DOMAIN;
 
-       BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
-
-       xen_setup_features();
-
        /* Install Xen paravirt ops */
        pv_info = xen_info;
        pv_init_ops = xen_init_ops;
        pv_time_ops = xen_time_ops;
        pv_cpu_ops = xen_cpu_ops;
        pv_apic_ops = xen_apic_ops;
-       pv_mmu_ops = xen_mmu_ops;
 
-       xen_init_irq_ops();
+       x86_init.resources.memory_setup = xen_memory_setup;
+       x86_init.oem.arch_setup = xen_arch_setup;
+       x86_init.oem.banner = xen_banner;
+
+       x86_init.timers.timer_init = xen_time_init;
+       x86_init.timers.setup_percpu_clockev = x86_init_noop;
+       x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+
+       x86_platform.calibrate_tsc = xen_tsc_khz;
+       x86_platform.get_wallclock = xen_get_wallclock;
+       x86_platform.set_wallclock = xen_set_wallclock;
+
+       /*
+        * Set up some pagetable state before starting to set any ptes.
+        */
+
+       xen_init_mmu_ops();
+
+       /* Prevent unwanted bits from being set in PTEs. */
+       __supported_pte_mask &= ~_PAGE_GLOBAL;
+       if (!xen_initial_domain())
+               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
+       __supported_pte_mask |= _PAGE_IOMAP;
 
+       /* Work out if we support NX */
+       x86_configure_nx();
+
+       xen_setup_features();
+
+       /* Get mfn list */
+       if (!xen_feature(XENFEAT_auto_translated_physmap))
+               xen_build_dynamic_phys_to_machine();
+
+       /*
+        * Set up kernel GDT and segment registers, mainly so that
+        * -fstack-protector code can be executed.
+        */
+       xen_setup_stackprotector();
+
+       xen_init_irq_ops();
        xen_init_cpuid_mask();
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -953,13 +1126,6 @@ asmlinkage void __init xen_start_kernel(void)
 
        machine_ops = xen_machine_ops;
 
-#ifdef CONFIG_X86_64
-       /*
-        * Setup percpu state.  We only need to do this for 64-bit
-        * because 32-bit already has %fs set properly.
-        */
-       load_percpu_segment(0);
-#endif
        /*
         * The only reliable way to retain the initial address of the
         * percpu gdt_page is to remember it here, so we can go and
@@ -969,22 +1135,8 @@ asmlinkage void __init xen_start_kernel(void)
 
        xen_smp_init();
 
-       /* Get mfn list */
-       if (!xen_feature(XENFEAT_auto_translated_physmap))
-               xen_build_dynamic_phys_to_machine();
-
        pgd = (pgd_t *)xen_start_info->pt_base;
 
-       /* Prevent unwanted bits from being set in PTEs. */
-       __supported_pte_mask &= ~_PAGE_GLOBAL;
-       if (!xen_initial_domain())
-               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
-#ifdef CONFIG_X86_64
-       /* Work out if we support NX */
-       check_efer();
-#endif
-
        /* Don't do the full vcpu_info placement stuff until we have a
           possible map and a non-dummy shared_info. */
        per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1010,6 +1162,7 @@ asmlinkage void __init xen_start_kernel(void)
        /* set up basic CPUID stuff */
        cpu_detect(&new_cpu_data);
        new_cpu_data.hard_math = 1;
+       new_cpu_data.wp_works_ok = 1;
        new_cpu_data.x86_capability[0] = cpuid_edx(1);
 #endif
 
@@ -1024,10 +1177,16 @@ asmlinkage void __init xen_start_kernel(void)
                add_preferred_console("xenboot", 0, NULL);
                add_preferred_console("tty", 0, NULL);
                add_preferred_console("hvc", 0, NULL);
+       } else {
+               /* Make sure ACS will be enabled */
+               pci_request_acs();
        }
+               
 
        xen_raw_console_write("about to get started...\n");
 
+       xen_setup_runstate_info(0);
+
        /* Start the world */
 #ifdef CONFIG_X86_32
        i386_start_kernel();