X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fxen%2Fenlighten.c;h=2b26dd5930c6e82e87f0e20505ab5954e6213a83;hb=11bd04f6f35621193311c32e0721142b073a7794;hp=5e4686d70f629de22f81adc96b658ab06216be24;hpb=d05fdf316067cd311d5e7add08da26ded8a58080;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5e4686d..2b26dd5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -20,14 +20,18 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include +#include #include #include #include @@ -41,12 +45,15 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include "xen-ops.h" #include "mmu.h" @@ -60,40 +67,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; +void *xen_initial_gdt; + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -113,14 +93,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; * * 0: not available, 1: available */ -static int have_vcpu_info_placement = -#ifdef CONFIG_X86_32 - 1 -#else - 0 -#endif - ; - +static int have_vcpu_info_placement = 1; static void xen_vcpu_setup(int cpu) { @@ -136,7 +109,7 @@ static void xen_vcpu_setup(int cpu) vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = virt_to_mfn(vcpup); + info.mfn = arbitrary_virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", @@ -167,24 +140,23 @@ static void xen_vcpu_setup(int cpu) */ void xen_vcpu_restore(void) { - if (have_vcpu_info_placement) { - int cpu; + int cpu; - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); - xen_vcpu_setup(cpu); + xen_setup_runstate_info(cpu); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } + if (have_vcpu_info_placement) + xen_vcpu_setup(cpu); - BUG_ON(!have_vcpu_info_placement); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); } } @@ -201,21 +173,31 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskebx = ~0; + unsigned maskecx = ~0; unsigned maskedx = ~0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) - maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ - (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + switch (*ax) { + case 1: + maskecx = cpuid_leaf1_ecx_mask; + maskedx = cpuid_leaf1_edx_mask; + break; + + case 0xb: + /* Suppress extended topology stuff */ + maskebx = 0; + break; + } asm(XEN_EMULATE_PREFIX "cpuid" : "=a" (*ax), @@ -223,9 +205,45 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=c" (*cx), "=d" (*dx) : "0" (*ax), "2" (*cx)); + + *bx &= maskebx; + *cx &= maskecx; *dx &= maskedx; } +static __init void xen_init_cpuid_mask(void) +{ + unsigned int ax, bx, cx, dx; + + cpuid_leaf1_edx_mask = + ~((1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + if (!xen_initial_domain()) + cpuid_leaf1_edx_mask &= + ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ + (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + + ax = 1; + cx = 0; + xen_cpuid(&ax, &bx, &cx, &dx); + + /* cpuid claims we support xsave; try enabling it to see what happens */ + if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { + unsigned long cr4; + + set_in_cr4(X86_CR4_OSXSAVE); + + cr4 = read_cr4(); + + if ((cr4 & X86_CR4_OSXSAVE) == 0) + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); + + clear_in_cr4(X86_CR4_OSXSAVE); + } +} + static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -236,10 +254,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy(void) +static void xen_end_context_switch(struct task_struct *next) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); + paravirt_end_context_switch(next); } static unsigned long xen_store_tr(void) @@ -317,37 +335,93 @@ static void xen_set_ldt(const void *addr, unsigned entries) static void xen_load_gdt(const struct desc_ptr *dtr) { - unsigned long *frames; unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long frames[pages]; int f; - struct multicall_space mcs; - /* A GDT can be up to 64k in size, which corresponds to 8192 - 8-byte entries, or 16 4k pages.. */ + /* + * A GDT can be up to 64k in size, which corresponds to 8192 + * 8-byte entries, or 16 4k pages.. + */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); - mcs = xen_mc_entry(sizeof(*frames) * pages); - frames = mcs.args; - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = virt_to_mfn(va); + int level; + pte_t *ptep; + unsigned long pfn, mfn; + void *virt; + + /* + * The GDT is per-cpu and is in the percpu data area. + * That can be virtually mapped, so we need to do a + * page-walk to get the underlying MFN for the + * hypercall. The page can also be in the kernel's + * linear range, so we need to RO that mapping too. + */ + ptep = lookup_address(va, &level); + BUG_ON(ptep == NULL); + + pfn = pte_pfn(*ptep); + mfn = pfn_to_mfn(pfn); + virt = __va(PFN_PHYS(pfn)); + + frames[f] = mfn; + make_lowmem_page_readonly((void *)va); + make_lowmem_page_readonly(virt); } - MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); + if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + BUG(); +} - xen_mc_issue(PARAVIRT_LAZY_CPU); +/* + * load_gdt for early boot, when the gdt is only mapped once + */ +static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) +{ + unsigned long va = dtr->address; + unsigned int size = dtr->size + 1; + unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long frames[pages]; + int f; + + /* + * A GDT can be up to 64k in size, which corresponds to 8192 + * 8-byte entries, or 16 4k pages.. + */ + + BUG_ON(size > 65536); + BUG_ON(va & ~PAGE_MASK); + + for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { + pte_t pte; + unsigned long pfn, mfn; + + pfn = virt_to_pfn(va); + mfn = pfn_to_mfn(pfn); + + pte = pfn_pte(pfn, PAGE_KERNEL_RO); + + if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) + BUG(); + + frames[f] = mfn; + } + + if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + BUG(); } static void load_TLS_descriptor(struct thread_struct *t, unsigned int cpu, unsigned int i) { struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); struct multicall_space mc = __xen_mc_entry(0); MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); @@ -356,13 +430,14 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone, - * it means we're in a context switch, and %gs has just been - * saved. This means we can zero it out to prevent faults on - * exit from the hypervisor if the next process has no %gs. - * Either way, it has been saved, and the new value will get - * loaded properly. This will go away as soon as Xen has been - * modified to not save/restore %gs for normal hypercalls. + * XXX sleazy hack: If we're being called in a lazy-cpu zone + * and lazy gs handling is enabled, it means we're in a + * context switch, and %gs has just been saved. This means we + * can zero it out to prevent faults on exit from the + * hypervisor if the next process has no %gs. Either way, it + * has been saved, and the new value will get loaded properly. + * This will go away as soon as Xen has been modified to not + * save/restore %gs for normal hypercalls. * * On x86_64, this hack is not used for %gs, because gs points * to KERNEL_GS_BASE (and uses it for PDA references), so we @@ -374,7 +449,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) */ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { #ifdef CONFIG_X86_32 - loadsegment(gs, 0); + lazy_load_gs(0); #else loadsegment(fs, 0); #endif @@ -415,16 +490,49 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - if (val->type != 0xf && val->type != 0xe) + unsigned long addr; + + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; - info->address = gate_offset(*val); + + addr = gate_offset(*val); +#ifdef CONFIG_X86_64 + /* + * Look for known traps using IST, and substitute them + * appropriately. The debugger ones are the only ones we care + * about. Xen will handle faults like double_fault and + * machine_check, so we should never see them. Warn if + * there's an unexpected IST-using fault handler. + */ + if (addr == (unsigned long)debug) + addr = (unsigned long)xen_debug; + else if (addr == (unsigned long)int3) + addr = (unsigned long)xen_int3; + else if (addr == (unsigned long)stack_segment) + addr = (unsigned long)xen_stack_segment; + else if (addr == (unsigned long)double_fault || + addr == (unsigned long)nmi) { + /* Don't need to handle these */ + return 0; +#ifdef CONFIG_X86_MCE + } else if (addr == (unsigned long)machine_check) { + return 0; +#endif + } else { + /* Some other trap using IST? */ + if (WARN_ON(val->ist != 0)) + return 0; + } +#endif /* CONFIG_X86_64 */ + info->address = addr; + info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ - if (val->type == 0xe) - info->flags |= 4; + if (val->type == GATE_INTERRUPT) + info->flags |= 1 << 2; return 1; } @@ -520,7 +628,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, break; default: { - xmaddr_t maddr = virt_to_machine(&dt[entry]); + xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); xen_mc_flush(); if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) @@ -532,6 +640,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, preempt_enable(); } +/* + * Version of write_gdt_entry for use at early boot-time needed to + * update an entry as simply as possible. + */ +static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, + const void *desc, int type) +{ + switch (type) { + case DESC_LDT: + case DESC_TSS: + /* ignore */ + break; + + default: { + xmaddr_t maddr = virt_to_machine(&dt[entry]); + + if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) + dt[entry] = *(struct desc_struct *)desc; + } + + } +} + static void xen_load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -586,110 +717,50 @@ static u32 xen_safe_apic_wait_icr_idle(void) return 0; } -static struct apic_ops xen_basic_apic_ops = { - .read = xen_apic_read, - .write = xen_apic_write, - .icr_read = xen_apic_icr_read, - .icr_write = xen_apic_icr_write, - .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, -}; - -#endif - -static void xen_flush_tlb(void) +static void set_xen_basic_apic_ops(void) { - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); + apic->read = xen_apic_read; + apic->write = xen_apic_write; + apic->icr_read = xen_apic_icr_read; + apic->icr_write = xen_apic_icr_write; + apic->wait_icr_idle = xen_apic_wait_icr_idle; + apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; } -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); +#endif - xen_mc_issue(PARAVIRT_LAZY_MMU); - preempt_enable(); -} - -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) +static void xen_clts(void) { - struct { - struct mmuext_op op; - cpumask_t mask; - } *args; - cpumask_t cpumask = *cpus; struct multicall_space mcs; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } + mcs = xen_mc_entry(0); - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + MULTI_fpu_taskswitch(mcs.mc, 0); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_clts(void) -{ - struct multicall_space mcs; +static DEFINE_PER_CPU(unsigned long, xen_cr0_value); - mcs = xen_mc_entry(0); +static unsigned long xen_read_cr0(void) +{ + unsigned long cr0 = percpu_read(xen_cr0_value); - MULTI_fpu_taskswitch(mcs.mc, 0); + if (unlikely(cr0 == 0)) { + cr0 = native_read_cr0(); + percpu_write(xen_cr0_value, cr0); + } - xen_mc_issue(PARAVIRT_LAZY_CPU); + return cr0; } static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; + percpu_write(xen_cr0_value, cr0); + /* Only pay attention to cr0.TS; everything else is ignored. */ mcs = xen_mc_entry(0); @@ -699,21 +770,6 @@ static void xen_write_cr0(unsigned long cr0) xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_write_cr2(unsigned long cr2) -{ - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return x86_read_percpu(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return x86_read_percpu(xen_vcpu_info.arch.cr2); -} - static void xen_write_cr4(unsigned long cr4) { cr4 &= ~X86_CR4_PGE; @@ -722,78 +778,13 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static unsigned long xen_read_cr3(void) -{ - return x86_read_percpu(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - x86_write_percpu(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op *op; - struct multicall_space mcs; - unsigned long mfn; - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - if (kernel) { - x86_write_percpu(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - x86_write_percpu(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; ret = 0; - switch(msr) { + switch (msr) { #ifdef CONFIG_X86_64 unsigned which; u64 base; @@ -805,7 +796,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) set: base = ((u64)high << 32) | low; if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EFAULT; + ret = -EIO; break; #endif @@ -828,185 +819,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) { - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -#ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} -#endif - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1027,37 +839,6 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); -} - -static __init void xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -1071,10 +852,10 @@ void xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - pv_irq_ops.save_fl = xen_save_fl_direct; - pv_irq_ops.restore_fl = xen_restore_fl_direct; - pv_irq_ops.irq_disable = xen_irq_disable_direct; - pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } } @@ -1132,49 +913,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: -#endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif - pte = pfn_pte(phys, prot); - break; - - default: - pte = mfn_pte(phys, prot); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1184,19 +922,9 @@ static const struct pv_info xen_info __initdata = { static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, - - .banner = xen_banner, - .memory_setup = xen_memory_setup, - .arch_setup = xen_arch_setup, - .post_allocator_init = xen_post_allocator_init, }; static const struct pv_time_ops xen_time_ops __initdata = { - .time_init = xen_time_init, - - .set_wallclock = xen_set_wallclock, - .get_wallclock = xen_get_wallclock, - .get_tsc_khz = xen_tsc_khz, .sched_clock = xen_sched_clock, }; @@ -1208,7 +936,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .clts = xen_clts, - .read_cr0 = native_read_cr0, + .read_cr0 = xen_read_cr0, .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, @@ -1256,101 +984,16 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { /* Xen takes care of %gs when switching to usermode for us */ .swapgs = paravirt_nop, - .lazy_mode = { - .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy, - }, + .start_context_switch = paravirt_start_context_switch, + .end_context_switch = xen_end_context_switch, }; static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC - .setup_boot_clock = paravirt_nop, - .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, #endif }; -static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, - .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else - .set_pte = xen_set_pte_init, -#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = xen_pte_val, - .pte_flags = native_pte_flags, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, - -#if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, - }, - - .set_fixmap = xen_set_fixmap, -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1392,251 +1035,88 @@ static const struct machine_ops __initdata xen_machine_ops = { .emergency_restart = xen_emergency_restart, }; - -static void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - /* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. + * Set up the GDT and segment registers for -fstack-protector. Until + * we do this, we have to be careful not to call any stack-protected + * function, which is most of the kernel. */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) +static void __init xen_setup_stackprotector(void) { - phys_addr_t paddr; + pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; + pv_cpu_ops.load_gdt = xen_load_gdt_boot; - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + setup_stack_canary_segment(0); + switch_to_new_gdt(0); - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); + pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; + pv_cpu_ops.load_gdt = xen_load_gdt; } -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - ident_pte = 0; - pfn = 0; - for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for(i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the inital kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) +/* First C function to be called on Xen boot */ +asmlinkage void __init xen_start_kernel(void) { - pud_t *l3; - pmd_t *l2; - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + pgd_t *pgd; - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + if (!xen_start_info) + return; - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); + xen_domain_type = XEN_PV_DOMAIN; - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + /* Install Xen paravirt ops */ + pv_info = xen_info; + pv_init_ops = xen_init_ops; + pv_time_ops = xen_time_ops; + pv_cpu_ops = xen_cpu_ops; + pv_apic_ops = xen_apic_ops; - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); + x86_init.resources.memory_setup = xen_memory_setup; + x86_init.oem.arch_setup = xen_arch_setup; + x86_init.oem.banner = xen_banner; - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + x86_init.timers.timer_init = xen_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + x86_cpuinit.setup_percpu_clockev = x86_init_noop; - /* Switch over */ - pgd = init_level4_pgt; + x86_platform.calibrate_tsc = xen_tsc_khz; + x86_platform.get_wallclock = xen_get_wallclock; + x86_platform.set_wallclock = xen_set_wallclock; /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. + * Set up some pagetable state before starting to set any ptes. */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - reserve_early(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; - -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + xen_init_mmu_ops(); - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - - return swapper_pg_dir; -} -#endif /* CONFIG_X86_64 */ - -/* First C function to be called on Xen boot */ -asmlinkage void __init xen_start_kernel(void) -{ - pgd_t *pgd; - - if (!xen_start_info) - return; + /* Prevent unwanted bits from being set in PTEs. */ + __supported_pte_mask &= ~_PAGE_GLOBAL; + if (!xen_initial_domain()) + __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - xen_domain_type = XEN_PV_DOMAIN; + __supported_pte_mask |= _PAGE_IOMAP; - BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); + /* Work out if we support NX */ + x86_configure_nx(); xen_setup_features(); - /* Install Xen paravirt ops */ - pv_info = xen_info; - pv_init_ops = xen_init_ops; - pv_time_ops = xen_time_ops; - pv_cpu_ops = xen_cpu_ops; - pv_apic_ops = xen_apic_ops; - pv_mmu_ops = xen_mmu_ops; + /* Get mfn list */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + xen_build_dynamic_phys_to_machine(); + + /* + * Set up kernel GDT and segment registers, mainly so that + * -fstack-protector code can be executed. + */ + xen_setup_stackprotector(); xen_init_irq_ops(); + xen_init_cpuid_mask(); #ifdef CONFIG_X86_LOCAL_APIC /* * set up the basic apic ops. */ - apic_ops = &xen_basic_apic_ops; + set_xen_basic_apic_ops(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { @@ -1646,29 +1126,24 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; -#ifdef CONFIG_X86_64 - /* Disable until direct per-cpu data access. */ - have_vcpu_info_placement = 0; - x86_64_init_pda(); -#endif + /* + * The only reliable way to retain the initial address of the + * percpu gdt_page is to remember it here, so we can go and + * mark it RW later, when the initial percpu area is freed. + */ + xen_initial_gdt = &per_cpu(gdt_page, 0); xen_smp_init(); - /* Get mfn list */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - xen_build_dynamic_phys_to_machine(); - pgd = (pgd_t *)xen_start_info->pt_base; - /* Prevent unwanted bits from being set in PTEs. */ - __supported_pte_mask &= ~_PAGE_GLOBAL; - if (!xen_initial_domain()) - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + local_irq_disable(); + early_boot_irqs_off(); + xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); @@ -1687,6 +1162,7 @@ asmlinkage void __init xen_start_kernel(void) /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); new_cpu_data.hard_math = 1; + new_cpu_data.wp_works_ok = 1; new_cpu_data.x86_capability[0] = cpuid_edx(1); #endif @@ -1701,10 +1177,16 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } else { + /* Make sure ACS will be enabled */ + pci_request_acs(); } + xen_raw_console_write("about to get started...\n"); + xen_setup_runstate_info(0); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel();