#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
-#include <linux/smp.h>
#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-DEFINE_PER_CPU(unsigned long, xen_cr3);
+
+/*
+ * Note about cr3 (pagetable base) values:
+ *
+ * xen_cr3 contains the current logical cr3 value; it contains the
+ * last set cr3. This may not be the current effective cr3, because
+ * its update may be being lazily deferred. However, a vcpu looking
+ * at its own cr3 can use this value knowing that it everything will
+ * be self-consistent.
+ *
+ * xen_current_cr3 contains the actual vcpu cr3; it is set once the
+ * hypercall to set the vcpu cr3 is complete (so it may be a little
+ * out of date, but it will never be set early). If one vcpu is
+ * looking at another vcpu's cr3 value, it should use this variable.
+ */
+DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
+DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
*
* 0: not available, 1: available
*/
-static int have_vcpu_info_placement = 1;
+static int have_vcpu_info_placement = 0;
static void __init xen_vcpu_setup(int cpu)
{
info.mfn = virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
- printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
+ printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
cpu, vcpup, info.mfn, info.offset);
/* Check to see if the hypervisor will put the vcpu_info
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
}
-static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+ unsigned int *cx, unsigned int *dx)
{
unsigned maskedx = ~0;
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
- if (*eax == 1)
+ if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
- *edx &= maskedx;
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));
+ *dx &= maskedx;
}
static void xen_set_debugreg(int reg, unsigned long val)
preempt_enable();
}
-static void xen_load_esp0(struct tss_struct *tss,
+static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
}
#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(unsigned long reg)
{
return 0;
}
-static void xen_apic_write(unsigned long reg, unsigned long val)
+static void xen_apic_write(unsigned long reg, u32 val)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
return x86_read_percpu(xen_cr3);
}
+static void set_current_cr3(void *v)
+{
+ x86_write_percpu(xen_current_cr3, (unsigned long)v);
+}
+
static void xen_write_cr3(unsigned long cr3)
{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+ unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+
BUG_ON(preemptible());
- if (cr3 == x86_read_percpu(xen_cr3)) {
- /* just a simple tlb flush */
- xen_flush_tlb();
- return;
- }
+ mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
+ /* Update while interrupts are disabled, so its atomic with
+ respect to ipis */
x86_write_percpu(xen_cr3, cr3);
+ op = mcs.args;
+ op->cmd = MMUEXT_NEW_BASEPTR;
+ op->arg1.mfn = mfn;
- {
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
- unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
-
- op = mcs.args;
- op->cmd = MMUEXT_NEW_BASEPTR;
- op->arg1.mfn = mfn;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+ /* Update xen_update_cr3 once the batch has actually
+ been submitted. */
+ xen_mc_callback(set_current_cr3, (void *)cr3);
- xen_mc_issue(PARAVIRT_LAZY_CPU);
- }
+ xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
/* Early in boot, while setting up the initial pagetable, assume
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
}
+static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+{
+ struct mmuext_op op;
+ op.cmd = level;
+ op.arg1.mfn = pfn_to_mfn(pfn);
+ if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+ BUG();
+}
+
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
- if (!PageHighMem(page))
+ if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- else
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ } else
/* make sure there are no stray mappings of
this page */
kmap_flush_unused();
struct page *page = pfn_to_page(pfn);
if (PagePinned(page)) {
- if (!PageHighMem(page))
+ if (!PageHighMem(page)) {
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+ }
}
}
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
{
- struct mmuext_op op;
+ unsigned level;
+
#ifdef CONFIG_X86_PAE
- op.cmd = MMUEXT_PIN_L3_TABLE;
+ level = MMUEXT_PIN_L3_TABLE;
#else
- op.cmd = MMUEXT_PIN_L3_TABLE;
+ level = MMUEXT_PIN_L2_TABLE;
#endif
- op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
- if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
- BUG();
+
+ pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
}
}
.read_pmc = native_read_pmc,
.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
- .irq_enable_sysexit = NULL, /* never called */
+ .irq_enable_syscall_ret = NULL, /* never called */
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.write_ldt_entry = xen_write_ldt_entry,
.write_gdt_entry = xen_write_gdt_entry,
.write_idt_entry = xen_write_idt_entry,
- .load_esp0 = xen_load_esp0,
+ .load_sp0 = xen_load_sp0,
.set_iopl_mask = xen_set_iopl_mask,
.io_delay = xen_io_delay,
};
+static void __init xen_reserve_top(void)
+{
+ unsigned long top = HYPERVISOR_VIRT_START;
+ struct xen_platform_parameters pp;
+
+ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+ top = pp.virt_start;
+
+ reserve_top_address(-top + 2 * PAGE_SIZE);
+}
+
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
if (!xen_start_info)
return;
- BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
+ BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
/* Install Xen paravirt ops */
pv_info = xen_info;
/* keep using Xen gdt for now; no urgent need to change it */
x86_write_percpu(xen_cr3, __pa(pgd));
+ x86_write_percpu(xen_current_cr3, __pa(pgd));
#ifdef CONFIG_SMP
/* Don't do the full vcpu_info placement stuff until we have a
pv_info.kernel_rpl = 0;
/* set the limit of our address space */
- reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
+ xen_reserve_top();
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
new_cpu_data.x86_capability[0] = cpuid_edx(1);
/* Poke various useful things into boot_params */
- LOADER_TYPE = (9 << 4) | 0;
- INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0;
- INITRD_SIZE = xen_start_info->mod_len;
+ boot_params.hdr.type_of_loader = (9 << 4) | 0;
+ boot_params.hdr.ramdisk_image = xen_start_info->mod_start
+ ? __pa(xen_start_info->mod_start) : 0;
+ boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
/* Start the world */
start_kernel();