KVM: Add instruction emulation statistics
[safe/jmp/linux-2.6] / drivers / kvm / x86.c
index 2cf7eba..a46b95b 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "kvm.h"
 #include "x86.h"
+#include "x86_emulate.h"
 #include "segment_descriptor.h"
 #include "irq.h"
 
@@ -25,6 +26,7 @@
 #include <linux/module.h>
 
 #include <asm/uaccess.h>
+#include <asm/msr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
@@ -42,6 +44,8 @@
 
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
+struct kvm_x86_ops *kvm_x86_ops;
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pf_fixed", STAT_OFFSET(pf_fixed) },
        { "pf_guest", STAT_OFFSET(pf_guest) },
@@ -56,8 +60,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
        { "request_irq", STAT_OFFSET(request_irq_exits) },
        { "irq_exits", STAT_OFFSET(irq_exits) },
-       { "light_exits", STAT_OFFSET(light_exits) },
+       { "host_state_reload", STAT_OFFSET(host_state_reload) },
        { "efer_reload", STAT_OFFSET(efer_reload) },
+       { "fpu_reload", STAT_OFFSET(fpu_reload) },
+       { "insn_emulation", STAT_OFFSET(insn_emulation) },
+       { "insn_emulation_fail", STAT_OFFSET(insn_emulation_fail) },
        { NULL }
 };
 
@@ -560,6 +567,61 @@ out:
        return r;
 }
 
+/*
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
+ * cached on it.
+ */
+void decache_vcpus_on_cpu(int cpu)
+{
+       struct kvm *vm;
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       spin_lock(&kvm_lock);
+       list_for_each_entry(vm, &vm_list, vm_list)
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+                       vcpu = vm->vcpus[i];
+                       if (!vcpu)
+                               continue;
+                       /*
+                        * If the vcpu is locked, then it is running on some
+                        * other cpu and therefore it is not cached on the
+                        * cpu in question.
+                        *
+                        * If it's not locked, check the last cpu it executed
+                        * on.
+                        */
+                       if (mutex_trylock(&vcpu->mutex)) {
+                               if (vcpu->cpu == cpu) {
+                                       kvm_x86_ops->vcpu_decache(vcpu);
+                                       vcpu->cpu = -1;
+                               }
+                               mutex_unlock(&vcpu->mutex);
+                       }
+               }
+       spin_unlock(&kvm_lock);
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+       int r;
+
+       switch (ext) {
+       case KVM_CAP_IRQCHIP:
+       case KVM_CAP_HLT:
+       case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
+       case KVM_CAP_USER_MEMORY:
+       case KVM_CAP_SET_TSS_ADDR:
+               r = 1;
+               break;
+       default:
+               r = 0;
+               break;
+       }
+       return r;
+
+}
+
 long kvm_arch_dev_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
@@ -990,7 +1052,7 @@ out:
        return r;
 }
 
-static __init void kvm_init_msr_list(void)
+static void kvm_init_msr_list(void)
 {
        u32 dummy[2];
        unsigned i, j;
@@ -1321,7 +1383,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                                        get_segment_base(vcpu, VCPU_SREG_FS);
 
                r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
+               ++vcpu->stat.insn_emulation;
                if (r)  {
+                       ++vcpu->stat.insn_emulation_fail;
                        if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
                                return EMULATE_DONE;
                        return EMULATE_FAIL;
@@ -1606,11 +1670,36 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
 
-__init void kvm_arch_init(void)
+int kvm_arch_init(void *opaque)
 {
+       struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
+
        kvm_init_msr_list();
+
+       if (kvm_x86_ops) {
+               printk(KERN_ERR "kvm: already loaded the other module\n");
+               return -EEXIST;
+       }
+
+       if (!ops->cpu_has_kvm_support()) {
+               printk(KERN_ERR "kvm: no hardware support\n");
+               return -EOPNOTSUPP;
+       }
+       if (ops->disabled_by_bios()) {
+               printk(KERN_ERR "kvm: disabled by bios\n");
+               return -EOPNOTSUPP;
+       }
+
+       kvm_x86_ops = ops;
+
+       return 0;
 }
 
+void kvm_arch_exit(void)
+{
+       kvm_x86_ops = NULL;
+ }
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
@@ -1785,3 +1874,723 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
        kvm_x86_ops->skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
+
+/*
+ * Check if userspace requested an interrupt window, and that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
+                                         struct kvm_run *kvm_run)
+{
+       return (!vcpu->irq_summary &&
+               kvm_run->request_interrupt_window &&
+               vcpu->interrupt_window_open &&
+               (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
+}
+
+static void post_kvm_run_save(struct kvm_vcpu *vcpu,
+                             struct kvm_run *kvm_run)
+{
+       kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       kvm_run->cr8 = get_cr8(vcpu);
+       kvm_run->apic_base = kvm_get_apic_base(vcpu);
+       if (irqchip_in_kernel(vcpu->kvm))
+               kvm_run->ready_for_interrupt_injection = 1;
+       else
+               kvm_run->ready_for_interrupt_injection =
+                                       (vcpu->interrupt_window_open &&
+                                        vcpu->irq_summary == 0);
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       int r;
+
+       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
+               pr_debug("vcpu %d received sipi with vector # %x\n",
+                      vcpu->vcpu_id, vcpu->sipi_vector);
+               kvm_lapic_reset(vcpu);
+               r = kvm_x86_ops->vcpu_reset(vcpu);
+               if (r)
+                       return r;
+               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+       }
+
+preempted:
+       if (vcpu->guest_debug.enabled)
+               kvm_x86_ops->guest_debug_pre(vcpu);
+
+again:
+       r = kvm_mmu_reload(vcpu);
+       if (unlikely(r))
+               goto out;
+
+       kvm_inject_pending_timer_irqs(vcpu);
+
+       preempt_disable();
+
+       kvm_x86_ops->prepare_guest_switch(vcpu);
+       kvm_load_guest_fpu(vcpu);
+
+       local_irq_disable();
+
+       if (signal_pending(current)) {
+               local_irq_enable();
+               preempt_enable();
+               r = -EINTR;
+               kvm_run->exit_reason = KVM_EXIT_INTR;
+               ++vcpu->stat.signal_exits;
+               goto out;
+       }
+
+       if (irqchip_in_kernel(vcpu->kvm))
+               kvm_x86_ops->inject_pending_irq(vcpu);
+       else if (!vcpu->mmio_read_completed)
+               kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
+
+       vcpu->guest_mode = 1;
+       kvm_guest_enter();
+
+       if (vcpu->requests)
+               if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+                       kvm_x86_ops->tlb_flush(vcpu);
+
+       kvm_x86_ops->run(vcpu, kvm_run);
+
+       vcpu->guest_mode = 0;
+       local_irq_enable();
+
+       ++vcpu->stat.exits;
+
+       /*
+        * We must have an instruction between local_irq_enable() and
+        * kvm_guest_exit(), so the timer interrupt isn't delayed by
+        * the interrupt shadow.  The stat.exits increment will do nicely.
+        * But we need to prevent reordering, hence this barrier():
+        */
+       barrier();
+
+       kvm_guest_exit();
+
+       preempt_enable();
+
+       /*
+        * Profile KVM exit RIPs:
+        */
+       if (unlikely(prof_on == KVM_PROFILING)) {
+               kvm_x86_ops->cache_regs(vcpu);
+               profile_hit(KVM_PROFILING, (void *)vcpu->rip);
+       }
+
+       r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+
+       if (r > 0) {
+               if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+                       r = -EINTR;
+                       kvm_run->exit_reason = KVM_EXIT_INTR;
+                       ++vcpu->stat.request_irq_exits;
+                       goto out;
+               }
+               if (!need_resched())
+                       goto again;
+       }
+
+out:
+       if (r > 0) {
+               kvm_resched(vcpu);
+               goto preempted;
+       }
+
+       post_kvm_run_save(vcpu, kvm_run);
+
+       return r;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       int r;
+       sigset_t sigsaved;
+
+       vcpu_load(vcpu);
+
+       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+               kvm_vcpu_block(vcpu);
+               vcpu_put(vcpu);
+               return -EAGAIN;
+       }
+
+       if (vcpu->sigset_active)
+               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+       /* re-sync apic's tpr */
+       if (!irqchip_in_kernel(vcpu->kvm))
+               set_cr8(vcpu, kvm_run->cr8);
+
+       if (vcpu->pio.cur_count) {
+               r = complete_pio(vcpu);
+               if (r)
+                       goto out;
+       }
+#if CONFIG_HAS_IOMEM
+       if (vcpu->mmio_needed) {
+               memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+               vcpu->mmio_read_completed = 1;
+               vcpu->mmio_needed = 0;
+               r = emulate_instruction(vcpu, kvm_run,
+                                       vcpu->mmio_fault_cr2, 0, 1);
+               if (r == EMULATE_DO_MMIO) {
+                       /*
+                        * Read-modify-write.  Back to userspace.
+                        */
+                       r = 0;
+                       goto out;
+               }
+       }
+#endif
+       if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
+               kvm_x86_ops->cache_regs(vcpu);
+               vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
+               kvm_x86_ops->decache_regs(vcpu);
+       }
+
+       r = __vcpu_run(vcpu, kvm_run);
+
+out:
+       if (vcpu->sigset_active)
+               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+       vcpu_put(vcpu);
+       return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       vcpu_load(vcpu);
+
+       kvm_x86_ops->cache_regs(vcpu);
+
+       regs->rax = vcpu->regs[VCPU_REGS_RAX];
+       regs->rbx = vcpu->regs[VCPU_REGS_RBX];
+       regs->rcx = vcpu->regs[VCPU_REGS_RCX];
+       regs->rdx = vcpu->regs[VCPU_REGS_RDX];
+       regs->rsi = vcpu->regs[VCPU_REGS_RSI];
+       regs->rdi = vcpu->regs[VCPU_REGS_RDI];
+       regs->rsp = vcpu->regs[VCPU_REGS_RSP];
+       regs->rbp = vcpu->regs[VCPU_REGS_RBP];
+#ifdef CONFIG_X86_64
+       regs->r8 = vcpu->regs[VCPU_REGS_R8];
+       regs->r9 = vcpu->regs[VCPU_REGS_R9];
+       regs->r10 = vcpu->regs[VCPU_REGS_R10];
+       regs->r11 = vcpu->regs[VCPU_REGS_R11];
+       regs->r12 = vcpu->regs[VCPU_REGS_R12];
+       regs->r13 = vcpu->regs[VCPU_REGS_R13];
+       regs->r14 = vcpu->regs[VCPU_REGS_R14];
+       regs->r15 = vcpu->regs[VCPU_REGS_R15];
+#endif
+
+       regs->rip = vcpu->rip;
+       regs->rflags = kvm_x86_ops->get_rflags(vcpu);
+
+       /*
+        * Don't leak debug flags in case they were set for guest debugging
+        */
+       if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+               regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       vcpu_load(vcpu);
+
+       vcpu->regs[VCPU_REGS_RAX] = regs->rax;
+       vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
+       vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
+       vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
+       vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
+       vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
+       vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
+       vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
+#ifdef CONFIG_X86_64
+       vcpu->regs[VCPU_REGS_R8] = regs->r8;
+       vcpu->regs[VCPU_REGS_R9] = regs->r9;
+       vcpu->regs[VCPU_REGS_R10] = regs->r10;
+       vcpu->regs[VCPU_REGS_R11] = regs->r11;
+       vcpu->regs[VCPU_REGS_R12] = regs->r12;
+       vcpu->regs[VCPU_REGS_R13] = regs->r13;
+       vcpu->regs[VCPU_REGS_R14] = regs->r14;
+       vcpu->regs[VCPU_REGS_R15] = regs->r15;
+#endif
+
+       vcpu->rip = regs->rip;
+       kvm_x86_ops->set_rflags(vcpu, regs->rflags);
+
+       kvm_x86_ops->decache_regs(vcpu);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+static void get_segment(struct kvm_vcpu *vcpu,
+                       struct kvm_segment *var, int seg)
+{
+       return kvm_x86_ops->get_segment(vcpu, var, seg);
+}
+
+void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
+{
+       struct kvm_segment cs;
+
+       get_segment(vcpu, &cs, VCPU_SREG_CS);
+       *db = cs.db;
+       *l = cs.l;
+}
+EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       struct descriptor_table dt;
+       int pending_vec;
+
+       vcpu_load(vcpu);
+
+       get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+       get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+       get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+       get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+       get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+       get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+
+       get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+       get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       sregs->idt.limit = dt.limit;
+       sregs->idt.base = dt.base;
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       sregs->gdt.limit = dt.limit;
+       sregs->gdt.base = dt.base;
+
+       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+       sregs->cr0 = vcpu->cr0;
+       sregs->cr2 = vcpu->cr2;
+       sregs->cr3 = vcpu->cr3;
+       sregs->cr4 = vcpu->cr4;
+       sregs->cr8 = get_cr8(vcpu);
+       sregs->efer = vcpu->shadow_efer;
+       sregs->apic_base = kvm_get_apic_base(vcpu);
+
+       if (irqchip_in_kernel(vcpu->kvm)) {
+               memset(sregs->interrupt_bitmap, 0,
+                      sizeof sregs->interrupt_bitmap);
+               pending_vec = kvm_x86_ops->get_irq(vcpu);
+               if (pending_vec >= 0)
+                       set_bit(pending_vec,
+                               (unsigned long *)sregs->interrupt_bitmap);
+       } else
+               memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
+                      sizeof sregs->interrupt_bitmap);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+static void set_segment(struct kvm_vcpu *vcpu,
+                       struct kvm_segment *var, int seg)
+{
+       return kvm_x86_ops->set_segment(vcpu, var, seg);
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       int mmu_reset_needed = 0;
+       int i, pending_vec, max_bits;
+       struct descriptor_table dt;
+
+       vcpu_load(vcpu);
+
+       dt.limit = sregs->idt.limit;
+       dt.base = sregs->idt.base;
+       kvm_x86_ops->set_idt(vcpu, &dt);
+       dt.limit = sregs->gdt.limit;
+       dt.base = sregs->gdt.base;
+       kvm_x86_ops->set_gdt(vcpu, &dt);
+
+       vcpu->cr2 = sregs->cr2;
+       mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
+       vcpu->cr3 = sregs->cr3;
+
+       set_cr8(vcpu, sregs->cr8);
+
+       mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
+#ifdef CONFIG_X86_64
+       kvm_x86_ops->set_efer(vcpu, sregs->efer);
+#endif
+       kvm_set_apic_base(vcpu, sregs->apic_base);
+
+       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+
+       mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
+       vcpu->cr0 = sregs->cr0;
+       kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
+
+       mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
+       kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
+       if (!is_long_mode(vcpu) && is_pae(vcpu))
+               load_pdptrs(vcpu, vcpu->cr3);
+
+       if (mmu_reset_needed)
+               kvm_mmu_reset_context(vcpu);
+
+       if (!irqchip_in_kernel(vcpu->kvm)) {
+               memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
+                      sizeof vcpu->irq_pending);
+               vcpu->irq_summary = 0;
+               for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
+                       if (vcpu->irq_pending[i])
+                               __set_bit(i, &vcpu->irq_summary);
+       } else {
+               max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+               pending_vec = find_first_bit(
+                       (const unsigned long *)sregs->interrupt_bitmap,
+                       max_bits);
+               /* Only pending external irq is handled here */
+               if (pending_vec < max_bits) {
+                       kvm_x86_ops->set_irq(vcpu, pending_vec);
+                       pr_debug("Set back pending irq %d\n",
+                                pending_vec);
+               }
+       }
+
+       set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+       set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+       set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+       set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+       set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+       set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+
+       set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+       set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+                                   struct kvm_debug_guest *dbg)
+{
+       int r;
+
+       vcpu_load(vcpu);
+
+       r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+
+       vcpu_put(vcpu);
+
+       return r;
+}
+
+/*
+ * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
+ * we have asm/x86/processor.h
+ */
+struct fxsave {
+       u16     cwd;
+       u16     swd;
+       u16     twd;
+       u16     fop;
+       u64     rip;
+       u64     rdp;
+       u32     mxcsr;
+       u32     mxcsr_mask;
+       u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+#ifdef CONFIG_X86_64
+       u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
+#else
+       u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+#endif
+};
+
+/*
+ * Translate a guest virtual address to a guest physical address.
+ */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                   struct kvm_translation *tr)
+{
+       unsigned long vaddr = tr->linear_address;
+       gpa_t gpa;
+
+       vcpu_load(vcpu);
+       mutex_lock(&vcpu->kvm->lock);
+       gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
+       tr->physical_address = gpa;
+       tr->valid = gpa != UNMAPPED_GVA;
+       tr->writeable = 1;
+       tr->usermode = 0;
+       mutex_unlock(&vcpu->kvm->lock);
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
+
+       vcpu_load(vcpu);
+
+       memcpy(fpu->fpr, fxsave->st_space, 128);
+       fpu->fcw = fxsave->cwd;
+       fpu->fsw = fxsave->swd;
+       fpu->ftwx = fxsave->twd;
+       fpu->last_opcode = fxsave->fop;
+       fpu->last_ip = fxsave->rip;
+       fpu->last_dp = fxsave->rdp;
+       memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
+
+       vcpu_load(vcpu);
+
+       memcpy(fxsave->st_space, fpu->fpr, 128);
+       fxsave->cwd = fpu->fcw;
+       fxsave->swd = fpu->fsw;
+       fxsave->twd = fpu->ftwx;
+       fxsave->fop = fpu->last_opcode;
+       fxsave->rip = fpu->last_ip;
+       fxsave->rdp = fpu->last_dp;
+       memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
+void fx_init(struct kvm_vcpu *vcpu)
+{
+       unsigned after_mxcsr_mask;
+
+       /* Initialize guest FPU by resetting ours and saving into guest's */
+       preempt_disable();
+       fx_save(&vcpu->host_fx_image);
+       fpu_init();
+       fx_save(&vcpu->guest_fx_image);
+       fx_restore(&vcpu->host_fx_image);
+       preempt_enable();
+
+       vcpu->cr0 |= X86_CR0_ET;
+       after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
+       vcpu->guest_fx_image.mxcsr = 0x1f80;
+       memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
+              0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
+}
+EXPORT_SYMBOL_GPL(fx_init);
+
+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+{
+       if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
+               return;
+
+       vcpu->guest_fpu_loaded = 1;
+       fx_save(&vcpu->host_fx_image);
+       fx_restore(&vcpu->guest_fx_image);
+}
+EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
+
+void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
+{
+       if (!vcpu->guest_fpu_loaded)
+               return;
+
+       vcpu->guest_fpu_loaded = 0;
+       fx_save(&vcpu->guest_fx_image);
+       fx_restore(&vcpu->host_fx_image);
+       ++vcpu->stat.fpu_reload;
+}
+EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->vcpu_free(vcpu);
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+                                               unsigned int id)
+{
+       int r;
+       struct kvm_vcpu *vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+       if (IS_ERR(vcpu)) {
+               r = -ENOMEM;
+               goto fail;
+       }
+
+       /* We do fxsave: this must be aligned. */
+       BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
+
+       vcpu_load(vcpu);
+       r = kvm_arch_vcpu_reset(vcpu);
+       if (r == 0)
+               r = kvm_mmu_setup(vcpu);
+       vcpu_put(vcpu);
+       if (r < 0)
+               goto free_vcpu;
+
+       return vcpu;
+free_vcpu:
+       kvm_x86_ops->vcpu_free(vcpu);
+fail:
+       return ERR_PTR(r);
+}
+
+void kvm_arch_vcpu_destory(struct kvm_vcpu *vcpu)
+{
+       vcpu_load(vcpu);
+       kvm_mmu_unload(vcpu);
+       vcpu_put(vcpu);
+
+       kvm_x86_ops->vcpu_free(vcpu);
+}
+
+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+       return kvm_x86_ops->vcpu_reset(vcpu);
+}
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+       kvm_x86_ops->hardware_enable(garbage);
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+       kvm_x86_ops->hardware_disable(garbage);
+}
+
+int kvm_arch_hardware_setup(void)
+{
+       return kvm_x86_ops->hardware_setup();
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+       kvm_x86_ops->hardware_unsetup();
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+       kvm_x86_ops->check_processor_compatibility(rtn);
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+       struct page *page;
+       struct kvm *kvm;
+       int r;
+
+       BUG_ON(vcpu->kvm == NULL);
+       kvm = vcpu->kvm;
+
+       vcpu->mmu.root_hpa = INVALID_PAGE;
+       if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
+               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+       else
+               vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
+
+       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       if (!page) {
+               r = -ENOMEM;
+               goto fail;
+       }
+       vcpu->pio_data = page_address(page);
+
+       r = kvm_mmu_create(vcpu);
+       if (r < 0)
+               goto fail_free_pio_data;
+
+       if (irqchip_in_kernel(kvm)) {
+               r = kvm_create_lapic(vcpu);
+               if (r < 0)
+                       goto fail_mmu_destroy;
+       }
+
+       return 0;
+
+fail_mmu_destroy:
+       kvm_mmu_destroy(vcpu);
+fail_free_pio_data:
+       free_page((unsigned long)vcpu->pio_data);
+fail:
+       return r;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+       kvm_free_lapic(vcpu);
+       kvm_mmu_destroy(vcpu);
+       free_page((unsigned long)vcpu->pio_data);
+}
+
+struct  kvm *kvm_arch_create_vm(void)
+{
+       struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+
+       if (!kvm)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&kvm->active_mmu_pages);
+
+       return kvm;
+}
+
+static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
+{
+       vcpu_load(vcpu);
+       kvm_mmu_unload(vcpu);
+       vcpu_put(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+       unsigned int i;
+
+       /*
+        * Unpin any mmu pages first.
+        */
+       for (i = 0; i < KVM_MAX_VCPUS; ++i)
+               if (kvm->vcpus[i])
+                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+               if (kvm->vcpus[i]) {
+                       kvm_arch_vcpu_free(kvm->vcpus[i]);
+                       kvm->vcpus[i] = NULL;
+               }
+       }
+
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+       kfree(kvm->vpic);
+       kfree(kvm->vioapic);
+       kvm_free_vcpus(kvm);
+       kvm_free_physmem(kvm);
+       kfree(kvm);
+}