KVM: x86: Add KVM_GET/SET_VCPU_EVENTS
[safe/jmp/linux-2.6] / arch / x86 / kvm / svm.c
index 78c0463..3de0b37 100644 (file)
@@ -46,17 +46,13 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_NPT  (1 << 0)
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_FEATURE_SVML (1 << 2)
+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
 
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
+#define NESTED_EXIT_HOST       0       /* Exit handled on host level */
+#define NESTED_EXIT_DONE       1       /* Exit caused nested vmexit  */
+#define NESTED_EXIT_CONTINUE   2       /* Further checks needed      */
 
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
@@ -81,6 +77,9 @@ struct nested_state {
        /* gpa pointers to the real vectors */
        u64 vmcb_msrpm;
 
+       /* A VMEXIT is required but not yet emulated */
+       bool exit_required;
+
        /* cache for intercepts of the guest */
        u16 intercept_cr_read;
        u16 intercept_cr_write;
@@ -108,6 +107,8 @@ struct vcpu_svm {
        u32 *msrpm;
 
        struct nested_state nested;
+
+       bool nmi_singlestep;
 };
 
 /* enable NPT for AMD64 and X86 with PAE */
@@ -120,13 +121,13 @@ static int npt = 1;
 
 module_param(npt, int, S_IRUGO);
 
-static int nested = 0;
+static int nested = 1;
 module_param(nested, int, S_IRUGO);
 
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
 
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_exit_handled(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
                                      bool has_error_code, u32 error_code);
@@ -282,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        if (!svm->next_rip) {
-               if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+               if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
                                EMULATE_DONE)
                        printk(KERN_DEBUG "%s: NOP\n", __func__);
                return;
@@ -312,7 +313,7 @@ static void svm_hardware_disable(void *garbage)
        cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
        struct svm_cpu_data *svm_data;
@@ -321,16 +322,21 @@ static void svm_hardware_enable(void *garbage)
        struct desc_struct *gdt;
        int me = raw_smp_processor_id();
 
+       rdmsrl(MSR_EFER, efer);
+       if (efer & EFER_SVME)
+               return -EBUSY;
+
        if (!has_svm()) {
-               printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
-               return;
+               printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+                      me);
+               return -EINVAL;
        }
        svm_data = per_cpu(svm_data, me);
 
        if (!svm_data) {
-               printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+               printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
                       me);
-               return;
+               return -EINVAL;
        }
 
        svm_data->asid_generation = 1;
@@ -341,11 +347,12 @@ static void svm_hardware_enable(void *garbage)
        gdt = (struct desc_struct *)gdt_descr.base;
        svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
-       rdmsrl(MSR_EFER, efer);
        wrmsrl(MSR_EFER, efer | EFER_SVME);
 
        wrmsrl(MSR_VM_HSAVE_PA,
               page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+       return 0;
 }
 
 static void svm_cpu_uninit(int cpu)
@@ -472,7 +479,7 @@ static __init int svm_hardware_setup(void)
                kvm_enable_efer_bits(EFER_SVME);
        }
 
-       for_each_online_cpu(cpu) {
+       for_each_possible_cpu(cpu) {
                r = svm_cpu_init(cpu);
                if (r)
                        goto err;
@@ -506,7 +513,7 @@ static __exit void svm_hardware_unsetup(void)
 {
        int cpu;
 
-       for_each_online_cpu(cpu)
+       for_each_possible_cpu(cpu)
                svm_cpu_uninit(cpu);
 
        __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@ -621,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm)
        save->rip = 0x0000fff0;
        svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
 
-       /*
-        * cr0 val on cpu init should be 0x60000010, we enable cpu
-        * cache by default. the orderly way is to enable cache in bios.
+       /* This is the guest-visible cr0 value.
+        * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
         */
-       save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
+       svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+
        save->cr4 = X86_CR4_PAE;
        /* rdx = ?? */
 
@@ -640,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm)
                control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
                                                 INTERCEPT_CR3_MASK);
                save->g_pat = 0x0007040600070406ULL;
-               /* enable caching because the QEMU Bios doesn't enable it */
-               save->cr0 = X86_CR0_ET;
                save->cr3 = 0;
                save->cr4 = 0;
        }
@@ -650,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm)
        svm->nested.vmcb = 0;
        svm->vcpu.arch.hflags = 0;
 
+       if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+               control->pause_filter_count = 3000;
+               control->intercept |= (1ULL << INTERCEPT_PAUSE);
+       }
+
        enable_gif(svm);
 }
 
@@ -754,15 +765,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        int i;
 
        if (unlikely(cpu != vcpu->cpu)) {
-               u64 tsc_this, delta;
+               u64 delta;
 
                /*
                 * Make sure that the guest sees a monotonically
                 * increasing TSC.
                 */
-               rdtscll(tsc_this);
-               delta = vcpu->arch.host_tsc - tsc_this;
+               delta = vcpu->arch.host_tsc - native_read_tsc();
                svm->vmcb->control.tsc_offset += delta;
+               if (is_nested(svm))
+                       svm->nested.hsave->control.tsc_offset += delta;
                vcpu->cpu = cpu;
                kvm_migrate_timers(vcpu);
                svm->asid_generation = 0;
@@ -781,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
                wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 
-       rdtscll(vcpu->arch.host_tsc);
+       vcpu->arch.host_tsc = native_read_tsc();
 }
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1039,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
        svm->vmcb->control.intercept_exceptions &=
                ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
 
-       if (vcpu->arch.singlestep)
+       if (svm->nmi_singlestep)
                svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
 
        if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -1054,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
                vcpu->guest_debug = 0;
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 {
-       int old_debug = vcpu->guest_debug;
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       vcpu->guest_debug = dbg->control;
-
-       update_db_intercept(vcpu);
-
        if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
                svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
        else
                svm->vmcb->save.dr7 = vcpu->arch.dr7;
 
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
-       else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
-               svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-
-       return 0;
+       update_db_intercept(vcpu);
 }
 
 static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1174,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
        }
 }
 
-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int pf_interception(struct vcpu_svm *svm)
 {
        u64 fault_address;
        u32 error_code;
@@ -1183,31 +1185,24 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        error_code = svm->vmcb->control.exit_info_1;
 
        trace_kvm_page_fault(fault_address, error_code);
-       /*
-        * FIXME: Tis shouldn't be necessary here, but there is a flush
-        * missing in the MMU code. Until we find this bug, flush the
-        * complete TLB here on an NPF
-        */
-       if (npt_enabled)
-               svm_flush_tlb(&svm->vcpu);
-       else {
-               if (kvm_event_needs_reinjection(&svm->vcpu))
-                       kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-       }
+       if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
+               kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
        return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int db_interception(struct vcpu_svm *svm)
 {
+       struct kvm_run *kvm_run = svm->vcpu.run;
+
        if (!(svm->vcpu.guest_debug &
              (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
-               !svm->vcpu.arch.singlestep) {
+               !svm->nmi_singlestep) {
                kvm_queue_exception(&svm->vcpu, DB_VECTOR);
                return 1;
        }
 
-       if (svm->vcpu.arch.singlestep) {
-               svm->vcpu.arch.singlestep = false;
+       if (svm->nmi_singlestep) {
+               svm->nmi_singlestep = false;
                if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
                        svm->vmcb->save.rflags &=
                                ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
@@ -1226,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int bp_interception(struct vcpu_svm *svm)
 {
+       struct kvm_run *kvm_run = svm->vcpu.run;
+
        kvm_run->exit_reason = KVM_EXIT_DEBUG;
        kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
        kvm_run->debug.arch.exception = BP_VECTOR;
        return 0;
 }
 
-static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int ud_interception(struct vcpu_svm *svm)
 {
        int er;
 
-       er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+       er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
        if (er != EMULATE_DONE)
                kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
 }
 
-static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nm_interception(struct vcpu_svm *svm)
 {
        svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
        if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
@@ -1254,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int mc_interception(struct vcpu_svm *svm)
 {
        /*
         * On an #MC intercept the MCE handler is not called automatically in
@@ -1267,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int shutdown_interception(struct vcpu_svm *svm)
 {
+       struct kvm_run *kvm_run = svm->vcpu.run;
+
        /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
@@ -1280,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 0;
 }
 
-static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int io_interception(struct vcpu_svm *svm)
 {
        u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
        int size, in, string;
@@ -1294,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
        if (string) {
                if (emulate_instruction(&svm->vcpu,
-                                       kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
+                                       0, 0, 0) == EMULATE_DO_MMIO)
                        return 0;
                return 1;
        }
@@ -1304,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 
        skip_emulated_instruction(&svm->vcpu);
-       return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
+       return kvm_emulate_pio(&svm->vcpu, in, size, port);
 }
 
-static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nmi_interception(struct vcpu_svm *svm)
 {
        return 1;
 }
 
-static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int intr_interception(struct vcpu_svm *svm)
 {
        ++svm->vcpu.stat.irq_exits;
        return 1;
 }
 
-static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nop_on_interception(struct vcpu_svm *svm)
 {
        return 1;
 }
 
-static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int halt_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
        skip_emulated_instruction(&svm->vcpu);
        return kvm_emulate_halt(&svm->vcpu);
 }
 
-static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmmcall_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        skip_emulated_instruction(&svm->vcpu);
@@ -1365,24 +1364,32 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
        svm->vmcb->control.exit_info_1 = error_code;
        svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
-       return nested_svm_exit_handled(svm, false);
+       return nested_svm_exit_handled(svm);
 }
 
 static inline int nested_svm_intr(struct vcpu_svm *svm)
 {
-       if (is_nested(svm)) {
-               if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
-                       return 0;
+       if (!is_nested(svm))
+               return 0;
 
-               if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
-                       return 0;
+       if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+               return 0;
 
-               svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+       if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+               return 0;
 
-               if (nested_svm_exit_handled(svm, false)) {
-                       nsvm_printk("VMexit -> INTR\n");
-                       return 1;
-               }
+       svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+       if (svm->nested.intercept & 1ULL) {
+               /*
+                * The #vmexit can't be emulated here directly because this
+                * code path runs with irqs and preemtion disabled. A
+                * #vmexit emulation might sleep. Only signal request for
+                * the #vmexit here.
+                */
+               svm->nested.exit_required = true;
+               trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+               return 1;
        }
 
        return 0;
@@ -1392,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
 {
        struct page *page;
 
-       down_read(&current->mm->mmap_sem);
        page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
-       up_read(&current->mm->mmap_sem);
-
        if (is_error_page(page))
                goto error;
 
@@ -1421,66 +1425,6 @@ static void nested_svm_unmap(void *addr, enum km_type idx)
        kvm_release_page_dirty(page);
 }
 
-static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
-{
-       struct page *page;
-
-       down_read(&current->mm->mmap_sem);
-       page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
-       up_read(&current->mm->mmap_sem);
-
-       if (is_error_page(page)) {
-               printk(KERN_INFO "%s: could not find page at 0x%llx\n",
-                      __func__, gpa);
-               kvm_release_page_clean(page);
-               kvm_inject_gp(&svm->vcpu, 0);
-               return NULL;
-       }
-       return page;
-}
-
-static int nested_svm_do(struct vcpu_svm *svm,
-                        u64 arg1_gpa, u64 arg2_gpa, void *opaque,
-                        int (*handler)(struct vcpu_svm *svm,
-                                       void *arg1,
-                                       void *arg2,
-                                       void *opaque))
-{
-       struct page *arg1_page;
-       struct page *arg2_page = NULL;
-       void *arg1;
-       void *arg2 = NULL;
-       int retval;
-
-       arg1_page = nested_svm_get_page(svm, arg1_gpa);
-       if(arg1_page == NULL)
-               return 1;
-
-       if (arg2_gpa) {
-               arg2_page = nested_svm_get_page(svm, arg2_gpa);
-               if(arg2_page == NULL) {
-                       kvm_release_page_clean(arg1_page);
-                       return 1;
-               }
-       }
-
-       arg1 = kmap_atomic(arg1_page, KM_USER0);
-       if (arg2_gpa)
-               arg2 = kmap_atomic(arg2_page, KM_USER1);
-
-       retval = handler(svm, arg1, arg2, opaque);
-
-       kunmap_atomic(arg1, KM_USER0);
-       if (arg2_gpa)
-               kunmap_atomic(arg2, KM_USER1);
-
-       kvm_release_page_dirty(arg1_page);
-       if (arg2_gpa)
-               kvm_release_page_dirty(arg2_page);
-
-       return retval;
-}
-
 static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm)
 {
        u32 param = svm->vmcb->control.exit_info_1 & 1;
@@ -1525,31 +1469,39 @@ out:
        return ret;
 }
 
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+static int nested_svm_exit_special(struct vcpu_svm *svm)
 {
        u32 exit_code = svm->vmcb->control.exit_code;
-       bool vmexit = false;
 
-       if (kvm_override) {
-               switch (exit_code) {
-               case SVM_EXIT_INTR:
-               case SVM_EXIT_NMI:
-                       return 0;
+       switch (exit_code) {
+       case SVM_EXIT_INTR:
+       case SVM_EXIT_NMI:
+               return NESTED_EXIT_HOST;
                /* For now we are always handling NPFs when using them */
-               case SVM_EXIT_NPF:
-                       if (npt_enabled)
-                               return 0;
-                       break;
-               /* When we're shadowing, trap PFs */
-               case SVM_EXIT_EXCP_BASE + PF_VECTOR:
-                       if (!npt_enabled)
-                               return 0;
-                       break;
-               default:
-                       break;
-               }
+       case SVM_EXIT_NPF:
+               if (npt_enabled)
+                       return NESTED_EXIT_HOST;
+               break;
+       /* When we're shadowing, trap PFs */
+       case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+               if (!npt_enabled)
+                       return NESTED_EXIT_HOST;
+               break;
+       default:
+               break;
        }
 
+       return NESTED_EXIT_CONTINUE;
+}
+
+/*
+ * If this function returns true, this #vmexit was already handled
+ */
+static int nested_svm_exit_handled(struct vcpu_svm *svm)
+{
+       u32 exit_code = svm->vmcb->control.exit_code;
+       int vmexit = NESTED_EXIT_HOST;
+
        switch (exit_code) {
        case SVM_EXIT_MSR:
                vmexit = nested_svm_exit_handled_msr(svm);
@@ -1557,43 +1509,41 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
        case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
                u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
                if (svm->nested.intercept_cr_read & cr_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
                u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
                if (svm->nested.intercept_cr_write & cr_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
                u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
                if (svm->nested.intercept_dr_read & dr_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
                u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
                if (svm->nested.intercept_dr_write & dr_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
                if (svm->nested.intercept_exceptions & excp_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
                break;
        }
        default: {
                u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
-               nsvm_printk("exit code: 0x%x\n", exit_code);
                if (svm->nested.intercept & exit_bits)
-                       vmexit = true;
+                       vmexit = NESTED_EXIT_DONE;
        }
        }
 
-       if (vmexit) {
-               nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
+       if (vmexit == NESTED_EXIT_DONE) {
                nested_svm_vmexit(svm);
        }
 
@@ -1638,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        struct vmcb *hsave = svm->nested.hsave;
        struct vmcb *vmcb = svm->vmcb;
 
+       trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+                                      vmcb->control.exit_info_1,
+                                      vmcb->control.exit_info_2,
+                                      vmcb->control.exit_int_info,
+                                      vmcb->control.exit_int_info_err);
+
        nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
        if (!nested_vmcb)
                return 1;
@@ -1671,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
        nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
        nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+       /*
+        * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+        * to make sure that we do not lose injected events. So check event_inj
+        * here and copy it to exit_int_info if it is valid.
+        * Exit_int_info and event_inj can't be both valid because the case
+        * below only happens on a VMRUN instruction intercept which has
+        * no valid exit_int_info set.
+        */
+       if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+               struct vmcb_control_area *nc = &nested_vmcb->control;
+
+               nc->exit_int_info     = vmcb->control.event_inj;
+               nc->exit_int_info_err = vmcb->control.event_inj_err;
+       }
+
        nested_vmcb->control.tlb_ctl           = 0;
        nested_vmcb->control.event_inj         = 0;
        nested_vmcb->control.event_inj_err     = 0;
@@ -1682,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        /* Restore the original control entries */
        copy_vmcb_control_area(vmcb, hsave);
 
-       /* Kill any pending exceptions */
-       if (svm->vcpu.arch.exception.pending == true)
-               nsvm_printk("WARNING: Pending Exception\n");
-
        kvm_clear_exception_queue(&svm->vcpu);
        kvm_clear_interrupt_queue(&svm->vcpu);
 
@@ -1724,28 +1692,44 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        return 0;
 }
 
-static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
-                                 void *arg2, void *opaque)
+static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
+       u32 *nested_msrpm;
        int i;
-       u32 *nested_msrpm = (u32*)arg1;
+
+       nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
+       if (!nested_msrpm)
+               return false;
+
        for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
                svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+
        svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
 
-       return 0;
+       nested_svm_unmap(nested_msrpm, KM_USER0);
+
+       return true;
 }
 
-static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
-                           void *arg2, void *opaque)
+static bool nested_svm_vmrun(struct vcpu_svm *svm)
 {
-       struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+       struct vmcb *nested_vmcb;
        struct vmcb *hsave = svm->nested.hsave;
        struct vmcb *vmcb = svm->vmcb;
 
+       nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
+       if (!nested_vmcb)
+               return false;
+
        /* nested_vmcb is our indicator if nested SVM is activated */
        svm->nested.vmcb = svm->vmcb->save.rax;
 
+       trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+                              nested_vmcb->save.rip,
+                              nested_vmcb->control.int_ctl,
+                              nested_vmcb->control.event_inj,
+                              nested_vmcb->control.nested_ctl);
+
        /* Clear internal status */
        kvm_clear_exception_queue(&svm->vcpu);
        kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1833,34 +1817,23 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
        svm->nested.intercept            = nested_vmcb->control.intercept;
 
        force_new_asid(&svm->vcpu);
-       svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
-       svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
        svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
-       if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
-               nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
-                               nested_vmcb->control.int_ctl);
-       }
        if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                svm->vcpu.arch.hflags |= HF_VINTR_MASK;
        else
                svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
 
-       nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
-                       nested_vmcb->control.exit_int_info,
-                       nested_vmcb->control.int_state);
-
        svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
        svm->vmcb->control.int_state = nested_vmcb->control.int_state;
        svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
-       if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
-               nsvm_printk("Injecting Event: 0x%x\n",
-                               nested_vmcb->control.event_inj);
        svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
        svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
 
+       nested_svm_unmap(nested_vmcb, KM_USER0);
+
        enable_gif(svm);
 
-       return 0;
+       return true;
 }
 
 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@ -1879,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
        to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
 }
 
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmload_interception(struct vcpu_svm *svm)
 {
        struct vmcb *nested_vmcb;
 
@@ -1899,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmsave_interception(struct vcpu_svm *svm)
 {
        struct vmcb *nested_vmcb;
 
@@ -1919,27 +1892,35 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmrun_interception(struct vcpu_svm *svm)
 {
-       nsvm_printk("VMrun\n");
        if (nested_svm_check_permissions(svm))
                return 1;
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        skip_emulated_instruction(&svm->vcpu);
 
-       if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
-                         NULL, nested_svm_vmrun))
+       if (!nested_svm_vmrun(svm))
                return 1;
 
-       if (nested_svm_do(svm, svm->nested.vmcb_msrpm, 0,
-                     NULL, nested_svm_vmrun_msrpm))
-               return 1;
+       if (!nested_svm_vmrun_msrpm(svm))
+               goto failed;
+
+       return 1;
+
+failed:
+
+       svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+       svm->vmcb->control.exit_code_hi = 0;
+       svm->vmcb->control.exit_info_1  = 0;
+       svm->vmcb->control.exit_info_2  = 0;
+
+       nested_svm_vmexit(svm);
 
        return 1;
 }
 
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int stgi_interception(struct vcpu_svm *svm)
 {
        if (nested_svm_check_permissions(svm))
                return 1;
@@ -1952,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int clgi_interception(struct vcpu_svm *svm)
 {
        if (nested_svm_check_permissions(svm))
                return 1;
@@ -1969,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int invlpga_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       nsvm_printk("INVLPGA\n");
+
+       trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+                         vcpu->arch.regs[VCPU_REGS_RAX]);
 
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
        kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -1982,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int invalid_op_interception(struct vcpu_svm *svm,
-                                  struct kvm_run *kvm_run)
+static int skinit_interception(struct vcpu_svm *svm)
 {
+       trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
 }
 
-static int task_switch_interception(struct vcpu_svm *svm,
-                                   struct kvm_run *kvm_run)
+static int invalid_op_interception(struct vcpu_svm *svm)
+{
+       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+       return 1;
+}
+
+static int task_switch_interception(struct vcpu_svm *svm)
 {
        u16 tss_selector;
        int reason;
@@ -2040,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
        return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
 
-static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cpuid_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
        kvm_emulate_cpuid(&svm->vcpu);
        return 1;
 }
 
-static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int iret_interception(struct vcpu_svm *svm)
 {
        ++svm->vcpu.stat.nmi_window_exits;
        svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
@@ -2055,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int invlpg_interception(struct vcpu_svm *svm)
 {
-       if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
+       if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
        return 1;
 }
 
-static int emulate_on_interception(struct vcpu_svm *svm,
-                                  struct kvm_run *kvm_run)
+static int emulate_on_interception(struct vcpu_svm *svm)
 {
-       if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
+       if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
        return 1;
 }
 
-static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cr8_write_interception(struct vcpu_svm *svm)
 {
+       struct kvm_run *kvm_run = svm->vcpu.run;
+
        u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
        /* instruction emulation calls kvm_set_cr8() */
-       emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
+       emulate_instruction(&svm->vcpu, 0, 0, 0);
        if (irqchip_in_kernel(svm->vcpu.kvm)) {
                svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
                return 1;
@@ -2091,10 +2081,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
        switch (ecx) {
        case MSR_IA32_TSC: {
-               u64 tsc;
+               u64 tsc_offset;
 
-               rdtscll(tsc);
-               *data = svm->vmcb->control.tsc_offset + tsc;
+               if (is_nested(svm))
+                       tsc_offset = svm->nested.hsave->control.tsc_offset;
+               else
+                       tsc_offset = svm->vmcb->control.tsc_offset;
+
+               *data = tsc_offset + native_read_tsc();
                break;
        }
        case MSR_K6_STAR:
@@ -2156,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
        return 0;
 }
 
-static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int rdmsr_interception(struct vcpu_svm *svm)
 {
        u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
        u64 data;
@@ -2180,10 +2174,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
        switch (ecx) {
        case MSR_IA32_TSC: {
-               u64 tsc;
+               u64 tsc_offset = data - native_read_tsc();
+               u64 g_tsc_offset = 0;
+
+               if (is_nested(svm)) {
+                       g_tsc_offset = svm->vmcb->control.tsc_offset -
+                                      svm->nested.hsave->control.tsc_offset;
+                       svm->nested.hsave->control.tsc_offset = tsc_offset;
+               }
+
+               svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
 
-               rdtscll(tsc);
-               svm->vmcb->control.tsc_offset = data - tsc;
                break;
        }
        case MSR_K6_STAR:
@@ -2242,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
        return 0;
 }
 
-static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int wrmsr_interception(struct vcpu_svm *svm)
 {
        u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
        u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
@@ -2258,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
-static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int msr_interception(struct vcpu_svm *svm)
 {
        if (svm->vmcb->control.exit_info_1)
-               return wrmsr_interception(svm, kvm_run);
+               return wrmsr_interception(svm);
        else
-               return rdmsr_interception(svm, kvm_run);
+               return rdmsr_interception(svm);
 }
 
-static int interrupt_window_interception(struct vcpu_svm *svm,
-                                  struct kvm_run *kvm_run)
+static int interrupt_window_interception(struct vcpu_svm *svm)
 {
+       struct kvm_run *kvm_run = svm->vcpu.run;
+
        svm_clear_vintr(svm);
        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
        /*
@@ -2286,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
        return 1;
 }
 
-static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
-                                     struct kvm_run *kvm_run) = {
+static int pause_interception(struct vcpu_svm *svm)
+{
+       kvm_vcpu_on_spin(&(svm->vcpu));
+       return 1;
+}
+
+static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
        [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
        [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
@@ -2322,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
        [SVM_EXIT_CPUID]                        = cpuid_interception,
        [SVM_EXIT_IRET]                         = iret_interception,
        [SVM_EXIT_INVD]                         = emulate_on_interception,
+       [SVM_EXIT_PAUSE]                        = pause_interception,
        [SVM_EXIT_HLT]                          = halt_interception,
        [SVM_EXIT_INVLPG]                       = invlpg_interception,
        [SVM_EXIT_INVLPGA]                      = invlpga_interception,
@@ -2335,25 +2343,43 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
        [SVM_EXIT_VMSAVE]                       = vmsave_interception,
        [SVM_EXIT_STGI]                         = stgi_interception,
        [SVM_EXIT_CLGI]                         = clgi_interception,
-       [SVM_EXIT_SKINIT]                       = invalid_op_interception,
+       [SVM_EXIT_SKINIT]                       = skinit_interception,
        [SVM_EXIT_WBINVD]                       = emulate_on_interception,
        [SVM_EXIT_MONITOR]                      = invalid_op_interception,
        [SVM_EXIT_MWAIT]                        = invalid_op_interception,
        [SVM_EXIT_NPF]                          = pf_interception,
 };
 
-static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_run *kvm_run = vcpu->run;
        u32 exit_code = svm->vmcb->control.exit_code;
 
        trace_kvm_exit(exit_code, svm->vmcb->save.rip);
 
+       if (unlikely(svm->nested.exit_required)) {
+               nested_svm_vmexit(svm);
+               svm->nested.exit_required = false;
+
+               return 1;
+       }
+
        if (is_nested(svm)) {
-               nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
-                           exit_code, svm->vmcb->control.exit_info_1,
-                           svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
-               if (nested_svm_exit_handled(svm, true))
+               int vmexit;
+
+               trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+                                       svm->vmcb->control.exit_info_1,
+                                       svm->vmcb->control.exit_info_2,
+                                       svm->vmcb->control.exit_int_info,
+                                       svm->vmcb->control.exit_int_info_err);
+
+               vmexit = nested_svm_exit_special(svm);
+
+               if (vmexit == NESTED_EXIT_CONTINUE)
+                       vmexit = nested_svm_exit_handled(svm);
+
+               if (vmexit == NESTED_EXIT_DONE)
                        return 1;
        }
 
@@ -2396,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       return svm_exit_handlers[exit_code](svm, kvm_run);
+       return svm_exit_handlers[exit_code](svm);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
@@ -2473,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
                !(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (masked) {
+               svm->vcpu.arch.hflags |= HF_NMI_MASK;
+               svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+       } else {
+               svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+               svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+       }
+}
+
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb;
-       return (vmcb->save.rflags & X86_EFLAGS_IF) &&
-               !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-               gif_set(svm) &&
-               !is_nested(svm);
+       int ret;
+
+       if (!gif_set(svm) ||
+            (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
+               return 0;
+
+       ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
+
+       if (is_nested(svm))
+               return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
+
+       return ret;
 }
 
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       nsvm_printk("Trying to open IRQ window\n");
 
        nested_svm_intr(svm);
 
@@ -2511,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
        /* Something prevents NMI from been injected. Single step over
           possible problem (IRET or exception injection or interrupt
           shadow) */
-       vcpu->arch.singlestep = true;
+       svm->nmi_singlestep = true;
        svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
        update_db_intercept(vcpu);
 }
@@ -2601,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 #define R "e"
 #endif
 
-static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        u16 fs_selector;
        u16 gs_selector;
        u16 ldt_selector;
 
+       /*
+        * A vmexit emulation is required before the vcpu can be executed
+        * again.
+        */
+       if (unlikely(svm->nested.exit_required))
+               return;
+
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
        svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
        svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -2620,8 +2680,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        fs_selector = kvm_read_fs();
        gs_selector = kvm_read_gs();
        ldt_selector = kvm_read_ldt();
-       if (!is_nested(svm))
-               svm->vmcb->save.cr2 = vcpu->arch.cr2;
+       svm->vmcb->save.cr2 = vcpu->arch.cr2;
        /* required for live migration with NPT */
        if (npt_enabled)
                svm->vmcb->save.cr3 = vcpu->arch.cr3;
@@ -2907,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = {
        .queue_exception = svm_queue_exception,
        .interrupt_allowed = svm_interrupt_allowed,
        .nmi_allowed = svm_nmi_allowed,
+       .get_nmi_mask = svm_get_nmi_mask,
+       .set_nmi_mask = svm_set_nmi_mask,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,