KVM: VMX: initialize TSC offset relative to vm creation time
[safe/jmp/linux-2.6] / arch / x86 / kvm / x86.c
index f8bde01..3b2acfd 100644 (file)
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
@@ -67,6 +69,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                    struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+                                             u32 function, u32 index);
 
 struct kvm_x86_ops *kvm_x86_ops;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -86,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
        { "hypercalls", VCPU_STAT(hypercalls) },
        { "request_irq", VCPU_STAT(request_irq_exits) },
+       { "request_nmi", VCPU_STAT(request_nmi_exits) },
        { "irq_exits", VCPU_STAT(irq_exits) },
        { "host_state_reload", VCPU_STAT(host_state_reload) },
        { "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "insn_emulation", VCPU_STAT(insn_emulation) },
        { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
        { "irq_injections", VCPU_STAT(irq_injections) },
+       { "nmi_injections", VCPU_STAT(nmi_injections) },
        { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
        { "mmu_pte_write", VM_STAT(mmu_pte_write) },
        { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +107,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "mmu_recycled", VM_STAT(mmu_recycled) },
        { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
        { "mmu_unsync", VM_STAT(mmu_unsync) },
+       { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
        { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
        { "largepages", VM_STAT(lpages) },
        { NULL }
@@ -168,6 +175,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           u32 error_code)
 {
        ++vcpu->stat.pf_guest;
+
        if (vcpu->arch.exception.pending) {
                if (vcpu->arch.exception.nr == PF_VECTOR) {
                        printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -312,6 +320,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        kvm_x86_ops->set_cr0(vcpu, cr0);
        vcpu->arch.cr0 = cr0;
 
+       kvm_mmu_sync_global(vcpu);
        kvm_mmu_reset_context(vcpu);
        return;
 }
@@ -355,6 +364,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        }
        kvm_x86_ops->set_cr4(vcpu, cr4);
        vcpu->arch.cr4 = cr4;
+       vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
+       kvm_mmu_sync_global(vcpu);
        kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -435,6 +446,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static inline u32 bit(int bitno)
+{
+       return 1 << (bitno & 31);
+}
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -449,7 +465,7 @@ static u32 msrs_to_save[] = {
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
        MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_IA32_PERF_STATUS,
+       MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
 static unsigned num_msrs_to_save;
@@ -474,6 +490,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
                return;
        }
 
+       if (efer & EFER_SVME) {
+               struct kvm_cpuid_entry2 *feat;
+
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+                       printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       }
+
        kvm_x86_ops->set_efer(vcpu, efer);
 
        efer &= ~EFER_LMA;
@@ -575,7 +602,7 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
        hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
 
        pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
-                __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+                __func__, tsc_khz, hv_clock->tsc_shift,
                 hv_clock->tsc_to_system_mul);
 }
 
@@ -648,10 +675,38 @@ static bool msr_mtrr_valid(unsigned msr)
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
        if (!msr_mtrr_valid(msr))
                return 1;
 
-       vcpu->arch.mtrr[msr - 0x200] = data;
+       if (msr == MSR_MTRRdefType) {
+               vcpu->arch.mtrr_state.def_type = data;
+               vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+       } else if (msr == MSR_MTRRfix64K_00000)
+               p[0] = data;
+       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+               p[1 + msr - MSR_MTRRfix16K_80000] = data;
+       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+               p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+       else if (msr == MSR_IA32_CR_PAT)
+               vcpu->arch.pat = data;
+       else {  /* Variable MTRRs */
+               int idx, is_mtrr_mask;
+               u64 *pt;
+
+               idx = (msr - 0x200) / 2;
+               is_mtrr_mask = msr - 0x200 - 2 * idx;
+               if (!is_mtrr_mask)
+                       pt =
+                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+               else
+                       pt =
+                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+               *pt = data;
+       }
+
+       kvm_mmu_reset_context(vcpu);
        return 0;
 }
 
@@ -747,10 +802,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
+       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
        if (!msr_mtrr_valid(msr))
                return 1;
 
-       *pdata = vcpu->arch.mtrr[msr - 0x200];
+       if (msr == MSR_MTRRdefType)
+               *pdata = vcpu->arch.mtrr_state.def_type +
+                        (vcpu->arch.mtrr_state.enabled << 10);
+       else if (msr == MSR_MTRRfix64K_00000)
+               *pdata = p[0];
+       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+               *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+               *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+       else if (msr == MSR_IA32_CR_PAT)
+               *pdata = vcpu->arch.pat;
+       else {  /* Variable MTRRs */
+               int idx, is_mtrr_mask;
+               u64 *pt;
+
+               idx = (msr - 0x200) / 2;
+               is_mtrr_mask = msr - 0x200 - 2 * idx;
+               if (!is_mtrr_mask)
+                       pt =
+                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+               else
+                       pt =
+                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+               *pdata = *pt;
+       }
+
        return 0;
 }
 
@@ -903,10 +985,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IRQCHIP:
        case KVM_CAP_HLT:
        case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
-       case KVM_CAP_USER_MEMORY:
        case KVM_CAP_SET_TSS_ADDR:
        case KVM_CAP_EXT_CPUID:
-       case KVM_CAP_CLOCKSOURCE:
        case KVM_CAP_PIT:
        case KVM_CAP_NOP_IO_DELAY:
        case KVM_CAP_MP_STATE:
@@ -929,7 +1009,10 @@ int kvm_dev_ioctl_check_extension(long ext)
                r = !tdp_enabled;
                break;
        case KVM_CAP_IOMMU:
-               r = intel_iommu_found();
+               r = iommu_found();
+               break;
+       case KVM_CAP_CLOCKSOURCE:
+               r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
                break;
        default:
                r = 0;
@@ -1118,11 +1201,6 @@ out:
        return r;
 }
 
-static inline u32 bit(int bitno)
-{
-       return 1 << (bitno & 31);
-}
-
 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                          u32 index)
 {
@@ -1165,7 +1243,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        const u32 kvm_supported_word3_x86_features =
                bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
        const u32 kvm_supported_word6_x86_features =
-               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+               bit(X86_FEATURE_SVM);
 
        /* all func 2 cpuid_count() should be called on the same cpu */
        get_cpu();
@@ -1188,6 +1267,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                int t, times = entry->eax & 0xff;
 
                entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+               entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
                for (t = 1; t < times && *nent < maxnent; ++t) {
                        do_cpuid_1_ent(&entry[t], function, 0);
                        entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1298,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                /* read more entries until level_type is zero */
                for (i = 1; *nent < maxnent; ++i) {
-                       level_type = entry[i - 1].ecx & 0xff;
+                       level_type = entry[i - 1].ecx & 0xff00;
                        if (!level_type)
                                break;
                        do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1398,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+       vcpu_load(vcpu);
+       kvm_inject_nmi(vcpu);
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
                                           struct kvm_tpr_access_ctl *tac)
 {
@@ -1377,6 +1466,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_NMI: {
+               r = kvm_vcpu_ioctl_nmi(vcpu);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_SET_CPUID: {
                struct kvm_cpuid __user *cpuid_arg = argp;
                struct kvm_cpuid cpuid;
@@ -1742,7 +1838,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        mutex_lock(&kvm->lock);
-                       kvm_set_irq(kvm, irq_event.irq, irq_event.level);
+                       kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                   irq_event.irq, irq_event.level);
                        mutex_unlock(&kvm->lock);
                        r = 0;
                }
@@ -1967,7 +2064,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
        ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
        if (ret < 0)
                return 0;
-       kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+       kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
        return 1;
 }
 
@@ -2403,8 +2500,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
        val = kvm_register_read(vcpu, VCPU_REGS_RAX);
        memcpy(vcpu->arch.pio_data, &val, 4);
 
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
-
        pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
        if (pio_dev) {
                kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2540,7 +2635,7 @@ int kvm_arch_init(void *opaque)
        kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
        kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-                       PT_DIRTY_MASK, PT64_NX_MASK, 0);
+                       PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
        return 0;
 
 out:
@@ -2728,7 +2823,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 
        e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
        /* when no next entry is found, the current entry[i] is reselected */
-       for (j = i + 1; j == i; j = (j + 1) % nent) {
+       for (j = i + 1; ; j = (j + 1) % nent) {
                struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
                if (ej->function == e->function) {
                        ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2753,20 +2848,15 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
        return 1;
 }
 
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+                                             u32 function, u32 index)
 {
        int i;
-       u32 function, index;
-       struct kvm_cpuid_entry2 *e, *best;
+       struct kvm_cpuid_entry2 *best = NULL;
 
-       function = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       index = kvm_register_read(vcpu, VCPU_REGS_RCX);
-       kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
-       best = NULL;
        for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+               struct kvm_cpuid_entry2 *e;
+
                e = &vcpu->arch.cpuid_entries[i];
                if (is_matching_cpuid_entry(e, function, index)) {
                        if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2781,6 +2871,22 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                        if (!best || e->function > best->function)
                                best = e;
        }
+
+       return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+       u32 function, index;
+       struct kvm_cpuid_entry2 *best;
+
+       function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+       index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+       best = kvm_find_cpuid_entry(vcpu, function, index);
        if (best) {
                kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
                kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2900,9 +3006,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                goto out;
        }
 
-       if (vcpu->guest_debug.enabled)
-               kvm_x86_ops->guest_debug_pre(vcpu);
-
        vcpu->guest_mode = 1;
        /*
         * Make sure that guest_mode assignment won't happen after
@@ -2923,10 +3026,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        kvm_guest_enter();
 
+       get_debugreg(vcpu->arch.host_dr6, 6);
+       get_debugreg(vcpu->arch.host_dr7, 7);
+       if (unlikely(vcpu->arch.switch_db_regs)) {
+               get_debugreg(vcpu->arch.host_db[0], 0);
+               get_debugreg(vcpu->arch.host_db[1], 1);
+               get_debugreg(vcpu->arch.host_db[2], 2);
+               get_debugreg(vcpu->arch.host_db[3], 3);
+
+               set_debugreg(0, 7);
+               set_debugreg(vcpu->arch.eff_db[0], 0);
+               set_debugreg(vcpu->arch.eff_db[1], 1);
+               set_debugreg(vcpu->arch.eff_db[2], 2);
+               set_debugreg(vcpu->arch.eff_db[3], 3);
+       }
 
        KVMTRACE_0D(VMENTRY, vcpu, entryexit);
        kvm_x86_ops->run(vcpu, kvm_run);
 
+       if (unlikely(vcpu->arch.switch_db_regs)) {
+               set_debugreg(0, 7);
+               set_debugreg(vcpu->arch.host_db[0], 0);
+               set_debugreg(vcpu->arch.host_db[1], 1);
+               set_debugreg(vcpu->arch.host_db[2], 2);
+               set_debugreg(vcpu->arch.host_db[3], 3);
+       }
+       set_debugreg(vcpu->arch.host_dr6, 6);
+       set_debugreg(vcpu->arch.host_dr7, 7);
+
        vcpu->guest_mode = 0;
        local_irq_enable();
 
@@ -2972,7 +3099,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                pr_debug("vcpu %d received sipi with vector # %x\n",
                         vcpu->vcpu_id, vcpu->arch.sipi_vector);
                kvm_lapic_reset(vcpu);
-               r = kvm_x86_ops->vcpu_reset(vcpu);
+               r = kvm_arch_vcpu_reset(vcpu);
                if (r)
                        return r;
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3113,7 +3240,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        /*
         * Don't leak debug flags in case they were set for guest debugging
         */
-       if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
 
        vcpu_put(vcpu);
@@ -3274,9 +3401,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
        kvm_desct->padding = 0;
 }
 
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
-                                          u16 selector,
-                                          struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+                                         u16 selector,
+                                         struct descriptor_table *dtable)
 {
        if (selector & 1 << 2) {
                struct kvm_segment kvm_seg;
@@ -3301,7 +3428,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
        struct descriptor_table dtable;
        u16 index = selector >> 3;
 
-       get_segment_descritptor_dtable(vcpu, selector, &dtable);
+       get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
        if (dtable.limit < index * 8 + 7) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3320,7 +3447,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
        struct descriptor_table dtable;
        u16 index = selector >> 3;
 
-       get_segment_descritptor_dtable(vcpu, selector, &dtable);
+       get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
        if (dtable.limit < index * 8 + 7)
                return 1;
@@ -3732,15 +3859,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                   struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg)
 {
-       int r;
+       int i, r;
 
        vcpu_load(vcpu);
 
+       if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; ++i)
+                       vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+               vcpu->arch.switch_db_regs =
+                       (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+       } else {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+       }
+
        r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
 
+       if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+               kvm_queue_exception(vcpu, DB_VECTOR);
+       else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+               kvm_queue_exception(vcpu, BP_VECTOR);
+
        vcpu_put(vcpu);
 
        return r;
@@ -3899,6 +4043,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        /* We do fxsave: this must be aligned. */
        BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
+       vcpu->arch.mtrr_state.have_fixed = 1;
        vcpu_load(vcpu);
        r = kvm_arch_vcpu_reset(vcpu);
        if (r == 0)
@@ -3924,6 +4069,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+       vcpu->arch.nmi_pending = false;
+       vcpu->arch.nmi_injected = false;
+
+       vcpu->arch.switch_db_regs = 0;
+       memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       vcpu->arch.dr6 = DR6_FIXED_1;
+       vcpu->arch.dr7 = DR7_FIXED_1;
+
        return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -4011,8 +4164,14 @@ struct  kvm *kvm_arch_create_vm(void)
                return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
+       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
+       rdtscll(kvm->arch.vm_init_tsc);
+
        return kvm;
 }
 
@@ -4042,10 +4201,14 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+       kvm_free_all_assigned_devices(kvm);
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        kvm_iommu_unmap_guest(kvm);
-       kvm_free_all_assigned_devices(kvm);
        kvm_free_pit(kvm);
        kfree(kvm->arch.vpic);
        kfree(kvm->arch.vioapic);
@@ -4123,7 +4286,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-              || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+              || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+              || vcpu->arch.nmi_pending;
 }
 
 static void vcpu_kick_intr(void *info)