KVM: ignore AMDs HWCR register access to set the FFDIS bit
[safe/jmp/linux-2.6] / arch / x86 / kvm / x86.c
index d2a4eca..cae5b12 100644 (file)
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
+#include <asm/mce.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+#define KVM_MAX_MCE_BANKS 32
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -176,16 +183,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
        ++vcpu->stat.pf_guest;
 
        if (vcpu->arch.exception.pending) {
-               if (vcpu->arch.exception.nr == PF_VECTOR) {
-                       printk(KERN_DEBUG "kvm: inject_page_fault:"
-                                       " double fault 0x%lx\n", addr);
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-               } else if (vcpu->arch.exception.nr == DF_VECTOR) {
+               switch(vcpu->arch.exception.nr) {
+               case DF_VECTOR:
                        /* triple fault -> shutdown */
                        set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return;
+               case PF_VECTOR:
+                       vcpu->arch.exception.nr = DF_VECTOR;
+                       vcpu->arch.exception.error_code = 0;
+                       return;
+               default:
+                       /* replace previous exception with a new one in a hope
+                          that instruction re-execution will regenerate lost
+                          exception */
+                       vcpu->arch.exception.pending = false;
+                       break;
                }
-               return;
        }
        vcpu->arch.cr2 = addr;
        kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
@@ -232,7 +245,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
                goto out;
        }
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-               if (is_present_pte(pdpte[i]) &&
+               if (is_present_gpte(pdpte[i]) &&
                    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
                        ret = 0;
                        goto out;
@@ -241,6 +254,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
        ret = 1;
 
        memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_dirty);
 out:
 
        return ret;
@@ -256,6 +273,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
        if (is_long_mode(vcpu) || !is_pae(vcpu))
                return false;
 
+       if (!test_bit(VCPU_EXREG_PDPTR,
+                     (unsigned long *)&vcpu->arch.regs_avail))
+               return true;
+
        r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
        if (r < 0)
                goto out;
@@ -328,9 +349,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
        kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
-       KVMTRACE_1D(LMSW, vcpu,
-                   (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
-                   handler);
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
@@ -466,7 +484,7 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
@@ -644,8 +662,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
-       kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
-                         &vcpu->hv_clock.tsc_timestamp);
+       kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
        ktime_get_ts(&ts);
        local_irq_restore(flags);
 
@@ -704,11 +721,48 @@ static bool msr_mtrr_valid(unsigned msr)
        return false;
 }
 
+static bool valid_pat_type(unsigned t)
+{
+       return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+       return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       int i;
+
+       if (!msr_mtrr_valid(msr))
+               return false;
+
+       if (msr == MSR_IA32_CR_PAT) {
+               for (i = 0; i < 8; i++)
+                       if (!valid_pat_type((data >> (i * 8)) & 0xff))
+                               return false;
+               return true;
+       } else if (msr == MSR_MTRRdefType) {
+               if (data & ~0xcff)
+                       return false;
+               return valid_mtrr_type(data & 0xff);
+       } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+               for (i = 0; i < 8 ; i++)
+                       if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+                               return false;
+               return true;
+       }
+
+       /* variable MTRRs */
+       return valid_mtrr_type(data & 0xff);
+}
+
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
 
-       if (!msr_mtrr_valid(msr))
+       if (!mtrr_valid(vcpu, msr, data))
                return 1;
 
        if (msr == MSR_MTRRdefType) {
@@ -741,23 +795,51 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        return 0;
 }
 
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+
+       switch (msr) {
+       case MSR_IA32_MCG_STATUS:
+               vcpu->arch.mcg_status = data;
+               break;
+       case MSR_IA32_MCG_CTL:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               if (data != 0 && data != ~(u64)0)
+                       return -1;
+               vcpu->arch.mcg_ctl = data;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL */
+                       if ((offset & 0x3) == 0 &&
+                           data != 0 && data != ~(u64)0)
+                               return -1;
+                       vcpu->arch.mce_banks[offset] = data;
+                       break;
+               }
+               return 1;
+       }
+       return 0;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        switch (msr) {
        case MSR_EFER:
                set_efer(vcpu, data);
                break;
-       case MSR_IA32_MC0_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-                      __func__, data);
-               break;
-       case MSR_IA32_MCG_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-                       __func__, data);
-               break;
-       case MSR_IA32_MCG_CTL:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-                       __func__, data);
+       case MSR_K7_HWCR:
+               data &= ~(u64)0x40;     /* ignore flush filter disable */
+               if (data != 0) {
+                       pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
+                               data);
+                       return 1;
+               }
                break;
        case MSR_IA32_DEBUGCTLMSR:
                if (!data) {
@@ -813,6 +895,40 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                kvm_request_guest_time_update(vcpu);
                break;
        }
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return set_msr_mce(vcpu, msr, data);
+
+       /* Performance counters are not protected by a CPUID bit,
+        * so we should check all of them in the generic path for the sake of
+        * cross vendor migration.
+        * Writing a zero into the event select MSRs disables them,
+        * which we perfectly emulate ;-). Any other value should be at least
+        * reported, some guests depend on them.
+        */
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
+       case MSR_K7_EVNTSEL0:
+       case MSR_K7_EVNTSEL1:
+       case MSR_K7_EVNTSEL2:
+       case MSR_K7_EVNTSEL3:
+               if (data != 0)
+                       pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                               "0x%x data 0x%llx\n", msr, data);
+               break;
+       /* at least RHEL 4 unconditionally writes to the perfctr registers,
+        * so we ignore writes to make it happy.
+        */
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
+       case MSR_K7_PERFCTR0:
+       case MSR_K7_PERFCTR1:
+       case MSR_K7_PERFCTR2:
+       case MSR_K7_PERFCTR3:
+               pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                       "0x%x data 0x%llx\n", msr, data);
+               break;
        default:
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
                return 1;
@@ -868,26 +984,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        return 0;
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
        u64 data;
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
 
        switch (msr) {
-       case 0xc0010010: /* SYSCFG */
-       case 0xc0010015: /* HWCR */
-       case MSR_IA32_PLATFORM_ID:
        case MSR_IA32_P5_MC_ADDR:
        case MSR_IA32_P5_MC_TYPE:
-       case MSR_IA32_MC0_CTL:
-       case MSR_IA32_MCG_STATUS:
+               data = 0;
+               break;
        case MSR_IA32_MCG_CAP:
+               data = vcpu->arch.mcg_cap;
+               break;
        case MSR_IA32_MCG_CTL:
-       case MSR_IA32_MC0_MISC:
-       case MSR_IA32_MC0_MISC+4:
-       case MSR_IA32_MC0_MISC+8:
-       case MSR_IA32_MC0_MISC+12:
-       case MSR_IA32_MC0_MISC+16:
-       case MSR_IA32_MC0_MISC+20:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               data = vcpu->arch.mcg_ctl;
+               break;
+       case MSR_IA32_MCG_STATUS:
+               data = vcpu->arch.mcg_status;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       data = vcpu->arch.mce_banks[offset];
+                       break;
+               }
+               return 1;
+       }
+       *pdata = data;
+       return 0;
+}
+
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+       u64 data;
+
+       switch (msr) {
+       case MSR_IA32_PLATFORM_ID:
        case MSR_IA32_UCODE_REV:
        case MSR_IA32_EBL_CR_POWERON:
        case MSR_IA32_DEBUGCTLMSR:
@@ -895,9 +1032,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_LASTBRANCHTOIP:
        case MSR_IA32_LASTINTFROMIP:
        case MSR_IA32_LASTINTTOIP:
+       case MSR_K8_SYSCFG:
+       case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
        case MSR_P6_EVNTSEL0:
        case MSR_P6_EVNTSEL1:
+       case MSR_K7_EVNTSEL0:
                data = 0;
                break;
        case MSR_MTRRcap:
@@ -929,6 +1069,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_KVM_SYSTEM_TIME:
                data = vcpu->arch.time;
                break;
+       case MSR_IA32_P5_MC_ADDR:
+       case MSR_IA32_P5_MC_TYPE:
+       case MSR_IA32_MCG_CAP:
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return get_msr_mce(vcpu, msr, pdata);
        default:
                pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
                return 1;
@@ -1030,6 +1177,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_REINJECT_CONTROL:
        case KVM_CAP_IRQ_INJECT_STATUS:
        case KVM_CAP_ASSIGN_DEV_IRQ:
+       case KVM_CAP_IRQFD:
+       case KVM_CAP_PIT2:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -1050,6 +1199,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IOMMU:
                r = iommu_found();
                break;
+       case KVM_CAP_MCE:
+               r = KVM_MAX_MCE_BANKS;
+               break;
        default:
                r = 0;
                break;
@@ -1078,14 +1230,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
                if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
                        goto out;
                r = -E2BIG;
-               if (n < num_msrs_to_save)
+               if (n < msr_list.nmsrs)
                        goto out;
                r = -EFAULT;
                if (copy_to_user(user_msr_list->indices, &msrs_to_save,
                                 num_msrs_to_save * sizeof(u32)))
                        goto out;
-               if (copy_to_user(user_msr_list->indices
-                                + num_msrs_to_save * sizeof(u32),
+               if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
                                 &emulated_msrs,
                                 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
                        goto out;
@@ -1110,6 +1261,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+               u64 mce_cap;
+
+               mce_cap = KVM_MCE_CAP_SUPPORTED;
+               r = -EFAULT;
+               if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+                       goto out;
+               r = 0;
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -1398,6 +1559,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
        for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
                do_cpuid_ent(&cpuid_entries[nent], func, 0,
                             &nent, cpuid->nent);
+       r = -E2BIG;
+       if (nent >= cpuid->nent)
+               goto out_free;
+
        r = -EFAULT;
        if (copy_to_user(entries, cpuid_entries,
                         nent * sizeof(struct kvm_cpuid_entry2)))
@@ -1441,8 +1606,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
                return -ENXIO;
        vcpu_load(vcpu);
 
-       set_bit(irq->irq, vcpu->arch.irq_pending);
-       set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+       kvm_queue_interrupt(vcpu, irq->irq, false);
 
        vcpu_put(vcpu);
 
@@ -1467,6 +1631,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
+                                       u64 mcg_cap)
+{
+       int r;
+       unsigned bank_num = mcg_cap & 0xff, bank;
+
+       r = -EINVAL;
+       if (!bank_num)
+               goto out;
+       if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+               goto out;
+       r = 0;
+       vcpu->arch.mcg_cap = mcg_cap;
+       /* Init IA32_MCG_CTL to all 1s */
+       if (mcg_cap & MCG_CTL_P)
+               vcpu->arch.mcg_ctl = ~(u64)0;
+       /* Init IA32_MCi_CTL to all 1s */
+       for (bank = 0; bank < bank_num; bank++)
+               vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+out:
+       return r;
+}
+
+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
+                                     struct kvm_x86_mce *mce)
+{
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+       u64 *banks = vcpu->arch.mce_banks;
+
+       if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
+               return -EINVAL;
+       /*
+        * if IA32_MCG_CTL is not all 1s, the uncorrected error
+        * reporting is disabled
+        */
+       if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+           vcpu->arch.mcg_ctl != ~(u64)0)
+               return 0;
+       banks += 4 * mce->bank;
+       /*
+        * if IA32_MCi_CTL is not all 1s, the uncorrected error
+        * reporting is disabled for the bank
+        */
+       if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
+               return 0;
+       if (mce->status & MCI_STATUS_UC) {
+               if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
+                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                       printk(KERN_DEBUG "kvm: set_mce: "
+                              "injects mce exception while "
+                              "previous one is in progress!\n");
+                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return 0;
+               }
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               vcpu->arch.mcg_status = mce->mcg_status;
+               banks[1] = mce->status;
+               kvm_queue_exception(vcpu, MC_VECTOR);
+       } else if (!(banks[1] & MCI_STATUS_VAL)
+                  || !(banks[1] & MCI_STATUS_UC)) {
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               banks[1] = mce->status;
+       } else
+               banks[1] |= MCI_STATUS_OVER;
+       return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -1600,6 +1838,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
                break;
        }
+       case KVM_X86_SETUP_MCE: {
+               u64 mcg_cap;
+
+               r = -EFAULT;
+               if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
+               break;
+       }
+       case KVM_X86_SET_MCE: {
+               struct kvm_x86_mce mce;
+
+               r = -EFAULT;
+               if (copy_from_user(&mce, argp, sizeof mce))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -1739,19 +1995,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
+               spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[0],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
+               spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[1],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
+               mutex_lock(&kvm->irq_lock);
                memcpy(ioapic_irqchip(kvm),
                        &chip->chip.ioapic,
                        sizeof(struct kvm_ioapic_state));
+               mutex_unlock(&kvm->irq_lock);
                break;
        default:
                r = -EINVAL;
@@ -1765,7 +2027,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
        int r = 0;
 
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
 
@@ -1773,8 +2037,10 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
        int r = 0;
 
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
        kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
 
@@ -1783,7 +2049,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 {
        if (!kvm->arch.vpit)
                return -ENXIO;
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
        kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return 0;
 }
 
@@ -1834,6 +2102,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
        union {
                struct kvm_pit_state ps;
                struct kvm_memory_alias alias;
+               struct kvm_pit_config pit_config;
        } u;
 
        switch (ioctl) {
@@ -1894,12 +2163,20 @@ long kvm_arch_vm_ioctl(struct file *filp,
                }
                break;
        case KVM_CREATE_PIT:
+               u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+               goto create_pit;
+       case KVM_CREATE_PIT2:
+               r = -EFAULT;
+               if (copy_from_user(&u.pit_config, argp,
+                                  sizeof(struct kvm_pit_config)))
+                       goto out;
+       create_pit:
                mutex_lock(&kvm->lock);
                r = -EEXIST;
                if (kvm->arch.vpit)
                        goto create_pit_unlock;
                r = -ENOMEM;
-               kvm->arch.vpit = kvm_create_pit(kvm);
+               kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
                if (kvm->arch.vpit)
                        r = 0;
        create_pit_unlock:
@@ -1914,10 +2191,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        __s32 status;
-                       mutex_lock(&kvm->lock);
+                       mutex_lock(&kvm->irq_lock);
                        status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                        irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->lock);
+                       mutex_unlock(&kvm->irq_lock);
                        if (ioctl == KVM_IRQ_LINE_STATUS) {
                                irq_event.status = status;
                                if (copy_to_user(argp, &irq_event,
@@ -2050,7 +2327,7 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
 
        if (vcpu->arch.apic) {
                dev = &vcpu->arch.apic->dev;
-               if (dev->in_range(dev, addr, len, is_write))
+               if (kvm_iodevice_in_range(dev, addr, len, is_write))
                        return dev;
        }
        return NULL;
@@ -2163,12 +2440,11 @@ mmio:
         */
        mutex_lock(&vcpu->kvm->lock);
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+       mutex_unlock(&vcpu->kvm->lock);
        if (mmio_dev) {
                kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
                return X86EMUL_CONTINUE;
        }
-       mutex_unlock(&vcpu->kvm->lock);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -2218,12 +2494,11 @@ mmio:
         */
        mutex_lock(&vcpu->kvm->lock);
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
+       mutex_unlock(&vcpu->kvm->lock);
        if (mmio_dev) {
                kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
                return X86EMUL_CONTINUE;
        }
-       mutex_unlock(&vcpu->kvm->lock);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -2312,7 +2587,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-       KVMTRACE_0D(CLTS, vcpu, handler);
        kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
        return X86EMUL_CONTINUE;
 }
@@ -2413,14 +2687,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
                r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
-               /* Reject the instructions other than VMCALL/VMMCALL when
-                * try to emulate invalid opcode */
+               /* Only allow emulation of specific instructions on #UD
+                * (namely VMMCALL, sysenter, sysexit, syscall)*/
                c = &vcpu->arch.emulate_ctxt.decode;
-               if ((emulation_type & EMULTYPE_TRAP_UD) &&
-                   (!(c->twobyte && c->b == 0x01 &&
-                     (c->modrm_reg == 0 || c->modrm_reg == 3) &&
-                      c->modrm_mod == 3 && c->modrm_rm == 1)))
-                       return EMULATE_FAIL;
+               if (emulation_type & EMULTYPE_TRAP_UD) {
+                       if (!c->twobyte)
+                               return EMULATE_FAIL;
+                       switch (c->b) {
+                       case 0x01: /* VMMCALL */
+                               if (c->modrm_mod != 3 || c->modrm_rm != 1)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x34: /* sysenter */
+                       case 0x35: /* sysexit */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x05: /* syscall */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       default:
+                               return EMULATE_FAIL;
+                       }
+
+                       if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
+                               return EMULATE_FAIL;
+               }
 
                ++vcpu->stat.insn_emulation;
                if (r)  {
@@ -2546,7 +2839,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
 {
        /* TODO: String I/O for in kernel device */
 
-       mutex_lock(&vcpu->kvm->lock);
        if (vcpu->arch.pio.in)
                kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
                                  vcpu->arch.pio.size,
@@ -2555,7 +2847,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
                kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
                                   vcpu->arch.pio.size,
                                   pd);
-       mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2565,14 +2856,12 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
        void *pd = vcpu->arch.pio_data;
        int i;
 
-       mutex_lock(&vcpu->kvm->lock);
        for (i = 0; i < io->cur_count; i++) {
                kvm_iodevice_write(pio_dev, io->port,
                                   io->size,
                                   pd);
                pd += io->size;
        }
-       mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2599,17 +2888,15 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
        vcpu->arch.pio.down = 0;
        vcpu->arch.pio.rep = 0;
 
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, 1);
 
        val = kvm_register_read(vcpu, VCPU_REGS_RAX);
        memcpy(vcpu->arch.pio_data, &val, 4);
 
+       mutex_lock(&vcpu->kvm->lock);
        pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+       mutex_unlock(&vcpu->kvm->lock);
        if (pio_dev) {
                kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
                complete_pio(vcpu);
@@ -2638,12 +2925,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
        vcpu->arch.pio.down = down;
        vcpu->arch.pio.rep = rep;
 
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, count);
 
        if (!count) {
                kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -2673,9 +2956,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
        vcpu->arch.pio.guest_gva = address;
 
+       mutex_lock(&vcpu->kvm->lock);
        pio_dev = vcpu_find_pio_dev(vcpu, port,
                                    vcpu->arch.pio.cur_count,
                                    !vcpu->arch.pio.in);
+       mutex_unlock(&vcpu->kvm->lock);
+
        if (!vcpu->arch.pio.in) {
                /* string PIO write */
                ret = pio_copy_data(vcpu);
@@ -2725,10 +3011,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-                       vcpu = kvm->vcpus[i];
-                       if (!vcpu)
-                               continue;
+               kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
                                continue;
                        if (!kvm_request_guest_time_update(vcpu))
@@ -2821,7 +3104,6 @@ void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
-       KVMTRACE_0D(HLT, vcpu, handler);
        if (irqchip_in_kernel(vcpu->kvm)) {
                vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
                return 1;
@@ -2852,7 +3134,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
        a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
 
-       KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+       trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
        if (!is_long_mode(vcpu)) {
                nr &= 0xFFFFFFFF;
@@ -2952,8 +3234,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
                return 0;
        }
-       KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
-                   (u32)((u64)value >> 32), handler);
 
        return value;
 }
@@ -2961,9 +3241,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                     unsigned long *rflags)
 {
-       KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
-                   (u32)((u64)val >> 32), handler);
-
        switch (cr) {
        case 0:
                kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -3073,11 +3350,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
        }
        kvm_x86_ops->skip_emulated_instruction(vcpu);
-       KVMTRACE_5D(CPUID, vcpu, function,
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
+       trace_kvm_cpuid(function,
+                       kvm_register_read(vcpu, VCPU_REGS_RAX),
+                       kvm_register_read(vcpu, VCPU_REGS_RBX),
+                       kvm_register_read(vcpu, VCPU_REGS_RCX),
+                       kvm_register_read(vcpu, VCPU_REGS_RDX));
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -3105,8 +3382,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
                kvm_run->ready_for_interrupt_injection = 1;
        else
                kvm_run->ready_for_interrupt_injection =
-                                       (kvm_arch_interrupt_allowed(vcpu) &&
-                                        !kvm_cpu_has_interrupt(vcpu));
+                       kvm_arch_interrupt_allowed(vcpu) &&
+                       !kvm_cpu_has_interrupt(vcpu) &&
+                       !kvm_event_needs_reinjection(vcpu);
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3142,7 +3420,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        if (!kvm_x86_ops->update_cr8_intercept)
                return;
 
-       max_irr = kvm_lapic_find_highest_irr(vcpu);
+       if (!vcpu->arch.apic->vapic_addr)
+               max_irr = kvm_lapic_find_highest_irr(vcpu);
+       else
+               max_irr = -1;
 
        if (max_irr != -1)
                max_irr >>= 4;
@@ -3152,7 +3433,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_irq(struct kvm_vcpu *vcpu)
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        /* try to reinject previous events if any */
        if (vcpu->arch.nmi_injected) {
@@ -3161,7 +3442,7 @@ static void inject_irq(struct kvm_vcpu *vcpu)
        }
 
        if (vcpu->arch.interrupt.pending) {
-               kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+               kvm_x86_ops->set_irq(vcpu);
                return;
        }
 
@@ -3174,32 +3455,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
                }
        } else if (kvm_cpu_has_interrupt(vcpu)) {
                if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-                       kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-                       kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+                       kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+                                           false);
+                       kvm_x86_ops->set_irq(vcpu);
                }
        }
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-               kvm_run->request_interrupt_window;
-
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-
-       inject_irq(vcpu);
-
-       /* enable NMI/IRQ window open exits if needed */
-       if (vcpu->arch.nmi_pending)
-               kvm_x86_ops->enable_nmi_window(vcpu);
-       else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-               kvm_x86_ops->enable_irq_window(vcpu);
-}
-
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        int r;
+       bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+               kvm_run->request_interrupt_window;
 
        if (vcpu->requests)
                if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3253,11 +3520,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        else
                inject_pending_irq(vcpu, kvm_run);
 
+       /* enable NMI/IRQ window open exits if needed */
+       if (vcpu->arch.nmi_pending)
+               kvm_x86_ops->enable_nmi_window(vcpu);
+       else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+               kvm_x86_ops->enable_irq_window(vcpu);
+
        if (kvm_lapic_enabled(vcpu)) {
-               if (!vcpu->arch.apic->vapic_addr)
-                       update_cr8_intercept(vcpu);
-               else
-                       kvm_lapic_sync_to_vapic(vcpu);
+               update_cr8_intercept(vcpu);
+               kvm_lapic_sync_to_vapic(vcpu);
        }
 
        up_read(&vcpu->kvm->slots_lock);
@@ -3279,7 +3550,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                set_debugreg(vcpu->arch.eff_db[3], 3);
        }
 
-       KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+       trace_kvm_entry(vcpu->vcpu_id);
        kvm_x86_ops->run(vcpu, kvm_run);
 
        if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -3582,14 +3853,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->efer = vcpu->arch.shadow_efer;
        sregs->apic_base = kvm_get_apic_base(vcpu);
 
-       if (irqchip_in_kernel(vcpu->kvm))
-               memset(sregs->interrupt_bitmap, 0,
-                      sizeof sregs->interrupt_bitmap);
-       else
-               memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
-                      sizeof sregs->interrupt_bitmap);
+       memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
 
-       if (vcpu->arch.interrupt.pending)
+       if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
                set_bit(vcpu->arch.interrupt.nr,
                        (unsigned long *)sregs->interrupt_bitmap);
 
@@ -4057,7 +4323,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                  struct kvm_sregs *sregs)
 {
        int mmu_reset_needed = 0;
-       int i, pending_vec, max_bits;
+       int pending_vec, max_bits;
        struct descriptor_table dt;
 
        vcpu_load(vcpu);
@@ -4099,24 +4365,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       if (!irqchip_in_kernel(vcpu->kvm)) {
-               memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
-                      sizeof vcpu->arch.irq_pending);
-               vcpu->arch.irq_summary = 0;
-               for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
-                       if (vcpu->arch.irq_pending[i])
-                               __set_bit(i, &vcpu->arch.irq_summary);
-       } else {
-               max_bits = (sizeof sregs->interrupt_bitmap) << 3;
-               pending_vec = find_first_bit(
-                       (const unsigned long *)sregs->interrupt_bitmap,
-                       max_bits);
-               /* Only pending external irq is handled here */
-               if (pending_vec < max_bits) {
-                       kvm_queue_interrupt(vcpu, pending_vec);
-                       pr_debug("Set back pending irq %d\n", pending_vec);
-               }
-               kvm_pic_clear_isr_ack(vcpu->kvm);
+       max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+       pending_vec = find_first_bit(
+               (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+       if (pending_vec < max_bits) {
+               kvm_queue_interrupt(vcpu, pending_vec, false);
+               pr_debug("Set back pending irq %d\n", pending_vec);
+               if (irqchip_in_kernel(vcpu->kvm))
+                       kvm_pic_clear_isr_ack(vcpu->kvm);
        }
 
        kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@@ -4130,7 +4386,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
        /* Older userspace won't unhalt the vcpu on reset. */
-       if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+       if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
            sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
            !(vcpu->arch.cr0 & X86_CR0_PE))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4401,7 +4657,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        kvm = vcpu->kvm;
 
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-       if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        else
                vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -4423,6 +4679,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                        goto fail_mmu_destroy;
        }
 
+       vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+                                      GFP_KERNEL);
+       if (!vcpu->arch.mce_banks) {
+               r = -ENOMEM;
+               goto fail_mmu_destroy;
+       }
+       vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+
        return 0;
 
 fail_mmu_destroy:
@@ -4470,20 +4734,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 static void kvm_free_vcpus(struct kvm *kvm)
 {
        unsigned int i;
+       struct kvm_vcpu *vcpu;
 
        /*
         * Unpin any mmu pages first.
         */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i)
-               if (kvm->vcpus[i])
-                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_arch_vcpu_free(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_unload_vcpu_mmu(vcpu);
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_arch_vcpu_free(vcpu);
+
+       mutex_lock(&kvm->lock);
+       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+               kvm->vcpus[i] = NULL;
 
+       atomic_set(&kvm->online_vcpus, 0);
+       mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -4568,6 +4834,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
        kvm_mmu_zap_all(kvm);
+       kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -4598,3 +4865,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        return kvm_x86_ops->interrupt_allowed(vcpu);
 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);