include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[safe/jmp/linux-2.6] / arch / x86 / kvm / x86.c
index 6a31dfb..24cd0ee 100644 (file)
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <linux/user-return-notifier.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#include <asm/debugreg.h>
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -87,6 +91,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+#define KVM_NR_SHARED_MSRS 16
+
+struct kvm_shared_msrs_global {
+       int nr;
+       u32 msrs[KVM_NR_SHARED_MSRS];
+};
+
+struct kvm_shared_msrs {
+       struct user_return_notifier urn;
+       bool registered;
+       struct kvm_shared_msr_values {
+               u64 host;
+               u64 curr;
+       } values[KVM_NR_SHARED_MSRS];
+};
+
+static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
+static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pf_fixed", VCPU_STAT(pf_fixed) },
        { "pf_guest", VCPU_STAT(pf_guest) },
@@ -123,6 +146,83 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
+static void kvm_on_user_return(struct user_return_notifier *urn)
+{
+       unsigned slot;
+       struct kvm_shared_msrs *locals
+               = container_of(urn, struct kvm_shared_msrs, urn);
+       struct kvm_shared_msr_values *values;
+
+       for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+               values = &locals->values[slot];
+               if (values->host != values->curr) {
+                       wrmsrl(shared_msrs_global.msrs[slot], values->host);
+                       values->curr = values->host;
+               }
+       }
+       locals->registered = false;
+       user_return_notifier_unregister(urn);
+}
+
+static void shared_msr_update(unsigned slot, u32 msr)
+{
+       struct kvm_shared_msrs *smsr;
+       u64 value;
+
+       smsr = &__get_cpu_var(shared_msrs);
+       /* only read, and nobody should modify it at this time,
+        * so don't need lock */
+       if (slot >= shared_msrs_global.nr) {
+               printk(KERN_ERR "kvm: invalid MSR slot!");
+               return;
+       }
+       rdmsrl_safe(msr, &value);
+       smsr->values[slot].host = value;
+       smsr->values[slot].curr = value;
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
+       if (slot >= shared_msrs_global.nr)
+               shared_msrs_global.nr = slot + 1;
+       shared_msrs_global.msrs[slot] = msr;
+       /* we need ensured the shared_msr_global have been updated */
+       smp_wmb();
+}
+EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
+
+static void kvm_shared_msr_cpu_online(void)
+{
+       unsigned i;
+
+       for (i = 0; i < shared_msrs_global.nr; ++i)
+               shared_msr_update(i, shared_msrs_global.msrs[i]);
+}
+
+void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+{
+       struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+
+       if (((value ^ smsr->values[slot].curr) & mask) == 0)
+               return;
+       smsr->values[slot].curr = value;
+       wrmsrl(shared_msrs_global.msrs[slot], value);
+       if (!smsr->registered) {
+               smsr->urn.on_user_return = kvm_on_user_return;
+               user_return_notifier_register(&smsr->urn);
+               smsr->registered = true;
+       }
+}
+EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
+
+static void drop_user_return_notifiers(void *ignore)
+{
+       struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+
+       if (smsr->registered)
+               kvm_on_user_return(&smsr->urn);
+}
+
 unsigned long segment_base(u16 selector)
 {
        struct descriptor_table gdt;
@@ -170,12 +270,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
+#define EXCPT_BENIGN           0
+#define EXCPT_CONTRIBUTORY     1
+#define EXCPT_PF               2
+
+static int exception_class(int vector)
+{
+       switch (vector) {
+       case PF_VECTOR:
+               return EXCPT_PF;
+       case DE_VECTOR:
+       case TS_VECTOR:
+       case NP_VECTOR:
+       case SS_VECTOR:
+       case GP_VECTOR:
+               return EXCPT_CONTRIBUTORY;
+       default:
+               break;
+       }
+       return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+               unsigned nr, bool has_error, u32 error_code)
+{
+       u32 prev_nr;
+       int class1, class2;
+
+       if (!vcpu->arch.exception.pending) {
+       queue:
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = has_error;
+               vcpu->arch.exception.nr = nr;
+               vcpu->arch.exception.error_code = error_code;
+               return;
+       }
+
+       /* to check exception */
+       prev_nr = vcpu->arch.exception.nr;
+       if (prev_nr == DF_VECTOR) {
+               /* triple fault -> shutdown */
+               set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+               return;
+       }
+       class1 = exception_class(prev_nr);
+       class2 = exception_class(nr);
+       if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+               || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+               /* generate double fault per SDM Table 5-5 */
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = true;
+               vcpu->arch.exception.nr = DF_VECTOR;
+               vcpu->arch.exception.error_code = 0;
+       } else
+               /* replace previous exception with a new one in a hope
+                  that instruction re-execution will regenerate lost
+                  exception */
+               goto queue;
+}
+
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = false;
-       vcpu->arch.exception.nr = nr;
+       kvm_multiple_exception(vcpu, nr, false, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception);
 
@@ -183,25 +339,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           u32 error_code)
 {
        ++vcpu->stat.pf_guest;
-
-       if (vcpu->arch.exception.pending) {
-               switch(vcpu->arch.exception.nr) {
-               case DF_VECTOR:
-                       /* triple fault -> shutdown */
-                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
-                       return;
-               case PF_VECTOR:
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-                       return;
-               default:
-                       /* replace previous exception with a new one in a hope
-                          that instruction re-execution will regenerate lost
-                          exception */
-                       vcpu->arch.exception.pending = false;
-                       break;
-               }
-       }
        vcpu->arch.cr2 = addr;
        kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
 }
@@ -214,11 +351,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = true;
-       vcpu->arch.exception.nr = nr;
-       vcpu->arch.exception.error_code = error_code;
+       kvm_multiple_exception(vcpu, nr, true, error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
@@ -296,12 +429,18 @@ out:
 
 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
-       if (cr0 & CR0_RESERVED_BITS) {
+       cr0 |= X86_CR0_ET;
+
+#ifdef CONFIG_X86_64
+       if (cr0 & 0xffffffff00000000UL) {
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
-                      cr0, vcpu->arch.cr0);
+                      cr0, kvm_read_cr0(vcpu));
                kvm_inject_gp(vcpu, 0);
                return;
        }
+#endif
+
+       cr0 &= ~CR0_RESERVED_BITS;
 
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
                printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
@@ -318,7 +457,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 #ifdef CONFIG_X86_64
-               if ((vcpu->arch.shadow_efer & EFER_LME)) {
+               if ((vcpu->arch.efer & EFER_LME)) {
                        int cs_db, cs_l;
 
                        if (!is_pae(vcpu)) {
@@ -356,13 +495,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
-       kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+       kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 
        if (cr4 & CR4_RESERVED_BITS) {
@@ -484,16 +623,21 @@ static inline u32 bit(int bitno)
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
  *
  * This list is modified at module load time to reflect the
- * capabilities of the host cpu.
+ * capabilities of the host cpu. This capabilities test skips MSRs that are
+ * kvm-specific. Those are put in the beginning of the list.
  */
+
+#define KVM_SAVE_MSRS_BEGIN    5
 static u32 msrs_to_save[] = {
+       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+       HV_X64_MSR_APIC_ASSIST_PAGE,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
        MSR_K6_STAR,
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+       MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
 static unsigned num_msrs_to_save;
@@ -512,7 +656,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
        }
 
        if (is_paging(vcpu)
-           && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+           && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
                printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
                kvm_inject_gp(vcpu, 0);
                return;
@@ -543,9 +687,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
        kvm_x86_ops->set_efer(vcpu, efer);
 
        efer &= ~EFER_LMA;
-       efer |= vcpu->arch.shadow_efer & EFER_LMA;
+       efer |= vcpu->arch.efer & EFER_LMA;
 
-       vcpu->arch.shadow_efer = efer;
+       vcpu->arch.efer = efer;
 
        vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
        kvm_mmu_reset_context(vcpu);
@@ -580,7 +724,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
        static int version;
        struct pvclock_wall_clock wc;
-       struct timespec now, sys, boot;
+       struct timespec boot;
 
        if (!wall_clock)
                return;
@@ -595,9 +739,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
         * wall clock specified here.  guest system time equals host
         * system time for us, thus we must fill in host boot time here.
         */
-       now = current_kernel_time();
-       ktime_get_ts(&sys);
-       boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+       getboottime(&boot);
 
        wc.sec = boot.tv_sec;
        wc.nsec = boot.tv_nsec;
@@ -672,12 +814,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
        local_irq_save(flags);
        kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
        ktime_get_ts(&ts);
+       monotonic_to_bootbased(&ts);
        local_irq_restore(flags);
 
        /* With all the info we got, fill in the values */
 
        vcpu->hv_clock.system_time = ts.tv_nsec +
-                                    (NSEC_PER_SEC * (u64)ts.tv_sec);
+                                    (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
+
        /*
         * The interface expects us to write an even number signaling that the
         * update is finished. Since the guest won't see the intermediate
@@ -835,6 +979,132 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        return 0;
 }
 
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+       struct kvm *kvm = vcpu->kvm;
+       int lm = is_long_mode(vcpu);
+       u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
+               : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
+       u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
+               : kvm->arch.xen_hvm_config.blob_size_32;
+       u32 page_num = data & ~PAGE_MASK;
+       u64 page_addr = data & PAGE_MASK;
+       u8 *page;
+       int r;
+
+       r = -E2BIG;
+       if (page_num >= blob_size)
+               goto out;
+       r = -ENOMEM;
+       page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!page)
+               goto out;
+       r = -EFAULT;
+       if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
+               goto out_free;
+       if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+               goto out_free;
+       r = 0;
+out_free:
+       kfree(page);
+out:
+       return r;
+}
+
+static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+       return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+       bool r = false;
+       switch (msr) {
+       case HV_X64_MSR_GUEST_OS_ID:
+       case HV_X64_MSR_HYPERCALL:
+               r = true;
+               break;
+       }
+
+       return r;
+}
+
+static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       switch (msr) {
+       case HV_X64_MSR_GUEST_OS_ID:
+               kvm->arch.hv_guest_os_id = data;
+               /* setting guest os id to zero disables hypercall page */
+               if (!kvm->arch.hv_guest_os_id)
+                       kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+               break;
+       case HV_X64_MSR_HYPERCALL: {
+               u64 gfn;
+               unsigned long addr;
+               u8 instructions[4];
+
+               /* if guest os id is not set hypercall should remain disabled */
+               if (!kvm->arch.hv_guest_os_id)
+                       break;
+               if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+                       kvm->arch.hv_hypercall = data;
+                       break;
+               }
+               gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+               addr = gfn_to_hva(kvm, gfn);
+               if (kvm_is_error_hva(addr))
+                       return 1;
+               kvm_x86_ops->patch_hypercall(vcpu, instructions);
+               ((unsigned char *)instructions)[3] = 0xc3; /* ret */
+               if (copy_to_user((void __user *)addr, instructions, 4))
+                       return 1;
+               kvm->arch.hv_hypercall = data;
+               break;
+       }
+       default:
+               pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+                         "data 0x%llx\n", msr, data);
+               return 1;
+       }
+       return 0;
+}
+
+static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+       switch (msr) {
+       case HV_X64_MSR_APIC_ASSIST_PAGE: {
+               unsigned long addr;
+
+               if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+                       vcpu->arch.hv_vapic = data;
+                       break;
+               }
+               addr = gfn_to_hva(vcpu->kvm, data >>
+                                 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+               if (kvm_is_error_hva(addr))
+                       return 1;
+               if (clear_user((void __user *)addr, PAGE_SIZE))
+                       return 1;
+               vcpu->arch.hv_vapic = data;
+               break;
+       }
+       case HV_X64_MSR_EOI:
+               return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+       case HV_X64_MSR_ICR:
+               return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+       case HV_X64_MSR_TPR:
+               return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+       default:
+               pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+                         "data 0x%llx\n", msr, data);
+               return 1;
+       }
+
+       return 0;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        switch (msr) {
@@ -949,7 +1219,19 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
                        "0x%x data 0x%llx\n", msr, data);
                break;
+       case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+               if (kvm_hv_msr_partition_wide(msr)) {
+                       int r;
+                       mutex_lock(&vcpu->kvm->lock);
+                       r = set_msr_hyperv_pw(vcpu, msr, data);
+                       mutex_unlock(&vcpu->kvm->lock);
+                       return r;
+               } else
+                       return set_msr_hyperv(vcpu, msr, data);
+               break;
        default:
+               if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
+                       return xen_hvm_config(vcpu, data);
                if (!ignore_msrs) {
                        pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
                                msr, data);
@@ -1046,6 +1328,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        return 0;
 }
 
+static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+       u64 data = 0;
+       struct kvm *kvm = vcpu->kvm;
+
+       switch (msr) {
+       case HV_X64_MSR_GUEST_OS_ID:
+               data = kvm->arch.hv_guest_os_id;
+               break;
+       case HV_X64_MSR_HYPERCALL:
+               data = kvm->arch.hv_hypercall;
+               break;
+       default:
+               pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+               return 1;
+       }
+
+       *pdata = data;
+       return 0;
+}
+
+static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+       u64 data = 0;
+
+       switch (msr) {
+       case HV_X64_MSR_VP_INDEX: {
+               int r;
+               struct kvm_vcpu *v;
+               kvm_for_each_vcpu(r, v, vcpu->kvm)
+                       if (v == vcpu)
+                               data = r;
+               break;
+       }
+       case HV_X64_MSR_EOI:
+               return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+       case HV_X64_MSR_ICR:
+               return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+       case HV_X64_MSR_TPR:
+               return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+       default:
+               pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+               return 1;
+       }
+       *pdata = data;
+       return 0;
+}
+
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
        u64 data;
@@ -1097,7 +1427,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
                data |= (((uint64_t)4ULL) << 40);
                break;
        case MSR_EFER:
-               data = vcpu->arch.shadow_efer;
+               data = vcpu->arch.efer;
                break;
        case MSR_KVM_WALL_CLOCK:
                data = vcpu->kvm->arch.wall_clock;
@@ -1112,6 +1442,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_MCG_STATUS:
        case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
                return get_msr_mce(vcpu, msr, pdata);
+       case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+               if (kvm_hv_msr_partition_wide(msr)) {
+                       int r;
+                       mutex_lock(&vcpu->kvm->lock);
+                       r = get_msr_hyperv_pw(vcpu, msr, pdata);
+                       mutex_unlock(&vcpu->kvm->lock);
+                       return r;
+               } else
+                       return get_msr_hyperv(vcpu, msr, pdata);
+               break;
        default:
                if (!ignore_msrs) {
                        pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1137,15 +1477,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
                    int (*do_msr)(struct kvm_vcpu *vcpu,
                                  unsigned index, u64 *data))
 {
-       int i;
+       int i, idx;
 
        vcpu_load(vcpu);
 
-       down_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
        for (i = 0; i < msrs->nmsrs; ++i)
                if (do_msr(vcpu, entries[i].index, &entries[i].data))
                        break;
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        vcpu_put(vcpu);
 
@@ -1224,6 +1564,14 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_PIT2:
        case KVM_CAP_PIT_STATE2:
        case KVM_CAP_SET_IDENTITY_MAP_ADDR:
+       case KVM_CAP_XEN_HVM:
+       case KVM_CAP_ADJUST_CLOCK:
+       case KVM_CAP_VCPU_EVENTS:
+       case KVM_CAP_HYPERV:
+       case KVM_CAP_HYPERV_VAPIC:
+       case KVM_CAP_HYPERV_SPIN:
+       case KVM_CAP_PCI_SEGMENT:
+       case KVM_CAP_X86_ROBUST_SINGLESTEP:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -1238,8 +1586,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_NR_MEMSLOTS:
                r = KVM_MEMORY_SLOTS;
                break;
-       case KVM_CAP_PV_MMU:
-               r = !tdp_enabled;
+       case KVM_CAP_PV_MMU:    /* obsolete */
+               r = 0;
                break;
        case KVM_CAP_IOMMU:
                r = iommu_found();
@@ -1326,13 +1674,19 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        kvm_x86_ops->vcpu_load(vcpu, cpu);
+       if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
+               unsigned long khz = cpufreq_quick_get(cpu);
+               if (!khz)
+                       khz = tsc_khz;
+               per_cpu(cpu_tsc_khz, cpu) = khz;
+       }
        kvm_request_guest_time_update(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->vcpu_put(vcpu);
        kvm_put_guest_fpu(vcpu);
+       kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -1397,6 +1751,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        cpuid_fix_nx_cap(vcpu);
        r = 0;
        kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
 
 out_free:
        vfree(cpuid_entries);
@@ -1419,6 +1774,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
                goto out;
        vcpu->arch.cpuid_nent = cpuid->nent;
        kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
        return 0;
 
 out:
@@ -1461,12 +1817,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                         u32 index, int *nent, int maxnent)
 {
        unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-       unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
 #ifdef CONFIG_X86_64
+       unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
+                               ? F(GBPAGES) : 0;
        unsigned f_lm = F(LM);
 #else
+       unsigned f_gbpages = 0;
        unsigned f_lm = 0;
 #endif
+       unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 
        /* cpuid 1.edx */
        const u32 kvm_supported_word0_x86_features =
@@ -1486,7 +1845,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
                F(PAT) | F(PSE36) | 0 /* Reserved */ |
                f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
                0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
        /* cpuid 1.ecx */
        const u32 kvm_supported_word4_x86_features =
@@ -1733,7 +2092,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
                return 0;
        if (mce->status & MCI_STATUS_UC) {
                if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
-                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                   !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
                        printk(KERN_DEBUG "kvm: set_mce: "
                               "injects mce exception while "
                               "previous one is in progress!\n");
@@ -1759,6 +2118,65 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+                                              struct kvm_vcpu_events *events)
+{
+       vcpu_load(vcpu);
+
+       events->exception.injected = vcpu->arch.exception.pending;
+       events->exception.nr = vcpu->arch.exception.nr;
+       events->exception.has_error_code = vcpu->arch.exception.has_error_code;
+       events->exception.error_code = vcpu->arch.exception.error_code;
+
+       events->interrupt.injected = vcpu->arch.interrupt.pending;
+       events->interrupt.nr = vcpu->arch.interrupt.nr;
+       events->interrupt.soft = vcpu->arch.interrupt.soft;
+
+       events->nmi.injected = vcpu->arch.nmi_injected;
+       events->nmi.pending = vcpu->arch.nmi_pending;
+       events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+
+       events->sipi_vector = vcpu->arch.sipi_vector;
+
+       events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
+                        | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
+
+       vcpu_put(vcpu);
+}
+
+static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
+                                             struct kvm_vcpu_events *events)
+{
+       if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
+                             | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
+               return -EINVAL;
+
+       vcpu_load(vcpu);
+
+       vcpu->arch.exception.pending = events->exception.injected;
+       vcpu->arch.exception.nr = events->exception.nr;
+       vcpu->arch.exception.has_error_code = events->exception.has_error_code;
+       vcpu->arch.exception.error_code = events->exception.error_code;
+
+       vcpu->arch.interrupt.pending = events->interrupt.injected;
+       vcpu->arch.interrupt.nr = events->interrupt.nr;
+       vcpu->arch.interrupt.soft = events->interrupt.soft;
+       if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
+               kvm_pic_clear_isr_ack(vcpu->kvm);
+
+       vcpu->arch.nmi_injected = events->nmi.injected;
+       if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
+               vcpu->arch.nmi_pending = events->nmi.pending;
+       kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
+
+       if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
+               vcpu->arch.sipi_vector = events->sipi_vector;
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -1769,6 +2187,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
        switch (ioctl) {
        case KVM_GET_LAPIC: {
+               r = -EINVAL;
+               if (!vcpu->arch.apic)
+                       goto out;
                lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
 
                r = -ENOMEM;
@@ -1784,6 +2205,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_SET_LAPIC: {
+               r = -EINVAL;
+               if (!vcpu->arch.apic)
+                       goto out;
                lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
                r = -ENOMEM;
                if (!lapic)
@@ -1910,6 +2334,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
                break;
        }
+       case KVM_GET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+
+               kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
+
+               r = -EFAULT;
+               if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
+                       break;
+               r = 0;
+               break;
+       }
+       case KVM_SET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+
+               r = -EFAULT;
+               if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
+                       break;
+
+               r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -1941,14 +2386,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
        if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
                return -EINVAL;
 
-       down_write(&kvm->slots_lock);
+       mutex_lock(&kvm->slots_lock);
        spin_lock(&kvm->mmu_lock);
 
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
        kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
        spin_unlock(&kvm->mmu_lock);
-       up_write(&kvm->slots_lock);
+       mutex_unlock(&kvm->slots_lock);
        return 0;
 }
 
@@ -1957,13 +2402,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
        return kvm->arch.n_alloc_mmu_pages;
 }
 
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
+{
+       int i;
+       struct kvm_mem_alias *alias;
+       struct kvm_mem_aliases *aliases;
+
+       aliases = rcu_dereference(kvm->arch.aliases);
+
+       for (i = 0; i < aliases->naliases; ++i) {
+               alias = &aliases->aliases[i];
+               if (alias->flags & KVM_ALIAS_INVALID)
+                       continue;
+               if (gfn >= alias->base_gfn
+                   && gfn < alias->base_gfn + alias->npages)
+                       return alias->target_gfn + gfn - alias->base_gfn;
+       }
+       return gfn;
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
        int i;
        struct kvm_mem_alias *alias;
+       struct kvm_mem_aliases *aliases;
+
+       aliases = rcu_dereference(kvm->arch.aliases);
 
-       for (i = 0; i < kvm->arch.naliases; ++i) {
-               alias = &kvm->arch.aliases[i];
+       for (i = 0; i < aliases->naliases; ++i) {
+               alias = &aliases->aliases[i];
                if (gfn >= alias->base_gfn
                    && gfn < alias->base_gfn + alias->npages)
                        return alias->target_gfn + gfn - alias->base_gfn;
@@ -1981,6 +2448,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 {
        int r, n;
        struct kvm_mem_alias *p;
+       struct kvm_mem_aliases *aliases, *old_aliases;
 
        r = -EINVAL;
        /* General sanity checks */
@@ -1997,26 +2465,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
            < alias->target_phys_addr)
                goto out;
 
-       down_write(&kvm->slots_lock);
-       spin_lock(&kvm->mmu_lock);
+       r = -ENOMEM;
+       aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+       if (!aliases)
+               goto out;
+
+       mutex_lock(&kvm->slots_lock);
+
+       /* invalidate any gfn reference in case of deletion/shrinking */
+       memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+       aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
+       old_aliases = kvm->arch.aliases;
+       rcu_assign_pointer(kvm->arch.aliases, aliases);
+       synchronize_srcu_expedited(&kvm->srcu);
+       kvm_mmu_zap_all(kvm);
+       kfree(old_aliases);
+
+       r = -ENOMEM;
+       aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+       if (!aliases)
+               goto out_unlock;
+
+       memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 
-       p = &kvm->arch.aliases[alias->slot];
+       p = &aliases->aliases[alias->slot];
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
        p->npages = alias->memory_size >> PAGE_SHIFT;
        p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+       p->flags &= ~(KVM_ALIAS_INVALID);
 
        for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-               if (kvm->arch.aliases[n - 1].npages)
+               if (aliases->aliases[n - 1].npages)
                        break;
-       kvm->arch.naliases = n;
+       aliases->naliases = n;
 
-       spin_unlock(&kvm->mmu_lock);
-       kvm_mmu_zap_all(kvm);
-
-       up_write(&kvm->slots_lock);
-
-       return 0;
+       old_aliases = kvm->arch.aliases;
+       rcu_assign_pointer(kvm->arch.aliases, aliases);
+       synchronize_srcu_expedited(&kvm->srcu);
+       kfree(old_aliases);
+       r = 0;
 
+out_unlock:
+       mutex_unlock(&kvm->slots_lock);
 out:
        return r;
 }
@@ -2054,18 +2544,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
-               spin_lock(&pic_irqchip(kvm)->lock);
+               raw_spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[0],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               raw_spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
-               spin_lock(&pic_irqchip(kvm)->lock);
+               raw_spin_lock(&pic_irqchip(kvm)->lock);
                memcpy(&pic_irqchip(kvm)->pics[1],
                        &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               raw_spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
                r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -2145,29 +2635,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                                      struct kvm_dirty_log *log)
 {
-       int r;
-       int n;
+       int r, n, i;
        struct kvm_memory_slot *memslot;
-       int is_dirty = 0;
+       unsigned long is_dirty = 0;
+       unsigned long *dirty_bitmap = NULL;
 
-       down_write(&kvm->slots_lock);
+       mutex_lock(&kvm->slots_lock);
 
-       r = kvm_get_dirty_log(kvm, log, &is_dirty);
-       if (r)
+       r = -EINVAL;
+       if (log->slot >= KVM_MEMORY_SLOTS)
+               goto out;
+
+       memslot = &kvm->memslots->memslots[log->slot];
+       r = -ENOENT;
+       if (!memslot->dirty_bitmap)
+               goto out;
+
+       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+
+       r = -ENOMEM;
+       dirty_bitmap = vmalloc(n);
+       if (!dirty_bitmap)
                goto out;
+       memset(dirty_bitmap, 0, n);
+
+       for (i = 0; !is_dirty && i < n/sizeof(long); i++)
+               is_dirty = memslot->dirty_bitmap[i];
 
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
+               struct kvm_memslots *slots, *old_slots;
+
                spin_lock(&kvm->mmu_lock);
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
                spin_unlock(&kvm->mmu_lock);
-               memslot = &kvm->memslots[log->slot];
-               n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
-               memset(memslot->dirty_bitmap, 0, n);
+
+               slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+               if (!slots)
+                       goto out_free;
+
+               memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+               slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+
+               old_slots = kvm->memslots;
+               rcu_assign_pointer(kvm->memslots, slots);
+               synchronize_srcu_expedited(&kvm->srcu);
+               dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
+               kfree(old_slots);
        }
+
        r = 0;
+       if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
+               r = -EFAULT;
+out_free:
+       vfree(dirty_bitmap);
 out:
-       up_write(&kvm->slots_lock);
+       mutex_unlock(&kvm->slots_lock);
        return r;
 }
 
@@ -2238,25 +2761,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (r)
                        goto out;
                break;
-       case KVM_CREATE_IRQCHIP:
+       case KVM_CREATE_IRQCHIP: {
+               struct kvm_pic *vpic;
+
+               mutex_lock(&kvm->lock);
+               r = -EEXIST;
+               if (kvm->arch.vpic)
+                       goto create_irqchip_unlock;
                r = -ENOMEM;
-               kvm->arch.vpic = kvm_create_pic(kvm);
-               if (kvm->arch.vpic) {
+               vpic = kvm_create_pic(kvm);
+               if (vpic) {
                        r = kvm_ioapic_init(kvm);
                        if (r) {
-                               kfree(kvm->arch.vpic);
-                               kvm->arch.vpic = NULL;
-                               goto out;
+                               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+                                                         &vpic->dev);
+                               kfree(vpic);
+                               goto create_irqchip_unlock;
                        }
                } else
-                       goto out;
+                       goto create_irqchip_unlock;
+               smp_wmb();
+               kvm->arch.vpic = vpic;
+               smp_wmb();
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
-                       kfree(kvm->arch.vpic);
-                       kfree(kvm->arch.vioapic);
-                       goto out;
+                       mutex_lock(&kvm->irq_lock);
+                       kvm_ioapic_destroy(kvm);
+                       kvm_destroy_pic(kvm);
+                       mutex_unlock(&kvm->irq_lock);
                }
+       create_irqchip_unlock:
+               mutex_unlock(&kvm->lock);
                break;
+       }
        case KVM_CREATE_PIT:
                u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
                goto create_pit;
@@ -2266,7 +2803,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                                   sizeof(struct kvm_pit_config)))
                        goto out;
        create_pit:
-               down_write(&kvm->slots_lock);
+               mutex_lock(&kvm->slots_lock);
                r = -EEXIST;
                if (kvm->arch.vpit)
                        goto create_pit_unlock;
@@ -2275,7 +2812,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (kvm->arch.vpit)
                        r = 0;
        create_pit_unlock:
-               up_write(&kvm->slots_lock);
+               mutex_unlock(&kvm->slots_lock);
                break;
        case KVM_IRQ_LINE_STATUS:
        case KVM_IRQ_LINE: {
@@ -2413,6 +2950,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_XEN_HVM_CONFIG: {
+               r = -EFAULT;
+               if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
+                                  sizeof(struct kvm_xen_hvm_config)))
+                       goto out;
+               r = -EINVAL;
+               if (kvm->arch.xen_hvm_config.flags)
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_SET_CLOCK: {
+               struct timespec now;
+               struct kvm_clock_data user_ns;
+               u64 now_ns;
+               s64 delta;
+
+               r = -EFAULT;
+               if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
+                       goto out;
+
+               r = -EINVAL;
+               if (user_ns.flags)
+                       goto out;
+
+               r = 0;
+               ktime_get_ts(&now);
+               now_ns = timespec_to_ns(&now);
+               delta = user_ns.clock - now_ns;
+               kvm->arch.kvmclock_offset = delta;
+               break;
+       }
+       case KVM_GET_CLOCK: {
+               struct timespec now;
+               struct kvm_clock_data user_ns;
+               u64 now_ns;
+
+               ktime_get_ts(&now);
+               now_ns = timespec_to_ns(&now);
+               user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
+               user_ns.flags = 0;
+
+               r = -EFAULT;
+               if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
+                       goto out;
+               r = 0;
+               break;
+       }
+
        default:
                ;
        }
@@ -2425,7 +3011,8 @@ static void kvm_init_msr_list(void)
        u32 dummy[2];
        unsigned i, j;
 
-       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
+       /* skip the first msrs in the list. KVM-specific */
+       for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
                if (j < i)
@@ -2442,7 +3029,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
            !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
                return 0;
 
-       return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
+       return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2451,17 +3038,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
            !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
                return 0;
 
-       return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
+       return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
-static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
-                              struct kvm_vcpu *vcpu)
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       access |= PFERR_FETCH_MASK;
+       return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       access |= PFERR_WRITE_MASK;
+       return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+/* uses this to access any guest's mapped memory without checking CPL */
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+       return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
+}
+
+static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+                                     struct kvm_vcpu *vcpu, u32 access,
+                                     u32 *error)
 {
        void *data = val;
        int r = X86EMUL_CONTINUE;
 
        while (bytes) {
-               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
@@ -2484,14 +3098,37 @@ out:
        return r;
 }
 
+/* used for instruction fetching */
+static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
+                               struct kvm_vcpu *vcpu, u32 *error)
+{
+       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
+                                         access | PFERR_FETCH_MASK, error);
+}
+
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+                              struct kvm_vcpu *vcpu, u32 *error)
+{
+       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
+                                         error);
+}
+
+static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
+                              struct kvm_vcpu *vcpu, u32 *error)
+{
+       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
+}
+
 static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
-                               struct kvm_vcpu *vcpu)
+                               struct kvm_vcpu *vcpu, u32 *error)
 {
        void *data = val;
        int r = X86EMUL_CONTINUE;
 
        while (bytes) {
-               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
@@ -2521,6 +3158,7 @@ static int emulator_read_emulated(unsigned long addr,
                                  struct kvm_vcpu *vcpu)
 {
        gpa_t                 gpa;
+       u32 error_code;
 
        if (vcpu->mmio_read_completed) {
                memcpy(val, vcpu->mmio_data, bytes);
@@ -2530,17 +3168,20 @@ static int emulator_read_emulated(unsigned long addr,
                return X86EMUL_CONTINUE;
        }
 
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
+
+       if (gpa == UNMAPPED_GVA) {
+               kvm_inject_page_fault(vcpu, addr, error_code);
+               return X86EMUL_PROPAGATE_FAULT;
+       }
 
        /* For APIC access vmexit */
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
                goto mmio;
 
-       if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+       if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
                                == X86EMUL_CONTINUE)
                return X86EMUL_CONTINUE;
-       if (gpa == UNMAPPED_GVA)
-               return X86EMUL_PROPAGATE_FAULT;
 
 mmio:
        /*
@@ -2579,11 +3220,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
                                           struct kvm_vcpu *vcpu)
 {
        gpa_t                 gpa;
+       u32 error_code;
 
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
 
        if (gpa == UNMAPPED_GVA) {
-               kvm_inject_page_fault(vcpu, addr, 2);
+               kvm_inject_page_fault(vcpu, addr, error_code);
                return X86EMUL_PROPAGATE_FAULT;
        }
 
@@ -2647,7 +3289,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
                char *kaddr;
                u64 val;
 
-               gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
 
                if (gpa == UNMAPPED_GVA ||
                   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2684,35 +3326,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
+       kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
+       kvm_x86_ops->fpu_activate(vcpu);
        return X86EMUL_CONTINUE;
 }
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-       struct kvm_vcpu *vcpu = ctxt->vcpu;
-
-       switch (dr) {
-       case 0 ... 3:
-               *dest = kvm_x86_ops->get_dr(vcpu, dr);
-               return X86EMUL_CONTINUE;
-       default:
-               pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
-               return X86EMUL_UNHANDLEABLE;
-       }
+       return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 {
        unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
-       int exception;
 
-       kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
-       if (exception) {
-               /* FIXME: better handling */
-               return X86EMUL_UNHANDLEABLE;
-       }
-       return X86EMUL_CONTINUE;
+       return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
 }
 
 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -2726,7 +3354,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 
        rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
 
-       kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
+       kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
 
        printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
               context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2734,7 +3362,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
 static struct x86_emulate_ops emulate_ops = {
-       .read_std            = kvm_read_guest_virt,
+       .read_std            = kvm_read_guest_virt_system,
+       .fetch               = kvm_fetch_guest_virt,
        .read_emulated       = emulator_read_emulated,
        .write_emulated      = emulator_write_emulated,
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
@@ -2775,10 +3404,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
                vcpu->arch.emulate_ctxt.vcpu = vcpu;
-               vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+               vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
                vcpu->arch.emulate_ctxt.mode =
+                       (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
                        (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
-                       ? X86EMUL_MODE_REAL : cs_l
+                       ? X86EMUL_MODE_VM86 : cs_l
                        ? X86EMUL_MODE_PROT64 : cs_db
                        ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
@@ -2853,7 +3483,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                return EMULATE_DO_MMIO;
        }
 
-       kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+       kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
        if (vcpu->mmio_is_write) {
                vcpu->mmio_needed = 0;
@@ -2870,12 +3500,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
        gva_t q = vcpu->arch.pio.guest_gva;
        unsigned bytes;
        int ret;
+       u32 error_code;
 
        bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
        if (vcpu->arch.pio.in)
-               ret = kvm_write_guest_virt(q, p, bytes, vcpu);
+               ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
        else
-               ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+               ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
+
+       if (ret == X86EMUL_PROPAGATE_FAULT)
+               kvm_inject_page_fault(vcpu, q, error_code);
+
        return ret;
 }
 
@@ -2896,7 +3531,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
                if (io->in) {
                        r = pio_copy_data(vcpu);
                        if (r)
-                               return r;
+                               goto out;
                }
 
                delta = 1;
@@ -2923,7 +3558,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
                        kvm_register_write(vcpu, VCPU_REGS_RSI, val);
                }
        }
-
+out:
        io->count -= io->cur_count;
        io->cur_count = 0;
 
@@ -2936,11 +3571,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
        int r;
 
        if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+               r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
                                    vcpu->arch.pio.size, pd);
        else
-               r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
-                                    vcpu->arch.pio.size, pd);
+               r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+                                    vcpu->arch.pio.port, vcpu->arch.pio.size,
+                                    pd);
        return r;
 }
 
@@ -2951,7 +3587,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
        int i, r = 0;
 
        for (i = 0; i < io->cur_count; i++) {
-               if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+               if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
                                     io->port, io->size, pd)) {
                        r = -EOPNOTSUPP;
                        break;
@@ -2965,6 +3601,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
 {
        unsigned long val;
 
+       trace_kvm_pio(!in, port, size, 1);
+
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
        vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -2976,11 +3614,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
        vcpu->arch.pio.down = 0;
        vcpu->arch.pio.rep = 0;
 
-       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
-                     size, 1);
-
-       val = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       memcpy(vcpu->arch.pio_data, &val, 4);
+       if (!vcpu->arch.pio.in) {
+               val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+               memcpy(vcpu->arch.pio_data, &val, 4);
+       }
 
        if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
                complete_pio(vcpu);
@@ -2997,6 +3634,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
        unsigned now, in_page;
        int ret = 0;
 
+       trace_kvm_pio(!in, port, size, count);
+
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
        vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3008,9 +3647,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
        vcpu->arch.pio.down = down;
        vcpu->arch.pio.rep = rep;
 
-       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
-                     size, count);
-
        if (!count) {
                kvm_x86_ops->skip_emulated_instruction(vcpu);
                return 1;
@@ -3042,10 +3678,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
        if (!vcpu->arch.pio.in) {
                /* string PIO write */
                ret = pio_copy_data(vcpu);
-               if (ret == X86EMUL_PROPAGATE_FAULT) {
-                       kvm_inject_gp(vcpu, 0);
+               if (ret == X86EMUL_PROPAGATE_FAULT)
                        return 1;
-               }
                if (ret == 0 && !pio_string_write(vcpu)) {
                        complete_pio(vcpu);
                        if (vcpu->arch.pio.count == 0)
@@ -3063,9 +3697,6 @@ static void bounce_off(void *info)
        /* nothing */
 }
 
-static unsigned int  ref_freq;
-static unsigned long tsc_khz_ref;
-
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                     void *data)
 {
@@ -3074,14 +3705,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
        struct kvm_vcpu *vcpu;
        int i, send_ipi = 0;
 
-       if (!ref_freq)
-               ref_freq = freq->old;
-
        if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
                return 0;
        if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
                return 0;
-       per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+       per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3122,12 +3750,18 @@ static void kvm_timer_init(void)
 {
        int cpu;
 
-       for_each_possible_cpu(cpu)
-               per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-               tsc_khz_ref = tsc_khz;
                cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
                                          CPUFREQ_TRANSITION_NOTIFIER);
+               for_each_online_cpu(cpu) {
+                       unsigned long khz = cpufreq_get(cpu);
+                       if (!khz)
+                               khz = tsc_khz;
+                       per_cpu(cpu_tsc_khz, cpu) = khz;
+               }
+       } else {
+               for_each_possible_cpu(cpu)
+                       per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
        }
 }
 
@@ -3204,11 +3838,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
                return a0 | ((gpa_t)a1 << 32);
 }
 
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+       u64 param, ingpa, outgpa, ret;
+       uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+       bool fast, longmode;
+       int cs_db, cs_l;
+
+       /*
+        * hypercall generates UD from non zero cpl and real mode
+        * per HYPER-V spec
+        */
+       if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 0;
+       }
+
+       kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+       longmode = is_long_mode(vcpu) && cs_l == 1;
+
+       if (!longmode) {
+               param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+                       (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+               ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+                       (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+               outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+                       (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+       }
+#ifdef CONFIG_X86_64
+       else {
+               param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+               ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+               outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+       }
+#endif
+
+       code = param & 0xffff;
+       fast = (param >> 16) & 0x1;
+       rep_cnt = (param >> 32) & 0xfff;
+       rep_idx = (param >> 48) & 0xfff;
+
+       trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+       switch (code) {
+       case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+               kvm_vcpu_on_spin(vcpu);
+               break;
+       default:
+               res = HV_STATUS_INVALID_HYPERCALL_CODE;
+               break;
+       }
+
+       ret = res | (((u64)rep_done & 0xfff) << 32);
+       if (longmode) {
+               kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+       } else {
+               kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+               kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+       }
+
+       return 1;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
        int r = 1;
 
+       if (kvm_hv_hypercall_enabled(vcpu->kvm))
+               return kvm_hv_hypercall(vcpu);
+
        nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
        a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
        a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3251,10 +3950,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 {
        char instruction[3];
-       int ret = 0;
        unsigned long rip = kvm_rip_read(vcpu);
 
-
        /*
         * Blow out the MMU to ensure that no other VCPU has an active mapping
         * to ensure that the updated hypercall appears atomically across all
@@ -3263,11 +3960,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
        kvm_mmu_zap_all(vcpu->kvm);
 
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
-       if (emulator_write_emulated(rip, instruction, 3, vcpu)
-           != X86EMUL_CONTINUE)
-               ret = -EFAULT;
 
-       return ret;
+       return emulator_write_emulated(rip, instruction, 3, vcpu);
 }
 
 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3293,17 +3987,16 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
                   unsigned long *rflags)
 {
        kvm_lmsw(vcpu, msw);
-       *rflags = kvm_x86_ops->get_rflags(vcpu);
+       *rflags = kvm_get_rflags(vcpu);
 }
 
 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 {
        unsigned long value;
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
        switch (cr) {
        case 0:
-               value = vcpu->arch.cr0;
+               value = kvm_read_cr0(vcpu);
                break;
        case 2:
                value = vcpu->arch.cr2;
@@ -3312,7 +4005,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
                value = vcpu->arch.cr3;
                break;
        case 4:
-               value = vcpu->arch.cr4;
+               value = kvm_read_cr4(vcpu);
                break;
        case 8:
                value = kvm_get_cr8(vcpu);
@@ -3330,8 +4023,8 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 {
        switch (cr) {
        case 0:
-               kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
-               *rflags = kvm_x86_ops->get_rflags(vcpu);
+               kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
+               *rflags = kvm_get_rflags(vcpu);
                break;
        case 2:
                vcpu->arch.cr2 = val;
@@ -3340,7 +4033,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                kvm_set_cr3(vcpu, val);
                break;
        case 4:
-               kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+               kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
                break;
        case 8:
                kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3407,6 +4100,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
        }
        return best;
 }
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 {
@@ -3462,7 +4156,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
        if (irqchip_in_kernel(vcpu->kvm))
@@ -3490,14 +4184,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 static void vapic_exit(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
+       int idx;
 
        if (!apic || !apic->vapic_addr)
                return;
 
-       down_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
        kvm_release_page_dirty(apic->vapic_page);
        mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3593,12 +4288,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        r = 0;
                        goto out;
                }
+               if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+                       vcpu->fpu_active = 0;
+                       kvm_x86_ops->fpu_deactivate(vcpu);
+               }
        }
 
        preempt_disable();
 
        kvm_x86_ops->prepare_guest_switch(vcpu);
-       kvm_load_guest_fpu(vcpu);
+       if (vcpu->fpu_active)
+               kvm_load_guest_fpu(vcpu);
 
        local_irq_disable();
 
@@ -3626,7 +4326,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                kvm_lapic_sync_to_vapic(vcpu);
        }
 
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
        kvm_guest_enter();
 
@@ -3641,14 +4341,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        trace_kvm_entry(vcpu->vcpu_id);
        kvm_x86_ops->run(vcpu);
 
-       if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
-               set_debugreg(current->thread.debugreg0, 0);
-               set_debugreg(current->thread.debugreg1, 1);
-               set_debugreg(current->thread.debugreg2, 2);
-               set_debugreg(current->thread.debugreg3, 3);
-               set_debugreg(current->thread.debugreg6, 6);
-               set_debugreg(current->thread.debugreg7, 7);
-       }
+       /*
+        * If the guest has used debug registers, at least dr7
+        * will be disabled while returning to the host.
+        * If we don't have active breakpoints in the host, we don't
+        * care about the messed up debug address registers. But if
+        * we have some of them active, restore the old state.
+        */
+       if (hw_breakpoint_active())
+               hw_breakpoint_restore();
 
        set_bit(KVM_REQ_KICK, &vcpu->requests);
        local_irq_enable();
@@ -3667,7 +4368,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        preempt_enable();
 
-       down_read(&vcpu->kvm->slots_lock);
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        /*
         * Profile KVM exit RIPs:
@@ -3689,6 +4390,7 @@ out:
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
+       struct kvm *kvm = vcpu->kvm;
 
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
                pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3700,7 +4402,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        }
 
-       down_read(&vcpu->kvm->slots_lock);
+       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
        vapic_enter(vcpu);
 
        r = 1;
@@ -3708,9 +4410,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
                        r = vcpu_enter_guest(vcpu);
                else {
-                       up_read(&vcpu->kvm->slots_lock);
+                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
                        kvm_vcpu_block(vcpu);
-                       down_read(&vcpu->kvm->slots_lock);
+                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
                        if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
                        {
                                switch(vcpu->arch.mp_state) {
@@ -3745,13 +4447,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        ++vcpu->stat.signal_exits;
                }
                if (need_resched()) {
-                       up_read(&vcpu->kvm->slots_lock);
+                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
                        kvm_resched(vcpu);
-                       down_read(&vcpu->kvm->slots_lock);
+                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
                }
        }
 
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
        post_kvm_run_save(vcpu);
 
        vapic_exit(vcpu);
@@ -3785,16 +4487,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (r)
                        goto out;
        }
-#if CONFIG_HAS_IOMEM
        if (vcpu->mmio_needed) {
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
                vcpu->mmio_read_completed = 1;
                vcpu->mmio_needed = 0;
 
-               down_read(&vcpu->kvm->slots_lock);
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
                                        EMULTYPE_NO_DECODE);
-               up_read(&vcpu->kvm->slots_lock);
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
                if (r == EMULATE_DO_MMIO) {
                        /*
                         * Read-modify-write.  Back to userspace.
@@ -3803,7 +4504,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        goto out;
                }
        }
-#endif
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
                kvm_register_write(vcpu, VCPU_REGS_RAX,
                                     kvm_run->hypercall.ret);
@@ -3842,13 +4542,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
        regs->rip = kvm_rip_read(vcpu);
-       regs->rflags = kvm_x86_ops->get_rflags(vcpu);
-
-       /*
-        * Don't leak debug flags in case they were set for guest debugging
-        */
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+       regs->rflags = kvm_get_rflags(vcpu);
 
        vcpu_put(vcpu);
 
@@ -3876,12 +4570,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
        kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
        kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
-
 #endif
 
        kvm_rip_write(vcpu, regs->rip);
-       kvm_x86_ops->set_rflags(vcpu, regs->rflags);
-
+       kvm_set_rflags(vcpu, regs->rflags);
 
        vcpu->arch.exception.pending = false;
 
@@ -3930,13 +4622,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->gdt.limit = dt.limit;
        sregs->gdt.base = dt.base;
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-       sregs->cr0 = vcpu->arch.cr0;
+       sregs->cr0 = kvm_read_cr0(vcpu);
        sregs->cr2 = vcpu->arch.cr2;
        sregs->cr3 = vcpu->arch.cr3;
-       sregs->cr4 = vcpu->arch.cr4;
+       sregs->cr4 = kvm_read_cr4(vcpu);
        sregs->cr8 = kvm_get_cr8(vcpu);
-       sregs->efer = vcpu->arch.shadow_efer;
+       sregs->efer = vcpu->arch.efer;
        sregs->apic_base = kvm_get_apic_base(vcpu);
 
        memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4024,14 +4715,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 {
        struct descriptor_table dtable;
        u16 index = selector >> 3;
+       int ret;
+       u32 err;
+       gva_t addr;
 
        get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
        if (dtable.limit < index * 8 + 7) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
-               return 1;
+               return X86EMUL_PROPAGATE_FAULT;
        }
-       return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+       addr = dtable.base + index * 8;
+       ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
+                                        vcpu,  &err);
+       if (ret == X86EMUL_PROPAGATE_FAULT)
+               kvm_inject_page_fault(vcpu, addr, err);
+
+       return ret;
 }
 
 /* allowed just for 8 bytes segments */
@@ -4045,15 +4745,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 
        if (dtable.limit < index * 8 + 7)
                return 1;
-       return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+       return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
+}
+
+static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
+                              struct desc_struct *seg_desc)
+{
+       u32 base_addr = get_desc_base(seg_desc);
+
+       return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
 }
 
-static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
                             struct desc_struct *seg_desc)
 {
        u32 base_addr = get_desc_base(seg_desc);
 
-       return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
+       return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
 }
 
 static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4064,18 +4772,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
        return kvm_seg.selector;
 }
 
-static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
-                                               u16 selector,
-                                               struct kvm_segment *kvm_seg)
-{
-       struct desc_struct seg_desc;
-
-       if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
-               return 1;
-       seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
-       return 0;
-}
-
 static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
 {
        struct kvm_segment segvar = {
@@ -4093,34 +4789,122 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
                .unusable = 0,
        };
        kvm_x86_ops->set_segment(vcpu, &segvar, seg);
-       return 0;
+       return X86EMUL_CONTINUE;
 }
 
 static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
 {
        return (seg != VCPU_SREG_LDTR) &&
                (seg != VCPU_SREG_TR) &&
-               (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
+               (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
 }
 
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
-                               int type_bits, int seg)
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
 {
        struct kvm_segment kvm_seg;
+       struct desc_struct seg_desc;
+       u8 dpl, rpl, cpl;
+       unsigned err_vec = GP_VECTOR;
+       u32 err_code = 0;
+       bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
+       int ret;
 
-       if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
+       if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
                return kvm_load_realmode_segment(vcpu, selector, seg);
-       if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
-               return 1;
-       kvm_seg.type |= type_bits;
 
-       if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
-           seg != VCPU_SREG_LDTR)
-               if (!kvm_seg.s)
-                       kvm_seg.unusable = 1;
+       /* NULL selector is not valid for TR, CS and SS */
+       if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
+           && null_selector)
+               goto exception;
+
+       /* TR should be in GDT only */
+       if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
+               goto exception;
+
+       ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
+       if (ret)
+               return ret;
+
+       seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
+
+       if (null_selector) { /* for NULL selector skip all following checks */
+               kvm_seg.unusable = 1;
+               goto load;
+       }
+
+       err_code = selector & 0xfffc;
+       err_vec = GP_VECTOR;
+
+       /* can't load system descriptor into segment selecor */
+       if (seg <= VCPU_SREG_GS && !kvm_seg.s)
+               goto exception;
+
+       if (!kvm_seg.present) {
+               err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
+               goto exception;
+       }
+
+       rpl = selector & 3;
+       dpl = kvm_seg.dpl;
+       cpl = kvm_x86_ops->get_cpl(vcpu);
 
+       switch (seg) {
+       case VCPU_SREG_SS:
+               /*
+                * segment is not a writable data segment or segment
+                * selector's RPL != CPL or segment selector's RPL != CPL
+                */
+               if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
+                       goto exception;
+               break;
+       case VCPU_SREG_CS:
+               if (!(kvm_seg.type & 8))
+                       goto exception;
+
+               if (kvm_seg.type & 4) {
+                       /* conforming */
+                       if (dpl > cpl)
+                               goto exception;
+               } else {
+                       /* nonconforming */
+                       if (rpl > cpl || dpl != cpl)
+                               goto exception;
+               }
+               /* CS(RPL) <- CPL */
+               selector = (selector & 0xfffc) | cpl;
+            break;
+       case VCPU_SREG_TR:
+               if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
+                       goto exception;
+               break;
+       case VCPU_SREG_LDTR:
+               if (kvm_seg.s || kvm_seg.type != 2)
+                       goto exception;
+               break;
+       default: /*  DS, ES, FS, or GS */
+               /*
+                * segment is not a data or readable code segment or
+                * ((segment is a data or nonconforming code segment)
+                * and (both RPL and CPL > DPL))
+                */
+               if ((kvm_seg.type & 0xa) == 0x8 ||
+                   (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
+                       goto exception;
+               break;
+       }
+
+       if (!kvm_seg.unusable && kvm_seg.s) {
+               /* mark segment as accessed */
+               kvm_seg.type |= 1;
+               seg_desc.type |= 1;
+               save_guest_segment_descriptor(vcpu, selector, &seg_desc);
+       }
+load:
        kvm_set_segment(vcpu, &kvm_seg, seg);
-       return 0;
+       return X86EMUL_CONTINUE;
+exception:
+       kvm_queue_exception_e(vcpu, err_vec, err_code);
+       return X86EMUL_PROPAGATE_FAULT;
 }
 
 static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4128,7 +4912,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
 {
        tss->cr3 = vcpu->arch.cr3;
        tss->eip = kvm_rip_read(vcpu);
-       tss->eflags = kvm_x86_ops->get_rflags(vcpu);
+       tss->eflags = kvm_get_rflags(vcpu);
        tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4146,13 +4930,21 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
        tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
 }
 
+static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
+{
+       struct kvm_segment kvm_seg;
+       kvm_get_segment(vcpu, &kvm_seg, seg);
+       kvm_seg.selector = sel;
+       kvm_set_segment(vcpu, &kvm_seg, seg);
+}
+
 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
                                  struct tss_segment_32 *tss)
 {
        kvm_set_cr3(vcpu, tss->cr3);
 
        kvm_rip_write(vcpu, tss->eip);
-       kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
+       kvm_set_rflags(vcpu, tss->eflags | 2);
 
        kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
        kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@@ -4163,25 +4955,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
        kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
        kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+       /*
+        * SDM says that segment selectors are loaded before segment
+        * descriptors
+        */
+       kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
+       kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+       kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+       kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+       kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+       kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
+       kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
+
+       /*
+        * Now load segment descriptors. If fault happenes at this stage
+        * it is handled in a context of new task
+        */
+       if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+       if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+       if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+       if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+       if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+       if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+       if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
                return 1;
        return 0;
 }
@@ -4190,7 +4998,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
                                struct tss_segment_16 *tss)
 {
        tss->ip = kvm_rip_read(vcpu);
-       tss->flag = kvm_x86_ops->get_rflags(vcpu);
+       tss->flag = kvm_get_rflags(vcpu);
        tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4205,14 +5013,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
        tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
        tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
        tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-       tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss16(struct kvm_vcpu *vcpu,
                                 struct tss_segment_16 *tss)
 {
        kvm_rip_write(vcpu, tss->ip);
-       kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
+       kvm_set_rflags(vcpu, tss->flag | 2);
        kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
        kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
        kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@@ -4222,19 +5029,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
        kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
        kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+       /*
+        * SDM says that segment selectors are loaded before segment
+        * descriptors
+        */
+       kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
+       kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+       kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+       kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+       kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+
+       /*
+        * Now load segment descriptors. If fault happenes at this stage
+        * it is handled in a context of new task
+        */
+       if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+       if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+       if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+       if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
                return 1;
 
-       if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+       if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
                return 1;
        return 0;
 }
@@ -4256,7 +5077,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
                            sizeof tss_segment_16))
                goto out;
 
-       if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+       if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
                           &tss_segment_16, sizeof tss_segment_16))
                goto out;
 
@@ -4264,7 +5085,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
                tss_segment_16.prev_task_link = old_tss_sel;
 
                if (kvm_write_guest(vcpu->kvm,
-                                   get_tss_base_addr(vcpu, nseg_desc),
+                                   get_tss_base_addr_write(vcpu, nseg_desc),
                                    &tss_segment_16.prev_task_link,
                                    sizeof tss_segment_16.prev_task_link))
                        goto out;
@@ -4295,7 +5116,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
                            sizeof tss_segment_32))
                goto out;
 
-       if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+       if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
                           &tss_segment_32, sizeof tss_segment_32))
                goto out;
 
@@ -4303,7 +5124,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
                tss_segment_32.prev_task_link = old_tss_sel;
 
                if (kvm_write_guest(vcpu->kvm,
-                                   get_tss_base_addr(vcpu, nseg_desc),
+                                   get_tss_base_addr_write(vcpu, nseg_desc),
                                    &tss_segment_32.prev_task_link,
                                    sizeof tss_segment_32.prev_task_link))
                        goto out;
@@ -4326,7 +5147,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
        u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
        u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
 
-       old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
+       old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
 
        /* FIXME: Handle errors. Failure to read either TSS or their
         * descriptors should generate a pagefault.
@@ -4358,8 +5179,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
        }
 
        if (reason == TASK_SWITCH_IRET) {
-               u32 eflags = kvm_x86_ops->get_rflags(vcpu);
-               kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
+               u32 eflags = kvm_get_rflags(vcpu);
+               kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
        }
 
        /* set back link to prev task only if NT bit is set in eflags
@@ -4367,11 +5188,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
        if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
                old_tss_sel = 0xffff;
 
-       /* set back link to prev task only if NT bit is set in eflags
-          note that old_tss_sel is not used afetr this point */
-       if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
-               old_tss_sel = 0xffff;
-
        if (nseg_desc.type & 8)
                ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
                                         old_tss_base, &nseg_desc);
@@ -4380,8 +5196,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
                                         old_tss_base, &nseg_desc);
 
        if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
-               u32 eflags = kvm_x86_ops->get_rflags(vcpu);
-               kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
+               u32 eflags = kvm_get_rflags(vcpu);
+               kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
        }
 
        if (reason != TASK_SWITCH_IRET) {
@@ -4390,7 +5206,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
                                              &nseg_desc);
        }
 
-       kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
+       kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
        seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
        tr_seg.type = 11;
        kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4421,20 +5237,20 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
        kvm_set_cr8(vcpu, sregs->cr8);
 
-       mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
+       mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
        kvm_set_apic_base(vcpu, sregs->apic_base);
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-
-       mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
+       mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
        vcpu->arch.cr0 = sregs->cr0;
 
-       mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
+       mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
-       if (!is_long_mode(vcpu) && is_pae(vcpu))
+       if (!is_long_mode(vcpu) && is_pae(vcpu)) {
                load_pdptrs(vcpu, vcpu->arch.cr3);
+               mmu_reset_needed = 1;
+       }
 
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
@@ -4464,7 +5280,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        /* Older userspace won't unhalt the vcpu on reset. */
        if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
            sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
-           !(vcpu->arch.cr0 & X86_CR0_PE))
+           !is_protmode(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
        vcpu_put(vcpu);
@@ -4475,12 +5291,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                        struct kvm_guest_debug *dbg)
 {
+       unsigned long rflags;
        int i, r;
 
        vcpu_load(vcpu);
 
-       if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
-           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+       if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
+               r = -EBUSY;
+               if (vcpu->arch.exception.pending)
+                       goto unlock_out;
+               if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+                       kvm_queue_exception(vcpu, DB_VECTOR);
+               else
+                       kvm_queue_exception(vcpu, BP_VECTOR);
+       }
+
+       /*
+        * Read rflags as long as potentially injected trace flags are still
+        * filtered out.
+        */
+       rflags = kvm_get_rflags(vcpu);
+
+       vcpu->guest_debug = dbg->control;
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+               vcpu->guest_debug = 0;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                for (i = 0; i < KVM_NR_DB_REGS; ++i)
                        vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
                vcpu->arch.switch_db_regs =
@@ -4491,13 +5327,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
        }
 
-       r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+               vcpu->arch.singlestep_cs =
+                       get_segment_selector(vcpu, VCPU_SREG_CS);
+               vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
+       }
 
-       if (dbg->control & KVM_GUESTDBG_INJECT_DB)
-               kvm_queue_exception(vcpu, DB_VECTOR);
-       else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
-               kvm_queue_exception(vcpu, BP_VECTOR);
+       /*
+        * Trigger an rflags update that will inject or remove the trace
+        * flags.
+        */
+       kvm_set_rflags(vcpu, rflags);
+
+       kvm_x86_ops->set_guest_debug(vcpu, dbg);
+
+       r = 0;
 
+unlock_out:
        vcpu_put(vcpu);
 
        return r;
@@ -4532,11 +5378,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 {
        unsigned long vaddr = tr->linear_address;
        gpa_t gpa;
+       int idx;
 
        vcpu_load(vcpu);
-       down_read(&vcpu->kvm->slots_lock);
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-       up_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        tr->physical_address = gpa;
        tr->valid = gpa != UNMAPPED_GVA;
        tr->writeable = 1;
@@ -4617,14 +5464,14 @@ EXPORT_SYMBOL_GPL(fx_init);
 
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
+       if (vcpu->guest_fpu_loaded)
                return;
 
        vcpu->guest_fpu_loaded = 1;
        kvm_fx_save(&vcpu->arch.host_fx_image);
        kvm_fx_restore(&vcpu->arch.guest_fx_image);
+       trace_kvm_fpu(1);
 }
-EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
@@ -4635,8 +5482,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
        kvm_fx_save(&vcpu->arch.guest_fx_image);
        kvm_fx_restore(&vcpu->arch.host_fx_image);
        ++vcpu->stat.fpu_reload;
+       set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
+       trace_kvm_fpu(0);
 }
-EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
@@ -4700,12 +5548,24 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 
 int kvm_arch_hardware_enable(void *garbage)
 {
+       /*
+        * Since this may be called from a hotplug notifcation,
+        * we can't get the CPU frequency directly.
+        */
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               int cpu = raw_smp_processor_id();
+               per_cpu(cpu_tsc_khz, cpu) = 0;
+       }
+
+       kvm_shared_msr_cpu_online();
+
        return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
 {
        kvm_x86_ops->hardware_disable(garbage);
+       drop_user_return_notifiers(garbage);
 }
 
 int kvm_arch_hardware_setup(void)
@@ -4759,12 +5619,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                       GFP_KERNEL);
        if (!vcpu->arch.mce_banks) {
                r = -ENOMEM;
-               goto fail_mmu_destroy;
+               goto fail_free_lapic;
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
 
        return 0;
-
+fail_free_lapic:
+       kvm_free_lapic(vcpu);
 fail_mmu_destroy:
        kvm_mmu_destroy(vcpu);
 fail_free_pio_data:
@@ -4775,10 +5636,13 @@ fail:
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+       int idx;
+
+       kfree(vcpu->arch.mce_banks);
        kvm_free_lapic(vcpu);
-       down_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
        kvm_mmu_destroy(vcpu);
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        free_page((unsigned long)vcpu->arch.pio_data);
 }
 
@@ -4789,6 +5653,12 @@ struct  kvm *kvm_arch_create_vm(void)
        if (!kvm)
                return ERR_PTR(-ENOMEM);
 
+       kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+       if (!kvm->arch.aliases) {
+               kfree(kvm);
+               return ERR_PTR(-ENOMEM);
+       }
+
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
@@ -4845,16 +5715,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                put_page(kvm->arch.apic_access_page);
        if (kvm->arch.ept_identity_pagetable)
                put_page(kvm->arch.ept_identity_pagetable);
+       cleanup_srcu_struct(&kvm->srcu);
+       kfree(kvm->arch.aliases);
        kfree(kvm);
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                               struct kvm_memory_slot *memslot,
                                struct kvm_memory_slot old,
+                               struct kvm_userspace_memory_region *mem,
                                int user_alloc)
 {
-       int npages = mem->memory_size >> PAGE_SHIFT;
-       struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+       int npages = memslot->npages;
 
        /*To keep backward compatibility with older userspace,
         *x86 needs to hanlde !user_alloc case.
@@ -4874,26 +5746,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
                        if (IS_ERR((void *)userspace_addr))
                                return PTR_ERR((void *)userspace_addr);
 
-                       /* set userspace_addr atomically for kvm_hva_to_rmapp */
-                       spin_lock(&kvm->mmu_lock);
                        memslot->userspace_addr = userspace_addr;
-                       spin_unlock(&kvm->mmu_lock);
-               } else {
-                       if (!old.user_alloc && old.rmap) {
-                               int ret;
-
-                               down_write(&current->mm->mmap_sem);
-                               ret = do_munmap(current->mm, old.userspace_addr,
-                                               old.npages * PAGE_SIZE);
-                               up_write(&current->mm->mmap_sem);
-                               if (ret < 0)
-                                       printk(KERN_WARNING
-                                      "kvm_vm_ioctl_set_memory_region: "
-                                      "failed to munmap memory\n");
-                       }
                }
        }
 
+
+       return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               struct kvm_memory_slot old,
+                               int user_alloc)
+{
+
+       int npages = mem->memory_size >> PAGE_SHIFT;
+
+       if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+               int ret;
+
+               down_write(&current->mm->mmap_sem);
+               ret = do_munmap(current->mm, old.userspace_addr,
+                               old.npages * PAGE_SIZE);
+               up_write(&current->mm->mmap_sem);
+               if (ret < 0)
+                       printk(KERN_WARNING
+                              "kvm_vm_ioctl_set_memory_region: "
+                              "failed to munmap memory\n");
+       }
+
        spin_lock(&kvm->mmu_lock);
        if (!kvm->arch.n_requested_mmu_pages) {
                unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -4902,8 +5783,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
        spin_unlock(&kvm->mmu_lock);
-
-       return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
@@ -4943,8 +5822,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
        return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
+{
+       unsigned long rflags;
+
+       rflags = kvm_x86_ops->get_rflags(vcpu);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
+       return rflags;
+}
+EXPORT_SYMBOL_GPL(kvm_get_rflags);
+
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+           vcpu->arch.singlestep_cs ==
+                       get_segment_selector(vcpu, VCPU_SREG_CS) &&
+           vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
+               rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+       kvm_x86_ops->set_rflags(vcpu, rflags);
+}
+EXPORT_SYMBOL_GPL(kvm_set_rflags);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);