KVM: x86: Add KVM_GET/SET_VCPU_EVENTS
[safe/jmp/linux-2.6] / arch / x86 / kvm / vmx.c
index 364263a..778f059 100644 (file)
@@ -86,6 +86,11 @@ struct vmcs {
        char data[0];
 };
 
+struct shared_msr_entry {
+       unsigned index;
+       u64 data;
+};
+
 struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
        struct list_head      local_vcpus_link;
@@ -93,13 +98,12 @@ struct vcpu_vmx {
        int                   launched;
        u8                    fail;
        u32                   idt_vectoring_info;
-       struct kvm_msr_entry *guest_msrs;
-       struct kvm_msr_entry *host_msrs;
+       struct shared_msr_entry *guest_msrs;
        int                   nmsrs;
        int                   save_nmsrs;
-       int                   msr_offset_efer;
 #ifdef CONFIG_X86_64
-       int                   msr_offset_kernel_gs_base;
+       u64                   msr_host_kernel_gs_base;
+       u64                   msr_guest_kernel_gs_base;
 #endif
        struct vmcs          *vmcs;
        struct {
@@ -107,7 +111,6 @@ struct vcpu_vmx {
                u16           fs_sel, gs_sel, ldt_sel;
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
-               int           guest_efer_loaded;
        } host_state;
        struct {
                int vm86_active;
@@ -194,6 +197,8 @@ static struct kvm_vmx_segment_field {
        VMX_SEGMENT_FIELD(LDTR),
 };
 
+static u64 host_efer;
+
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 
 /*
@@ -202,28 +207,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
  */
 static const u32 vmx_msr_index[] = {
 #ifdef CONFIG_X86_64
-       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
        MSR_EFER, MSR_K6_STAR,
 };
 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
 
-static void load_msrs(struct kvm_msr_entry *e, int n)
-{
-       int i;
-
-       for (i = 0; i < n; ++i)
-               wrmsrl(e[i].index, e[i].data);
-}
-
-static void save_msrs(struct kvm_msr_entry *e, int n)
-{
-       int i;
-
-       for (i = 0; i < n; ++i)
-               rdmsrl(e[i].index, e[i].data);
-}
-
 static inline int is_page_fault(u32 intr_info)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -372,7 +361,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
        int i;
 
        for (i = 0; i < vmx->nmsrs; ++i)
-               if (vmx->guest_msrs[i].index == msr)
+               if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
                        return i;
        return -1;
 }
@@ -403,7 +392,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
                        : : "a" (&operand), "c" (ext) : "cc", "memory");
 }
 
-static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
 
@@ -594,17 +583,12 @@ static void reload_tss(void)
        load_TR_desc();
 }
 
-static void load_transition_efer(struct vcpu_vmx *vmx)
+static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
-       int efer_offset = vmx->msr_offset_efer;
-       u64 host_efer;
        u64 guest_efer;
        u64 ignore_bits;
 
-       if (efer_offset < 0)
-               return;
-       host_efer = vmx->host_msrs[efer_offset].data;
-       guest_efer = vmx->guest_msrs[efer_offset].data;
+       guest_efer = vmx->vcpu.arch.shadow_efer;
 
        /*
         * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -618,26 +602,18 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
                ignore_bits &= ~(u64)EFER_SCE;
 #endif
        if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
-               return;
+               return false;
 
-       vmx->host_state.guest_efer_loaded = 1;
        guest_efer &= ~ignore_bits;
        guest_efer |= host_efer & ignore_bits;
-       wrmsrl(MSR_EFER, guest_efer);
-       vmx->vcpu.stat.efer_reload++;
-}
-
-static void reload_host_efer(struct vcpu_vmx *vmx)
-{
-       if (vmx->host_state.guest_efer_loaded) {
-               vmx->host_state.guest_efer_loaded = 0;
-               load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
-       }
+       vmx->guest_msrs[efer_offset].data = guest_efer;
+       return true;
 }
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int i;
 
        if (vmx->host_state.loaded)
                return;
@@ -674,13 +650,14 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 #endif
 
 #ifdef CONFIG_X86_64
-       if (is_long_mode(&vmx->vcpu))
-               save_msrs(vmx->host_msrs +
-                         vmx->msr_offset_kernel_gs_base, 1);
-
+       if (is_long_mode(&vmx->vcpu)) {
+               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+               wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+       }
 #endif
-       load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
-       load_transition_efer(vmx);
+       for (i = 0; i < vmx->save_nmsrs; ++i)
+               kvm_set_shared_msr(vmx->guest_msrs[i].index,
+                                  vmx->guest_msrs[i].data);
 }
 
 static void __vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -708,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
                local_irq_restore(flags);
        }
        reload_tss();
-       save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
-       load_msrs(vmx->host_msrs, vmx->save_nmsrs);
-       reload_host_efer(vmx);
+#ifdef CONFIG_X86_64
+       if (is_long_mode(&vmx->vcpu)) {
+               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+               wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+       }
+#endif
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -901,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
-#ifdef CONFIG_X86_64
 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 {
-       struct kvm_msr_entry tmp;
+       struct shared_msr_entry tmp;
 
        tmp = vmx->guest_msrs[to];
        vmx->guest_msrs[to] = vmx->guest_msrs[from];
        vmx->guest_msrs[from] = tmp;
-       tmp = vmx->host_msrs[to];
-       vmx->host_msrs[to] = vmx->host_msrs[from];
-       vmx->host_msrs[from] = tmp;
 }
-#endif
 
 /*
  * Set up the vmcs to automatically save and restore system
@@ -922,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
  */
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
-       int save_nmsrs;
+       int save_nmsrs, index;
        unsigned long *msr_bitmap;
 
        vmx_load_host_state(vmx);
        save_nmsrs = 0;
 #ifdef CONFIG_X86_64
        if (is_long_mode(&vmx->vcpu)) {
-               int index;
-
                index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
                if (index >= 0)
                        move_msr_up(vmx, index, save_nmsrs++);
@@ -940,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
                index = __find_msr_index(vmx, MSR_CSTAR);
                if (index >= 0)
                        move_msr_up(vmx, index, save_nmsrs++);
-               index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-               if (index >= 0)
-                       move_msr_up(vmx, index, save_nmsrs++);
                /*
                 * MSR_K6_STAR is only needed on long mode guests, and only
                 * if efer.sce is enabled.
@@ -952,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx)
                        move_msr_up(vmx, index, save_nmsrs++);
        }
 #endif
-       vmx->save_nmsrs = save_nmsrs;
+       index = __find_msr_index(vmx, MSR_EFER);
+       if (index >= 0 && update_transition_efer(vmx, index))
+               move_msr_up(vmx, index, save_nmsrs++);
 
-#ifdef CONFIG_X86_64
-       vmx->msr_offset_kernel_gs_base =
-               __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-#endif
-       vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+       vmx->save_nmsrs = save_nmsrs;
 
        if (cpu_has_vmx_msr_bitmap()) {
                if (is_long_mode(&vmx->vcpu))
@@ -1000,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
        u64 data;
-       struct kvm_msr_entry *msr;
+       struct shared_msr_entry *msr;
 
        if (!pdata) {
                printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -1015,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
        case MSR_GS_BASE:
                data = vmcs_readl(GUEST_GS_BASE);
                break;
+       case MSR_KERNEL_GS_BASE:
+               vmx_load_host_state(to_vmx(vcpu));
+               data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+               break;
+#endif
        case MSR_EFER:
                return kvm_get_msr_common(vcpu, msr_index, pdata);
-#endif
        case MSR_IA32_TSC:
                data = guest_read_tsc();
                break;
@@ -1031,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        default:
+               vmx_load_host_state(to_vmx(vcpu));
                msr = find_msr_entry(to_vmx(vcpu), msr_index);
                if (msr) {
                        vmx_load_host_state(to_vmx(vcpu));
@@ -1052,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       struct kvm_msr_entry *msr;
+       struct shared_msr_entry *msr;
        u64 host_tsc;
        int ret = 0;
 
@@ -1068,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
        case MSR_GS_BASE:
                vmcs_writel(GUEST_GS_BASE, data);
                break;
+       case MSR_KERNEL_GS_BASE:
+               vmx_load_host_state(vmx);
+               vmx->msr_guest_kernel_gs_base = data;
+               break;
 #endif
        case MSR_IA32_SYSENTER_CS:
                vmcs_write32(GUEST_SYSENTER_CS, data);
@@ -1557,8 +1534,16 @@ continue_rmode:
 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+       struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+       if (!msr)
+               return;
 
+       /*
+        * Force kernel_gs_base reloading before EFER changes, as control
+        * of this msr depends on is_long_mode().
+        */
+       vmx_load_host_state(to_vmx(vcpu));
        vcpu->arch.shadow_efer = efer;
        if (!msr)
                return;
@@ -1748,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                vmcs_write64(EPT_POINTER, eptp);
                guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
                        vcpu->kvm->arch.ept_identity_map_addr;
+               ept_load_pdptrs(vcpu);
        }
 
        vmx_flush_tlb(vcpu);
@@ -2404,10 +2390,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                if (wrmsr_safe(index, data_low, data_high) < 0)
                        continue;
                data = data_low | ((u64)data_high << 32);
-               vmx->host_msrs[j].index = index;
-               vmx->host_msrs[j].reserved = 0;
-               vmx->host_msrs[j].data = data;
-               vmx->guest_msrs[j] = vmx->host_msrs[j];
+               vmx->guest_msrs[j].index = i;
+               vmx->guest_msrs[j].data = 0;
                ++vmx->nmsrs;
        }
 
@@ -2538,7 +2522,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        if (vmx->vpid != 0)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
-       vmx->vcpu.arch.cr0 = 0x60000010;
+       vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
        vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
        vmx_set_cr4(&vmx->vcpu, 0);
        vmx_set_efer(&vmx->vcpu, 0);
@@ -2655,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
                                GUEST_INTR_STATE_NMI));
 }
 
+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+       if (!cpu_has_virtual_nmis())
+               return to_vmx(vcpu)->soft_vnmi_blocked;
+       else
+               return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                         GUEST_INTR_STATE_NMI);
+}
+
+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (!cpu_has_virtual_nmis()) {
+               if (vmx->soft_vnmi_blocked != masked) {
+                       vmx->soft_vnmi_blocked = masked;
+                       vmx->vnmi_blocked_time = 0;
+               }
+       } else {
+               if (masked)
+                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                     GUEST_INTR_STATE_NMI);
+               else
+                       vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                       GUEST_INTR_STATE_NMI);
+       }
+}
+
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2760,9 +2772,14 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                return handle_machine_check(vcpu);
 
        if ((vect_info & VECTORING_INFO_VALID_MASK) &&
-                                               !is_page_fault(intr_info))
-               printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
-                      "intr info 0x%x\n", __func__, vect_info, intr_info);
+           !is_page_fault(intr_info)) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.data[0] = vect_info;
+               vcpu->run->internal.data[1] = intr_info;
+               return 0;
+       }
 
        if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
                return 1;  /* already handled by vmx_vcpu_run() */
@@ -3368,6 +3385,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                        kvm_report_emulation_failure(vcpu, "emulation failure");
                        vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                        vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
                        ret = 0;
                        goto out;
                }
@@ -3638,10 +3656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       if (enable_ept && is_paging(vcpu)) {
-               vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
-               ept_load_pdptrs(vcpu);
-       }
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
                vmx->entry_time = ktime_get();
@@ -3808,7 +3822,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
                __clear_bit(vmx->vpid, vmx_vpid_bitmap);
        spin_unlock(&vmx_vpid_lock);
        vmx_free_vmcs(vcpu);
-       kfree(vmx->host_msrs);
        kfree(vmx->guest_msrs);
        kvm_vcpu_uninit(vcpu);
        kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -3835,10 +3848,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
                goto uninit_vcpu;
        }
 
-       vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!vmx->host_msrs)
-               goto free_guest_msrs;
-
        vmx->vmcs = alloc_vmcs();
        if (!vmx->vmcs)
                goto free_msrs;
@@ -3869,8 +3878,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 free_vmcs:
        free_vmcs(vmx->vmcs);
 free_msrs:
-       kfree(vmx->host_msrs);
-free_guest_msrs:
        kfree(vmx->guest_msrs);
 uninit_vcpu:
        kvm_vcpu_uninit(&vmx->vcpu);
@@ -4006,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .queue_exception = vmx_queue_exception,
        .interrupt_allowed = vmx_interrupt_allowed,
        .nmi_allowed = vmx_nmi_allowed,
+       .get_nmi_mask = vmx_get_nmi_mask,
+       .set_nmi_mask = vmx_set_nmi_mask,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
@@ -4020,7 +4029,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-       int r;
+       int r, i;
+
+       rdmsrl_safe(MSR_EFER, &host_efer);
+
+       for (i = 0; i < NR_VMX_MSR; ++i)
+               kvm_define_shared_msr(i, vmx_msr_index[i]);
 
        vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_io_bitmap_a)