KVM: x86: Add KVM_GET/SET_VCPU_EVENTS

[safe/jmp/linux-2.6] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 364263a..778f059 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -86,6 +86,11 @@ struct vmcs {
         char data[0];
  };
  
+struct shared_msr_entry {
+       unsigned index;
+       u64 data;
+};
+
  struct vcpu_vmx {
         struct kvm_vcpu       vcpu;
         struct list_head      local_vcpus_link;
@@ -93,13 +98,12 @@ struct vcpu_vmx {
         int                   launched;
         u8                    fail;
         u32                   idt_vectoring_info;
-       struct kvm_msr_entry *guest_msrs;
-       struct kvm_msr_entry *host_msrs;
+       struct shared_msr_entry *guest_msrs;
         int                   nmsrs;
         int                   save_nmsrs;
-       int                   msr_offset_efer;
  #ifdef CONFIG_X86_64
-       int                   msr_offset_kernel_gs_base;
+       u64                   msr_host_kernel_gs_base;
+       u64                   msr_guest_kernel_gs_base;
  #endif
         struct vmcs          *vmcs;
         struct {
@@ -107,7 +111,6 @@ struct vcpu_vmx {
                 u16           fs_sel, gs_sel, ldt_sel;
                 int           gs_ldt_reload_needed;
                 int           fs_reload_needed;
-               int           guest_efer_loaded;
         } host_state;
         struct {
                 int vm86_active;
@@ -194,6 +197,8 @@ static struct kvm_vmx_segment_field {
         VMX_SEGMENT_FIELD(LDTR),
  };
  
+static u64 host_efer;
+
  static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
  
  /*
@@ -202,28 +207,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
   */
  static const u32 vmx_msr_index[] = {
  #ifdef CONFIG_X86_64
-       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
  #endif
         MSR_EFER, MSR_K6_STAR,
  };
  #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
  
-static void load_msrs(struct kvm_msr_entry *e, int n)
-{
-       int i;
-
-       for (i = 0; i < n; ++i)
-               wrmsrl(e[i].index, e[i].data);
-}
-
-static void save_msrs(struct kvm_msr_entry *e, int n)
-{
-       int i;
-
-       for (i = 0; i < n; ++i)
-               rdmsrl(e[i].index, e[i].data);
-}
-
  static inline int is_page_fault(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -372,7 +361,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
         int i;
  
         for (i = 0; i < vmx->nmsrs; ++i)
-               if (vmx->guest_msrs[i].index == msr)
+               if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
                         return i;
         return -1;
  }
@@ -403,7 +392,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
                         : : "a" (&operand), "c" (ext) : "cc", "memory");
  }
  
-static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
  {
         int i;
  
@@ -594,17 +583,12 @@ static void reload_tss(void)
         load_TR_desc();
  }
  
-static void load_transition_efer(struct vcpu_vmx *vmx)
+static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
  {
-       int efer_offset = vmx->msr_offset_efer;
-       u64 host_efer;
         u64 guest_efer;
         u64 ignore_bits;
  
-       if (efer_offset < 0)
-               return;
-       host_efer = vmx->host_msrs[efer_offset].data;
-       guest_efer = vmx->guest_msrs[efer_offset].data;
+       guest_efer = vmx->vcpu.arch.shadow_efer;
  
         /*
          * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -618,26 +602,18 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
                 ignore_bits &= ~(u64)EFER_SCE;
  #endif
         if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
-               return;
+               return false;
  
-       vmx->host_state.guest_efer_loaded = 1;
         guest_efer &= ~ignore_bits;
         guest_efer |= host_efer & ignore_bits;
-       wrmsrl(MSR_EFER, guest_efer);
-       vmx->vcpu.stat.efer_reload++;
-}
-
-static void reload_host_efer(struct vcpu_vmx *vmx)
-{
-       if (vmx->host_state.guest_efer_loaded) {
-               vmx->host_state.guest_efer_loaded = 0;
-               load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
-       }
+       vmx->guest_msrs[efer_offset].data = guest_efer;
+       return true;
  }
  
  static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int i;
  
         if (vmx->host_state.loaded)
                 return;
@@ -674,13 +650,14 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  #endif
  
  #ifdef CONFIG_X86_64
-       if (is_long_mode(&vmx->vcpu))
-               save_msrs(vmx->host_msrs +
-                         vmx->msr_offset_kernel_gs_base, 1);
-
+       if (is_long_mode(&vmx->vcpu)) {
+               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+               wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+       }
  #endif
-       load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
-       load_transition_efer(vmx);
+       for (i = 0; i < vmx->save_nmsrs; ++i)
+               kvm_set_shared_msr(vmx->guest_msrs[i].index,
+                                  vmx->guest_msrs[i].data);
  }
  
  static void __vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -708,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
                 local_irq_restore(flags);
         }
         reload_tss();
-       save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
-       load_msrs(vmx->host_msrs, vmx->save_nmsrs);
-       reload_host_efer(vmx);
+#ifdef CONFIG_X86_64
+       if (is_long_mode(&vmx->vcpu)) {
+               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+               wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+       }
+#endif
  }
  
  static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -901,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
  /*
   * Swap MSR entry in host/guest MSR entry array.
   */
-#ifdef CONFIG_X86_64
  static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
  {
-       struct kvm_msr_entry tmp;
+       struct shared_msr_entry tmp;
  
         tmp = vmx->guest_msrs[to];
         vmx->guest_msrs[to] = vmx->guest_msrs[from];
         vmx->guest_msrs[from] = tmp;
-       tmp = vmx->host_msrs[to];
-       vmx->host_msrs[to] = vmx->host_msrs[from];
-       vmx->host_msrs[from] = tmp;
  }
-#endif
  
  /*
   * Set up the vmcs to automatically save and restore system
@@ -922,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
   */
  static void setup_msrs(struct vcpu_vmx *vmx)
  {
-       int save_nmsrs;
+       int save_nmsrs, index;
         unsigned long *msr_bitmap;
  
         vmx_load_host_state(vmx);
         save_nmsrs = 0;
  #ifdef CONFIG_X86_64
         if (is_long_mode(&vmx->vcpu)) {
-               int index;
-
                 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
                 if (index >= 0)
                         move_msr_up(vmx, index, save_nmsrs++);
@@ -940,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
                 index = __find_msr_index(vmx, MSR_CSTAR);
                 if (index >= 0)
                         move_msr_up(vmx, index, save_nmsrs++);
-               index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-               if (index >= 0)
-                       move_msr_up(vmx, index, save_nmsrs++);
                 /*
                  * MSR_K6_STAR is only needed on long mode guests, and only
                  * if efer.sce is enabled.
@@ -952,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx)
                         move_msr_up(vmx, index, save_nmsrs++);
         }
  #endif
-       vmx->save_nmsrs = save_nmsrs;
+       index = __find_msr_index(vmx, MSR_EFER);
+       if (index >= 0 && update_transition_efer(vmx, index))
+               move_msr_up(vmx, index, save_nmsrs++);
  
-#ifdef CONFIG_X86_64
-       vmx->msr_offset_kernel_gs_base =
-               __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-#endif
-       vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+       vmx->save_nmsrs = save_nmsrs;
  
         if (cpu_has_vmx_msr_bitmap()) {
                 if (is_long_mode(&vmx->vcpu))
@@ -1000,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
  static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
  {
         u64 data;
-       struct kvm_msr_entry *msr;
+       struct shared_msr_entry *msr;
  
         if (!pdata) {
                 printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -1015,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
         case MSR_GS_BASE:
                 data = vmcs_readl(GUEST_GS_BASE);
                 break;
+       case MSR_KERNEL_GS_BASE:
+               vmx_load_host_state(to_vmx(vcpu));
+               data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+               break;
+#endif
         case MSR_EFER:
                 return kvm_get_msr_common(vcpu, msr_index, pdata);
-#endif
         case MSR_IA32_TSC:
                 data = guest_read_tsc();
                 break;
@@ -1031,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                 data = vmcs_readl(GUEST_SYSENTER_ESP);
                 break;
         default:
+               vmx_load_host_state(to_vmx(vcpu));
                 msr = find_msr_entry(to_vmx(vcpu), msr_index);
                 if (msr) {
                         vmx_load_host_state(to_vmx(vcpu));
@@ -1052,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
  static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       struct kvm_msr_entry *msr;
+       struct shared_msr_entry *msr;
         u64 host_tsc;
         int ret = 0;
  
@@ -1068,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
         case MSR_GS_BASE:
                 vmcs_writel(GUEST_GS_BASE, data);
                 break;
+       case MSR_KERNEL_GS_BASE:
+               vmx_load_host_state(vmx);
+               vmx->msr_guest_kernel_gs_base = data;
+               break;
  #endif
         case MSR_IA32_SYSENTER_CS:
                 vmcs_write32(GUEST_SYSENTER_CS, data);
@@ -1557,8 +1534,16 @@ continue_rmode:
  static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+       struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+       if (!msr)
+               return;
  
+       /*
+        * Force kernel_gs_base reloading before EFER changes, as control
+        * of this msr depends on is_long_mode().
+        */
+       vmx_load_host_state(to_vmx(vcpu));
         vcpu->arch.shadow_efer = efer;
         if (!msr)
                 return;
@@ -1748,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                 vmcs_write64(EPT_POINTER, eptp);
                 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
                         vcpu->kvm->arch.ept_identity_map_addr;
+               ept_load_pdptrs(vcpu);
         }
  
         vmx_flush_tlb(vcpu);
@@ -2404,10 +2390,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                 if (wrmsr_safe(index, data_low, data_high) < 0)
                         continue;
                 data = data_low | ((u64)data_high << 32);
-               vmx->host_msrs[j].index = index;
-               vmx->host_msrs[j].reserved = 0;
-               vmx->host_msrs[j].data = data;
-               vmx->guest_msrs[j] = vmx->host_msrs[j];
+               vmx->guest_msrs[j].index = i;
+               vmx->guest_msrs[j].data = 0;
                 ++vmx->nmsrs;
         }
  
@@ -2538,7 +2522,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
         if (vmx->vpid != 0)
                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
  
-       vmx->vcpu.arch.cr0 = 0x60000010;
+       vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
         vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
         vmx_set_cr4(&vmx->vcpu, 0);
         vmx_set_efer(&vmx->vcpu, 0);
@@ -2655,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
                                 GUEST_INTR_STATE_NMI));
  }
  
+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+       if (!cpu_has_virtual_nmis())
+               return to_vmx(vcpu)->soft_vnmi_blocked;
+       else
+               return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                         GUEST_INTR_STATE_NMI);
+}
+
+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (!cpu_has_virtual_nmis()) {
+               if (vmx->soft_vnmi_blocked != masked) {
+                       vmx->soft_vnmi_blocked = masked;
+                       vmx->vnmi_blocked_time = 0;
+               }
+       } else {
+               if (masked)
+                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                     GUEST_INTR_STATE_NMI);
+               else
+                       vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                       GUEST_INTR_STATE_NMI);
+       }
+}
+
  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
  {
         return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2760,9 +2772,14 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                 return handle_machine_check(vcpu);
  
         if ((vect_info & VECTORING_INFO_VALID_MASK) &&
-                                               !is_page_fault(intr_info))
-               printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
-                      "intr info 0x%x\n", __func__, vect_info, intr_info);
+           !is_page_fault(intr_info)) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.data[0] = vect_info;
+               vcpu->run->internal.data[1] = intr_info;
+               return 0;
+       }
  
         if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
                 return 1;  /* already handled by vmx_vcpu_run() */
@@ -3368,6 +3385,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                         kvm_report_emulation_failure(vcpu, "emulation failure");
                         vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                         vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+                       vcpu->run->internal.ndata = 0;
                         ret = 0;
                         goto out;
                 }
@@ -3638,10 +3656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (enable_ept && is_paging(vcpu)) {
-               vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
-               ept_load_pdptrs(vcpu);
-       }
         /* Record the guest's net vcpu time for enforced NMI injections. */
         if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
                 vmx->entry_time = ktime_get();
@@ -3808,7 +3822,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
                 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
         spin_unlock(&vmx_vpid_lock);
         vmx_free_vmcs(vcpu);
-       kfree(vmx->host_msrs);
         kfree(vmx->guest_msrs);
         kvm_vcpu_uninit(vcpu);
         kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -3835,10 +3848,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
                 goto uninit_vcpu;
         }
  
-       vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!vmx->host_msrs)
-               goto free_guest_msrs;
-
         vmx->vmcs = alloc_vmcs();
         if (!vmx->vmcs)
                 goto free_msrs;
@@ -3869,8 +3878,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
  free_vmcs:
         free_vmcs(vmx->vmcs);
  free_msrs:
-       kfree(vmx->host_msrs);
-free_guest_msrs:
         kfree(vmx->guest_msrs);
  uninit_vcpu:
         kvm_vcpu_uninit(&vmx->vcpu);
@@ -4006,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
         .queue_exception = vmx_queue_exception,
         .interrupt_allowed = vmx_interrupt_allowed,
         .nmi_allowed = vmx_nmi_allowed,
+       .get_nmi_mask = vmx_get_nmi_mask,
+       .set_nmi_mask = vmx_set_nmi_mask,
         .enable_nmi_window = enable_nmi_window,
         .enable_irq_window = enable_irq_window,
         .update_cr8_intercept = update_cr8_intercept,
@@ -4020,7 +4029,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
  
  static int __init vmx_init(void)
  {
-       int r;
+       int r, i;
+
+       rdmsrl_safe(MSR_EFER, &host_efer);
+
+       for (i = 0; i < NR_VMX_MSR; ++i)
+               kvm_define_shared_msr(i, vmx_msr_index[i]);
  
         vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
         if (!vmx_io_bitmap_a)