Merge branch 'topic/core-cleanup' into for-linus
[safe/jmp/linux-2.6] / arch / x86 / kvm / vmx.c
index fad871c..2f8db0e 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
 #include <linux/ftrace_event.h>
+#include <linux/slab.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -66,7 +67,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 #define KVM_GUEST_CR0_MASK                                             \
        (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST                                \
-       (X86_CR0_WP | X86_CR0_NE | X86_CR0_MP)
+       (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON                                           \
        (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
@@ -76,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
 
+#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -130,7 +133,7 @@ struct vcpu_vmx {
        } host_state;
        struct {
                int vm86_active;
-               u8 save_iopl;
+               ulong save_rflags;
                struct kvm_save_segment {
                        u16 selector;
                        unsigned long base;
@@ -358,9 +361,7 @@ static inline int cpu_has_vmx_ple(void)
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
-       return flexpriority_enabled &&
-               (cpu_has_vmx_virtualize_apic_accesses()) &&
-               (irqchip_in_kernel(kvm));
+       return flexpriority_enabled && irqchip_in_kernel(kvm);
 }
 
 static inline int cpu_has_vmx_vpid(void)
@@ -613,7 +614,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
        u64 guest_efer;
        u64 ignore_bits;
 
-       guest_efer = vmx->vcpu.arch.shadow_efer;
+       guest_efer = vmx->vcpu.arch.efer;
 
        /*
         * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -791,12 +792,15 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
 {
+       ulong cr0;
+
        if (vcpu->fpu_active)
                return;
        vcpu->fpu_active = 1;
-       vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);
-       if (kvm_read_cr0_bits(vcpu, X86_CR0_TS))
-               vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
+       cr0 = vmcs_readl(GUEST_CR0);
+       cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
+       cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
+       vmcs_writel(GUEST_CR0, cr0);
        update_exception_bitmap(vcpu);
        vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
        vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
@@ -807,7 +811,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
        vmx_decache_cr0_guest_bits(vcpu);
-       vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
+       vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
        update_exception_bitmap(vcpu);
        vcpu->arch.cr0_guest_owned_bits = 0;
        vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
@@ -816,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 
 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
-       unsigned long rflags;
+       unsigned long rflags, save_rflags;
 
        rflags = vmcs_readl(GUEST_RFLAGS);
-       if (to_vmx(vcpu)->rmode.vm86_active)
-               rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
+       if (to_vmx(vcpu)->rmode.vm86_active) {
+               rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+               save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+               rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+       }
        return rflags;
 }
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-       if (to_vmx(vcpu)->rmode.vm86_active)
+       if (to_vmx(vcpu)->rmode.vm86_active) {
+               to_vmx(vcpu)->rmode.save_rflags = rflags;
                rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
+       }
        vmcs_writel(GUEST_RFLAGS, rflags);
 }
 
@@ -955,7 +964,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
                 * if efer.sce is enabled.
                 */
                index = __find_msr_index(vmx, MSR_K6_STAR);
-               if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE))
+               if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
                        move_msr_up(vmx, index, save_nmsrs++);
        }
 #endif
@@ -1481,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
        vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
 
        flags = vmcs_readl(GUEST_RFLAGS);
-       flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
-       flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
+       flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+       flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
        vmcs_writel(GUEST_RFLAGS, flags);
 
        vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1555,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
        flags = vmcs_readl(GUEST_RFLAGS);
-       vmx->rmode.save_iopl
-               = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       vmx->rmode.save_rflags = flags;
 
        flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 
@@ -1600,9 +1608,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
         * of this msr depends on is_long_mode().
         */
        vmx_load_host_state(to_vmx(vcpu));
-       vcpu->arch.shadow_efer = efer;
-       if (!msr)
-               return;
+       vcpu->arch.efer = efer;
        if (efer & EFER_LMA) {
                vmcs_write32(VM_ENTRY_CONTROLS,
                             vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1632,13 +1638,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
                             (guest_tr_ar & ~AR_TYPE_MASK)
                             | AR_TYPE_BUSY_64_TSS);
        }
-       vcpu->arch.shadow_efer |= EFER_LMA;
-       vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
+       vcpu->arch.efer |= EFER_LMA;
+       vmx_set_efer(vcpu, vcpu->arch.efer);
 }
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.shadow_efer &= ~EFER_LMA;
+       vcpu->arch.efer &= ~EFER_LMA;
 
        vmcs_write32(VM_ENTRY_CONTROLS,
                     vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1745,7 +1751,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
                enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.shadow_efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
                        enter_lmode(vcpu);
                if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1757,7 +1763,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
                ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
        if (!vcpu->fpu_active)
-               hw_cr0 |= X86_CR0_TS;
+               hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
 
        vmcs_writel(CR0_READ_SHADOW, cr0);
        vmcs_writel(GUEST_CR0, hw_cr0);
@@ -1845,7 +1851,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 
 static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
-       if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) /* if real mode */
+       if (!is_protmode(vcpu))
                return 0;
 
        if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2100,7 +2106,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
 static bool guest_state_valid(struct kvm_vcpu *vcpu)
 {
        /* real mode guest state checks */
-       if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+       if (!is_protmode(vcpu)) {
                if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
                        return false;
                if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2697,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
                return 0;
 
        return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-                       (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
-                               GUEST_INTR_STATE_NMI));
+                       (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
 }
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -2776,6 +2781,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                kvm_queue_exception(vcpu, vec);
                return 1;
        case BP_VECTOR:
+               /*
+                * Update instruction length as we may reinject the exception
+                * from user space while in guest debugging mode.
+                */
+               to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
+                       vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
                        return 0;
                /* fall through */
@@ -2898,6 +2909,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
                /* fall through */
        case BP_VECTOR:
+               /*
+                * Update instruction length as we may reinject #BP from
+                * user space while in guest debugging mode. Reading it for
+                * #DB as well causes no harm, it is not used in that case.
+                */
+               vmx->vcpu.arch.event_exit_inst_len =
+                       vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
                kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
                kvm_run->debug.arch.exception = ex_no;
@@ -3155,6 +3173,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
        u64 data;
 
        if (vmx_get_msr(vcpu, ecx, &data)) {
+               trace_kvm_msr_read_ex(ecx);
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
@@ -3174,13 +3193,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-       trace_kvm_msr_write(ecx, data);
-
        if (vmx_set_msr(vcpu, ecx, data) != 0) {
+               trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
 
+       trace_kvm_msr_write(ecx, data);
        skip_emulated_instruction(vcpu);
        return 1;
 }
@@ -4001,7 +4020,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
         *   b. VT-d with snooping control feature: snooping control feature of
         *      VT-d engine can guarantee the cache correctness. Just set it
         *      to WB to keep consistent with host. So the same as item 3.
-        * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+        * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
         *    consistent with host MTRR
         */
        if (is_mmio)
@@ -4012,7 +4031,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
                      VMX_EPT_MT_EPTE_SHIFT;
        else
                ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
-                       | VMX_EPT_IGMT_BIT;
+                       | VMX_EPT_IPAT_BIT;
 
        return ret;
 }
@@ -4135,6 +4154,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .cache_reg = vmx_cache_reg,
        .get_rflags = vmx_get_rflags,
        .set_rflags = vmx_set_rflags,
+       .fpu_activate = vmx_fpu_activate,
        .fpu_deactivate = vmx_fpu_deactivate,
 
        .tlb_flush = vmx_flush_tlb,