X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fvmx.c;h=07491c9c6ed03c1306b4bf5f7dfe04db9e830441;hb=2aaf69dcee864f4fb6402638dd2f263324ac839f;hp=9591ca73191cfa4f43b89bdc15ab7967cf1fd8ef;hpb=60bd83a125030878665684f353c7d36fd54f09fd;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9591ca7..07491c9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -16,7 +16,6 @@ */ #include "irq.h" -#include "vmx.h" #include "mmu.h" #include @@ -31,6 +30,8 @@ #include #include +#include +#include #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -49,6 +50,9 @@ module_param(flexpriority_enabled, bool, 0); static int enable_ept = 1; module_param(enable_ept, bool, 0); +static int emulate_invalid_guest_state = 0; +module_param(emulate_invalid_guest_state, bool, 0); + struct vmcs { u32 revision_id; u32 abort; @@ -58,6 +62,7 @@ struct vmcs { struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; + unsigned long host_rsp; int launched; u8 fail; u32 idt_vectoring_info; @@ -85,6 +90,12 @@ struct vcpu_vmx { } irq; } rmode; int vpid; + bool emulation_required; + + /* Support for vnmi-less CPUs */ + int soft_vnmi_blocked; + ktime_t entry_time; + s64 vnmi_blocked_time; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -117,7 +128,7 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; -struct vmx_capability { +static struct vmx_capability { u32 ept; u32 vpid; } vmx_capability; @@ -470,7 +481,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; if (vcpu->guest_debug.enabled) - eb |= 1u << 1; + eb |= 1u << DB_VECTOR; if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -735,12 +746,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (has_error_code) + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + + if (vcpu->arch.rmode.active) { + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = nr; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + if (nr == BP_VECTOR) + vmx->rmode.irq.rip++; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + nr | INTR_TYPE_SOFT_INTR + | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) + | INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); + return; + } + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, nr | INTR_TYPE_EXCEPTION | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | INTR_INFO_VALID_MASK); - if (has_error_code) - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); } static bool vmx_exception_injected(struct kvm_vcpu *vcpu) @@ -934,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); break; + case MSR_IA32_CR_PAT: + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + vmcs_write64(GUEST_IA32_PAT, data); + vcpu->arch.pat = data; + break; + } + /* Otherwise falls through to kvm_set_msr_common */ default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1009,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ + return cpu_has_vmx(); } static __init int vmx_disabled_by_bios(void) @@ -1018,9 +1053,9 @@ static __init int vmx_disabled_by_bios(void) u64 msr; rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - return (msr & (IA32_FEATURE_CONTROL_LOCKED_BIT | - IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT)) - == IA32_FEATURE_CONTROL_LOCKED_BIT; + return (msr & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + == FEATURE_CONTROL_LOCKED; /* locked but not enabled */ } @@ -1032,14 +1067,14 @@ static void hardware_enable(void *garbage) INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - if ((old & (IA32_FEATURE_CONTROL_LOCKED_BIT | - IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT)) - != (IA32_FEATURE_CONTROL_LOCKED_BIT | - IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT)) + if ((old & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) /* enable and lock */ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | - IA32_FEATURE_CONTROL_LOCKED_BIT | - IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT); + FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED); write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) @@ -1056,13 +1091,22 @@ static void vmclear_local_vcpus(void) __vcpu_clear(vmx); } -static void hardware_disable(void *garbage) + +/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() + * tricks. + */ +static void kvm_cpu_vmxoff(void) { - vmclear_local_vcpus(); asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } +static void hardware_disable(void *garbage) +{ + vmclear_local_vcpus(); + kvm_cpu_vmxoff(); +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -1107,7 +1151,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETING; + CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INVLPG_EXITING; opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -1136,9 +1181,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; #endif if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { - /* CR3 accesses don't need to cause VM Exits when EPT enabled */ + /* CR3 accesses and invlpg don't need to cause VM Exits when EPT + enabled */ min &= ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_INVLPG_EXITING); if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) return -EIO; @@ -1150,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = 0; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; - min = opt = 0; + min = 0; + opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -1275,7 +1323,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) static void enter_pmode(struct kvm_vcpu *vcpu) { unsigned long flags; + struct vcpu_vmx *vmx = to_vmx(vcpu); + vmx->emulation_required = 1; vcpu->arch.rmode.active = 0; vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); @@ -1292,6 +1342,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); + if (emulate_invalid_guest_state) + return; + fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); @@ -1332,7 +1385,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) static void enter_rmode(struct kvm_vcpu *vcpu) { unsigned long flags; + struct vcpu_vmx *vmx = to_vmx(vcpu); + vmx->emulation_required = 1; vcpu->arch.rmode.active = 1; vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); @@ -1354,6 +1409,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); update_exception_bitmap(vcpu); + if (emulate_invalid_guest_state) + goto continue_rmode; + vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); vmcs_write32(GUEST_SS_LIMIT, 0xffff); vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); @@ -1369,6 +1427,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); +continue_rmode: kvm_mmu_reset_context(vcpu); init_rmode(vcpu->kvm); } @@ -1702,6 +1761,186 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) vmcs_writel(GUEST_GDTR_BASE, dt->base); } +static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment var; + u32 ar; + + vmx_get_segment(vcpu, &var, seg); + ar = vmx_segment_access_rights(&var); + + if (var.base != (var.selector << 4)) + return false; + if (var.limit != 0xffff) + return false; + if (ar != 0xf3) + return false; + + return true; +} + +static bool code_segment_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + unsigned int cs_rpl; + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs_rpl = cs.selector & SELECTOR_RPL_MASK; + + if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) + return false; + if (!cs.s) + return false; + if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { + if (cs.dpl > cs_rpl) + return false; + } else if (cs.type & AR_TYPE_CODE_MASK) { + if (cs.dpl != cs_rpl) + return false; + } + if (!cs.present) + return false; + + /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ + return true; +} + +static bool stack_segment_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment ss; + unsigned int ss_rpl; + + vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); + ss_rpl = ss.selector & SELECTOR_RPL_MASK; + + if ((ss.type != 3) || (ss.type != 7)) + return false; + if (!ss.s) + return false; + if (ss.dpl != ss_rpl) /* DPL != RPL */ + return false; + if (!ss.present) + return false; + + return true; +} + +static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment var; + unsigned int rpl; + + vmx_get_segment(vcpu, &var, seg); + rpl = var.selector & SELECTOR_RPL_MASK; + + if (!var.s) + return false; + if (!var.present) + return false; + if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { + if (var.dpl < rpl) /* DPL < RPL */ + return false; + } + + /* TODO: Add other members to kvm_segment_field to allow checking for other access + * rights flags + */ + return true; +} + +static bool tr_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment tr; + + vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); + + if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + return false; + if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ + return false; + if (!tr.present) + return false; + + return true; +} + +static bool ldtr_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment ldtr; + + vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); + + if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + return false; + if (ldtr.type != 2) + return false; + if (!ldtr.present) + return false; + + return true; +} + +static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs, ss; + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); + + return ((cs.selector & SELECTOR_RPL_MASK) == + (ss.selector & SELECTOR_RPL_MASK)); +} + +/* + * Check if guest state is valid. Returns true if valid, false if + * not. + * We assume that registers are always usable + */ +static bool guest_state_valid(struct kvm_vcpu *vcpu) +{ + /* real mode guest state checks */ + if (!(vcpu->arch.cr0 & X86_CR0_PE)) { + if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) + return false; + } else { + /* protected mode guest state checks */ + if (!cs_ss_rpl_check(vcpu)) + return false; + if (!code_segment_valid(vcpu)) + return false; + if (!stack_segment_valid(vcpu)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_DS)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_ES)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_FS)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_GS)) + return false; + if (!tr_valid(vcpu)) + return false; + if (!ldtr_valid(vcpu)) + return false; + } + /* TODO: + * - Add checks on RIP + * - Add checks on RFLAGS + */ + + return true; +} + static int init_rmode_tss(struct kvm *kvm) { gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; @@ -1713,7 +1952,8 @@ static int init_rmode_tss(struct kvm *kvm) if (r < 0) goto out; data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); + r = kvm_write_guest_page(kvm, fn++, &data, + TSS_IOPB_BASE_OFFSET, sizeof(u16)); if (r < 0) goto out; r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); @@ -1776,7 +2016,7 @@ static void seg_setup(int seg) vmcs_write16(sf->selector, 0); vmcs_writel(sf->base, 0); vmcs_write32(sf->limit, 0xffff); - vmcs_write32(sf->ar_bytes, 0x93); + vmcs_write32(sf->ar_bytes, 0xf3); } static int alloc_apic_access_page(struct kvm *kvm) @@ -1795,9 +2035,7 @@ static int alloc_apic_access_page(struct kvm *kvm) if (r) goto out; - down_read(¤t->mm->mmap_sem); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); - up_read(¤t->mm->mmap_sem); out: up_write(&kvm->slots_lock); return r; @@ -1819,10 +2057,8 @@ static int alloc_identity_pagetable(struct kvm *kvm) if (r) goto out; - down_read(¤t->mm->mmap_sem); kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); out: up_write(&kvm->slots_lock); return r; @@ -1873,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) */ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - u32 host_sysenter_cs; + u32 host_sysenter_cs, msr_low, msr_high; u32 junk; + u64 host_pat; unsigned long a; struct descriptor_table dt; int i; @@ -1904,7 +2141,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) } if (!vm_need_ept()) exec_control |= CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_CR3_LOAD_EXITING; + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_INVLPG_EXITING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); if (cpu_has_secondary_exec_ctrls()) { @@ -1961,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + vmcs_write64(HOST_IA32_PAT, host_pat); + } + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + /* Write the default value follow host pat */ + vmcs_write64(GUEST_IA32_PAT, host_pat); + /* Keep arch.pat sync with GUEST_IA32_PAT */ + vmx->vcpu.arch.pat = host_pat; + } + for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -2015,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -2024,6 +2278,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) fx_init(&vmx->vcpu); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. @@ -2035,8 +2290,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); } - vmcs_write32(GUEST_CS_LIMIT, 0xffff); - vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -2113,17 +2366,44 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) ret = 0; + /* HACK: Don't enable emulation on guest boot/reset */ + vmx->emulation_required = 0; + out: up_read(&vcpu->kvm->slots_lock); return ret; } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) { + enable_irq_window(vcpu); + return; + } + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); + ++vcpu->stat.irq_injections; if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = irq; @@ -2140,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->soft_vnmi_blocked = 1; + vmx->vnmi_blocked_time = 0; + } + + ++vcpu->stat.nmi_injections; + if (vcpu->arch.rmode.active) { + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = NMI_VECTOR; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + NMI_VECTOR | INTR_TYPE_SOFT_INTR | + INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); + return; + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } +static void vmx_update_window_states(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + + vcpu->arch.nmi_window_open = + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_NMI)); + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) + vcpu->arch.nmi_window_open = 0; + + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS))); +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2153,44 +2477,52 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); if (!vcpu->arch.irq_pending[word_index]) clear_bit(word_index, &vcpu->arch.irq_summary); - vmx_inject_irq(vcpu, irq); + kvm_queue_interrupt(vcpu, irq); } - static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 cpu_based_vm_exec_control; + vmx_update_window_states(vcpu); - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); + return; + } + } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (vcpu->arch.irq_summary + || kvm_run->request_interrupt_window) + enable_irq_window(vcpu); + return; + } - if (vcpu->arch.interrupt_window_open && - vcpu->arch.irq_summary && - !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) - /* - * If interrupts enabled, and not blocked by sti or mov ss. Good. - */ - kvm_do_inject_irq(vcpu); + if (vcpu->arch.interrupt_window_open) { + if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) + kvm_do_inject_irq(vcpu); - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + if (vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + } if (!vcpu->arch.interrupt_window_open && (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - /* - * Interrupts blocked. Wait for unblock. - */ - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - else - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + enable_irq_window(vcpu); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = 8, + .slot = TSS_PRIVATE_MEMSLOT, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, @@ -2231,6 +2563,25 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) return 1; + /* + * Forward all other exceptions that are valid in real mode. + * FIXME: Breaks guest debugging in real mode, needs to be fixed with + * the required debugging infrastructure rework. + */ + switch (vec) { + case DE_VECTOR: + case DB_VECTOR: + case BP_VECTOR: + case OF_VECTOR: + case BR_VECTOR: + case UD_VECTOR: + case DF_VECTOR: + case SS_VECTOR: + case GP_VECTOR: + case MF_VECTOR: + kvm_queue_exception(vcpu, vec); + return 1; + } return 0; } @@ -2256,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -2345,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; + skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } @@ -2531,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); KVMTRACE_0D(PEND_INTR, vcpu, handler); + ++vcpu->stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as @@ -2539,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->arch.irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -2558,6 +2910,15 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + + kvm_mmu_invlpg(vcpu, exit_qualification); + skip_emulated_instruction(vcpu); + return 1; +} + static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); @@ -2587,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; int reason; @@ -2594,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; + if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && + (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) + == INTR_TYPE_NMI_INTR) { + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } tss_selector = exit_qualification; return kvm_task_switch(vcpu, tss_selector, reason); @@ -2670,6 +3041,41 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int err; + + preempt_enable(); + local_irq_enable(); + + while (!guest_state_valid(vcpu)) { + err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (err == EMULATE_DO_MMIO) + break; + + if (err != EMULATE_DONE) { + kvm_report_emulation_failure(vcpu, "emulation failure"); + return; + } + + if (signal_pending(current)) + break; + if (need_resched()) + schedule(); + } + + local_irq_disable(); + preempt_disable(); + + /* Guest state should be valid now except if we need to + * emulate an MMIO */ + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2689,6 +3095,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_MSR_WRITE] = handle_wrmsr, [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, [EXIT_REASON_HLT] = handle_halt, + [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, @@ -2713,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); + /* If we need to emulate an MMIO from handle_invalid_guest_state + * we just return 0 */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return 0; + /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ if (vm_need_ept() && is_paging(vcpu)) { @@ -2729,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION)) - printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __func__, exit_reason); + exit_reason != EXIT_REASON_EPT_VIOLATION && + exit_reason != EXIT_REASON_TASK_SWITCH)) + printk(KERN_WARNING "%s: unexpected, valid vectoring info " + "(0x%x) and exit reason is 0x%x\n", + __func__, vectoring_info, exit_reason); + + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { + if (vcpu->arch.interrupt_window_open) { + vmx->soft_vnmi_blocked = 0; + vcpu->arch.nmi_window_open = 1; + } else if (vmx->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.nmi_window_open = 1; + } + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -2759,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); } -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - if (!cpu_has_virtual_nmis()) - return; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return !(guest_intr & (GUEST_INTR_STATE_NMI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)); -} - -static int vmx_irq_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)) && - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); -} - -static void enable_intr_window(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); -} - static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -2826,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) if (unblock_nmi && vector != DF_VECTOR) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - } + } else if (unlikely(vmx->soft_vnmi_blocked)) + vmx->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); idt_vectoring_info = vmx->idt_vectoring_info; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -2862,29 +3254,31 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) static void vmx_intr_assist(struct kvm_vcpu *vcpu) { - u32 intr_info_field; - update_tpr_threshold(vcpu); - intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); - if (cpu_has_virtual_nmis()) { - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_intr_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - enable_intr_window(vcpu); + vmx_update_window_states(vcpu); + + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); return; } } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); + return; + } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vmx_irq_enabled(vcpu)) + if (vcpu->arch.interrupt_window_open) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -2892,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); + if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); } } @@ -2918,11 +3314,29 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | vmx->rmode.irq.vector; } +#ifdef CONFIG_X86_64 +#define R "r" +#define Q "q" +#else +#define R "e" +#define Q "l" +#endif + static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info; + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + + /* Handle invalid guest state instead of entering VMX */ + if (vmx->emulation_required && emulate_invalid_guest_state) { + handle_invalid_guest_state(vcpu, kvm_run); + return; + } + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) @@ -2935,26 +3349,25 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) asm( /* Store host registers */ -#ifdef CONFIG_X86_64 - "push %%rdx; push %%rbp;" - "push %%rcx \n\t" -#else - "push %%edx; push %%ebp;" - "push %%ecx \n\t" -#endif + "push %%"R"dx; push %%"R"bp;" + "push %%"R"cx \n\t" + "cmp %%"R"sp, %c[host_rsp](%0) \n\t" + "je 1f \n\t" + "mov %%"R"sp, %c[host_rsp](%0) \n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" + "1: \n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ + "mov %c[cr2](%0), %%"R"ax \n\t" + "mov %%"R"ax, %%cr2 \n\t" + "mov %c[rax](%0), %%"R"ax \n\t" + "mov %c[rbx](%0), %%"R"bx \n\t" + "mov %c[rdx](%0), %%"R"dx \n\t" + "mov %c[rsi](%0), %%"R"si \n\t" + "mov %c[rdi](%0), %%"R"di \n\t" + "mov %c[rbp](%0), %%"R"bp \n\t" #ifdef CONFIG_X86_64 - "mov %c[cr2](%0), %%rax \n\t" - "mov %%rax, %%cr2 \n\t" - "mov %c[rax](%0), %%rax \n\t" - "mov %c[rbx](%0), %%rbx \n\t" - "mov %c[rdx](%0), %%rdx \n\t" - "mov %c[rsi](%0), %%rsi \n\t" - "mov %c[rdi](%0), %%rdi \n\t" - "mov %c[rbp](%0), %%rbp \n\t" "mov %c[r8](%0), %%r8 \n\t" "mov %c[r9](%0), %%r9 \n\t" "mov %c[r10](%0), %%r10 \n\t" @@ -2963,18 +3376,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "mov %c[r13](%0), %%r13 \n\t" "mov %c[r14](%0), %%r14 \n\t" "mov %c[r15](%0), %%r15 \n\t" - "mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */ -#else - "mov %c[cr2](%0), %%eax \n\t" - "mov %%eax, %%cr2 \n\t" - "mov %c[rax](%0), %%eax \n\t" - "mov %c[rbx](%0), %%ebx \n\t" - "mov %c[rdx](%0), %%edx \n\t" - "mov %c[rsi](%0), %%esi \n\t" - "mov %c[rdi](%0), %%edi \n\t" - "mov %c[rbp](%0), %%ebp \n\t" - "mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */ #endif + "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ + /* Enter guest mode */ "jne .Llaunched \n\t" __ex(ASM_VMX_VMLAUNCH) "\n\t" @@ -2982,15 +3386,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" ".Lkvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ + "xchg %0, (%%"R"sp) \n\t" + "mov %%"R"ax, %c[rax](%0) \n\t" + "mov %%"R"bx, %c[rbx](%0) \n\t" + "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" + "mov %%"R"dx, %c[rdx](%0) \n\t" + "mov %%"R"si, %c[rsi](%0) \n\t" + "mov %%"R"di, %c[rdi](%0) \n\t" + "mov %%"R"bp, %c[rbp](%0) \n\t" #ifdef CONFIG_X86_64 - "xchg %0, (%%rsp) \n\t" - "mov %%rax, %c[rax](%0) \n\t" - "mov %%rbx, %c[rbx](%0) \n\t" - "pushq (%%rsp); popq %c[rcx](%0) \n\t" - "mov %%rdx, %c[rdx](%0) \n\t" - "mov %%rsi, %c[rsi](%0) \n\t" - "mov %%rdi, %c[rdi](%0) \n\t" - "mov %%rbp, %c[rbp](%0) \n\t" "mov %%r8, %c[r8](%0) \n\t" "mov %%r9, %c[r9](%0) \n\t" "mov %%r10, %c[r10](%0) \n\t" @@ -2999,28 +3403,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" - "mov %%cr2, %%rax \n\t" - "mov %%rax, %c[cr2](%0) \n\t" - - "pop %%rbp; pop %%rbp; pop %%rdx \n\t" -#else - "xchg %0, (%%esp) \n\t" - "mov %%eax, %c[rax](%0) \n\t" - "mov %%ebx, %c[rbx](%0) \n\t" - "pushl (%%esp); popl %c[rcx](%0) \n\t" - "mov %%edx, %c[rdx](%0) \n\t" - "mov %%esi, %c[rsi](%0) \n\t" - "mov %%edi, %c[rdi](%0) \n\t" - "mov %%ebp, %c[rbp](%0) \n\t" - "mov %%cr2, %%eax \n\t" - "mov %%eax, %c[cr2](%0) \n\t" - - "pop %%ebp; pop %%ebp; pop %%edx \n\t" #endif + "mov %%cr2, %%"R"ax \n\t" + "mov %%"R"ax, %c[cr2](%0) \n\t" + + "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" "setbe %c[fail](%0) \n\t" : : "c"(vmx), "d"((unsigned long)HOST_RSP), [launched]"i"(offsetof(struct vcpu_vmx, launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), + [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), @@ -3040,11 +3432,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) #endif [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) : "cc", "memory" + , R"bx", R"di", R"si" #ifdef CONFIG_X86_64 - , "rbx", "rdi", "rsi" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" -#else - , "ebx", "edi", "rsi" #endif ); @@ -3055,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; + vmx_update_window_states(vcpu); asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -3065,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); @@ -3074,6 +3462,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vmx_complete_interrupts(vmx); } +#undef R +#undef Q + static void vmx_free_vmcs(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3180,6 +3571,11 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } +static int vmx_get_mt_mask_shift(void) +{ + return VMX_EPT_MT_EPTE_SHIFT; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3235,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, + .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3290,10 +3687,10 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK); + VMX_EPT_EXECUTABLE_MASK, + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); kvm_enable_tdp(); } else kvm_disable_tdp();