X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fx86.c;h=cae5b12bf93856f60a1345e5e258247eb8026060;hb=8f1589d95e5eab1ed287f217a33656e922cdbdd0;hp=a0faa4882f846ddd77e160ac2770d845e76b6566;hpb=d6a8c875f35a6e1b3fb3f21e93eabb183b1f39ee;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a0faa48..cae5b12 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -37,11 +37,14 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" #include #include #include #include +#include #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -55,6 +58,10 @@ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + +#define KVM_MAX_MCE_BANKS 32 +#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -176,16 +183,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, ++vcpu->stat.pf_guest; if (vcpu->arch.exception.pending) { - if (vcpu->arch.exception.nr == PF_VECTOR) { - printk(KERN_DEBUG "kvm: inject_page_fault:" - " double fault 0x%lx\n", addr); - vcpu->arch.exception.nr = DF_VECTOR; - vcpu->arch.exception.error_code = 0; - } else if (vcpu->arch.exception.nr == DF_VECTOR) { + switch(vcpu->arch.exception.nr) { + case DF_VECTOR: /* triple fault -> shutdown */ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + return; + case PF_VECTOR: + vcpu->arch.exception.nr = DF_VECTOR; + vcpu->arch.exception.error_code = 0; + return; + default: + /* replace previous exception with a new one in a hope + that instruction re-execution will regenerate lost + exception */ + vcpu->arch.exception.pending = false; + break; } - return; } vcpu->arch.cr2 = addr; kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); @@ -232,7 +245,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) goto out; } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { - if (is_present_pte(pdpte[i]) && + if (is_present_gpte(pdpte[i]) && (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { ret = 0; goto out; @@ -241,6 +254,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) ret = 1; memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); + __set_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_avail); + __set_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_dirty); out: return ret; @@ -256,6 +273,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) if (is_long_mode(vcpu) || !is_pae(vcpu)) return false; + if (!test_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_avail)) + return true; + r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); if (r < 0) goto out; @@ -328,9 +349,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); - KVMTRACE_1D(LMSW, vcpu, - (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)), - handler); } EXPORT_SYMBOL_GPL(kvm_lmsw); @@ -466,7 +484,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; @@ -644,8 +662,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, - &vcpu->hv_clock.tsc_timestamp); + kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); ktime_get_ts(&ts); local_irq_restore(flags); @@ -704,11 +721,48 @@ static bool msr_mtrr_valid(unsigned msr) return false; } +static bool valid_pat_type(unsigned t) +{ + return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ +} + +static bool valid_mtrr_type(unsigned t) +{ + return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ +} + +static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + int i; + + if (!msr_mtrr_valid(msr)) + return false; + + if (msr == MSR_IA32_CR_PAT) { + for (i = 0; i < 8; i++) + if (!valid_pat_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } else if (msr == MSR_MTRRdefType) { + if (data & ~0xcff) + return false; + return valid_mtrr_type(data & 0xff); + } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { + for (i = 0; i < 8 ; i++) + if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } + + /* variable MTRRs */ + return valid_mtrr_type(data & 0xff); +} + static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!msr_mtrr_valid(msr)) + if (!mtrr_valid(vcpu, msr, data)) return 1; if (msr == MSR_MTRRdefType) { @@ -741,23 +795,51 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } +static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + u64 mcg_cap = vcpu->arch.mcg_cap; + unsigned bank_num = mcg_cap & 0xff; + + switch (msr) { + case MSR_IA32_MCG_STATUS: + vcpu->arch.mcg_status = data; + break; + case MSR_IA32_MCG_CTL: + if (!(mcg_cap & MCG_CTL_P)) + return 1; + if (data != 0 && data != ~(u64)0) + return -1; + vcpu->arch.mcg_ctl = data; + break; + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + u32 offset = msr - MSR_IA32_MC0_CTL; + /* only 0 or all 1s can be written to IA32_MCi_CTL */ + if ((offset & 0x3) == 0 && + data != 0 && data != ~(u64)0) + return -1; + vcpu->arch.mce_banks[offset] = data; + break; + } + return 1; + } + return 0; +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { case MSR_EFER: set_efer(vcpu, data); break; - case MSR_IA32_MC0_STATUS: - pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", - __func__, data); - break; - case MSR_IA32_MCG_STATUS: - pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", - __func__, data); - break; - case MSR_IA32_MCG_CTL: - pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", - __func__, data); + case MSR_K7_HWCR: + data &= ~(u64)0x40; /* ignore flush filter disable */ + if (data != 0) { + pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", + data); + return 1; + } break; case MSR_IA32_DEBUGCTLMSR: if (!data) { @@ -813,6 +895,40 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) kvm_request_guest_time_update(vcpu); break; } + case MSR_IA32_MCG_CTL: + case MSR_IA32_MCG_STATUS: + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + return set_msr_mce(vcpu, msr, data); + + /* Performance counters are not protected by a CPUID bit, + * so we should check all of them in the generic path for the sake of + * cross vendor migration. + * Writing a zero into the event select MSRs disables them, + * which we perfectly emulate ;-). Any other value should be at least + * reported, some guests depend on them. + */ + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: + case MSR_K7_EVNTSEL1: + case MSR_K7_EVNTSEL2: + case MSR_K7_EVNTSEL3: + if (data != 0) + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " + "0x%x data 0x%llx\n", msr, data); + break; + /* at least RHEL 4 unconditionally writes to the perfctr registers, + * so we ignore writes to make it happy. + */ + case MSR_P6_PERFCTR0: + case MSR_P6_PERFCTR1: + case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " + "0x%x data 0x%llx\n", msr, data); + break; default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); return 1; @@ -868,26 +984,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data; + u64 mcg_cap = vcpu->arch.mcg_cap; + unsigned bank_num = mcg_cap & 0xff; switch (msr) { - case 0xc0010010: /* SYSCFG */ - case 0xc0010015: /* HWCR */ - case MSR_IA32_PLATFORM_ID: case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: - case MSR_IA32_MC0_CTL: - case MSR_IA32_MCG_STATUS: + data = 0; + break; case MSR_IA32_MCG_CAP: + data = vcpu->arch.mcg_cap; + break; case MSR_IA32_MCG_CTL: - case MSR_IA32_MC0_MISC: - case MSR_IA32_MC0_MISC+4: - case MSR_IA32_MC0_MISC+8: - case MSR_IA32_MC0_MISC+12: - case MSR_IA32_MC0_MISC+16: - case MSR_IA32_MC0_MISC+20: + if (!(mcg_cap & MCG_CTL_P)) + return 1; + data = vcpu->arch.mcg_ctl; + break; + case MSR_IA32_MCG_STATUS: + data = vcpu->arch.mcg_status; + break; + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + u32 offset = msr - MSR_IA32_MC0_CTL; + data = vcpu->arch.mce_banks[offset]; + break; + } + return 1; + } + *pdata = data; + return 0; +} + +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + u64 data; + + switch (msr) { + case MSR_IA32_PLATFORM_ID: case MSR_IA32_UCODE_REV: case MSR_IA32_EBL_CR_POWERON: case MSR_IA32_DEBUGCTLMSR: @@ -895,9 +1032,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: + case MSR_K8_SYSCFG: + case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: data = 0; break; case MSR_MTRRcap: @@ -929,6 +1069,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_KVM_SYSTEM_TIME: data = vcpu->arch.time; break; + case MSR_IA32_P5_MC_ADDR: + case MSR_IA32_P5_MC_TYPE: + case MSR_IA32_MCG_CAP: + case MSR_IA32_MCG_CTL: + case MSR_IA32_MCG_STATUS: + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + return get_msr_mce(vcpu, msr, pdata); default: pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1030,6 +1177,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_ASSIGN_DEV_IRQ: + case KVM_CAP_IRQFD: + case KVM_CAP_PIT2: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1050,6 +1199,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_MCE: + r = KVM_MAX_MCE_BANKS; + break; default: r = 0; break; @@ -1078,14 +1230,13 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) goto out; r = -E2BIG; - if (n < num_msrs_to_save) + if (n < msr_list.nmsrs) goto out; r = -EFAULT; if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; - if (copy_to_user(user_msr_list->indices - + num_msrs_to_save * sizeof(u32), + if (copy_to_user(user_msr_list->indices + num_msrs_to_save, &emulated_msrs, ARRAY_SIZE(emulated_msrs) * sizeof(u32))) goto out; @@ -1110,6 +1261,16 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } + case KVM_X86_GET_MCE_CAP_SUPPORTED: { + u64 mce_cap; + + mce_cap = KVM_MCE_CAP_SUPPORTED; + r = -EFAULT; + if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) + goto out; + r = 0; + break; + } default: r = -EINVAL; } @@ -1247,41 +1408,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags = 0; } +#define F(x) bit(X86_FEATURE_##x) + static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index, int *nent, int maxnent) { - const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | - bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | - bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | - bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | - bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | - bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | - bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | - bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | - bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | - bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); - const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | - bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | - bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | - bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | - bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | - bit(X86_FEATURE_PGE) | - bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | - bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | - bit(X86_FEATURE_SYSCALL) | - (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) | + unsigned f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 - bit(X86_FEATURE_LM) | + unsigned f_lm = F(LM); +#else + unsigned f_lm = 0; #endif - bit(X86_FEATURE_FXSR_OPT) | - bit(X86_FEATURE_MMXEXT) | - bit(X86_FEATURE_3DNOWEXT) | - bit(X86_FEATURE_3DNOW); - const u32 kvm_supported_word3_x86_features = - bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); + + /* cpuid 1.edx */ + const u32 kvm_supported_word0_x86_features = + F(FPU) | F(VME) | F(DE) | F(PSE) | + F(TSC) | F(MSR) | F(PAE) | F(MCE) | + F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | + F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | + F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | + 0 /* Reserved, DS, ACPI */ | F(MMX) | + F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | + 0 /* HTT, TM, Reserved, PBE */; + /* cpuid 0x80000001.edx */ + const u32 kvm_supported_word1_x86_features = + F(FPU) | F(VME) | F(DE) | F(PSE) | + F(TSC) | F(MSR) | F(PAE) | F(MCE) | + F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | + F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | + F(PAT) | F(PSE36) | 0 /* Reserved */ | + f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ | + 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); + /* cpuid 1.ecx */ + const u32 kvm_supported_word4_x86_features = + F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | + 0 /* DS-CPL, VMX, SMX, EST */ | + 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | + 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | + 0 /* Reserved, DCA */ | F(XMM4_1) | + F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | + 0 /* Reserved, XSAVE, OSXSAVE */; + /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = - bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | - bit(X86_FEATURE_SVM); + F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | + F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | + F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | + 0 /* SKINIT */ | 0 /* WDT */; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -1294,7 +1467,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 1: entry->edx &= kvm_supported_word0_x86_features; - entry->ecx &= kvm_supported_word3_x86_features; + entry->ecx &= kvm_supported_word4_x86_features; break; /* function 2 entries are STATEFUL. That is, repeated cpuid commands * may return different values. This forces us to get_cpu() before @@ -1356,6 +1529,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, put_cpu(); } +#undef F + static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { @@ -1384,6 +1559,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, &nent, cpuid->nent); + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + r = -EFAULT; if (copy_to_user(entries, cpuid_entries, nent * sizeof(struct kvm_cpuid_entry2))) @@ -1427,8 +1606,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return -ENXIO; vcpu_load(vcpu); - set_bit(irq->irq, vcpu->arch.irq_pending); - set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary); + kvm_queue_interrupt(vcpu, irq->irq, false); vcpu_put(vcpu); @@ -1453,6 +1631,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, + u64 mcg_cap) +{ + int r; + unsigned bank_num = mcg_cap & 0xff, bank; + + r = -EINVAL; + if (!bank_num) + goto out; + if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) + goto out; + r = 0; + vcpu->arch.mcg_cap = mcg_cap; + /* Init IA32_MCG_CTL to all 1s */ + if (mcg_cap & MCG_CTL_P) + vcpu->arch.mcg_ctl = ~(u64)0; + /* Init IA32_MCi_CTL to all 1s */ + for (bank = 0; bank < bank_num; bank++) + vcpu->arch.mce_banks[bank*4] = ~(u64)0; +out: + return r; +} + +static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, + struct kvm_x86_mce *mce) +{ + u64 mcg_cap = vcpu->arch.mcg_cap; + unsigned bank_num = mcg_cap & 0xff; + u64 *banks = vcpu->arch.mce_banks; + + if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL)) + return -EINVAL; + /* + * if IA32_MCG_CTL is not all 1s, the uncorrected error + * reporting is disabled + */ + if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && + vcpu->arch.mcg_ctl != ~(u64)0) + return 0; + banks += 4 * mce->bank; + /* + * if IA32_MCi_CTL is not all 1s, the uncorrected error + * reporting is disabled for the bank + */ + if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0) + return 0; + if (mce->status & MCI_STATUS_UC) { + if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || + !(vcpu->arch.cr4 & X86_CR4_MCE)) { + printk(KERN_DEBUG "kvm: set_mce: " + "injects mce exception while " + "previous one is in progress!\n"); + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + return 0; + } + if (banks[1] & MCI_STATUS_VAL) + mce->status |= MCI_STATUS_OVER; + banks[2] = mce->addr; + banks[3] = mce->misc; + vcpu->arch.mcg_status = mce->mcg_status; + banks[1] = mce->status; + kvm_queue_exception(vcpu, MC_VECTOR); + } else if (!(banks[1] & MCI_STATUS_VAL) + || !(banks[1] & MCI_STATUS_UC)) { + if (banks[1] & MCI_STATUS_VAL) + mce->status |= MCI_STATUS_OVER; + banks[2] = mce->addr; + banks[3] = mce->misc; + banks[1] = mce->status; + } else + banks[1] |= MCI_STATUS_OVER; + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1586,6 +1838,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp, kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); break; } + case KVM_X86_SETUP_MCE: { + u64 mcg_cap; + + r = -EFAULT; + if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) + goto out; + r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); + break; + } + case KVM_X86_SET_MCE: { + struct kvm_x86_mce mce; + + r = -EFAULT; + if (copy_from_user(&mce, argp, sizeof mce)) + goto out; + r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); + break; + } default: r = -EINVAL; } @@ -1611,10 +1881,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return -EINVAL; down_write(&kvm->slots_lock); + spin_lock(&kvm->mmu_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; + spin_unlock(&kvm->mmu_lock); up_write(&kvm->slots_lock); return 0; } @@ -1723,19 +1995,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[0], &chip->chip.pic, sizeof(struct kvm_pic_state)); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_PIC_SLAVE: + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[1], &chip->chip.pic, sizeof(struct kvm_pic_state)); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_IOAPIC: + mutex_lock(&kvm->irq_lock); memcpy(ioapic_irqchip(kvm), &chip->chip.ioapic, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&kvm->irq_lock); break; default: r = -EINVAL; @@ -1749,7 +2027,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) { int r = 0; + mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return r; } @@ -1757,8 +2037,10 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { int r = 0; + mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); kvm_pit_load_count(kvm, 0, ps->channels[0].count); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return r; } @@ -1767,7 +2049,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, { if (!kvm->arch.vpit) return -ENXIO; + mutex_lock(&kvm->arch.vpit->pit_state.lock); kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -1790,7 +2074,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { + spin_lock(&kvm->mmu_lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots[log->slot]; n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; @@ -1816,6 +2102,7 @@ long kvm_arch_vm_ioctl(struct file *filp, union { struct kvm_pit_state ps; struct kvm_memory_alias alias; + struct kvm_pit_config pit_config; } u; switch (ioctl) { @@ -1876,12 +2163,20 @@ long kvm_arch_vm_ioctl(struct file *filp, } break; case KVM_CREATE_PIT: + u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; + goto create_pit; + case KVM_CREATE_PIT2: + r = -EFAULT; + if (copy_from_user(&u.pit_config, argp, + sizeof(struct kvm_pit_config))) + goto out; + create_pit: mutex_lock(&kvm->lock); r = -EEXIST; if (kvm->arch.vpit) goto create_pit_unlock; r = -ENOMEM; - kvm->arch.vpit = kvm_create_pit(kvm); + kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); if (kvm->arch.vpit) r = 0; create_pit_unlock: @@ -1896,10 +2191,10 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, &irq_event, @@ -2032,7 +2327,7 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, if (vcpu->arch.apic) { dev = &vcpu->arch.apic->dev; - if (dev->in_range(dev, addr, len, is_write)) + if (kvm_iodevice_in_range(dev, addr, len, is_write)) return dev; } return NULL; @@ -2145,12 +2440,11 @@ mmio: */ mutex_lock(&vcpu->kvm->lock); mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); + mutex_unlock(&vcpu->kvm->lock); if (mmio_dev) { kvm_iodevice_read(mmio_dev, gpa, bytes, val); - mutex_unlock(&vcpu->kvm->lock); return X86EMUL_CONTINUE; } - mutex_unlock(&vcpu->kvm->lock); vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; @@ -2200,12 +2494,11 @@ mmio: */ mutex_lock(&vcpu->kvm->lock); mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); + mutex_unlock(&vcpu->kvm->lock); if (mmio_dev) { kvm_iodevice_write(mmio_dev, gpa, bytes, val); - mutex_unlock(&vcpu->kvm->lock); return X86EMUL_CONTINUE; } - mutex_unlock(&vcpu->kvm->lock); vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; @@ -2294,7 +2587,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { - KVMTRACE_0D(CLTS, vcpu, handler); kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE; } @@ -2365,7 +2657,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, u16 error_code, int emulation_type) { - int r; + int r, shadow_mask; struct decode_cache *c; kvm_clear_exception_queue(vcpu); @@ -2395,14 +2687,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu, r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); - /* Reject the instructions other than VMCALL/VMMCALL when - * try to emulate invalid opcode */ + /* Only allow emulation of specific instructions on #UD + * (namely VMMCALL, sysenter, sysexit, syscall)*/ c = &vcpu->arch.emulate_ctxt.decode; - if ((emulation_type & EMULTYPE_TRAP_UD) && - (!(c->twobyte && c->b == 0x01 && - (c->modrm_reg == 0 || c->modrm_reg == 3) && - c->modrm_mod == 3 && c->modrm_rm == 1))) - return EMULATE_FAIL; + if (emulation_type & EMULTYPE_TRAP_UD) { + if (!c->twobyte) + return EMULATE_FAIL; + switch (c->b) { + case 0x01: /* VMMCALL */ + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return EMULATE_FAIL; + break; + case 0x34: /* sysenter */ + case 0x35: /* sysexit */ + if (c->modrm_mod != 0 || c->modrm_rm != 0) + return EMULATE_FAIL; + break; + case 0x05: /* syscall */ + if (c->modrm_mod != 0 || c->modrm_rm != 0) + return EMULATE_FAIL; + break; + default: + return EMULATE_FAIL; + } + + if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) + return EMULATE_FAIL; + } ++vcpu->stat.insn_emulation; if (r) { @@ -2419,6 +2730,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); + shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; + + if (r == 0) + kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); if (vcpu->arch.pio.string) return EMULATE_DO_MMIO; @@ -2524,7 +2839,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev, { /* TODO: String I/O for in kernel device */ - mutex_lock(&vcpu->kvm->lock); if (vcpu->arch.pio.in) kvm_iodevice_read(pio_dev, vcpu->arch.pio.port, vcpu->arch.pio.size, @@ -2533,7 +2847,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev, kvm_iodevice_write(pio_dev, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); - mutex_unlock(&vcpu->kvm->lock); } static void pio_string_write(struct kvm_io_device *pio_dev, @@ -2543,14 +2856,12 @@ static void pio_string_write(struct kvm_io_device *pio_dev, void *pd = vcpu->arch.pio_data; int i; - mutex_lock(&vcpu->kvm->lock); for (i = 0; i < io->cur_count; i++) { kvm_iodevice_write(pio_dev, io->port, io->size, pd); pd += io->size; } - mutex_unlock(&vcpu->kvm->lock); } static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, @@ -2577,17 +2888,15 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.down = 0; vcpu->arch.pio.rep = 0; - if (vcpu->run->io.direction == KVM_EXIT_IO_IN) - KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, - handler); - else - KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, - handler); + trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, + size, 1); val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); + mutex_lock(&vcpu->kvm->lock); pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); + mutex_unlock(&vcpu->kvm->lock); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); complete_pio(vcpu); @@ -2616,12 +2925,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.down = down; vcpu->arch.pio.rep = rep; - if (vcpu->run->io.direction == KVM_EXIT_IO_IN) - KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, - handler); - else - KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, - handler); + trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, + size, count); if (!count) { kvm_x86_ops->skip_emulated_instruction(vcpu); @@ -2651,9 +2956,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.guest_gva = address; + mutex_lock(&vcpu->kvm->lock); pio_dev = vcpu_find_pio_dev(vcpu, port, vcpu->arch.pio.cur_count, !vcpu->arch.pio.in); + mutex_unlock(&vcpu->kvm->lock); + if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); @@ -2703,10 +3011,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) continue; if (!kvm_request_guest_time_update(vcpu)) @@ -2799,7 +3104,6 @@ void kvm_arch_exit(void) int kvm_emulate_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; - KVMTRACE_0D(HLT, vcpu, handler); if (irqchip_in_kernel(vcpu->kvm)) { vcpu->arch.mp_state = KVM_MP_STATE_HALTED; return 1; @@ -2830,7 +3134,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); - KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); + trace_kvm_hypercall(nr, a0, a1, a2, a3); if (!is_long_mode(vcpu)) { nr &= 0xFFFFFFFF; @@ -2930,8 +3234,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); return 0; } - KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value, - (u32)((u64)value >> 32), handler); return value; } @@ -2939,9 +3241,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, unsigned long *rflags) { - KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val, - (u32)((u64)val >> 32), handler); - switch (cr) { case 0: kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); @@ -3051,11 +3350,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); } kvm_x86_ops->skip_emulated_instruction(vcpu); - KVMTRACE_5D(CPUID, vcpu, function, - (u32)kvm_register_read(vcpu, VCPU_REGS_RAX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RBX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RCX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler); + trace_kvm_cpuid(function, + kvm_register_read(vcpu, VCPU_REGS_RAX), + kvm_register_read(vcpu, VCPU_REGS_RBX), + kvm_register_read(vcpu, VCPU_REGS_RCX), + kvm_register_read(vcpu, VCPU_REGS_RDX)); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); @@ -3083,8 +3382,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, kvm_run->ready_for_interrupt_injection = 1; else kvm_run->ready_for_interrupt_injection = - (kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu)); + kvm_arch_interrupt_allowed(vcpu) && + !kvm_cpu_has_interrupt(vcpu) && + !kvm_event_needs_reinjection(vcpu); } static void vapic_enter(struct kvm_vcpu *vcpu) @@ -3120,7 +3420,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!kvm_x86_ops->update_cr8_intercept) return; - max_irr = kvm_lapic_find_highest_irr(vcpu); + if (!vcpu->arch.apic->vapic_addr) + max_irr = kvm_lapic_find_highest_irr(vcpu); + else + max_irr = -1; if (max_irr != -1) max_irr >>= 4; @@ -3130,7 +3433,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static void inject_irq(struct kvm_vcpu *vcpu) +static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { /* try to reinject previous events if any */ if (vcpu->arch.nmi_injected) { @@ -3139,7 +3442,7 @@ static void inject_irq(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { - kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr); + kvm_x86_ops->set_irq(vcpu); return; } @@ -3152,32 +3455,18 @@ static void inject_irq(struct kvm_vcpu *vcpu) } } else if (kvm_cpu_has_interrupt(vcpu)) { if (kvm_x86_ops->interrupt_allowed(vcpu)) { - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); - kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr); + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), + false); + kvm_x86_ops->set_irq(vcpu); } } } -static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && - kvm_run->request_interrupt_window; - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - kvm_x86_ops->drop_interrupt_shadow(vcpu); - - inject_irq(vcpu); - - /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); -} - static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && + kvm_run->request_interrupt_window; if (vcpu->requests) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) @@ -3216,6 +3505,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) local_irq_disable(); + clear_bit(KVM_REQ_KICK, &vcpu->requests); + smp_mb__after_clear_bit(); + if (vcpu->requests || need_resched() || signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -3223,23 +3515,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - vcpu->guest_mode = 1; - /* - * Make sure that guest_mode assignment won't happen after - * testing the pending IRQ vector bitmap. - */ - smp_wmb(); - if (vcpu->arch.exception.pending) __queue_exception(vcpu); else inject_pending_irq(vcpu, kvm_run); + /* enable NMI/IRQ window open exits if needed */ + if (vcpu->arch.nmi_pending) + kvm_x86_ops->enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + kvm_x86_ops->enable_irq_window(vcpu); + if (kvm_lapic_enabled(vcpu)) { - if (!vcpu->arch.apic->vapic_addr) - update_cr8_intercept(vcpu); - else - kvm_lapic_sync_to_vapic(vcpu); + update_cr8_intercept(vcpu); + kvm_lapic_sync_to_vapic(vcpu); } up_read(&vcpu->kvm->slots_lock); @@ -3261,7 +3550,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_debugreg(vcpu->arch.eff_db[3], 3); } - KVMTRACE_0D(VMENTRY, vcpu, entryexit); + trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -3274,7 +3563,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_debugreg(vcpu->arch.host_dr6, 6); set_debugreg(vcpu->arch.host_dr7, 7); - vcpu->guest_mode = 0; + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); ++vcpu->stat.exits; @@ -3564,14 +3853,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->efer = vcpu->arch.shadow_efer; sregs->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) - memset(sregs->interrupt_bitmap, 0, - sizeof sregs->interrupt_bitmap); - else - memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending, - sizeof sregs->interrupt_bitmap); + memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); - if (vcpu->arch.interrupt.pending) + if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) set_bit(vcpu->arch.interrupt.nr, (unsigned long *)sregs->interrupt_bitmap); @@ -4039,7 +4323,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { int mmu_reset_needed = 0; - int i, pending_vec, max_bits; + int pending_vec, max_bits; struct descriptor_table dt; vcpu_load(vcpu); @@ -4081,24 +4365,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - if (!irqchip_in_kernel(vcpu->kvm)) { - memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, - sizeof vcpu->arch.irq_pending); - vcpu->arch.irq_summary = 0; - for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) - if (vcpu->arch.irq_pending[i]) - __set_bit(i, &vcpu->arch.irq_summary); - } else { - max_bits = (sizeof sregs->interrupt_bitmap) << 3; - pending_vec = find_first_bit( - (const unsigned long *)sregs->interrupt_bitmap, - max_bits); - /* Only pending external irq is handled here */ - if (pending_vec < max_bits) { - kvm_queue_interrupt(vcpu, pending_vec); - pr_debug("Set back pending irq %d\n", pending_vec); - } - kvm_pic_clear_isr_ack(vcpu->kvm); + max_bits = (sizeof sregs->interrupt_bitmap) << 3; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, max_bits); + if (pending_vec < max_bits) { + kvm_queue_interrupt(vcpu, pending_vec, false); + pr_debug("Set back pending irq %d\n", pending_vec); + if (irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); } kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); @@ -4112,7 +4386,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); /* Older userspace won't unhalt the vcpu on reset. */ - if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 && + if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && !(vcpu->arch.cr0 & X86_CR0_PE)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -4383,7 +4657,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm = vcpu->kvm; vcpu->arch.mmu.root_hpa = INVALID_PAGE; - if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) + if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; @@ -4405,6 +4679,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_mmu_destroy; } + vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, + GFP_KERNEL); + if (!vcpu->arch.mce_banks) { + r = -ENOMEM; + goto fail_mmu_destroy; + } + vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; + return 0; fail_mmu_destroy: @@ -4452,20 +4734,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; + struct kvm_vcpu *vcpu; /* * Unpin any mmu pages first. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) - if (kvm->vcpus[i]) - kvm_unload_vcpu_mmu(kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_arch_vcpu_free(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_unload_vcpu_mmu(vcpu); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_free(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + atomic_set(&kvm->online_vcpus, 0); + mutex_unlock(&kvm->lock); } void kvm_arch_sync_events(struct kvm *kvm) @@ -4534,12 +4818,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, } } + spin_lock(&kvm->mmu_lock); if (!kvm->arch.n_requested_mmu_pages) { unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); } kvm_mmu_slot_remove_write_access(kvm, mem->slot); + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); return 0; @@ -4548,6 +4834,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, void kvm_arch_flush_shadow(struct kvm *kvm) { kvm_mmu_zap_all(kvm); + kvm_reload_remote_mmus(kvm); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) @@ -4557,30 +4844,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) || vcpu->arch.nmi_pending; } -static void vcpu_kick_intr(void *info) -{ -#ifdef DEBUG - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; - printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); -#endif -} - void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int ipi_pcpu = vcpu->cpu; - int cpu; + int me; + int cpu = vcpu->cpu; if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); ++vcpu->stat.halt_wakeup; } - /* - * We may be called synchronously with irqs disabled in guest mode, - * So need not to call smp_call_function_single() in that case. - */ - cpu = get_cpu(); - if (vcpu->guest_mode && vcpu->cpu != cpu) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); + + me = get_cpu(); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) + smp_send_reschedule(cpu); put_cpu(); } @@ -4588,3 +4865,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) { return kvm_x86_ops->interrupt_allowed(vcpu); } + +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);