X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fx86.c;h=b87d65d89a05ac8adce04b459b09c35cfdb9d854;hb=a1b37100d9e29c1f8dc3e2f5490a205c80180e01;hp=d8adc1da76ddb861c212983b893cdacf61aa8aea;hpb=c5af89b68abb26eea5e745f33228f4d672f115e5;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d8adc1d..b87d65d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -37,6 +37,10 @@ #include #include #include +#include +#undef TRACE_INCLUDE_FILE +#define CREATE_TRACE_POINTS +#include "trace.h" #include #include @@ -75,12 +79,13 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, - u32 function, u32 index); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); +int ignore_msrs = 0; +module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); + struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, { "pf_guest", VCPU_STAT(pf_guest) }, @@ -181,16 +186,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, ++vcpu->stat.pf_guest; if (vcpu->arch.exception.pending) { - if (vcpu->arch.exception.nr == PF_VECTOR) { - printk(KERN_DEBUG "kvm: inject_page_fault:" - " double fault 0x%lx\n", addr); - vcpu->arch.exception.nr = DF_VECTOR; - vcpu->arch.exception.error_code = 0; - } else if (vcpu->arch.exception.nr == DF_VECTOR) { + switch(vcpu->arch.exception.nr) { + case DF_VECTOR: /* triple fault -> shutdown */ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + return; + case PF_VECTOR: + vcpu->arch.exception.nr = DF_VECTOR; + vcpu->arch.exception.error_code = 0; + return; + default: + /* replace previous exception with a new one in a hope + that instruction re-execution will regenerate lost + exception */ + vcpu->arch.exception.pending = false; + break; } - return; } vcpu->arch.cr2 = addr; kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); @@ -212,13 +223,6 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) } EXPORT_SYMBOL_GPL(kvm_queue_exception_e); -static void __queue_exception(struct kvm_vcpu *vcpu) -{ - kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, - vcpu->arch.exception.has_error_code, - vcpu->arch.exception.error_code); -} - /* * Load the pae pdptrs. Return true is they are all valid. */ @@ -341,9 +345,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); - KVMTRACE_1D(LMSW, vcpu, - (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)), - handler); } EXPORT_SYMBOL_GPL(kvm_lmsw); @@ -828,6 +829,23 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_EFER: set_efer(vcpu, data); break; + case MSR_K7_HWCR: + data &= ~(u64)0x40; /* ignore flush filter disable */ + if (data != 0) { + pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", + data); + return 1; + } + break; + case MSR_FAM10H_MMIO_CONF_BASE: + if (data != 0) { + pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " + "0x%llx\n", data); + return 1; + } + break; + case MSR_AMD64_NB_CFG: + break; case MSR_IA32_DEBUGCTLMSR: if (!data) { /* We support the non-activated case already */ @@ -843,12 +861,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: + case MSR_AMD64_PATCH_LOADER: break; case 0x200 ... 0x2ff: return set_msr_mtrr(vcpu, msr, data); case MSR_IA32_APICBASE: kvm_set_apic_base(vcpu, data); break; + case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; @@ -886,9 +907,46 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: return set_msr_mce(vcpu, msr, data); + + /* Performance counters are not protected by a CPUID bit, + * so we should check all of them in the generic path for the sake of + * cross vendor migration. + * Writing a zero into the event select MSRs disables them, + * which we perfectly emulate ;-). Any other value should be at least + * reported, some guests depend on them. + */ + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: + case MSR_K7_EVNTSEL1: + case MSR_K7_EVNTSEL2: + case MSR_K7_EVNTSEL3: + if (data != 0) + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " + "0x%x data 0x%llx\n", msr, data); + break; + /* at least RHEL 4 unconditionally writes to the perfctr registers, + * so we ignore writes to make it happy. + */ + case MSR_P6_PERFCTR0: + case MSR_P6_PERFCTR1: + case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " + "0x%x data 0x%llx\n", msr, data); + break; default: - pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); - return 1; + if (!ignore_msrs) { + pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", + msr, data); + return 1; + } else { + pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", + msr, data); + break; + } } return 0; } @@ -995,6 +1053,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: case MSR_K7_EVNTSEL0: + case MSR_K8_INT_PENDING_MSG: + case MSR_AMD64_NB_CFG: + case MSR_FAM10H_MMIO_CONF_BASE: data = 0; break; case MSR_MTRRcap: @@ -1008,6 +1069,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_APICBASE: data = kvm_get_apic_base(vcpu); break; + case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + return kvm_x2apic_msr_read(vcpu, msr, pdata); + break; case MSR_IA32_MISC_ENABLE: data = vcpu->arch.ia32_misc_enable_msr; break; @@ -1034,8 +1098,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: return get_msr_mce(vcpu, msr, pdata); default: - pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); - return 1; + if (!ignore_msrs) { + pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); + return 1; + } else { + pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); + data = 0; + } + break; } *pdata = data; return 0; @@ -1135,7 +1205,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_PIT2: + case KVM_CAP_PIT_STATE2: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1308,6 +1380,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_nent = cpuid->nent; cpuid_fix_nx_cap(vcpu); r = 0; + kvm_apic_set_version(vcpu); out_free: vfree(cpuid_entries); @@ -1329,6 +1402,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; vcpu->arch.cpuid_nent = cpuid->nent; + kvm_apic_set_version(vcpu); return 0; out: @@ -1404,7 +1478,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 0 /* Reserved, DCA */ | F(XMM4_1) | - F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | + F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved, XSAVE, OSXSAVE */; /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = @@ -1425,6 +1499,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, case 1: entry->edx &= kvm_supported_word0_x86_features; entry->ecx &= kvm_supported_word4_x86_features; + /* we support x2apic emulation even if host does not support + * it since we emulate x2apic in software */ + entry->ecx |= F(X2APIC); break; /* function 2 entries are STATEFUL. That is, repeated cpuid commands * may return different values. This forces us to get_cpu() before @@ -1952,19 +2029,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[0], &chip->chip.pic, sizeof(struct kvm_pic_state)); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_PIC_SLAVE: + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[1], &chip->chip.pic, sizeof(struct kvm_pic_state)); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_IOAPIC: + mutex_lock(&kvm->irq_lock); memcpy(ioapic_irqchip(kvm), &chip->chip.ioapic, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&kvm->irq_lock); break; default: r = -EINVAL; @@ -1978,7 +2061,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) { int r = 0; + mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return r; } @@ -1986,8 +2071,39 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { int r = 0; + mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); - kvm_pit_load_count(kvm, 0, ps->channels[0].count); + kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); + return r; +} + +static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) +{ + int r = 0; + + mutex_lock(&kvm->arch.vpit->pit_state.lock); + memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, + sizeof(ps->channels)); + ps->flags = kvm->arch.vpit->pit_state.flags; + mutex_unlock(&kvm->arch.vpit->pit_state.lock); + return r; +} + +static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) +{ + int r = 0, start = 0; + u32 prev_legacy, cur_legacy; + mutex_lock(&kvm->arch.vpit->pit_state.lock); + prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; + cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; + if (!prev_legacy && cur_legacy) + start = 1; + memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, + sizeof(kvm->arch.vpit->pit_state.channels)); + kvm->arch.vpit->pit_state.flags = ps->flags; + kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return r; } @@ -1996,7 +2112,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, { if (!kvm->arch.vpit) return -ENXIO; + mutex_lock(&kvm->arch.vpit->pit_state.lock); kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -2046,6 +2164,7 @@ long kvm_arch_vm_ioctl(struct file *filp, */ union { struct kvm_pit_state ps; + struct kvm_pit_state2 ps2; struct kvm_memory_alias alias; struct kvm_pit_config pit_config; } u; @@ -2116,7 +2235,7 @@ long kvm_arch_vm_ioctl(struct file *filp, sizeof(struct kvm_pit_config))) goto out; create_pit: - mutex_lock(&kvm->lock); + down_write(&kvm->slots_lock); r = -EEXIST; if (kvm->arch.vpit) goto create_pit_unlock; @@ -2125,7 +2244,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.vpit) r = 0; create_pit_unlock: - mutex_unlock(&kvm->lock); + up_write(&kvm->slots_lock); break; case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { @@ -2228,6 +2347,32 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_PIT2: { + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; + r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &u.ps2, sizeof(u.ps2))) + goto out; + r = 0; + break; + } + case KVM_SET_PIT2: { + r = -EFAULT; + if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) + goto out; + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; + r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); + if (r) + goto out; + r = 0; + break; + } case KVM_REINJECT_CONTROL: { struct kvm_reinject_control control; r = -EFAULT; @@ -2261,35 +2406,23 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; } -/* - * Only apic need an MMIO device hook, so shortcut now.. - */ -static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, - gpa_t addr, int len, - int is_write) +static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, + const void *v) { - struct kvm_io_device *dev; + if (vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) + return 0; - if (vcpu->arch.apic) { - dev = &vcpu->arch.apic->dev; - if (kvm_iodevice_in_range(dev, addr, len, is_write)) - return dev; - } - return NULL; + return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); } - -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr, int len, - int is_write) +static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - struct kvm_io_device *dev; + if (vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) + return 0; - dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); - if (dev == NULL) - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, - is_write); - return dev; + return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); } static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, @@ -2358,11 +2491,12 @@ static int emulator_read_emulated(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu) { - struct kvm_io_device *mmio_dev; gpa_t gpa; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, + vcpu->mmio_phys_addr, *(u64 *)val); vcpu->mmio_read_completed = 0; return X86EMUL_CONTINUE; } @@ -2383,14 +2517,13 @@ mmio: /* * Is this MMIO handled locally? */ - mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); - mutex_unlock(&vcpu->kvm->lock); - if (mmio_dev) { - kvm_iodevice_read(mmio_dev, gpa, bytes, val); + if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); return X86EMUL_CONTINUE; } + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; vcpu->mmio_size = bytes; @@ -2416,7 +2549,6 @@ static int emulator_write_emulated_onepage(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu) { - struct kvm_io_device *mmio_dev; gpa_t gpa; gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); @@ -2434,16 +2566,12 @@ static int emulator_write_emulated_onepage(unsigned long addr, return X86EMUL_CONTINUE; mmio: + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); /* * Is this MMIO handled locally? */ - mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); - mutex_unlock(&vcpu->kvm->lock); - if (mmio_dev) { - kvm_iodevice_write(mmio_dev, gpa, bytes, val); + if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) return X86EMUL_CONTINUE; - } vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; @@ -2532,7 +2660,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { - KVMTRACE_0D(CLTS, vcpu, handler); kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE; } @@ -2633,14 +2760,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu, r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); - /* Reject the instructions other than VMCALL/VMMCALL when - * try to emulate invalid opcode */ + /* Only allow emulation of specific instructions on #UD + * (namely VMMCALL, sysenter, sysexit, syscall)*/ c = &vcpu->arch.emulate_ctxt.decode; - if ((emulation_type & EMULTYPE_TRAP_UD) && - (!(c->twobyte && c->b == 0x01 && - (c->modrm_reg == 0 || c->modrm_reg == 3) && - c->modrm_mod == 3 && c->modrm_rm == 1))) - return EMULATE_FAIL; + if (emulation_type & EMULTYPE_TRAP_UD) { + if (!c->twobyte) + return EMULATE_FAIL; + switch (c->b) { + case 0x01: /* VMMCALL */ + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return EMULATE_FAIL; + break; + case 0x34: /* sysenter */ + case 0x35: /* sysexit */ + if (c->modrm_mod != 0 || c->modrm_rm != 0) + return EMULATE_FAIL; + break; + case 0x05: /* syscall */ + if (c->modrm_mod != 0 || c->modrm_rm != 0) + return EMULATE_FAIL; + break; + default: + return EMULATE_FAIL; + } + + if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) + return EMULATE_FAIL; + } ++vcpu->stat.insn_emulation; if (r) { @@ -2760,48 +2906,40 @@ int complete_pio(struct kvm_vcpu *vcpu) return 0; } -static void kernel_pio(struct kvm_io_device *pio_dev, - struct kvm_vcpu *vcpu, - void *pd) +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) { /* TODO: String I/O for in kernel device */ + int r; if (vcpu->arch.pio.in) - kvm_iodevice_read(pio_dev, vcpu->arch.pio.port, - vcpu->arch.pio.size, - pd); + r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); else - kvm_iodevice_write(pio_dev, vcpu->arch.pio.port, - vcpu->arch.pio.size, - pd); + r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); + return r; } -static void pio_string_write(struct kvm_io_device *pio_dev, - struct kvm_vcpu *vcpu) +static int pio_string_write(struct kvm_vcpu *vcpu) { struct kvm_pio_request *io = &vcpu->arch.pio; void *pd = vcpu->arch.pio_data; - int i; + int i, r = 0; for (i = 0; i < io->cur_count; i++) { - kvm_iodevice_write(pio_dev, io->port, - io->size, - pd); + if (kvm_io_bus_write(&vcpu->kvm->pio_bus, + io->port, io->size, pd)) { + r = -EOPNOTSUPP; + break; + } pd += io->size; } -} - -static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, - gpa_t addr, int len, - int is_write) -{ - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); + return r; } int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned port) { - struct kvm_io_device *pio_dev; unsigned long val; vcpu->run->exit_reason = KVM_EXIT_IO; @@ -2815,21 +2953,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.down = 0; vcpu->arch.pio.rep = 0; - if (vcpu->run->io.direction == KVM_EXIT_IO_IN) - KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, - handler); - else - KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, - handler); + trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, + size, 1); val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); - mutex_lock(&vcpu->kvm->lock); - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); - mutex_unlock(&vcpu->kvm->lock); - if (pio_dev) { - kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { complete_pio(vcpu); return 1; } @@ -2843,7 +2973,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, { unsigned now, in_page; int ret = 0; - struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -2856,12 +2985,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.down = down; vcpu->arch.pio.rep = rep; - if (vcpu->run->io.direction == KVM_EXIT_IO_IN) - KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, - handler); - else - KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, - handler); + trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, + size, count); if (!count) { kvm_x86_ops->skip_emulated_instruction(vcpu); @@ -2891,12 +3016,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.guest_gva = address; - mutex_lock(&vcpu->kvm->lock); - pio_dev = vcpu_find_pio_dev(vcpu, port, - vcpu->arch.pio.cur_count, - !vcpu->arch.pio.in); - mutex_unlock(&vcpu->kvm->lock); - if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); @@ -2904,16 +3023,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_inject_gp(vcpu, 0); return 1; } - if (ret == 0 && pio_dev) { - pio_string_write(pio_dev, vcpu); + if (ret == 0 && !pio_string_write(vcpu)) { complete_pio(vcpu); if (vcpu->arch.pio.count == 0) ret = 1; } - } else if (pio_dev) - pr_unimpl(vcpu, "no string pio read support yet, " - "port %x size %d count %ld\n", - port, size, count); + } + /* no string PIO read support yet */ return ret; } @@ -2946,10 +3062,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) continue; if (!kvm_request_guest_time_update(vcpu)) @@ -3042,7 +3155,6 @@ void kvm_arch_exit(void) int kvm_emulate_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; - KVMTRACE_0D(HLT, vcpu, handler); if (irqchip_in_kernel(vcpu->kvm)) { vcpu->arch.mp_state = KVM_MP_STATE_HALTED; return 1; @@ -3073,7 +3185,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); - KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); + trace_kvm_hypercall(nr, a0, a1, a2, a3); if (!is_long_mode(vcpu)) { nr &= 0xFFFFFFFF; @@ -3173,8 +3285,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); return 0; } - KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value, - (u32)((u64)value >> 32), handler); return value; } @@ -3182,9 +3292,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, unsigned long *rflags) { - KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val, - (u32)((u64)val >> 32), handler); - switch (cr) { case 0: kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); @@ -3294,11 +3401,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); } kvm_x86_ops->skip_emulated_instruction(vcpu); - KVMTRACE_5D(CPUID, vcpu, function, - (u32)kvm_register_read(vcpu, VCPU_REGS_RAX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RBX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RCX), - (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler); + trace_kvm_cpuid(function, + kvm_register_read(vcpu, VCPU_REGS_RAX), + kvm_register_read(vcpu, VCPU_REGS_RBX), + kvm_register_read(vcpu, VCPU_REGS_RCX), + kvm_register_read(vcpu, VCPU_REGS_RDX)); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); @@ -3377,9 +3484,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { /* try to reinject previous events if any */ + if (vcpu->arch.exception.pending) { + kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, + vcpu->arch.exception.has_error_code, + vcpu->arch.exception.error_code); + return; + } + if (vcpu->arch.nmi_injected) { kvm_x86_ops->set_nmi(vcpu); return; @@ -3453,16 +3567,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) smp_mb__after_clear_bit(); if (vcpu->requests || need_resched() || signal_pending(current)) { + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); preempt_enable(); r = 1; goto out; } - if (vcpu->arch.exception.pending) - __queue_exception(vcpu); - else - inject_pending_irq(vcpu, kvm_run); + inject_pending_event(vcpu, kvm_run); /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) @@ -3494,7 +3606,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_debugreg(vcpu->arch.eff_db[3], 3); } - KVMTRACE_0D(VMENTRY, vcpu, entryexit); + trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -4281,13 +4393,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = sregs->cr2; mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; - - down_read(&vcpu->kvm->slots_lock); - if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT)) - vcpu->arch.cr3 = sregs->cr3; - else - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); - up_read(&vcpu->kvm->slots_lock); + vcpu->arch.cr3 = sregs->cr3; kvm_set_cr8(vcpu, sregs->cr8); @@ -4678,20 +4784,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; + struct kvm_vcpu *vcpu; /* * Unpin any mmu pages first. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) - if (kvm->vcpus[i]) - kvm_unload_vcpu_mmu(kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_arch_vcpu_free(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_unload_vcpu_mmu(vcpu); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_free(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + atomic_set(&kvm->online_vcpus, 0); + mutex_unlock(&kvm->lock); } void kvm_arch_sync_events(struct kvm *kvm) @@ -4782,8 +4890,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED - || vcpu->arch.nmi_pending; + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || vcpu->arch.nmi_pending || + (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_has_interrupt(vcpu)); } void kvm_vcpu_kick(struct kvm_vcpu *vcpu) @@ -4807,3 +4917,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) { return kvm_x86_ops->interrupt_allowed(vcpu); } + +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);