#include <linux/module.h>
#include <linux/mman.h>
#include <linux/highmem.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/uaccess.h>
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
+ { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
u32 error_code)
{
++vcpu->stat.pf_guest;
+
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS,
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
return;
}
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
- case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
- case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
r = !tdp_enabled;
break;
case KVM_CAP_IOMMU:
- r = intel_iommu_found();
+ r = iommu_found();
+ break;
+ case KVM_CAP_CLOCKSOURCE:
+ r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
break;
default:
r = 0;
return r;
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+ bit(X86_FEATURE_SVM);
/* all func 2 cpuid_count() should be called on the same cpu */
get_cpu();
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; *nent < maxnent; ++i) {
- level_type = entry[i - 1].ecx & 0xff;
+ level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+ kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
- kvm_x86_ops->skip_emulated_instruction(vcpu);
-
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
+ PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
return 0;
out:
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
- for (j = i + 1; j == i; j = (j + 1) % nent) {
+ for (j = i + 1; ; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
return 1;
}
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
int i;
- u32 function, index;
- struct kvm_cpuid_entry2 *e, *best;
+ struct kvm_cpuid_entry2 *best = NULL;
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+ struct kvm_cpuid_entry2 *e;
+
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
if (!best || e->function > best->function)
best = e;
}
+
+ return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 function, index;
+ struct kvm_cpuid_entry2 *best;
+
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
}
-/*
- * Check if userspace requested a NMI window, and that the NMI window
- * is open.
- *
- * No need to exit to userspace if we already have a NMI queued.
- */
-static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
-{
- return (!vcpu->arch.nmi_pending &&
- kvm_run->request_nmi_window &&
- vcpu->arch.nmi_window_open);
-}
-
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (irqchip_in_kernel(vcpu->kvm)) {
+ if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
- kvm_run->ready_for_nmi_injection = 1;
- } else {
+ else
kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary == 0);
- kvm_run->ready_for_nmi_injection =
- (vcpu->arch.nmi_window_open &&
- vcpu->arch.nmi_pending == 0);
- }
}
static void vapic_enter(struct kvm_vcpu *vcpu)
goto out;
}
- if (vcpu->guest_debug.enabled)
- kvm_x86_ops->guest_debug_pre(vcpu);
-
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
kvm_guest_enter();
+ get_debugreg(vcpu->arch.host_dr6, 6);
+ get_debugreg(vcpu->arch.host_dr7, 7);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ get_debugreg(vcpu->arch.host_db[0], 0);
+ get_debugreg(vcpu->arch.host_db[1], 1);
+ get_debugreg(vcpu->arch.host_db[2], 2);
+ get_debugreg(vcpu->arch.host_db[3], 3);
+
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.eff_db[0], 0);
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
+ }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.host_db[0], 0);
+ set_debugreg(vcpu->arch.host_db[1], 1);
+ set_debugreg(vcpu->arch.host_db[2], 2);
+ set_debugreg(vcpu->arch.host_db[3], 3);
+ }
+ set_debugreg(vcpu->arch.host_dr6, 6);
+ set_debugreg(vcpu->arch.host_dr7, 7);
+
vcpu->guest_mode = 0;
local_irq_enable();
}
if (r > 0) {
- if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
- r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_NMI;
- ++vcpu->stat.request_nmi_exits;
- }
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
kvm_desct->padding = 0;
}
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
- u16 selector,
- struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
return 0;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int r;
+ int i, r;
vcpu_load(vcpu);
+ if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+ vcpu->arch.switch_db_regs =
+ (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+ } else {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+ }
+
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+ kvm_queue_exception(vcpu, BP_VECTOR);
+
vcpu_put(vcpu);
return r;
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
+ vcpu->arch.switch_db_regs = 0;
+ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr7 = DR7_FIXED_1;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+ rdtscll(kvm->arch.vm_init_tsc);
+
return kvm;
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+ kvm_free_all_assigned_devices(kvm);
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
- kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);