#include <linux/interrupt.h>
#include <linux/kvm.h>
#include <linux/fs.h>
-#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/highmem.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
+#include <asm/mtrr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
+ { "request_nmi", VCPU_STAT(request_nmi_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
{ "efer_reload", VCPU_STAT(efer_reload) },
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
+ { "irq_injections", VCPU_STAT(irq_injections) },
+ { "nmi_injections", VCPU_STAT(nmi_injections) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
{ "mmu_flooded", VM_STAT(mmu_flooded) },
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+ { "mmu_unsync", VM_STAT(mmu_unsync) },
+ { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
};
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
- int assigned_dev_id)
-{
- struct list_head *ptr;
- struct kvm_assigned_dev_kernel *match;
-
- list_for_each(ptr, head) {
- match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
- if (match->assigned_dev_id == assigned_dev_id)
- return match;
- }
- return NULL;
-}
-
-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
-{
- struct kvm_assigned_dev_kernel *assigned_dev;
-
- assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
- interrupt_work);
-
- /* This is taken to safely inject irq inside the guest. When
- * the interrupt injection (or the ioapic code) uses a
- * finer-grained lock, update this
- */
- mutex_lock(&assigned_dev->kvm->lock);
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->guest_irq, 1);
- mutex_unlock(&assigned_dev->kvm->lock);
- kvm_put_kvm(assigned_dev->kvm);
-}
-
-/* FIXME: Implement the OR logic needed to make shared interrupts on
- * this line behave properly
- */
-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev =
- (struct kvm_assigned_dev_kernel *) dev_id;
-
- kvm_get_kvm(assigned_dev->kvm);
- schedule_work(&assigned_dev->interrupt_work);
- disable_irq_nosync(irq);
- return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_assigned_dev_kernel *dev;
-
- if (kian->gsi == -1)
- return;
-
- dev = container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
- kvm_set_irq(dev->kvm, dev->guest_irq, 0);
- enable_irq(dev->host_irq);
-}
-
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
- struct kvm_assigned_irq
- *assigned_irq)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match) {
- mutex_unlock(&kvm->lock);
- return -EINVAL;
- }
-
- if (match->irq_requested) {
- match->guest_irq = assigned_irq->guest_irq;
- match->ack_notifier.gsi = assigned_irq->guest_irq;
- mutex_unlock(&kvm->lock);
- return 0;
- }
-
- INIT_WORK(&match->interrupt_work,
- kvm_assigned_dev_interrupt_work_handler);
-
- if (irqchip_in_kernel(kvm)) {
- if (!capable(CAP_SYS_RAWIO)) {
- return -EPERM;
- goto out;
- }
-
- if (assigned_irq->host_irq)
- match->host_irq = assigned_irq->host_irq;
- else
- match->host_irq = match->dev->irq;
- match->guest_irq = assigned_irq->guest_irq;
- match->ack_notifier.gsi = assigned_irq->guest_irq;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
- kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
-
- /* Even though this is PCI, we don't want to use shared
- * interrupts. Sharing host devices with guest-assigned devices
- * on the same interrupt line is not a happy situation: there
- * are going to be long delays in accepting, acking, etc.
- */
- if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
- "kvm_assigned_device", (void *)match)) {
- printk(KERN_INFO "%s: couldn't allocate irq for pv "
- "device\n", __func__);
- r = -EIO;
- goto out;
- }
- }
-
- match->irq_requested = true;
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
- struct pci_dev *dev;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (match) {
- /* device already assigned */
- r = -EINVAL;
- goto out;
- }
-
- match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
- if (match == NULL) {
- printk(KERN_INFO "%s: Couldn't allocate memory\n",
- __func__);
- r = -ENOMEM;
- goto out;
- }
- dev = pci_get_bus_and_slot(assigned_dev->busnr,
- assigned_dev->devfn);
- if (!dev) {
- printk(KERN_INFO "%s: host device not found\n", __func__);
- r = -EINVAL;
- goto out_free;
- }
- if (pci_enable_device(dev)) {
- printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
- r = -EBUSY;
- goto out_put;
- }
- r = pci_request_regions(dev, "kvm_assigned_device");
- if (r) {
- printk(KERN_INFO "%s: Could not get access to device regions\n",
- __func__);
- goto out_disable;
- }
- match->assigned_dev_id = assigned_dev->assigned_dev_id;
- match->host_busnr = assigned_dev->busnr;
- match->host_devfn = assigned_dev->devfn;
- match->dev = dev;
-
- match->kvm = kvm;
-
- list_add(&match->list, &kvm->arch.assigned_dev_head);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-out_disable:
- pci_disable_device(dev);
-out_put:
- pci_dev_put(dev);
-out_free:
- kfree(match);
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static void kvm_free_assigned_devices(struct kvm *kvm)
-{
- struct list_head *ptr, *ptr2;
- struct kvm_assigned_dev_kernel *assigned_dev;
-
- list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
- assigned_dev = list_entry(ptr,
- struct kvm_assigned_dev_kernel,
- list);
-
- if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) {
- free_irq(assigned_dev->host_irq,
- (void *)assigned_dev);
-
- kvm_unregister_irq_ack_notifier(kvm,
- &assigned_dev->
- ack_notifier);
- }
-
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
- kvm_put_kvm(kvm);
-
- pci_release_regions(assigned_dev->dev);
- pci_disable_device(assigned_dev->dev);
- pci_dev_put(assigned_dev->dev);
-
- list_del(&assigned_dev->list);
- kfree(assigned_dev);
- }
-}
-
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
u32 error_code)
{
++vcpu->stat.pf_guest;
+
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
+ kvm_mmu_sync_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
return;
}
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS,
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
return;
}
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
- __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+ __func__, tsc_khz, hv_clock->tsc_shift,
hv_clock->tsc_to_system_mul);
}
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- vcpu->arch.mtrr[msr - 0x200] = data;
+ if (msr == MSR_MTRRdefType) {
+ vcpu->arch.mtrr_state.def_type = data;
+ vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+ } else if (msr == MSR_MTRRfix64K_00000)
+ p[0] = data;
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ p[1 + msr - MSR_MTRRfix16K_80000] = data;
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+ else if (msr == MSR_IA32_CR_PAT)
+ vcpu->arch.pat = data;
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pt = data;
+ }
+
+ kvm_mmu_reset_context(vcpu);
return 0;
}
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
- down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
- up_read(¤t->mm->mmap_sem);
if (is_error_page(vcpu->arch.time_page)) {
kvm_release_page_clean(vcpu->arch.time_page);
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- *pdata = vcpu->arch.mtrr[msr - 0x200];
+ if (msr == MSR_MTRRdefType)
+ *pdata = vcpu->arch.mtrr_state.def_type +
+ (vcpu->arch.mtrr_state.enabled << 10);
+ else if (msr == MSR_MTRRfix64K_00000)
+ *pdata = p[0];
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+ else if (msr == MSR_IA32_CR_PAT)
+ *pdata = vcpu->arch.pat;
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pdata = *pt;
+ }
+
return 0;
}
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
- case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
- case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_PV_MMU:
r = !tdp_enabled;
break;
+ case KVM_CAP_IOMMU:
+ r = iommu_found();
+ break;
+ case KVM_CAP_CLOCKSOURCE:
+ r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
+ break;
default:
r = 0;
break;
return r;
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+ bit(X86_FEATURE_SVM);
/* all func 2 cpuid_count() should be called on the same cpu */
get_cpu();
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; *nent < maxnent; ++i) {
- level_type = entry[i - 1].ecx & 0xff;
+ level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
return 0;
}
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+ vcpu_load(vcpu);
+ kvm_inject_nmi(vcpu);
+ vcpu_put(vcpu);
+
+ return 0;
+}
+
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
r = 0;
break;
}
+ case KVM_NMI: {
+ r = kvm_vcpu_ioctl_nmi(vcpu);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
goto out;
if (irqchip_in_kernel(kvm)) {
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, irq_event.irq, irq_event.level);
+ kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
r = 0;
}
goto out;
break;
}
- case KVM_ASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
case KVM_GET_PIT: {
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+ kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
val = *(u64 *)new;
- down_read(¤t->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
- up_read(¤t->mm->mmap_sem);
kaddr = kmap_atomic(page, KM_USER0);
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
{
+ kvm_mmu_invlpg(vcpu, address);
return X86EMUL_CONTINUE;
}
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
- kvm_x86_ops->skip_emulated_instruction(vcpu);
-
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
+ PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
return 0;
out:
KVMTRACE_0D(HLT, vcpu, handler);
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
- up_read(&vcpu->kvm->slots_lock);
- kvm_vcpu_block(vcpu);
- down_read(&vcpu->kvm->slots_lock);
- if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
- return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
- for (j = i + 1; j == i; j = (j + 1) % nent) {
+ for (j = i + 1; ; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
return 1;
}
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
int i;
- u32 function, index;
- struct kvm_cpuid_entry2 *e, *best;
+ struct kvm_cpuid_entry2 *best = NULL;
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+ struct kvm_cpuid_entry2 *e;
+
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
if (!best || e->function > best->function)
best = e;
}
+
+ return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 function, index;
+ struct kvm_cpuid_entry2 *best;
+
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
if (!apic || !apic->vapic_addr)
return;
- down_read(¤t->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
- up_read(¤t->mm->mmap_sem);
vcpu->arch.apic->vapic_page = page;
}
up_read(&vcpu->kvm->slots_lock);
}
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
- if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
- pr_debug("vcpu %d received sipi with vector # %x\n",
- vcpu->vcpu_id, vcpu->arch.sipi_vector);
- kvm_lapic_reset(vcpu);
- r = kvm_x86_ops->vcpu_reset(vcpu);
- if (r)
- return r;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
- }
-
- down_read(&vcpu->kvm->slots_lock);
- vapic_enter(vcpu);
-
-again:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_mmu_unload(vcpu);
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
+ kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
local_irq_disable();
- if (vcpu->requests || need_resched()) {
+ if (vcpu->requests || need_resched() || signal_pending(current)) {
local_irq_enable();
preempt_enable();
r = 1;
goto out;
}
- if (signal_pending(current)) {
- local_irq_enable();
- preempt_enable();
- r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ++vcpu->stat.signal_exits;
- goto out;
- }
-
- if (vcpu->guest_debug.enabled)
- kvm_x86_ops->guest_debug_pre(vcpu);
-
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
kvm_guest_enter();
+ get_debugreg(vcpu->arch.host_dr6, 6);
+ get_debugreg(vcpu->arch.host_dr7, 7);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ get_debugreg(vcpu->arch.host_db[0], 0);
+ get_debugreg(vcpu->arch.host_db[1], 1);
+ get_debugreg(vcpu->arch.host_db[2], 2);
+ get_debugreg(vcpu->arch.host_db[3], 3);
+
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.eff_db[0], 0);
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
+ }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.host_db[0], 0);
+ set_debugreg(vcpu->arch.host_db[1], 1);
+ set_debugreg(vcpu->arch.host_db[2], 2);
+ set_debugreg(vcpu->arch.host_db[3], 3);
+ }
+ set_debugreg(vcpu->arch.host_dr6, 6);
+ set_debugreg(vcpu->arch.host_dr7, 7);
+
vcpu->guest_mode = 0;
local_irq_enable();
kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+out:
+ return r;
+}
- if (r > 0) {
- if (dm_request_for_irq_injection(vcpu, kvm_run)) {
- r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ++vcpu->stat.request_irq_exits;
- goto out;
- }
- if (!need_resched())
- goto again;
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ int r;
+
+ if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
+ pr_debug("vcpu %d received sipi with vector # %x\n",
+ vcpu->vcpu_id, vcpu->arch.sipi_vector);
+ kvm_lapic_reset(vcpu);
+ r = kvm_arch_vcpu_reset(vcpu);
+ if (r)
+ return r;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
-out:
- up_read(&vcpu->kvm->slots_lock);
- if (r > 0) {
- kvm_resched(vcpu);
- down_read(&vcpu->kvm->slots_lock);
- goto again;
+ down_read(&vcpu->kvm->slots_lock);
+ vapic_enter(vcpu);
+
+ r = 1;
+ while (r > 0) {
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
+ r = vcpu_enter_guest(vcpu, kvm_run);
+ else {
+ up_read(&vcpu->kvm->slots_lock);
+ kvm_vcpu_block(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
+ if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+ vcpu->arch.mp_state =
+ KVM_MP_STATE_RUNNABLE;
+ if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+ r = -EINTR;
+ }
+
+ if (r > 0) {
+ if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+ r = -EINTR;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ ++vcpu->stat.request_irq_exits;
+ }
+ if (signal_pending(current)) {
+ r = -EINTR;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ ++vcpu->stat.signal_exits;
+ }
+ if (need_resched()) {
+ up_read(&vcpu->kvm->slots_lock);
+ kvm_resched(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
+ }
+ }
}
+ up_read(&vcpu->kvm->slots_lock);
post_kvm_run_save(vcpu, kvm_run);
vapic_exit(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
kvm_desct->padding = 0;
}
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
- u16 selector,
- struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
return 0;
}
-int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
+static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment segvar = {
.base = selector << 4,
pr_debug("Set back pending irq %d\n",
pending_vec);
}
+ kvm_pic_clear_isr_ack(vcpu->kvm);
}
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+ /* Older userspace won't unhalt the vcpu on reset. */
+ if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+ sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
+ !(vcpu->arch.cr0 & X86_CR0_PE))
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
vcpu_put(vcpu);
return 0;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int r;
+ int i, r;
vcpu_load(vcpu);
+ if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+ vcpu->arch.switch_db_regs =
+ (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+ } else {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+ }
+
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+ kvm_queue_exception(vcpu, BP_VECTOR);
+
vcpu_put(vcpu);
return r;
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
+ vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = false;
+
+ vcpu->arch.switch_db_regs = 0;
+ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr7 = DR7_FIXED_1;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
+ rdtscll(kvm->arch.vm_init_tsc);
+
return kvm;
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+ kvm_free_all_assigned_devices(kvm);
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_free_assigned_devices(kvm);
+ kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
- || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+ || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+ || vcpu->arch.nmi_pending;
}
static void vcpu_kick_intr(void *info)