#include <linux/cpufreq.h>
#include <linux/user-return-notifier.h>
#include <linux/srcu.h>
+#include <linux/slab.h>
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
unsigned long segment_base(u16 selector)
{
- struct descriptor_table gdt;
+ struct desc_ptr gdt;
struct desc_struct *d;
unsigned long table_base;
unsigned long v;
- if (selector == 0)
+ if (!(selector & ~3))
return 0;
- kvm_get_gdt(&gdt);
- table_base = gdt.base;
+ native_store_gdt(&gdt);
+ table_base = gdt.address;
if (selector & 4) { /* from ldt */
u16 ldt_selector = kvm_read_ldt();
+ if (!(ldt_selector & ~3))
+ return 0;
table_base = segment_base(ldt_selector);
}
d = (struct desc_struct *)(table_base + (selector & ~7));
#ifdef CONFIG_X86_64
if (cr0 & 0xffffffff00000000UL) {
- printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
- cr0, kvm_read_cr0(vcpu));
kvm_inject_gp(vcpu, 0);
return;
}
cr0 &= ~CR0_RESERVED_BITS;
if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
- printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
kvm_inject_gp(vcpu, 0);
return;
}
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
- printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
- "and a clear PE flag\n");
kvm_inject_gp(vcpu, 0);
return;
}
int cs_db, cs_l;
if (!is_pae(vcpu)) {
- printk(KERN_DEBUG "set_cr0: #GP, start paging "
- "in long mode while PAE is disabled\n");
kvm_inject_gp(vcpu, 0);
return;
}
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
if (cs_l) {
- printk(KERN_DEBUG "set_cr0: #GP, start paging "
- "in long mode while CS.L == 1\n");
kvm_inject_gp(vcpu, 0);
return;
} else
#endif
if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
- "reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
}
kvm_x86_ops->set_cr0(vcpu, cr0);
- vcpu->arch.cr0 = cr0;
kvm_mmu_reset_context(vcpu);
return;
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
if (cr4 & CR4_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE)) {
- printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
- "in long mode\n");
kvm_inject_gp(vcpu, 0);
return;
}
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
&& !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (cr4 & X86_CR4_VMXE) {
- printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (is_long_mode(vcpu)) {
if (cr3 & CR3_L_MODE_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
} else {
if (is_pae(vcpu)) {
if (cr3 & CR3_PAE_RESERVED_BITS) {
- printk(KERN_DEBUG
- "set_cr3: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
- printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
- "reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
if (cr8 & CR8_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
kvm_inject_gp(vcpu, 0);
return;
}
static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits) {
- printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
- efer);
kvm_inject_gp(vcpu, 0);
return;
}
if (is_paging(vcpu)
&& (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
- printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
kvm_inject_gp(vcpu, 0);
return;
}
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
- printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
kvm_inject_gp(vcpu, 0);
return;
}
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
- printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
u32 offset = msr - MSR_IA32_MC0_CTL;
- /* only 0 or all 1s can be written to IA32_MCi_CTL */
+ /* only 0 or all 1s can be written to IA32_MCi_CTL
+ * some Linux kernels though clear bit 10 in bank 4 to
+ * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+ * this to avoid an uncatched #GP in the guest
+ */
if ((offset & 0x3) == 0 &&
- data != 0 && data != ~(u64)0)
+ data != 0 && (data | (1 << 10)) != ~(u64)0)
return -1;
vcpu->arch.mce_banks[offset] = data;
break;
break;
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
+ data &= ~(u64)0x100; /* ignore ignne emulation enable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
case KVM_CAP_HYPERV_VAPIC:
case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_PCI_SEGMENT:
+ case KVM_CAP_DEBUGREGS:
+ case KVM_CAP_X86_ROBUST_SINGLESTEP:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
{
vcpu_load(vcpu);
- events->exception.injected = vcpu->arch.exception.pending;
+ events->exception.injected =
+ vcpu->arch.exception.pending &&
+ !kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
events->exception.error_code = vcpu->arch.exception.error_code;
- events->interrupt.injected = vcpu->arch.interrupt.pending;
+ events->interrupt.injected =
+ vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
- events->interrupt.soft = vcpu->arch.interrupt.soft;
+ events->interrupt.soft = 0;
+ events->interrupt.shadow =
+ kvm_x86_ops->get_interrupt_shadow(vcpu,
+ KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending;
events->sipi_vector = vcpu->arch.sipi_vector;
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
- | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
+ | KVM_VCPUEVENT_VALID_SIPI_VECTOR
+ | KVM_VCPUEVENT_VALID_SHADOW);
vcpu_put(vcpu);
}
struct kvm_vcpu_events *events)
{
if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
- | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
+ | KVM_VCPUEVENT_VALID_SIPI_VECTOR
+ | KVM_VCPUEVENT_VALID_SHADOW))
return -EINVAL;
vcpu_load(vcpu);
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
kvm_pic_clear_isr_ack(vcpu->kvm);
+ if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
+ kvm_x86_ops->set_interrupt_shadow(vcpu,
+ events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
return 0;
}
+static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *dbgregs)
+{
+ vcpu_load(vcpu);
+
+ memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
+ dbgregs->dr6 = vcpu->arch.dr6;
+ dbgregs->dr7 = vcpu->arch.dr7;
+ dbgregs->flags = 0;
+
+ vcpu_put(vcpu);
+}
+
+static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *dbgregs)
+{
+ if (dbgregs->flags)
+ return -EINVAL;
+
+ vcpu_load(vcpu);
+
+ memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = dbgregs->dr6;
+ vcpu->arch.dr7 = dbgregs->dr7;
+
+ vcpu_put(vcpu);
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
break;
}
+ case KVM_GET_DEBUGREGS: {
+ struct kvm_debugregs dbgregs;
+
+ kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &dbgregs,
+ sizeof(struct kvm_debugregs)))
+ break;
+ r = 0;
+ break;
+ }
+ case KVM_SET_DEBUGREGS: {
+ struct kvm_debugregs dbgregs;
+
+ r = -EFAULT;
+ if (copy_from_user(&dbgregs, argp,
+ sizeof(struct kvm_debugregs)))
+ break;
+
+ r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
+ break;
+ }
default:
r = -EINVAL;
}
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- spin_lock(&pic_irqchip(kvm)->lock);
+ raw_spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[0],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ raw_spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- spin_lock(&pic_irqchip(kvm)->lock);
+ raw_spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[1],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ raw_spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_IOAPIC:
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- int r, n, i;
+ int r, i;
struct kvm_memory_slot *memslot;
+ unsigned long n;
unsigned long is_dirty = 0;
unsigned long *dirty_bitmap = NULL;
if (!memslot->dirty_bitmap)
goto out;
- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ n = kvm_dirty_bitmap_bytes(memslot);
r = -ENOMEM;
dirty_bitmap = vmalloc(n);
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
- vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
+ vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
if (vcpu->arch.pio.string)
return EMULATE_DO_MMIO;
- if ((r || vcpu->mmio_is_write) && run) {
+ if (r || vcpu->mmio_is_write) {
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = vcpu->mmio_phys_addr;
memcpy(run->mmio.data, vcpu->mmio_data, 8);
return EMULATE_DO_MMIO;
}
- kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
{
unsigned long val;
+ trace_kvm_pio(!in, port, size, 1);
+
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->arch.pio.down = 0;
vcpu->arch.pio.rep = 0;
- trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
- size, 1);
-
if (!vcpu->arch.pio.in) {
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
unsigned now, in_page;
int ret = 0;
+ trace_kvm_pio(!in, port, size, count);
+
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->arch.pio.down = down;
vcpu->arch.pio.rep = rep;
- trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
- size, count);
-
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
- struct descriptor_table dt = { limit, base };
+ struct desc_ptr dt = { limit, base };
kvm_x86_ops->set_gdt(vcpu, &dt);
}
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
- struct descriptor_table dt = { limit, base };
+ struct desc_ptr dt = { limit, base };
kvm_x86_ops->set_idt(vcpu, &dt);
}
kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = complete_pio(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r)
goto out;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- struct descriptor_table dt;
+ struct desc_ptr dt;
vcpu_load(vcpu);
kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
kvm_x86_ops->get_idt(vcpu, &dt);
- sregs->idt.limit = dt.limit;
- sregs->idt.base = dt.base;
+ sregs->idt.limit = dt.size;
+ sregs->idt.base = dt.address;
kvm_x86_ops->get_gdt(vcpu, &dt);
- sregs->gdt.limit = dt.limit;
- sregs->gdt.base = dt.base;
+ sregs->gdt.limit = dt.size;
+ sregs->gdt.base = dt.address;
sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr2 = vcpu->arch.cr2;
static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
u16 selector,
- struct descriptor_table *dtable)
+ struct desc_ptr *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
if (kvm_seg.unusable)
- dtable->limit = 0;
+ dtable->size = 0;
else
- dtable->limit = kvm_seg.limit;
- dtable->base = kvm_seg.base;
+ dtable->size = kvm_seg.limit;
+ dtable->address = kvm_seg.base;
}
else
kvm_x86_ops->get_gdt(vcpu, dtable);
static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
- struct descriptor_table dtable;
+ struct desc_ptr dtable;
u16 index = selector >> 3;
+ int ret;
+ u32 err;
+ gva_t addr;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
- if (dtable.limit < index * 8 + 7) {
+ if (dtable.size < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
return X86EMUL_PROPAGATE_FAULT;
}
- return kvm_read_guest_virt_system(dtable.base + index*8,
- seg_desc, sizeof(*seg_desc),
- vcpu, NULL);
+ addr = dtable.base + index * 8;
+ ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
+ vcpu, &err);
+ if (ret == X86EMUL_PROPAGATE_FAULT)
+ kvm_inject_page_fault(vcpu, addr, err);
+
+ return ret;
}
/* allowed just for 8 bytes segments */
static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
- struct descriptor_table dtable;
+ struct desc_ptr dtable;
u16 index = selector >> 3;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
- if (dtable.limit < index * 8 + 7)
+ if (dtable.size < index * 8 + 7)
return 1;
- return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
+ return kvm_write_guest_virt(dtable.address + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
}
static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
.unusable = 0,
};
kvm_x86_ops->set_segment(vcpu, &segvar, seg);
- return 0;
+ return X86EMUL_CONTINUE;
}
static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
}
-static void kvm_check_segment_descriptor(struct kvm_vcpu *vcpu, int seg,
- u16 selector)
-{
- /* NULL selector is not valid for CS and SS */
- if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
- if (!selector)
- kvm_queue_exception_e(vcpu, TS_VECTOR, selector >> 3);
-}
-
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
- int type_bits, int seg)
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment kvm_seg;
struct desc_struct seg_desc;
+ u8 dpl, rpl, cpl;
+ unsigned err_vec = GP_VECTOR;
+ u32 err_code = 0;
+ bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
+ int ret;
if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
return kvm_load_realmode_segment(vcpu, selector, seg);
- if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
- return 1;
+ /* NULL selector is not valid for TR, CS and SS */
+ if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
+ && null_selector)
+ goto exception;
+
+ /* TR should be in GDT only */
+ if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
+ goto exception;
+
+ ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
+ if (ret)
+ return ret;
+
seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
- kvm_check_segment_descriptor(vcpu, seg, selector);
- kvm_seg.type |= type_bits;
+ if (null_selector) { /* for NULL selector skip all following checks */
+ kvm_seg.unusable = 1;
+ goto load;
+ }
+
+ err_code = selector & 0xfffc;
+ err_vec = GP_VECTOR;
- if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
- seg != VCPU_SREG_LDTR)
- if (!kvm_seg.s)
- kvm_seg.unusable = 1;
+ /* can't load system descriptor into segment selecor */
+ if (seg <= VCPU_SREG_GS && !kvm_seg.s)
+ goto exception;
- kvm_set_segment(vcpu, &kvm_seg, seg);
- if (selector && !kvm_seg.unusable && kvm_seg.s) {
+ if (!kvm_seg.present) {
+ err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
+ goto exception;
+ }
+
+ rpl = selector & 3;
+ dpl = kvm_seg.dpl;
+ cpl = kvm_x86_ops->get_cpl(vcpu);
+
+ switch (seg) {
+ case VCPU_SREG_SS:
+ /*
+ * segment is not a writable data segment or segment
+ * selector's RPL != CPL or segment selector's RPL != CPL
+ */
+ if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
+ goto exception;
+ break;
+ case VCPU_SREG_CS:
+ if (!(kvm_seg.type & 8))
+ goto exception;
+
+ if (kvm_seg.type & 4) {
+ /* conforming */
+ if (dpl > cpl)
+ goto exception;
+ } else {
+ /* nonconforming */
+ if (rpl > cpl || dpl != cpl)
+ goto exception;
+ }
+ /* CS(RPL) <- CPL */
+ selector = (selector & 0xfffc) | cpl;
+ break;
+ case VCPU_SREG_TR:
+ if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
+ goto exception;
+ break;
+ case VCPU_SREG_LDTR:
+ if (kvm_seg.s || kvm_seg.type != 2)
+ goto exception;
+ break;
+ default: /* DS, ES, FS, or GS */
+ /*
+ * segment is not a data or readable code segment or
+ * ((segment is a data or nonconforming code segment)
+ * and (both RPL and CPL > DPL))
+ */
+ if ((kvm_seg.type & 0xa) == 0x8 ||
+ (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
+ goto exception;
+ break;
+ }
+
+ if (!kvm_seg.unusable && kvm_seg.s) {
/* mark segment as accessed */
+ kvm_seg.type |= 1;
seg_desc.type |= 1;
save_guest_segment_descriptor(vcpu, selector, &seg_desc);
}
- return 0;
+load:
+ kvm_set_segment(vcpu, &kvm_seg, seg);
+ return X86EMUL_CONTINUE;
+exception:
+ kvm_queue_exception_e(vcpu, err_vec, err_code);
+ return X86EMUL_PROPAGATE_FAULT;
}
static void save_state_to_tss32(struct kvm_vcpu *vcpu,
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
+static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
+{
+ struct kvm_segment kvm_seg;
+ kvm_get_segment(vcpu, &kvm_seg, seg);
+ kvm_seg.selector = sel;
+ kvm_set_segment(vcpu, &kvm_seg, seg);
+}
+
static int load_state_from_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
- if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+ /*
+ * SDM says that segment selectors are loaded before segment
+ * descriptors
+ */
+ kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
+ kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+ kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+ kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+ kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+ kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
+ kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
+
+ /*
+ * Now load segment descriptors. If fault happenes at this stage
+ * it is handled in a context of new task
+ */
+ if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+ if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+ if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
return 1;
return 0;
}
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
- if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+ /*
+ * SDM says that segment selectors are loaded before segment
+ * descriptors
+ */
+ kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
+ kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+ kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+ kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+ kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+
+ /*
+ * Now load segment descriptors. If fault happenes at this stage
+ * it is handled in a context of new task
+ */
+ if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
return 0;
}
int ret = 0;
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
+ u32 desc_limit;
old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
}
}
- if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
+ desc_limit = get_desc_limit(&nseg_desc);
+ if (!nseg_desc.p ||
+ ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
+ desc_limit < 0x2b)) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
return 1;
}
{
int mmu_reset_needed = 0;
int pending_vec, max_bits;
- struct descriptor_table dt;
+ struct desc_ptr dt;
vcpu_load(vcpu);
- dt.limit = sregs->idt.limit;
- dt.base = sregs->idt.base;
+ dt.size = sregs->idt.limit;
+ dt.address = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
- dt.limit = sregs->gdt.limit;
- dt.base = sregs->gdt.base;
+ dt.size = sregs->gdt.limit;
+ dt.address = sregs->gdt.base;
kvm_x86_ops->set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
}
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- vcpu->arch.singlestep_cs =
- get_segment_selector(vcpu, VCPU_SREG_CS);
- vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
+ get_segment_base(vcpu, VCPU_SREG_CS);
/*
* Trigger an rflags update that will inject or remove the trace
return kvm_x86_ops->interrupt_allowed(vcpu);
}
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+ unsigned long current_rip = kvm_rip_read(vcpu) +
+ get_segment_base(vcpu, VCPU_SREG_CS);
+
+ return current_rip == linear_rip;
+}
+EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
+
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags;
rflags = kvm_x86_ops->get_rflags(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ rflags &= ~X86_EFLAGS_TF;
return rflags;
}
EXPORT_SYMBOL_GPL(kvm_get_rflags);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
- vcpu->arch.singlestep_cs ==
- get_segment_selector(vcpu, VCPU_SREG_CS) &&
- vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
- rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
+ rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);