KVM: x86 emulator: Check IOPL level during io instruction emulation
[safe/jmp/linux-2.6] / arch / x86 / kvm / emulate.c
index d226dff..296e851 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <asm/kvm_emulate.h>
 
-#include "mmu.h"               /* for is_long_mode() */
+#include "x86.h"
 
 /*
  * Opcode effective-address decode tables.
@@ -88,6 +88,7 @@
 enum {
        Group1_80, Group1_81, Group1_82, Group1_83,
        Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+       Group8, Group9,
 };
 
 static u32 opcode_table[256] = {
@@ -267,11 +268,12 @@ static u32 twobyte_table[256] = {
        0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
            DstReg | SrcMem16 | ModRM | Mov,
        /* 0xB8 - 0xBF */
-       0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
+       0, 0, Group | Group8, DstMem | SrcReg | ModRM | BitOp,
        0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
            DstReg | SrcMem16 | ModRM | Mov,
        /* 0xC0 - 0xCF */
-       0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
+       0, 0, 0, DstMem | SrcReg | ModRM | Mov,
+       0, 0, 0, Group | GroupDual | Group9,
        0, 0, 0, 0, 0, 0, 0, 0,
        /* 0xD0 - 0xDF */
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -323,6 +325,12 @@ static u32 group_table[] = {
        0, 0, ModRM | SrcMem, ModRM | SrcMem,
        SrcNone | ModRM | DstMem | Mov, 0,
        SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+       [Group8*8] =
+       0, 0, 0, 0,
+       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+       [Group9*8] =
+       0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0,
 };
 
 static u32 group2_table[] = {
@@ -330,6 +338,8 @@ static u32 group2_table[] = {
        SrcNone | ModRM, 0, 0, SrcNone | ModRM,
        SrcNone | ModRM | DstMem | Mov, 0,
        SrcMem16 | ModRM | Mov, 0,
+       [Group9*8] =
+       0, 0, 0, 0, 0, 0, 0, 0,
 };
 
 /* EFLAGS bit definitions. */
@@ -606,7 +616,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 
        if (linear < fc->start || linear >= fc->end) {
                size = min(15UL, PAGE_SIZE - offset_in_page(linear));
-               rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+               rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
                if (rc)
                        return rc;
                fc->start = linear;
@@ -622,6 +632,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
 {
        int rc = 0;
 
+       /* x86 instructions are limited to 15 bytes. */
+       if (eip + size - ctxt->decode.eip_orig > 15)
+               return X86EMUL_UNHANDLEABLE;
        eip += ctxt->cs_base;
        while (size--) {
                rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
@@ -658,11 +671,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
                op_bytes = 3;
        *address = 0;
        rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
-                          ctxt->vcpu);
+                          ctxt->vcpu, NULL);
        if (rc)
                return rc;
        rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
-                          ctxt->vcpu);
+                          ctxt->vcpu, NULL);
        return rc;
 }
 
@@ -880,12 +893,13 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        /* Shadow copy of register state. Committed on successful emulation. */
 
        memset(c, 0, sizeof(struct decode_cache));
-       c->eip = kvm_rip_read(ctxt->vcpu);
+       c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
        ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
        memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
 
        switch (mode) {
        case X86EMUL_MODE_REAL:
+       case X86EMUL_MODE_VM86:
        case X86EMUL_MODE_PROT16:
                def_op_bytes = def_ad_bytes = 2;
                break;
@@ -1193,7 +1207,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
        rc = ops->read_emulated(register_address(c, ss_base(ctxt),
                                                 c->regs[VCPU_REGS_RSP]),
                                dest, len, ctxt->vcpu);
-       if (rc != 0)
+       if (rc != X86EMUL_CONTINUE)
                return rc;
 
        register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
@@ -1367,7 +1381,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
        int rc;
 
        rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
-       if (rc != 0)
+       if (rc != X86EMUL_CONTINUE)
                return rc;
 
        if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@@ -1382,7 +1396,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
                       (u32) c->regs[VCPU_REGS_RBX];
 
                rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        return rc;
                ctxt->eflags |= EFLG_ZF;
        }
@@ -1448,7 +1462,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
                                        &c->dst.val,
                                        c->dst.bytes,
                                        ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        return rc;
                break;
        case OP_NONE:
@@ -1512,7 +1526,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
 
        /* syscall is not available in real mode */
        if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
-               || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
+           || ctxt->mode == X86EMUL_MODE_VM86)
                return -1;
 
        setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1564,9 +1578,8 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
        if (c->lock_prefix)
                return -1;
 
-       /* inject #GP if in real mode or paging is disabled */
-       if (ctxt->mode == X86EMUL_MODE_REAL ||
-               !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+       /* inject #GP if in real mode */
+       if (ctxt->mode == X86EMUL_MODE_REAL) {
                kvm_inject_gp(ctxt->vcpu, 0);
                return -1;
        }
@@ -1630,9 +1643,9 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
        if (c->lock_prefix)
                return -1;
 
-       /* inject #GP if in real mode or paging is disabled */
-       if (ctxt->mode == X86EMUL_MODE_REAL
-               || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+       /* inject #GP if in real mode or Virtual 8086 mode */
+       if (ctxt->mode == X86EMUL_MODE_REAL ||
+           ctxt->mode == X86EMUL_MODE_VM86) {
                kvm_inject_gp(ctxt->vcpu, 0);
                return -1;
        }
@@ -1685,6 +1698,57 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
        return 0;
 }
 
+static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
+{
+       int iopl;
+       if (ctxt->mode == X86EMUL_MODE_REAL)
+               return false;
+       if (ctxt->mode == X86EMUL_MODE_VM86)
+               return true;
+       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
+}
+
+static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
+                                           struct x86_emulate_ops *ops,
+                                           u16 port, u16 len)
+{
+       struct kvm_segment tr_seg;
+       int r;
+       u16 io_bitmap_ptr;
+       u8 perm, bit_idx = port & 0x7;
+       unsigned mask = (1 << len) - 1;
+
+       kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
+       if (tr_seg.unusable)
+               return false;
+       if (tr_seg.limit < 103)
+               return false;
+       r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
+                         NULL);
+       if (r != X86EMUL_CONTINUE)
+               return false;
+       if (io_bitmap_ptr + port/8 > tr_seg.limit)
+               return false;
+       r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
+                         ctxt->vcpu, NULL);
+       if (r != X86EMUL_CONTINUE)
+               return false;
+       if ((perm >> bit_idx) & mask)
+               return false;
+       return true;
+}
+
+static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
+                                struct x86_emulate_ops *ops,
+                                u16 port, u16 len)
+{
+       if (emulator_bad_iopl(ctxt))
+               if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
+                       return false;
+       return true;
+}
+
 int
 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -1746,7 +1810,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                                        &c->src.val,
                                        c->src.bytes,
                                        ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                c->src.orig_val = c->src.val;
        }
@@ -1765,12 +1829,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                        c->dst.ptr = (void *)c->dst.ptr +
                                                   (c->src.val & mask) / 8;
                }
-               if (!(c->d & Mov) &&
-                                  /* optimisation - avoid slow emulated read */
-                   ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-                                          &c->dst.val,
-                                         c->dst.bytes, ctxt->vcpu)) != 0))
-                       goto done;
+               if (!(c->d & Mov)) {
+                       /* optimisation - avoid slow emulated read */
+                       rc = ops->read_emulated((unsigned long)c->dst.ptr,
+                                               &c->dst.val,
+                                               c->dst.bytes,
+                                               ctxt->vcpu);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+               }
        }
        c->dst.orig_val = c->dst.val;
 
@@ -1873,7 +1940,12 @@ special_insn:
                break;
        case 0x6c:              /* insb */
        case 0x6d:              /* insw/insd */
-                if (kvm_emulate_pio_string(ctxt->vcpu,
+               if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
+               if (kvm_emulate_pio_string(ctxt->vcpu,
                                1,
                                (c->d & ByteOp) ? 1 : c->op_bytes,
                                c->rep_prefix ?
@@ -1889,6 +1961,11 @@ special_insn:
                return 0;
        case 0x6e:              /* outsb */
        case 0x6f:              /* outsw/outsd */
+               if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
                if (kvm_emulate_pio_string(ctxt->vcpu,
                                0,
                                (c->d & ByteOp) ? 1 : c->op_bytes,
@@ -2036,11 +2113,12 @@ special_insn:
                c->dst.ptr = (unsigned long *)register_address(c,
                                                   es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
-               if ((rc = ops->read_emulated(register_address(c,
-                                          seg_override_base(ctxt, c),
-                                       c->regs[VCPU_REGS_RSI]),
+               rc = ops->read_emulated(register_address(c,
+                                               seg_override_base(ctxt, c),
+                                               c->regs[VCPU_REGS_RSI]),
                                        &c->dst.val,
-                                       c->dst.bytes, ctxt->vcpu)) != 0)
+                                       c->dst.bytes, ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                register_address_increment(c, &c->regs[VCPU_REGS_RSI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2055,10 +2133,11 @@ special_insn:
                c->src.ptr = (unsigned long *)register_address(c,
                                       seg_override_base(ctxt, c),
                                                   c->regs[VCPU_REGS_RSI]);
-               if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
-                                               &c->src.val,
-                                               c->src.bytes,
-                                               ctxt->vcpu)) != 0)
+               rc = ops->read_emulated((unsigned long)c->src.ptr,
+                                       &c->src.val,
+                                       c->src.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
 
                c->dst.type = OP_NONE; /* Disable writeback. */
@@ -2066,10 +2145,11 @@ special_insn:
                c->dst.ptr = (unsigned long *)register_address(c,
                                                   es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
-               if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-                                               &c->dst.val,
-                                               c->dst.bytes,
-                                               ctxt->vcpu)) != 0)
+               rc = ops->read_emulated((unsigned long)c->dst.ptr,
+                                       &c->dst.val,
+                                       c->dst.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
 
                DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@@ -2099,12 +2179,13 @@ special_insn:
                c->dst.type = OP_REG;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-               if ((rc = ops->read_emulated(register_address(c,
-                                                seg_override_base(ctxt, c),
-                                                c->regs[VCPU_REGS_RSI]),
-                                                &c->dst.val,
-                                                c->dst.bytes,
-                                                ctxt->vcpu)) != 0)
+               rc = ops->read_emulated(register_address(c,
+                                               seg_override_base(ctxt, c),
+                                               c->regs[VCPU_REGS_RSI]),
+                                       &c->dst.val,
+                                       c->dst.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                register_address_increment(c, &c->regs[VCPU_REGS_RSI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2182,7 +2263,13 @@ special_insn:
        case 0xef: /* out (e/r)ax,dx */
                port = c->regs[VCPU_REGS_RDX];
                io_dir_in = 0;
-       do_io:  if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
+       do_io:
+               if (!emulator_io_permited(ctxt, ops, port,
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
+               if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
                                   (c->d & ByteOp) ? 1 : c->op_bytes,
                                   port) != 0) {
                        c->eip = saved_eip;
@@ -2207,13 +2294,21 @@ special_insn:
                c->dst.type = OP_NONE;  /* Disable writeback. */
                break;
        case 0xfa: /* cli */
-               ctxt->eflags &= ~X86_EFLAGS_IF;
-               c->dst.type = OP_NONE;  /* Disable writeback. */
+               if (emulator_bad_iopl(ctxt))
+                       kvm_inject_gp(ctxt->vcpu, 0);
+               else {
+                       ctxt->eflags &= ~X86_EFLAGS_IF;
+                       c->dst.type = OP_NONE;  /* Disable writeback. */
+               }
                break;
        case 0xfb: /* sti */
-               toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
-               ctxt->eflags |= X86_EFLAGS_IF;
-               c->dst.type = OP_NONE;  /* Disable writeback. */
+               if (emulator_bad_iopl(ctxt))
+                       kvm_inject_gp(ctxt->vcpu, 0);
+               else {
+                       toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+                       ctxt->eflags |= X86_EFLAGS_IF;
+                       c->dst.type = OP_NONE;  /* Disable writeback. */
+               }
                break;
        case 0xfc: /* cld */
                ctxt->eflags &= ~EFLG_DF;