KVM: x86 emulator: address size and operand size overrides are sticky
[safe/jmp/linux-2.6] / drivers / kvm / x86_emulate.c
index 75fd23b..3be506a 100644 (file)
@@ -26,6 +26,7 @@
 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
 #else
 #include "kvm.h"
+#include "x86.h"
 #define DPRINTF(x...) do {} while (0)
 #endif
 #include "x86_emulate.h"
@@ -62,8 +63,9 @@
 /* Destination is only written; never read. */
 #define Mov         (1<<7)
 #define BitOp       (1<<8)
+#define MemAbs      (1<<9)      /* Memory operand is absolute displacement */
 
-static u8 opcode_table[256] = {
+static u16 opcode_table[256] = {
        /* 0x00 - 0x07 */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -96,14 +98,14 @@ static u8 opcode_table[256] = {
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        0, 0, 0, 0,
-       /* 0x40 - 0x4F */
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       /* 0x40 - 0x47 */
+       DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
+       /* 0x48 - 0x4F */
+       DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
        /* 0x50 - 0x57 */
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+       SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg,
        /* 0x58 - 0x5F */
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-       ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+       DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
        /* 0x60 - 0x67 */
        0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
        0, 0, 0, 0,
@@ -129,8 +131,8 @@ static u8 opcode_table[256] = {
        /* 0x90 - 0x9F */
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
        /* 0xA0 - 0xA7 */
-       ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
-       ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
+       ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
+       ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
        ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
        ByteOp | ImplicitOps, ImplicitOps,
        /* 0xA8 - 0xAF */
@@ -157,10 +159,10 @@ static u8 opcode_table[256] = {
        ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
        /* 0xF0 - 0xF7 */
        0, 0, 0, 0,
-       ImplicitOps, 0,
+       ImplicitOps, ImplicitOps,
        ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
        /* 0xF8 - 0xFF */
-       0, 0, 0, 0,
+       ImplicitOps, 0, ImplicitOps, ImplicitOps,
        0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
 };
 
@@ -412,8 +414,7 @@ static u16 twobyte_table[256] = {
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({     unsigned long _x;                                               \
-       rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x,  \
-                          (_size), ctxt->vcpu);                        \
+       rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));            \
        if (rc != 0)                                                    \
                goto done;                                              \
        (_eip) += (_size);                                              \
@@ -444,6 +445,41 @@ static u16 twobyte_table[256] = {
                register_address_increment(c->eip, rel);                \
        } while (0)
 
+static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
+                             struct x86_emulate_ops *ops,
+                             unsigned long linear, u8 *dest)
+{
+       struct fetch_cache *fc = &ctxt->decode.fetch;
+       int rc;
+       int size;
+
+       if (linear < fc->start || linear >= fc->end) {
+               size = min(15UL, PAGE_SIZE - offset_in_page(linear));
+               rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+               if (rc)
+                       return rc;
+               fc->start = linear;
+               fc->end = linear + size;
+       }
+       *dest = fc->data[linear - fc->start];
+       return 0;
+}
+
+static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
+                        struct x86_emulate_ops *ops,
+                        unsigned long eip, void *dest, unsigned size)
+{
+       int rc = 0;
+
+       eip += ctxt->cs_base;
+       while (size--) {
+               rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.
@@ -514,14 +550,215 @@ static int test_cc(unsigned int condition, unsigned int flags)
        return (!!rc ^ (condition & 1));
 }
 
+static void decode_register_operand(struct operand *op,
+                                   struct decode_cache *c,
+                                   int inhibit_bytereg)
+{
+       unsigned reg = c->modrm_reg;
+       int highbyte_regs = c->rex_prefix == 0;
+
+       if (!(c->d & ModRM))
+               reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+       op->type = OP_REG;
+       if ((c->d & ByteOp) && !inhibit_bytereg) {
+               op->ptr = decode_register(reg, c->regs, highbyte_regs);
+               op->val = *(u8 *)op->ptr;
+               op->bytes = 1;
+       } else {
+               op->ptr = decode_register(reg, c->regs, 0);
+               op->bytes = c->op_bytes;
+               switch (op->bytes) {
+               case 2:
+                       op->val = *(u16 *)op->ptr;
+                       break;
+               case 4:
+                       op->val = *(u32 *)op->ptr;
+                       break;
+               case 8:
+                       op->val = *(u64 *) op->ptr;
+                       break;
+               }
+       }
+       op->orig_val = op->val;
+}
+
+static int decode_modrm(struct x86_emulate_ctxt *ctxt,
+                       struct x86_emulate_ops *ops)
+{
+       struct decode_cache *c = &ctxt->decode;
+       u8 sib;
+       int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+       int rc = 0;
+
+       if (c->rex_prefix) {
+               c->modrm_reg = (c->rex_prefix & 4) << 1;        /* REX.R */
+               index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
+               c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
+       }
+
+       c->modrm = insn_fetch(u8, 1, c->eip);
+       c->modrm_mod |= (c->modrm & 0xc0) >> 6;
+       c->modrm_reg |= (c->modrm & 0x38) >> 3;
+       c->modrm_rm |= (c->modrm & 0x07);
+       c->modrm_ea = 0;
+       c->use_modrm_ea = 1;
+
+       if (c->modrm_mod == 3) {
+               c->modrm_val = *(unsigned long *)
+                       decode_register(c->modrm_rm, c->regs, c->d & ByteOp);
+               return rc;
+       }
+
+       if (c->ad_bytes == 2) {
+               unsigned bx = c->regs[VCPU_REGS_RBX];
+               unsigned bp = c->regs[VCPU_REGS_RBP];
+               unsigned si = c->regs[VCPU_REGS_RSI];
+               unsigned di = c->regs[VCPU_REGS_RDI];
+
+               /* 16-bit ModR/M decode. */
+               switch (c->modrm_mod) {
+               case 0:
+                       if (c->modrm_rm == 6)
+                               c->modrm_ea += insn_fetch(u16, 2, c->eip);
+                       break;
+               case 1:
+                       c->modrm_ea += insn_fetch(s8, 1, c->eip);
+                       break;
+               case 2:
+                       c->modrm_ea += insn_fetch(u16, 2, c->eip);
+                       break;
+               }
+               switch (c->modrm_rm) {
+               case 0:
+                       c->modrm_ea += bx + si;
+                       break;
+               case 1:
+                       c->modrm_ea += bx + di;
+                       break;
+               case 2:
+                       c->modrm_ea += bp + si;
+                       break;
+               case 3:
+                       c->modrm_ea += bp + di;
+                       break;
+               case 4:
+                       c->modrm_ea += si;
+                       break;
+               case 5:
+                       c->modrm_ea += di;
+                       break;
+               case 6:
+                       if (c->modrm_mod != 0)
+                               c->modrm_ea += bp;
+                       break;
+               case 7:
+                       c->modrm_ea += bx;
+                       break;
+               }
+               if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
+                   (c->modrm_rm == 6 && c->modrm_mod != 0))
+                       if (!c->override_base)
+                               c->override_base = &ctxt->ss_base;
+               c->modrm_ea = (u16)c->modrm_ea;
+       } else {
+               /* 32/64-bit ModR/M decode. */
+               switch (c->modrm_rm) {
+               case 4:
+               case 12:
+                       sib = insn_fetch(u8, 1, c->eip);
+                       index_reg |= (sib >> 3) & 7;
+                       base_reg |= sib & 7;
+                       scale = sib >> 6;
+
+                       switch (base_reg) {
+                       case 5:
+                               if (c->modrm_mod != 0)
+                                       c->modrm_ea += c->regs[base_reg];
+                               else
+                                       c->modrm_ea +=
+                                               insn_fetch(s32, 4, c->eip);
+                               break;
+                       default:
+                               c->modrm_ea += c->regs[base_reg];
+                       }
+                       switch (index_reg) {
+                       case 4:
+                               break;
+                       default:
+                               c->modrm_ea += c->regs[index_reg] << scale;
+                       }
+                       break;
+               case 5:
+                       if (c->modrm_mod != 0)
+                               c->modrm_ea += c->regs[c->modrm_rm];
+                       else if (ctxt->mode == X86EMUL_MODE_PROT64)
+                               rip_relative = 1;
+                       break;
+               default:
+                       c->modrm_ea += c->regs[c->modrm_rm];
+                       break;
+               }
+               switch (c->modrm_mod) {
+               case 0:
+                       if (c->modrm_rm == 5)
+                               c->modrm_ea += insn_fetch(s32, 4, c->eip);
+                       break;
+               case 1:
+                       c->modrm_ea += insn_fetch(s8, 1, c->eip);
+                       break;
+               case 2:
+                       c->modrm_ea += insn_fetch(s32, 4, c->eip);
+                       break;
+               }
+       }
+       if (rip_relative) {
+               c->modrm_ea += c->eip;
+               switch (c->d & SrcMask) {
+               case SrcImmByte:
+                       c->modrm_ea += 1;
+                       break;
+               case SrcImm:
+                       if (c->d & ByteOp)
+                               c->modrm_ea += 1;
+                       else
+                               if (c->op_bytes == 8)
+                                       c->modrm_ea += 4;
+                               else
+                                       c->modrm_ea += c->op_bytes;
+               }
+       }
+done:
+       return rc;
+}
+
+static int decode_abs(struct x86_emulate_ctxt *ctxt,
+                     struct x86_emulate_ops *ops)
+{
+       struct decode_cache *c = &ctxt->decode;
+       int rc = 0;
+
+       switch (c->ad_bytes) {
+       case 2:
+               c->modrm_ea = insn_fetch(u16, 2, c->eip);
+               break;
+       case 4:
+               c->modrm_ea = insn_fetch(u32, 4, c->eip);
+               break;
+       case 8:
+               c->modrm_ea = insn_fetch(u64, 8, c->eip);
+               break;
+       }
+done:
+       return rc;
+}
+
 int
 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
        struct decode_cache *c = &ctxt->decode;
-       u8 sib, rex_prefix = 0;
        int rc = 0;
        int mode = ctxt->mode;
-       int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+       int def_op_bytes, def_ad_bytes;
 
        /* Shadow copy of register state. Committed on successful emulation. */
 
@@ -532,34 +769,38 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        switch (mode) {
        case X86EMUL_MODE_REAL:
        case X86EMUL_MODE_PROT16:
-               c->op_bytes = c->ad_bytes = 2;
+               def_op_bytes = def_ad_bytes = 2;
                break;
        case X86EMUL_MODE_PROT32:
-               c->op_bytes = c->ad_bytes = 4;
+               def_op_bytes = def_ad_bytes = 4;
                break;
 #ifdef CONFIG_X86_64
        case X86EMUL_MODE_PROT64:
-               c->op_bytes = 4;
-               c->ad_bytes = 8;
+               def_op_bytes = 4;
+               def_ad_bytes = 8;
                break;
 #endif
        default:
                return -1;
        }
 
+       c->op_bytes = def_op_bytes;
+       c->ad_bytes = def_ad_bytes;
+
        /* Legacy prefixes. */
        for (;;) {
                switch (c->b = insn_fetch(u8, 1, c->eip)) {
                case 0x66:      /* operand-size override */
-                       c->op_bytes ^= 6;       /* switch between 2/4 bytes */
+                       /* switch between 2/4 bytes */
+                       c->op_bytes = def_op_bytes ^ 6;
                        break;
                case 0x67:      /* address-size override */
                        if (mode == X86EMUL_MODE_PROT64)
                                /* switch between 4/8 bytes */
-                               c->ad_bytes ^= 12;
+                               c->ad_bytes = def_ad_bytes ^ 12;
                        else
                                /* switch between 2/4 bytes */
-                               c->ad_bytes ^= 6;
+                               c->ad_bytes = def_ad_bytes ^ 6;
                        break;
                case 0x2e:      /* CS override */
                        c->override_base = &ctxt->cs_base;
@@ -582,14 +823,16 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                case 0x40 ... 0x4f: /* REX */
                        if (mode != X86EMUL_MODE_PROT64)
                                goto done_prefixes;
-                       rex_prefix = c->b;
+                       c->rex_prefix = c->b;
                        continue;
                case 0xf0:      /* LOCK */
                        c->lock_prefix = 1;
                        break;
                case 0xf2:      /* REPNE/REPNZ */
+                       c->rep_prefix = REPNE_PREFIX;
+                       break;
                case 0xf3:      /* REP/REPE/REPZ */
-                       c->rep_prefix = 1;
+                       c->rep_prefix = REPE_PREFIX;
                        break;
                default:
                        goto done_prefixes;
@@ -597,19 +840,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
                /* Any legacy prefix after a REX prefix nullifies its effect. */
 
-               rex_prefix = 0;
+               c->rex_prefix = 0;
        }
 
 done_prefixes:
 
        /* REX prefix. */
-       if (rex_prefix) {
-               if (rex_prefix & 8)
+       if (c->rex_prefix)
+               if (c->rex_prefix & 8)
                        c->op_bytes = 8;        /* REX.W */
-               c->modrm_reg = (rex_prefix & 4) << 1;   /* REX.R */
-               index_reg = (rex_prefix & 2) << 2; /* REX.X */
-               c->modrm_rm = base_reg = (rex_prefix & 1) << 3; /* REG.B */
-       }
 
        /* Opcode byte(s). */
        c->d = opcode_table[c->b];
@@ -629,159 +868,25 @@ done_prefixes:
        }
 
        /* ModRM and SIB bytes. */
-       if (c->d & ModRM) {
-               c->modrm = insn_fetch(u8, 1, c->eip);
-               c->modrm_mod |= (c->modrm & 0xc0) >> 6;
-               c->modrm_reg |= (c->modrm & 0x38) >> 3;
-               c->modrm_rm |= (c->modrm & 0x07);
-               c->modrm_ea = 0;
-               c->use_modrm_ea = 1;
-
-               if (c->modrm_mod == 3) {
-                       c->modrm_val = *(unsigned long *)
-                         decode_register(c->modrm_rm, c->regs, c->d & ByteOp);
-                       goto modrm_done;
-               }
+       if (c->d & ModRM)
+               rc = decode_modrm(ctxt, ops);
+       else if (c->d & MemAbs)
+               rc = decode_abs(ctxt, ops);
+       if (rc)
+               goto done;
 
-               if (c->ad_bytes == 2) {
-                       unsigned bx = c->regs[VCPU_REGS_RBX];
-                       unsigned bp = c->regs[VCPU_REGS_RBP];
-                       unsigned si = c->regs[VCPU_REGS_RSI];
-                       unsigned di = c->regs[VCPU_REGS_RDI];
+       if (!c->override_base)
+               c->override_base = &ctxt->ds_base;
+       if (mode == X86EMUL_MODE_PROT64 &&
+           c->override_base != &ctxt->fs_base &&
+           c->override_base != &ctxt->gs_base)
+               c->override_base = NULL;
 
-                       /* 16-bit ModR/M decode. */
-                       switch (c->modrm_mod) {
-                       case 0:
-                               if (c->modrm_rm == 6)
-                                       c->modrm_ea +=
-                                               insn_fetch(u16, 2, c->eip);
-                               break;
-                       case 1:
-                               c->modrm_ea += insn_fetch(s8, 1, c->eip);
-                               break;
-                       case 2:
-                               c->modrm_ea += insn_fetch(u16, 2, c->eip);
-                               break;
-                       }
-                       switch (c->modrm_rm) {
-                       case 0:
-                               c->modrm_ea += bx + si;
-                               break;
-                       case 1:
-                               c->modrm_ea += bx + di;
-                               break;
-                       case 2:
-                               c->modrm_ea += bp + si;
-                               break;
-                       case 3:
-                               c->modrm_ea += bp + di;
-                               break;
-                       case 4:
-                               c->modrm_ea += si;
-                               break;
-                       case 5:
-                               c->modrm_ea += di;
-                               break;
-                       case 6:
-                               if (c->modrm_mod != 0)
-                                       c->modrm_ea += bp;
-                               break;
-                       case 7:
-                               c->modrm_ea += bx;
-                               break;
-                       }
-                       if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
-                           (c->modrm_rm == 6 && c->modrm_mod != 0))
-                               if (!c->override_base)
-                                       c->override_base = &ctxt->ss_base;
-                       c->modrm_ea = (u16)c->modrm_ea;
-               } else {
-                       /* 32/64-bit ModR/M decode. */
-                       switch (c->modrm_rm) {
-                       case 4:
-                       case 12:
-                               sib = insn_fetch(u8, 1, c->eip);
-                               index_reg |= (sib >> 3) & 7;
-                               base_reg |= sib & 7;
-                               scale = sib >> 6;
-
-                               switch (base_reg) {
-                               case 5:
-                                       if (c->modrm_mod != 0)
-                                               c->modrm_ea +=
-                                                       c->regs[base_reg];
-                                       else
-                                               c->modrm_ea +=
-                                                   insn_fetch(s32, 4, c->eip);
-                                       break;
-                               default:
-                                       c->modrm_ea += c->regs[base_reg];
-                               }
-                               switch (index_reg) {
-                               case 4:
-                                       break;
-                               default:
-                                       c->modrm_ea +=
-                                               c->regs[index_reg] << scale;
-
-                               }
-                               break;
-                       case 5:
-                               if (c->modrm_mod != 0)
-                                       c->modrm_ea += c->regs[c->modrm_rm];
-                               else if (mode == X86EMUL_MODE_PROT64)
-                                       rip_relative = 1;
-                               break;
-                       default:
-                               c->modrm_ea += c->regs[c->modrm_rm];
-                               break;
-                       }
-                       switch (c->modrm_mod) {
-                       case 0:
-                               if (c->modrm_rm == 5)
-                                       c->modrm_ea +=
-                                               insn_fetch(s32, 4, c->eip);
-                               break;
-                       case 1:
-                               c->modrm_ea += insn_fetch(s8, 1, c->eip);
-                               break;
-                       case 2:
-                               c->modrm_ea += insn_fetch(s32, 4, c->eip);
-                               break;
-                       }
-               }
-               if (!c->override_base)
-                       c->override_base = &ctxt->ds_base;
-               if (mode == X86EMUL_MODE_PROT64 &&
-                   c->override_base != &ctxt->fs_base &&
-                   c->override_base != &ctxt->gs_base)
-                       c->override_base = NULL;
-
-               if (c->override_base)
-                       c->modrm_ea += *c->override_base;
-
-               if (rip_relative) {
-                       c->modrm_ea += c->eip;
-                       switch (c->d & SrcMask) {
-                       case SrcImmByte:
-                               c->modrm_ea += 1;
-                               break;
-                       case SrcImm:
-                               if (c->d & ByteOp)
-                                       c->modrm_ea += 1;
-                               else
-                                       if (c->op_bytes == 8)
-                                               c->modrm_ea += 4;
-                                       else
-                                               c->modrm_ea += c->op_bytes;
-                       }
-               }
-               if (c->ad_bytes != 8)
-                       c->modrm_ea = (u32)c->modrm_ea;
-modrm_done:
-               ;
-       }
+       if (c->override_base)
+               c->modrm_ea += *c->override_base;
 
+       if (c->ad_bytes != 8)
+               c->modrm_ea = (u32)c->modrm_ea;
        /*
         * Decode and fetch the source operand: register, memory
         * or immediate.
@@ -790,31 +895,7 @@ modrm_done:
        case SrcNone:
                break;
        case SrcReg:
-               c->src.type = OP_REG;
-               if (c->d & ByteOp) {
-                       c->src.ptr =
-                               decode_register(c->modrm_reg, c->regs,
-                                                 (rex_prefix == 0));
-                       c->src.val = c->src.orig_val = *(u8 *)c->src.ptr;
-                       c->src.bytes = 1;
-               } else {
-                       c->src.ptr =
-                           decode_register(c->modrm_reg, c->regs, 0);
-                       switch ((c->src.bytes = c->op_bytes)) {
-                       case 2:
-                               c->src.val = c->src.orig_val =
-                                                      *(u16 *) c->src.ptr;
-                               break;
-                       case 4:
-                               c->src.val = c->src.orig_val =
-                                                      *(u32 *) c->src.ptr;
-                               break;
-                       case 8:
-                               c->src.val = c->src.orig_val =
-                                                      *(u64 *) c->src.ptr;
-                               break;
-                       }
-               }
+               decode_register_operand(&c->src, c, 0);
                break;
        case SrcMem16:
                c->src.bytes = 2;
@@ -872,30 +953,8 @@ modrm_done:
                /* Special instructions do their own operand decoding. */
                return 0;
        case DstReg:
-               c->dst.type = OP_REG;
-               if ((c->d & ByteOp)
-                   && !(c->twobyte &&
-                       (c->b == 0xb6 || c->b == 0xb7))) {
-                       c->dst.ptr =
-                               decode_register(c->modrm_reg, c->regs,
-                                                 (rex_prefix == 0));
-                       c->dst.val = *(u8 *) c->dst.ptr;
-                       c->dst.bytes = 1;
-               } else {
-                       c->dst.ptr =
-                           decode_register(c->modrm_reg, c->regs, 0);
-                       switch ((c->dst.bytes = c->op_bytes)) {
-                       case 2:
-                               c->dst.val = *(u16 *)c->dst.ptr;
-                               break;
-                       case 4:
-                               c->dst.val = *(u32 *)c->dst.ptr;
-                               break;
-                       case 8:
-                               c->dst.val = *(u64 *)c->dst.ptr;
-                               break;
-                       }
-               }
+               decode_register_operand(&c->dst, c,
+                        c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
                break;
        case DstMem:
                if ((c->d & ModRM) && c->modrm_mod == 3) {
@@ -1166,7 +1225,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        memcpy(c->regs, ctxt->vcpu->regs, sizeof c->regs);
        saved_eip = c->eip;
 
-       if ((c->d & ModRM) && (c->modrm_mod != 3))
+       if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
                cr2 = c->modrm_ea;
 
        if (c->src.type == OP_MEM) {
@@ -1257,6 +1316,32 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
              cmp:              /* cmp */
                emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
                break;
+       case 0x40 ... 0x47: /* inc r16/r32 */
+               emulate_1op("inc", c->dst, ctxt->eflags);
+               break;
+       case 0x48 ... 0x4f: /* dec r16/r32 */
+               emulate_1op("dec", c->dst, ctxt->eflags);
+               break;
+       case 0x50 ... 0x57:  /* push reg */
+               c->dst.type  = OP_MEM;
+               c->dst.bytes = c->op_bytes;
+               c->dst.val = c->src.val;
+               register_address_increment(c->regs[VCPU_REGS_RSP],
+                                          -c->op_bytes);
+               c->dst.ptr = (void *) register_address(
+                       ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
+               break;
+       case 0x58 ... 0x5f: /* pop reg */
+       pop_instruction:
+               if ((rc = ops->read_std(register_address(ctxt->ss_base,
+                       c->regs[VCPU_REGS_RSP]), c->dst.ptr,
+                       c->op_bytes, ctxt->vcpu)) != 0)
+                       goto done;
+
+               register_address_increment(c->regs[VCPU_REGS_RSP],
+                                          c->op_bytes);
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
        case 0x63:              /* movsxd */
                if (ctxt->mode != X86EMUL_MODE_PROT64)
                        goto cannot_emulate;
@@ -1321,13 +1406,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        case 0xa0 ... 0xa1:     /* mov */
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
                c->dst.val = c->src.val;
-               /* skip src displacement */
-               c->eip += c->ad_bytes;
                break;
        case 0xa2 ... 0xa3:     /* mov */
                c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
-               /* skip c->dst displacement */
-               c->eip += c->ad_bytes;
                break;
        case 0xc0 ... 0xc1:
                emulate_grp2(ctxt);
@@ -1376,31 +1457,6 @@ special_insn:
        if (c->twobyte)
                goto twobyte_special_insn;
        switch (c->b) {
-       case 0x50 ... 0x57:  /* push reg */
-               if (c->op_bytes == 2)
-                       c->src.val = (u16) c->regs[c->b & 0x7];
-               else
-                       c->src.val = (u32) c->regs[c->b & 0x7];
-               c->dst.type  = OP_MEM;
-               c->dst.bytes = c->op_bytes;
-               c->dst.val = c->src.val;
-               register_address_increment(c->regs[VCPU_REGS_RSP],
-                                          -c->op_bytes);
-               c->dst.ptr = (void *) register_address(
-                       ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
-               break;
-       case 0x58 ... 0x5f: /* pop reg */
-               c->dst.ptr = (unsigned long *)&c->regs[c->b & 0x7];
-       pop_instruction:
-               if ((rc = ops->read_std(register_address(ctxt->ss_base,
-                       c->regs[VCPU_REGS_RSP]), c->dst.ptr,
-                       c->op_bytes, ctxt->vcpu)) != 0)
-                       goto done;
-
-               register_address_increment(c->regs[VCPU_REGS_RSP],
-                                          c->op_bytes);
-               c->dst.type = OP_NONE;  /* Disable writeback. */
-               break;
        case 0x6a: /* push imm8 */
                c->src.val = 0L;
                c->src.val = insn_fetch(s8, 1, c->eip);
@@ -1460,6 +1516,23 @@ special_insn:
        case 0xf4:              /* hlt */
                ctxt->vcpu->halt_request = 1;
                goto done;
+       case 0xf5:      /* cmc */
+               /* complement carry flag from eflags reg */
+               ctxt->eflags ^= EFLG_CF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
+       case 0xf8: /* clc */
+               ctxt->eflags &= ~EFLG_CF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
+       case 0xfa: /* cli */
+               ctxt->eflags &= ~X86_EFLAGS_IF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
+       case 0xfb: /* sti */
+               ctxt->eflags |= X86_EFLAGS_IF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
        }
        if (c->rep_prefix) {
                if (c->regs[VCPU_REGS_RCX] == 0) {
@@ -1496,7 +1569,9 @@ special_insn:
        case 0xaa ... 0xab:     /* stos */
                c->dst.type = OP_MEM;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-               c->dst.ptr = (unsigned long *)cr2;
+               c->dst.ptr = (unsigned long *)register_address(
+                                                  ctxt->es_base,
+                                                  c->regs[VCPU_REGS_RDI]);
                c->dst.val = c->regs[VCPU_REGS_RAX];
                register_address_increment(c->regs[VCPU_REGS_RDI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -1506,9 +1581,13 @@ special_insn:
                c->dst.type = OP_REG;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-               if ((rc = ops->read_emulated(cr2, &c->dst.val,
-                                            c->dst.bytes,
-                                            ctxt->vcpu)) != 0)
+               if ((rc = ops->read_emulated(register_address(
+                               c->override_base ? *c->override_base :
+                                                  ctxt->ds_base,
+                                                c->regs[VCPU_REGS_RSI]),
+                                                &c->dst.val,
+                                                c->dst.bytes,
+                                                ctxt->vcpu)) != 0)
                        goto done;
                register_address_increment(c->regs[VCPU_REGS_RSI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -1526,9 +1605,6 @@ special_insn:
                case 4:
                        rel = insn_fetch(s32, 4, c->eip);
                        break;
-               case 8:
-                       rel = insn_fetch(s64, 8, c->eip);
-                       break;
                default:
                        DPRINTF("Call: Invalid op_bytes\n");
                        goto cannot_emulate;