KVM: SVM: Move EFER and MSR constants to generic x86 code
[safe/jmp/linux-2.6] / arch / x86 / kvm / x86_emulate.c
index ca7ab24..d174db7 100644 (file)
@@ -26,6 +26,7 @@
 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
 #else
 #include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
 #define DPRINTF(x...) do {} while (0)
 #endif
 #include <linux/module.h>
 #define ImplicitOps (1<<1)     /* Implicit in opcode. No generic decode. */
 #define DstReg      (2<<1)     /* Register operand. */
 #define DstMem      (3<<1)     /* Memory operand. */
-#define DstMask     (3<<1)
+#define DstAcc      (4<<1)      /* Destination Accumulator */
+#define DstMask     (7<<1)
 /* Source operand type. */
-#define SrcNone     (0<<3)     /* No source operand. */
-#define SrcImplicit (0<<3)     /* Source operand is implicit in the opcode. */
-#define SrcReg      (1<<3)     /* Register operand. */
-#define SrcMem      (2<<3)     /* Memory operand. */
-#define SrcMem16    (3<<3)     /* Memory operand (16-bit). */
-#define SrcMem32    (4<<3)     /* Memory operand (32-bit). */
-#define SrcImm      (5<<3)     /* Immediate operand. */
-#define SrcImmByte  (6<<3)     /* 8-bit sign-extended immediate operand. */
-#define SrcMask     (7<<3)
+#define SrcNone     (0<<4)     /* No source operand. */
+#define SrcImplicit (0<<4)     /* Source operand is implicit in the opcode. */
+#define SrcReg      (1<<4)     /* Register operand. */
+#define SrcMem      (2<<4)     /* Memory operand. */
+#define SrcMem16    (3<<4)     /* Memory operand (16-bit). */
+#define SrcMem32    (4<<4)     /* Memory operand (32-bit). */
+#define SrcImm      (5<<4)     /* Immediate operand. */
+#define SrcImmByte  (6<<4)     /* 8-bit sign-extended immediate operand. */
+#define SrcOne      (7<<4)     /* Implied '1' */
+#define SrcMask     (7<<4)
 /* Generic ModRM decode. */
-#define ModRM       (1<<6)
+#define ModRM       (1<<7)
 /* Destination is only written; never read. */
-#define Mov         (1<<7)
-#define BitOp       (1<<8)
-#define MemAbs      (1<<9)      /* Memory operand is absolute displacement */
-#define String      (1<<10)     /* String instruction (rep capable) */
-#define Stack       (1<<11)     /* Stack instruction (push/pop) */
+#define Mov         (1<<8)
+#define BitOp       (1<<9)
+#define MemAbs      (1<<10)      /* Memory operand is absolute displacement */
+#define String      (1<<12)     /* String instruction (rep capable) */
+#define Stack       (1<<13)     /* Stack instruction (push/pop) */
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
+/* Source 2 operand type */
+#define Src2None    (0<<29)
+#define Src2CL      (1<<29)
+#define Src2ImmByte (2<<29)
+#define Src2One     (3<<29)
+#define Src2Mask    (7<<29)
 
 enum {
        Group1_80, Group1_81, Group1_82, Group1_83,
        Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
 };
 
-static u16 opcode_table[256] = {
+static u32 opcode_table[256] = {
        /* 0x00 - 0x07 */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-       0, 0, 0, 0,
+       ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
        /* 0x08 - 0x0F */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -94,7 +103,7 @@ static u16 opcode_table[256] = {
        /* 0x20 - 0x27 */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-       SrcImmByte, SrcImm, 0, 0,
+       DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
        /* 0x28 - 0x2F */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -106,7 +115,8 @@ static u16 opcode_table[256] = {
        /* 0x38 - 0x3F */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-       0, 0, 0, 0,
+       ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+       0, 0,
        /* 0x40 - 0x47 */
        DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
        /* 0x48 - 0x4F */
@@ -153,9 +163,16 @@ static u16 opcode_table[256] = {
        0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
        ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
        ByteOp | ImplicitOps | String, ImplicitOps | String,
-       /* 0xB0 - 0xBF */
-       0, 0, 0, 0, 0, 0, 0, 0,
-       DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0,
+       /* 0xB0 - 0xB7 */
+       ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+       ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+       ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+       ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+       /* 0xB8 - 0xBF */
+       DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+       DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+       DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+       DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
        /* 0xC0 - 0xC7 */
        ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
        0, ImplicitOps | Stack, 0, 0,
@@ -169,20 +186,23 @@ static u16 opcode_table[256] = {
        /* 0xD8 - 0xDF */
        0, 0, 0, 0, 0, 0, 0, 0,
        /* 0xE0 - 0xE7 */
-       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0,
+       SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+       SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
        /* 0xE8 - 0xEF */
        ImplicitOps | Stack, SrcImm | ImplicitOps,
        ImplicitOps, SrcImmByte | ImplicitOps,
-       0, 0, 0, 0,
+       SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+       SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
        /* 0xF0 - 0xF7 */
        0, 0, 0, 0,
        ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
        /* 0xF8 - 0xFF */
        ImplicitOps, 0, ImplicitOps, ImplicitOps,
-       0, 0, Group | Group4, Group | Group5,
+       ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
-static u16 twobyte_table[256] = {
+static u32 twobyte_table[256] = {
        /* 0x00 - 0x0F */
        0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
        ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -217,9 +237,14 @@ static u16 twobyte_table[256] = {
        /* 0x90 - 0x9F */
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        /* 0xA0 - 0xA7 */
-       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+       0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+       DstMem | SrcReg | Src2ImmByte | ModRM,
+       DstMem | SrcReg | Src2CL | ModRM, 0, 0,
        /* 0xA8 - 0xAF */
-       0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+       0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+       DstMem | SrcReg | Src2ImmByte | ModRM,
+       DstMem | SrcReg | Src2CL | ModRM,
+       ModRM, 0,
        /* 0xB0 - 0xB7 */
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
            DstMem | SrcReg | ModRM | BitOp,
@@ -240,7 +265,7 @@ static u16 twobyte_table[256] = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static u16 group_table[] = {
+static u32 group_table[] = {
        [Group1_80*8] =
        ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
        ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -268,24 +293,25 @@ static u16 group_table[] = {
        ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
        0, 0, 0, 0,
        [Group3*8] =
-       DstMem | SrcImm | ModRM | SrcImm, 0,
-       DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+       DstMem | SrcImm | ModRM, 0,
+       DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
        0, 0, 0, 0,
        [Group4*8] =
        ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
        0, 0, 0, 0, 0, 0,
        [Group5*8] =
-       DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
-       SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
+       DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+       SrcMem | ModRM | Stack, 0,
+       SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
        [Group7*8] =
        0, 0, ModRM | SrcMem, ModRM | SrcMem,
        SrcNone | ModRM | DstMem | Mov, 0,
        SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
 };
 
-static u16 group2_table[] = {
+static u32 group2_table[] = {
        [Group7*8] =
-       SrcNone | ModRM, 0, 0, 0,
+       SrcNone | ModRM, 0, 0, SrcNone | ModRM,
        SrcNone | ModRM | DstMem | Mov, 0,
        SrcMem16 | ModRM | Mov, 0,
 };
@@ -345,49 +371,48 @@ static u16 group2_table[] = {
        "andl %"_msk",%"_LO32 _tmp"; "          \
        "orl  %"_LO32 _tmp",%"_sav"; "
 
+#ifdef CONFIG_X86_64
+#define ON64(x) x
+#else
+#define ON64(x)
+#endif
+
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)     \
+       do {                                                            \
+               __asm__ __volatile__ (                                  \
+                       _PRE_EFLAGS("0", "4", "2")                      \
+                       _op _suffix " %"_x"3,%1; "                      \
+                       _POST_EFLAGS("0", "4", "2")                     \
+                       : "=m" (_eflags), "=m" ((_dst).val),            \
+                         "=&r" (_tmp)                                  \
+                       : _y ((_src).val), "i" (EFLAGS_MASK));          \
+       } while (0)
+
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
-       do {                                                                \
-               unsigned long _tmp;                                         \
-                                                                           \
-               switch ((_dst).bytes) {                                     \
-               case 2:                                                     \
-                       __asm__ __volatile__ (                              \
-                               _PRE_EFLAGS("0", "4", "2")                  \
-                               _op"w %"_wx"3,%1; "                         \
-                               _POST_EFLAGS("0", "4", "2")                 \
-                               : "=m" (_eflags), "=m" ((_dst).val),        \
-                                 "=&r" (_tmp)                              \
-                               : _wy ((_src).val), "i" (EFLAGS_MASK));     \
-                       break;                                              \
-               case 4:                                                     \
-                       __asm__ __volatile__ (                              \
-                               _PRE_EFLAGS("0", "4", "2")                  \
-                               _op"l %"_lx"3,%1; "                         \
-                               _POST_EFLAGS("0", "4", "2")                 \
-                               : "=m" (_eflags), "=m" ((_dst).val),        \
-                                 "=&r" (_tmp)                              \
-                               : _ly ((_src).val), "i" (EFLAGS_MASK));     \
-                       break;                                              \
-               case 8:                                                     \
-                       __emulate_2op_8byte(_op, _src, _dst,                \
-                                           _eflags, _qx, _qy);             \
-                       break;                                              \
-               }                                                           \
+       do {                                                            \
+               unsigned long _tmp;                                     \
+                                                                       \
+               switch ((_dst).bytes) {                                 \
+               case 2:                                                 \
+                       ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+                       break;                                          \
+               case 4:                                                 \
+                       ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+                       break;                                          \
+               case 8:                                                 \
+                       ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+                       break;                                          \
+               }                                                       \
        } while (0)
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
        do {                                                                 \
-               unsigned long __tmp;                                         \
+               unsigned long _tmp;                                          \
                switch ((_dst).bytes) {                                      \
                case 1:                                                      \
-                       __asm__ __volatile__ (                               \
-                               _PRE_EFLAGS("0", "4", "2")                   \
-                               _op"b %"_bx"3,%1; "                          \
-                               _POST_EFLAGS("0", "4", "2")                  \
-                               : "=m" (_eflags), "=m" ((_dst).val),         \
-                                 "=&r" (__tmp)                              \
-                               : _by ((_src).val), "i" (EFLAGS_MASK));      \
+                       ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
                        break;                                               \
                default:                                                     \
                        __emulate_2op_nobyte(_op, _src, _dst, _eflags,       \
@@ -411,71 +436,68 @@ static u16 group2_table[] = {
        __emulate_2op_nobyte(_op, _src, _dst, _eflags,                  \
                             "w", "r", _LO32, "r", "", "r")
 
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags)                                    \
-       do {                                                            \
-               unsigned long _tmp;                                     \
-                                                                       \
-               switch ((_dst).bytes) {                                 \
-               case 1:                                                 \
-                       __asm__ __volatile__ (                          \
-                               _PRE_EFLAGS("0", "3", "2")              \
-                               _op"b %1; "                             \
-                               _POST_EFLAGS("0", "3", "2")             \
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
-                                 "=&r" (_tmp)                          \
-                               : "i" (EFLAGS_MASK));                   \
-                       break;                                          \
-               case 2:                                                 \
-                       __asm__ __volatile__ (                          \
-                               _PRE_EFLAGS("0", "3", "2")              \
-                               _op"w %1; "                             \
-                               _POST_EFLAGS("0", "3", "2")             \
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
-                                 "=&r" (_tmp)                          \
-                               : "i" (EFLAGS_MASK));                   \
-                       break;                                          \
-               case 4:                                                 \
-                       __asm__ __volatile__ (                          \
-                               _PRE_EFLAGS("0", "3", "2")              \
-                               _op"l %1; "                             \
-                               _POST_EFLAGS("0", "3", "2")             \
-                               : "=m" (_eflags), "=m" ((_dst).val),    \
-                                 "=&r" (_tmp)                          \
-                               : "i" (EFLAGS_MASK));                   \
-                       break;                                          \
-               case 8:                                                 \
-                       __emulate_1op_8byte(_op, _dst, _eflags);        \
-                       break;                                          \
-               }                                                       \
+/* Instruction has three operands and one operand is stored in ECX register */
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type)        \
+       do {                                                                    \
+               unsigned long _tmp;                                             \
+               _type _clv  = (_cl).val;                                        \
+               _type _srcv = (_src).val;                                       \
+               _type _dstv = (_dst).val;                                       \
+                                                                               \
+               __asm__ __volatile__ (                                          \
+                       _PRE_EFLAGS("0", "5", "2")                              \
+                       _op _suffix " %4,%1 \n"                                 \
+                       _POST_EFLAGS("0", "5", "2")                             \
+                       : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)            \
+                       : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)           \
+                       );                                                      \
+                                                                               \
+               (_cl).val  = (unsigned long) _clv;                              \
+               (_src).val = (unsigned long) _srcv;                             \
+               (_dst).val = (unsigned long) _dstv;                             \
        } while (0)
 
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(CONFIG_X86_64)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
-       do {                                                              \
-               __asm__ __volatile__ (                                    \
-                       _PRE_EFLAGS("0", "4", "2")                        \
-                       _op"q %"_qx"3,%1; "                               \
-                       _POST_EFLAGS("0", "4", "2")                       \
-                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-                       : _qy ((_src).val), "i" (EFLAGS_MASK));         \
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)                          \
+       do {                                                                    \
+               switch ((_dst).bytes) {                                         \
+               case 2:                                                         \
+                       __emulate_2op_cl(_op, _cl, _src, _dst, _eflags,         \
+                                               "w", unsigned short);           \
+                       break;                                                  \
+               case 4:                                                         \
+                       __emulate_2op_cl(_op, _cl, _src, _dst, _eflags,         \
+                                               "l", unsigned int);             \
+                       break;                                                  \
+               case 8:                                                         \
+                       ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,    \
+                                               "q", unsigned long));           \
+                       break;                                                  \
+               }                                                               \
        } while (0)
 
-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
-       do {                                                              \
-               __asm__ __volatile__ (                                    \
-                       _PRE_EFLAGS("0", "3", "2")                        \
-                       _op"q %1; "                                       \
-                       _POST_EFLAGS("0", "3", "2")                       \
-                       : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-                       : "i" (EFLAGS_MASK));                             \
+#define __emulate_1op(_op, _dst, _eflags, _suffix)                     \
+       do {                                                            \
+               unsigned long _tmp;                                     \
+                                                                       \
+               __asm__ __volatile__ (                                  \
+                       _PRE_EFLAGS("0", "3", "2")                      \
+                       _op _suffix " %1; "                             \
+                       _POST_EFLAGS("0", "3", "2")                     \
+                       : "=m" (_eflags), "+m" ((_dst).val),            \
+                         "=&r" (_tmp)                                  \
+                       : "i" (EFLAGS_MASK));                           \
        } while (0)
 
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
-#endif                         /* __i386__ */
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags)                                    \
+       do {                                                            \
+               switch ((_dst).bytes) {                                 \
+               case 1: __emulate_1op(_op, _dst, _eflags, "b"); break;  \
+               case 2: __emulate_1op(_op, _dst, _eflags, "w"); break;  \
+               case 4: __emulate_1op(_op, _dst, _eflags, "l"); break;  \
+               case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
+               }                                                       \
+       } while (0)
 
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
@@ -522,6 +544,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
        register_address_increment(c, &c->eip, rel);
 }
 
+static void set_seg_override(struct decode_cache *c, int seg)
+{
+       c->has_seg_override = true;
+       c->seg_override = seg;
+}
+
+static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
+{
+       if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
+               return 0;
+
+       return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
+}
+
+static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
+                                      struct decode_cache *c)
+{
+       if (!c->has_seg_override)
+               return 0;
+
+       return seg_base(ctxt, c->seg_override);
+}
+
+static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
+{
+       return seg_base(ctxt, VCPU_SREG_ES);
+}
+
+static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
+{
+       return seg_base(ctxt, VCPU_SREG_SS);
+}
+
 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
                              struct x86_emulate_ops *ops,
                              unsigned long linear, u8 *dest)
@@ -664,7 +719,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 {
        struct decode_cache *c = &ctxt->decode;
        u8 sib;
-       int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+       int index_reg = 0, base_reg = 0, scale;
        int rc = 0;
 
        if (c->rex_prefix) {
@@ -735,14 +790,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                }
                if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
                    (c->modrm_rm == 6 && c->modrm_mod != 0))
-                       if (!c->override_base)
-                               c->override_base = &ctxt->ss_base;
+                       if (!c->has_seg_override)
+                               set_seg_override(c, VCPU_SREG_SS);
                c->modrm_ea = (u16)c->modrm_ea;
        } else {
                /* 32/64-bit ModR/M decode. */
-               switch (c->modrm_rm) {
-               case 4:
-               case 12:
+               if ((c->modrm_rm & 7) == 4) {
                        sib = insn_fetch(u8, 1, c->eip);
                        index_reg |= (sib >> 3) & 7;
                        base_reg |= sib & 7;
@@ -754,18 +807,11 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                                c->modrm_ea += c->regs[base_reg];
                        if (index_reg != 4)
                                c->modrm_ea += c->regs[index_reg] << scale;
-                       break;
-               case 5:
-               case 13:
-                       if (c->modrm_mod != 0)
-                               c->modrm_ea += c->regs[c->modrm_rm];
-                       else if (ctxt->mode == X86EMUL_MODE_PROT64)
-                               rip_relative = 1;
-                       break;
-               default:
+               } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
+                       if (ctxt->mode == X86EMUL_MODE_PROT64)
+                               c->rip_relative = 1;
+               } else
                        c->modrm_ea += c->regs[c->modrm_rm];
-                       break;
-               }
                switch (c->modrm_mod) {
                case 0:
                        if (c->modrm_rm == 5)
@@ -779,22 +825,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                        break;
                }
        }
-       if (rip_relative) {
-               c->modrm_ea += c->eip;
-               switch (c->d & SrcMask) {
-               case SrcImmByte:
-                       c->modrm_ea += 1;
-                       break;
-               case SrcImm:
-                       if (c->d & ByteOp)
-                               c->modrm_ea += 1;
-                       else
-                               if (c->op_bytes == 8)
-                                       c->modrm_ea += 4;
-                               else
-                                       c->modrm_ea += c->op_bytes;
-               }
-       }
 done:
        return rc;
 }
@@ -831,7 +861,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        /* Shadow copy of register state. Committed on successful emulation. */
 
        memset(c, 0, sizeof(struct decode_cache));
-       c->eip = ctxt->vcpu->arch.rip;
+       c->eip = kvm_rip_read(ctxt->vcpu);
+       ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
        memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
 
        switch (mode) {
@@ -870,23 +901,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                                /* switch between 2/4 bytes */
                                c->ad_bytes = def_ad_bytes ^ 6;
                        break;
+               case 0x26:      /* ES override */
                case 0x2e:      /* CS override */
-                       c->override_base = &ctxt->cs_base;
-                       break;
+               case 0x36:      /* SS override */
                case 0x3e:      /* DS override */
-                       c->override_base = &ctxt->ds_base;
-                       break;
-               case 0x26:      /* ES override */
-                       c->override_base = &ctxt->es_base;
+                       set_seg_override(c, (c->b >> 3) & 3);
                        break;
                case 0x64:      /* FS override */
-                       c->override_base = &ctxt->fs_base;
-                       break;
                case 0x65:      /* GS override */
-                       c->override_base = &ctxt->gs_base;
-                       break;
-               case 0x36:      /* SS override */
-                       c->override_base = &ctxt->ss_base;
+                       set_seg_override(c, c->b & 7);
                        break;
                case 0x40 ... 0x4f: /* REX */
                        if (mode != X86EMUL_MODE_PROT64)
@@ -958,15 +981,11 @@ done_prefixes:
        if (rc)
                goto done;
 
-       if (!c->override_base)
-               c->override_base = &ctxt->ds_base;
-       if (mode == X86EMUL_MODE_PROT64 &&
-           c->override_base != &ctxt->fs_base &&
-           c->override_base != &ctxt->gs_base)
-               c->override_base = NULL;
+       if (!c->has_seg_override)
+               set_seg_override(c, VCPU_SREG_DS);
 
-       if (c->override_base)
-               c->modrm_ea += *c->override_base;
+       if (!(!c->twobyte && c->b == 0x8d))
+               c->modrm_ea += seg_override_base(ctxt, c);
 
        if (c->ad_bytes != 8)
                c->modrm_ea = (u32)c->modrm_ea;
@@ -1030,6 +1049,33 @@ done_prefixes:
                c->src.bytes = 1;
                c->src.val = insn_fetch(s8, 1, c->eip);
                break;
+       case SrcOne:
+               c->src.bytes = 1;
+               c->src.val = 1;
+               break;
+       }
+
+       /*
+        * Decode and fetch the second source operand: register, memory
+        * or immediate.
+        */
+       switch (c->d & Src2Mask) {
+       case Src2None:
+               break;
+       case Src2CL:
+               c->src2.bytes = 1;
+               c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+               break;
+       case Src2ImmByte:
+               c->src2.type = OP_IMM;
+               c->src2.ptr = (unsigned long *)c->eip;
+               c->src2.bytes = 1;
+               c->src2.val = insn_fetch(u8, 1, c->eip);
+               break;
+       case Src2One:
+               c->src2.bytes = 1;
+               c->src2.val = 1;
+               break;
        }
 
        /* Decode and fetch the destination operand: register or memory. */
@@ -1051,8 +1097,28 @@ done_prefixes:
                }
                c->dst.type = OP_MEM;
                break;
+       case DstAcc:
+               c->dst.type = OP_REG;
+               c->dst.bytes = c->op_bytes;
+               c->dst.ptr = &c->regs[VCPU_REGS_RAX];
+               switch (c->op_bytes) {
+                       case 1:
+                               c->dst.val = *(u8 *)c->dst.ptr;
+                               break;
+                       case 2:
+                               c->dst.val = *(u16 *)c->dst.ptr;
+                               break;
+                       case 4:
+                               c->dst.val = *(u32 *)c->dst.ptr;
+                               break;
+               }
+               c->dst.orig_val = c->dst.val;
+               break;
        }
 
+       if (c->rip_relative)
+               c->modrm_ea += c->eip;
+
 done:
        return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
 }
@@ -1065,24 +1131,37 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
        c->dst.bytes = c->op_bytes;
        c->dst.val = c->src.val;
        register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
-       c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
+       c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
                                               c->regs[VCPU_REGS_RSP]);
 }
 
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
-                               struct x86_emulate_ops *ops)
+static int emulate_pop(struct x86_emulate_ctxt *ctxt,
+                      struct x86_emulate_ops *ops)
 {
        struct decode_cache *c = &ctxt->decode;
        int rc;
 
-       rc = ops->read_std(register_address(c, ctxt->ss_base,
-                                           c->regs[VCPU_REGS_RSP]),
-                          &c->dst.val, c->dst.bytes, ctxt->vcpu);
+       rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+                                                c->regs[VCPU_REGS_RSP]),
+                               &c->src.val, c->src.bytes, ctxt->vcpu);
        if (rc != 0)
                return rc;
 
-       register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+       register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+       return rc;
+}
 
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
+                               struct x86_emulate_ops *ops)
+{
+       struct decode_cache *c = &ctxt->decode;
+       int rc;
+
+       c->src.bytes = c->dst.bytes;
+       rc = emulate_pop(ctxt, ops);
+       if (rc != 0)
+               return rc;
+       c->dst.val = c->src.val;
        return 0;
 }
 
@@ -1151,6 +1230,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
        case 1: /* dec */
                emulate_1op("dec", c->dst, ctxt->eflags);
                break;
+       case 2: /* call near abs */ {
+               long int old_eip;
+               old_eip = c->eip;
+               c->eip = c->src.val;
+               c->src.val = old_eip;
+               emulate_push(ctxt);
+               break;
+       }
        case 4: /* jmp abs */
                c->eip = c->src.val;
                break;
@@ -1251,6 +1338,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        u64 msr_data;
        unsigned long saved_eip = 0;
        struct decode_cache *c = &ctxt->decode;
+       unsigned int port;
+       int io_dir_in;
        int rc = 0;
 
        /* Shadow copy of register state. Committed on successful emulation.
@@ -1267,7 +1356,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        if (c->rep_prefix && (c->d & String)) {
                /* All REP prefixes have the same first termination condition */
                if (c->regs[VCPU_REGS_RCX] == 0) {
-                       ctxt->vcpu->arch.rip = c->eip;
+                       kvm_rip_write(ctxt->vcpu, c->eip);
                        goto done;
                }
                /* The second termination condition only applies for REPE
@@ -1281,17 +1370,17 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                                (c->b == 0xae) || (c->b == 0xaf)) {
                        if ((c->rep_prefix == REPE_PREFIX) &&
                                ((ctxt->eflags & EFLG_ZF) == 0)) {
-                                       ctxt->vcpu->arch.rip = c->eip;
+                                       kvm_rip_write(ctxt->vcpu, c->eip);
                                        goto done;
                        }
                        if ((c->rep_prefix == REPNE_PREFIX) &&
                                ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
-                               ctxt->vcpu->arch.rip = c->eip;
+                               kvm_rip_write(ctxt->vcpu, c->eip);
                                goto done;
                        }
                }
                c->regs[VCPU_REGS_RCX]--;
-               c->eip = ctxt->vcpu->arch.rip;
+               c->eip = kvm_rip_read(ctxt->vcpu);
        }
 
        if (c->src.type == OP_MEM) {
@@ -1351,27 +1440,10 @@ special_insn:
              sbb:              /* sbb */
                emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
                break;
-       case 0x20 ... 0x23:
+       case 0x20 ... 0x25:
              and:              /* and */
                emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
                break;
-       case 0x24:              /* and al imm8 */
-               c->dst.type = OP_REG;
-               c->dst.ptr = &c->regs[VCPU_REGS_RAX];
-               c->dst.val = *(u8 *)c->dst.ptr;
-               c->dst.bytes = 1;
-               c->dst.orig_val = c->dst.val;
-               goto and;
-       case 0x25:              /* and ax imm16, or eax imm32 */
-               c->dst.type = OP_REG;
-               c->dst.bytes = c->op_bytes;
-               c->dst.ptr = &c->regs[VCPU_REGS_RAX];
-               if (c->op_bytes == 2)
-                       c->dst.val = *(u16 *)c->dst.ptr;
-               else
-                       c->dst.val = *(u32 *)c->dst.ptr;
-               c->dst.orig_val = c->dst.val;
-               goto and;
        case 0x28 ... 0x2d:
              sub:              /* sub */
                emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
@@ -1391,24 +1463,15 @@ special_insn:
                emulate_1op("dec", c->dst, ctxt->eflags);
                break;
        case 0x50 ... 0x57:  /* push reg */
-               c->dst.type  = OP_MEM;
-               c->dst.bytes = c->op_bytes;
-               c->dst.val = c->src.val;
-               register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-                                          -c->op_bytes);
-               c->dst.ptr = (void *) register_address(
-                       c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
+               emulate_push(ctxt);
                break;
        case 0x58 ... 0x5f: /* pop reg */
        pop_instruction:
-               if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
-                       c->regs[VCPU_REGS_RSP]), c->dst.ptr,
-                       c->op_bytes, ctxt->vcpu)) != 0)
+               c->src.bytes = c->op_bytes;
+               rc = emulate_pop(ctxt, ops);
+               if (rc != 0)
                        goto done;
-
-               register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-                                          c->op_bytes);
-               c->dst.type = OP_NONE;  /* Disable writeback. */
+               c->dst.val = c->src.val;
                break;
        case 0x63:              /* movsxd */
                if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1427,7 +1490,7 @@ special_insn:
                                c->rep_prefix ?
                                address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
                                (ctxt->eflags & EFLG_DF),
-                               register_address(c, ctxt->es_base,
+                               register_address(c, es_base(ctxt),
                                                 c->regs[VCPU_REGS_RDI]),
                                c->rep_prefix,
                                c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1443,9 +1506,8 @@ special_insn:
                                c->rep_prefix ?
                                address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
                                (ctxt->eflags & EFLG_DF),
-                               register_address(c, c->override_base ?
-                                                       *c->override_base :
-                                                       ctxt->ds_base,
+                                        register_address(c,
+                                         seg_override_base(ctxt, c),
                                                 c->regs[VCPU_REGS_RSI]),
                                c->rep_prefix,
                                c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1568,7 +1630,9 @@ special_insn:
                emulate_push(ctxt);
                break;
        case 0x9d: /* popf */
+               c->dst.type = OP_REG;
                c->dst.ptr = (unsigned long *) &ctxt->eflags;
+               c->dst.bytes = c->op_bytes;
                goto pop_instruction;
        case 0xa0 ... 0xa1:     /* mov */
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1581,11 +1645,10 @@ special_insn:
                c->dst.type = OP_MEM;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)register_address(c,
-                                                  ctxt->es_base,
+                                                  es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
                if ((rc = ops->read_emulated(register_address(c,
-                     c->override_base ? *c->override_base :
-                                       ctxt->ds_base,
+                                          seg_override_base(ctxt, c),
                                        c->regs[VCPU_REGS_RSI]),
                                        &c->dst.val,
                                        c->dst.bytes, ctxt->vcpu)) != 0)
@@ -1601,8 +1664,7 @@ special_insn:
                c->src.type = OP_NONE; /* Disable writeback. */
                c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->src.ptr = (unsigned long *)register_address(c,
-                               c->override_base ? *c->override_base :
-                                                  ctxt->ds_base,
+                                      seg_override_base(ctxt, c),
                                                   c->regs[VCPU_REGS_RSI]);
                if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
                                                &c->src.val,
@@ -1613,7 +1675,7 @@ special_insn:
                c->dst.type = OP_NONE; /* Disable writeback. */
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)register_address(c,
-                                                  ctxt->es_base,
+                                                  es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
                if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
                                                &c->dst.val,
@@ -1637,7 +1699,7 @@ special_insn:
                c->dst.type = OP_MEM;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)register_address(c,
-                                                  ctxt->es_base,
+                                                  es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
                c->dst.val = c->regs[VCPU_REGS_RAX];
                register_address_increment(c, &c->regs[VCPU_REGS_RDI],
@@ -1649,8 +1711,7 @@ special_insn:
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
                if ((rc = ops->read_emulated(register_address(c,
-                               c->override_base ? *c->override_base :
-                                                  ctxt->ds_base,
+                                                seg_override_base(ctxt, c),
                                                 c->regs[VCPU_REGS_RSI]),
                                                 &c->dst.val,
                                                 c->dst.bytes,
@@ -1663,13 +1724,15 @@ special_insn:
        case 0xae ... 0xaf:     /* scas */
                DPRINTF("Urk! I don't handle SCAS.\n");
                goto cannot_emulate;
-       case 0xb8: /* mov r, imm */
+       case 0xb0 ... 0xbf: /* mov r, imm */
                goto mov;
        case 0xc0 ... 0xc1:
                emulate_grp2(ctxt);
                break;
        case 0xc3: /* ret */
+               c->dst.type = OP_REG;
                c->dst.ptr = &c->eip;
+               c->dst.bytes = c->op_bytes;
                goto pop_instruction;
        case 0xc6 ... 0xc7:     /* mov (sole member of Grp11) */
        mov:
@@ -1683,6 +1746,16 @@ special_insn:
                c->src.val = c->regs[VCPU_REGS_RCX];
                emulate_grp2(ctxt);
                break;
+       case 0xe4:      /* inb */
+       case 0xe5:      /* in */
+               port = insn_fetch(u8, 1, c->eip);
+               io_dir_in = 1;
+               goto do_io;
+       case 0xe6: /* outb */
+       case 0xe7: /* out */
+               port = insn_fetch(u8, 1, c->eip);
+               io_dir_in = 0;
+               goto do_io;
        case 0xe8: /* call (near) */ {
                long int rel;
                switch (c->op_bytes) {
@@ -1733,9 +1806,25 @@ special_insn:
                jmp_rel(c, c->src.val);
                c->dst.type = OP_NONE; /* Disable writeback. */
                break;
+       case 0xec: /* in al,dx */
+       case 0xed: /* in (e/r)ax,dx */
+               port = c->regs[VCPU_REGS_RDX];
+               io_dir_in = 1;
+               goto do_io;
+       case 0xee: /* out al,dx */
+       case 0xef: /* out (e/r)ax,dx */
+               port = c->regs[VCPU_REGS_RDX];
+               io_dir_in = 0;
+       do_io:  if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
+                                  (c->d & ByteOp) ? 1 : c->op_bytes,
+                                  port) != 0) {
+                       c->eip = saved_eip;
+                       goto cannot_emulate;
+               }
+               break;
        case 0xf4:              /* hlt */
                ctxt->vcpu->arch.halt_request = 1;
-               goto done;
+               break;
        case 0xf5:      /* cmc */
                /* complement carry flag from eflags reg */
                ctxt->eflags ^= EFLG_CF;
@@ -1758,6 +1847,14 @@ special_insn:
                ctxt->eflags |= X86_EFLAGS_IF;
                c->dst.type = OP_NONE;  /* Disable writeback. */
                break;
+       case 0xfc: /* cld */
+               ctxt->eflags &= ~EFLG_DF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
+       case 0xfd: /* std */
+               ctxt->eflags |= EFLG_DF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
        case 0xfe ... 0xff:     /* Grp4/Grp5 */
                rc = emulate_grp45(ctxt, ops);
                if (rc != 0)
@@ -1772,7 +1869,7 @@ writeback:
 
        /* Commit shadow register state. */
        memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
-       ctxt->vcpu->arch.rip = c->eip;
+       kvm_rip_write(ctxt->vcpu, c->eip);
 
 done:
        if (rc == X86EMUL_UNHANDLEABLE) {
@@ -1797,7 +1894,7 @@ twobyte_insn:
                                goto done;
 
                        /* Let the processor re-execute the fixed hypercall */
-                       c->eip = ctxt->vcpu->arch.rip;
+                       c->eip = kvm_rip_read(ctxt->vcpu);
                        /* Disable writeback. */
                        c->dst.type = OP_NONE;
                        break;
@@ -1893,7 +1990,7 @@ twobyte_insn:
                rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
                if (rc) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       c->eip = ctxt->vcpu->arch.rip;
+                       c->eip = kvm_rip_read(ctxt->vcpu);
                }
                rc = X86EMUL_CONTINUE;
                c->dst.type = OP_NONE;
@@ -1903,7 +2000,7 @@ twobyte_insn:
                rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
                if (rc) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       c->eip = ctxt->vcpu->arch.rip;
+                       c->eip = kvm_rip_read(ctxt->vcpu);
                } else {
                        c->regs[VCPU_REGS_RAX] = (u32)msr_data;
                        c->regs[VCPU_REGS_RDX] = msr_data >> 32;
@@ -1945,12 +2042,22 @@ twobyte_insn:
                c->src.val &= (c->dst.bytes << 3) - 1;
                emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
                break;
+       case 0xa4: /* shld imm8, r, r/m */
+       case 0xa5: /* shld cl, r, r/m */
+               emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
+               break;
        case 0xab:
              bts:              /* bts */
                /* only subword offset */
                c->src.val &= (c->dst.bytes << 3) - 1;
                emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
                break;
+       case 0xac: /* shrd imm8, r, r/m */
+       case 0xad: /* shrd cl, r, r/m */
+               emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
+               break;
+       case 0xae:              /* clflush */
+               break;
        case 0xb0 ... 0xb1:     /* cmpxchg */
                /*
                 * Save real source value, then compare EAX against