1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMask (7<<1)
56 /* Source operand type. */
57 #define SrcNone (0<<4) /* No source operand. */
58 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
59 #define SrcReg (1<<4) /* Register operand. */
60 #define SrcMem (2<<4) /* Memory operand. */
61 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
62 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
63 #define SrcImm (5<<4) /* Immediate operand. */
64 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
65 #define SrcOne (7<<4) /* Implied '1' */
66 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
67 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
68 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
69 #define SrcMask (0xf<<4)
70 /* Generic ModRM decode. */
72 /* Destination is only written; never read. */
75 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
76 #define String (1<<12) /* String instruction (rep capable) */
77 #define Stack (1<<13) /* Stack instruction (push/pop) */
78 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
79 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
80 #define GroupMask 0xff /* Group number stored in bits 0:7 */
82 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
83 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
85 /* Source 2 operand type */
86 #define Src2None (0<<29)
87 #define Src2CL (1<<29)
88 #define Src2ImmByte (2<<29)
89 #define Src2One (3<<29)
90 #define Src2Imm16 (4<<29)
91 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
92 in memory and second argument is located
93 immediately after the first one in memory. */
94 #define Src2Mask (7<<29)
97 Group1_80, Group1_81, Group1_82, Group1_83,
98 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
102 static u32 opcode_table[256] = {
104 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
105 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
106 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
107 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
110 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
111 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
112 ImplicitOps | Stack | No64, 0,
114 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
115 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
116 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
117 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
119 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
120 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
121 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
122 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
124 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
125 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
126 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
128 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
129 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
132 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
133 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
136 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
137 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
138 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
141 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
143 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
145 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
146 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
148 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
149 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
151 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
152 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
155 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
156 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
157 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
159 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
162 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
165 Group | Group1_80, Group | Group1_81,
166 Group | Group1_82, Group | Group1_83,
167 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
168 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
170 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
171 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
172 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
173 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
175 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
177 0, 0, SrcImm | Src2Imm16 | No64, 0,
178 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
180 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
181 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
182 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
183 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
185 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
186 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
187 ByteOp | DstDI | String, DstDI | String,
189 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
194 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
199 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
200 0, ImplicitOps | Stack, 0, 0,
201 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
203 0, 0, 0, ImplicitOps | Stack,
204 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
206 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
207 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
210 0, 0, 0, 0, 0, 0, 0, 0,
213 ByteOp | SrcImmUByte, SrcImmUByte,
214 ByteOp | SrcImmUByte, SrcImmUByte,
216 SrcImm | Stack, SrcImm | ImplicitOps,
217 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
218 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
219 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
222 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
224 ImplicitOps, 0, ImplicitOps, ImplicitOps,
225 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
228 static u32 twobyte_table[256] = {
230 0, Group | GroupDual | Group7, 0, 0,
231 0, ImplicitOps, ImplicitOps | Priv, 0,
232 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
233 0, ImplicitOps | ModRM, 0, 0,
235 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
237 ModRM | ImplicitOps | Priv, ModRM | Priv,
238 ModRM | ImplicitOps | Priv, ModRM | Priv,
240 0, 0, 0, 0, 0, 0, 0, 0,
242 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
243 ImplicitOps, ImplicitOps | Priv, 0, 0,
244 0, 0, 0, 0, 0, 0, 0, 0,
246 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
251 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
263 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 ImplicitOps | Stack, ImplicitOps | Stack,
268 0, DstMem | SrcReg | ModRM | BitOp,
269 DstMem | SrcReg | Src2ImmByte | ModRM,
270 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
272 ImplicitOps | Stack, ImplicitOps | Stack,
273 0, DstMem | SrcReg | ModRM | BitOp | Lock,
274 DstMem | SrcReg | Src2ImmByte | ModRM,
275 DstMem | SrcReg | Src2CL | ModRM,
278 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
279 0, DstMem | SrcReg | ModRM | BitOp | Lock,
280 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
281 DstReg | SrcMem16 | ModRM | Mov,
284 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
285 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
286 DstReg | SrcMem16 | ModRM | Mov,
288 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
289 0, 0, 0, Group | GroupDual | Group9,
290 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
299 static u32 group_table[] = {
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM | Lock,
308 ByteOp | DstMem | SrcImm | ModRM,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM | Lock,
317 DstMem | SrcImm | ModRM,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326 ByteOp | DstMem | SrcImm | ModRM | No64,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM | Lock,
335 DstMem | SrcImmByte | ModRM,
337 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
339 ByteOp | SrcImm | DstMem | ModRM, 0,
340 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
343 DstMem | SrcImm | ModRM, 0,
344 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
347 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
350 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
351 SrcMem | ModRM | Stack, 0,
352 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
353 SrcMem | ModRM | Stack, 0,
355 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
356 SrcNone | ModRM | DstMem | Mov, 0,
357 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
360 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
361 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
363 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
366 static u32 group2_table[] = {
368 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
369 SrcNone | ModRM | DstMem | Mov, 0,
370 SrcMem16 | ModRM | Mov | Priv, 0,
372 0, 0, 0, 0, 0, 0, 0, 0,
375 /* EFLAGS bit definitions. */
376 #define EFLG_ID (1<<21)
377 #define EFLG_VIP (1<<20)
378 #define EFLG_VIF (1<<19)
379 #define EFLG_AC (1<<18)
380 #define EFLG_VM (1<<17)
381 #define EFLG_RF (1<<16)
382 #define EFLG_IOPL (3<<12)
383 #define EFLG_NT (1<<14)
384 #define EFLG_OF (1<<11)
385 #define EFLG_DF (1<<10)
386 #define EFLG_IF (1<<9)
387 #define EFLG_TF (1<<8)
388 #define EFLG_SF (1<<7)
389 #define EFLG_ZF (1<<6)
390 #define EFLG_AF (1<<4)
391 #define EFLG_PF (1<<2)
392 #define EFLG_CF (1<<0)
395 * Instruction emulation:
396 * Most instructions are emulated directly via a fragment of inline assembly
397 * code. This allows us to save/restore EFLAGS and thus very easily pick up
398 * any modified flags.
401 #if defined(CONFIG_X86_64)
402 #define _LO32 "k" /* force 32-bit operand */
403 #define _STK "%%rsp" /* stack pointer */
404 #elif defined(__i386__)
405 #define _LO32 "" /* force 32-bit operand */
406 #define _STK "%%esp" /* stack pointer */
410 * These EFLAGS bits are restored from saved value during emulation, and
411 * any changes are written back to the saved value after emulation.
413 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
415 /* Before executing instruction: restore necessary bits in EFLAGS. */
416 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
417 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
418 "movl %"_sav",%"_LO32 _tmp"; " \
421 "movl %"_msk",%"_LO32 _tmp"; " \
422 "andl %"_LO32 _tmp",("_STK"); " \
424 "notl %"_LO32 _tmp"; " \
425 "andl %"_LO32 _tmp",("_STK"); " \
426 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
428 "orl %"_LO32 _tmp",("_STK"); " \
432 /* After executing instruction: write-back necessary bits in EFLAGS. */
433 #define _POST_EFLAGS(_sav, _msk, _tmp) \
434 /* _sav |= EFLAGS & _msk; */ \
437 "andl %"_msk",%"_LO32 _tmp"; " \
438 "orl %"_LO32 _tmp",%"_sav"; "
446 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
448 __asm__ __volatile__ ( \
449 _PRE_EFLAGS("0", "4", "2") \
450 _op _suffix " %"_x"3,%1; " \
451 _POST_EFLAGS("0", "4", "2") \
452 : "=m" (_eflags), "=m" ((_dst).val), \
454 : _y ((_src).val), "i" (EFLAGS_MASK)); \
458 /* Raw emulation: instruction has two explicit operands. */
459 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
461 unsigned long _tmp; \
463 switch ((_dst).bytes) { \
465 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
468 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
471 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
476 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
478 unsigned long _tmp; \
479 switch ((_dst).bytes) { \
481 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
484 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
485 _wx, _wy, _lx, _ly, _qx, _qy); \
490 /* Source operand is byte-sized and may be restricted to just %cl. */
491 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
492 __emulate_2op(_op, _src, _dst, _eflags, \
493 "b", "c", "b", "c", "b", "c", "b", "c")
495 /* Source operand is byte, word, long or quad sized. */
496 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
497 __emulate_2op(_op, _src, _dst, _eflags, \
498 "b", "q", "w", "r", _LO32, "r", "", "r")
500 /* Source operand is word, long or quad sized. */
501 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
502 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
503 "w", "r", _LO32, "r", "", "r")
505 /* Instruction has three operands and one operand is stored in ECX register */
506 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
508 unsigned long _tmp; \
509 _type _clv = (_cl).val; \
510 _type _srcv = (_src).val; \
511 _type _dstv = (_dst).val; \
513 __asm__ __volatile__ ( \
514 _PRE_EFLAGS("0", "5", "2") \
515 _op _suffix " %4,%1 \n" \
516 _POST_EFLAGS("0", "5", "2") \
517 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
518 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
521 (_cl).val = (unsigned long) _clv; \
522 (_src).val = (unsigned long) _srcv; \
523 (_dst).val = (unsigned long) _dstv; \
526 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
528 switch ((_dst).bytes) { \
530 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
531 "w", unsigned short); \
534 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
535 "l", unsigned int); \
538 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
539 "q", unsigned long)); \
544 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
546 unsigned long _tmp; \
548 __asm__ __volatile__ ( \
549 _PRE_EFLAGS("0", "3", "2") \
550 _op _suffix " %1; " \
551 _POST_EFLAGS("0", "3", "2") \
552 : "=m" (_eflags), "+m" ((_dst).val), \
554 : "i" (EFLAGS_MASK)); \
557 /* Instruction has only one explicit operand (no source operand). */
558 #define emulate_1op(_op, _dst, _eflags) \
560 switch ((_dst).bytes) { \
561 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
562 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
563 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
564 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
568 /* Fetch next part of the instruction being emulated. */
569 #define insn_fetch(_type, _size, _eip) \
570 ({ unsigned long _x; \
571 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
572 if (rc != X86EMUL_CONTINUE) \
578 static inline unsigned long ad_mask(struct decode_cache *c)
580 return (1UL << (c->ad_bytes << 3)) - 1;
583 /* Access/update address held in a register, based on addressing mode. */
584 static inline unsigned long
585 address_mask(struct decode_cache *c, unsigned long reg)
587 if (c->ad_bytes == sizeof(unsigned long))
590 return reg & ad_mask(c);
593 static inline unsigned long
594 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
596 return base + address_mask(c, reg);
600 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
602 if (c->ad_bytes == sizeof(unsigned long))
605 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
608 static inline void jmp_rel(struct decode_cache *c, int rel)
610 register_address_increment(c, &c->eip, rel);
613 static void set_seg_override(struct decode_cache *c, int seg)
615 c->has_seg_override = true;
616 c->seg_override = seg;
619 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
621 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
624 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
627 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
628 struct decode_cache *c)
630 if (!c->has_seg_override)
633 return seg_base(ctxt, c->seg_override);
636 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
638 return seg_base(ctxt, VCPU_SREG_ES);
641 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
643 return seg_base(ctxt, VCPU_SREG_SS);
646 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
647 struct x86_emulate_ops *ops,
648 unsigned long linear, u8 *dest)
650 struct fetch_cache *fc = &ctxt->decode.fetch;
654 if (linear < fc->start || linear >= fc->end) {
655 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
656 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
657 if (rc != X86EMUL_CONTINUE)
660 fc->end = linear + size;
662 *dest = fc->data[linear - fc->start];
663 return X86EMUL_CONTINUE;
666 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
667 struct x86_emulate_ops *ops,
668 unsigned long eip, void *dest, unsigned size)
672 /* x86 instructions are limited to 15 bytes. */
673 if (eip + size - ctxt->eip > 15)
674 return X86EMUL_UNHANDLEABLE;
675 eip += ctxt->cs_base;
677 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
678 if (rc != X86EMUL_CONTINUE)
681 return X86EMUL_CONTINUE;
685 * Given the 'reg' portion of a ModRM byte, and a register block, return a
686 * pointer into the block that addresses the relevant register.
687 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
689 static void *decode_register(u8 modrm_reg, unsigned long *regs,
694 p = ®s[modrm_reg];
695 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
696 p = (unsigned char *)®s[modrm_reg & 3] + 1;
700 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
701 struct x86_emulate_ops *ops,
703 u16 *size, unsigned long *address, int op_bytes)
710 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
712 if (rc != X86EMUL_CONTINUE)
714 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
719 static int test_cc(unsigned int condition, unsigned int flags)
723 switch ((condition & 15) >> 1) {
725 rc |= (flags & EFLG_OF);
727 case 1: /* b/c/nae */
728 rc |= (flags & EFLG_CF);
731 rc |= (flags & EFLG_ZF);
734 rc |= (flags & (EFLG_CF|EFLG_ZF));
737 rc |= (flags & EFLG_SF);
740 rc |= (flags & EFLG_PF);
743 rc |= (flags & EFLG_ZF);
746 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
750 /* Odd condition identifiers (lsb == 1) have inverted sense. */
751 return (!!rc ^ (condition & 1));
754 static void decode_register_operand(struct operand *op,
755 struct decode_cache *c,
758 unsigned reg = c->modrm_reg;
759 int highbyte_regs = c->rex_prefix == 0;
762 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
764 if ((c->d & ByteOp) && !inhibit_bytereg) {
765 op->ptr = decode_register(reg, c->regs, highbyte_regs);
766 op->val = *(u8 *)op->ptr;
769 op->ptr = decode_register(reg, c->regs, 0);
770 op->bytes = c->op_bytes;
773 op->val = *(u16 *)op->ptr;
776 op->val = *(u32 *)op->ptr;
779 op->val = *(u64 *) op->ptr;
783 op->orig_val = op->val;
786 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
787 struct x86_emulate_ops *ops)
789 struct decode_cache *c = &ctxt->decode;
791 int index_reg = 0, base_reg = 0, scale;
792 int rc = X86EMUL_CONTINUE;
795 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
796 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
797 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
800 c->modrm = insn_fetch(u8, 1, c->eip);
801 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
802 c->modrm_reg |= (c->modrm & 0x38) >> 3;
803 c->modrm_rm |= (c->modrm & 0x07);
807 if (c->modrm_mod == 3) {
808 c->modrm_ptr = decode_register(c->modrm_rm,
809 c->regs, c->d & ByteOp);
810 c->modrm_val = *(unsigned long *)c->modrm_ptr;
814 if (c->ad_bytes == 2) {
815 unsigned bx = c->regs[VCPU_REGS_RBX];
816 unsigned bp = c->regs[VCPU_REGS_RBP];
817 unsigned si = c->regs[VCPU_REGS_RSI];
818 unsigned di = c->regs[VCPU_REGS_RDI];
820 /* 16-bit ModR/M decode. */
821 switch (c->modrm_mod) {
823 if (c->modrm_rm == 6)
824 c->modrm_ea += insn_fetch(u16, 2, c->eip);
827 c->modrm_ea += insn_fetch(s8, 1, c->eip);
830 c->modrm_ea += insn_fetch(u16, 2, c->eip);
833 switch (c->modrm_rm) {
835 c->modrm_ea += bx + si;
838 c->modrm_ea += bx + di;
841 c->modrm_ea += bp + si;
844 c->modrm_ea += bp + di;
853 if (c->modrm_mod != 0)
860 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
861 (c->modrm_rm == 6 && c->modrm_mod != 0))
862 if (!c->has_seg_override)
863 set_seg_override(c, VCPU_SREG_SS);
864 c->modrm_ea = (u16)c->modrm_ea;
866 /* 32/64-bit ModR/M decode. */
867 if ((c->modrm_rm & 7) == 4) {
868 sib = insn_fetch(u8, 1, c->eip);
869 index_reg |= (sib >> 3) & 7;
873 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
874 c->modrm_ea += insn_fetch(s32, 4, c->eip);
876 c->modrm_ea += c->regs[base_reg];
878 c->modrm_ea += c->regs[index_reg] << scale;
879 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
880 if (ctxt->mode == X86EMUL_MODE_PROT64)
883 c->modrm_ea += c->regs[c->modrm_rm];
884 switch (c->modrm_mod) {
886 if (c->modrm_rm == 5)
887 c->modrm_ea += insn_fetch(s32, 4, c->eip);
890 c->modrm_ea += insn_fetch(s8, 1, c->eip);
893 c->modrm_ea += insn_fetch(s32, 4, c->eip);
901 static int decode_abs(struct x86_emulate_ctxt *ctxt,
902 struct x86_emulate_ops *ops)
904 struct decode_cache *c = &ctxt->decode;
905 int rc = X86EMUL_CONTINUE;
907 switch (c->ad_bytes) {
909 c->modrm_ea = insn_fetch(u16, 2, c->eip);
912 c->modrm_ea = insn_fetch(u32, 4, c->eip);
915 c->modrm_ea = insn_fetch(u64, 8, c->eip);
923 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
925 struct decode_cache *c = &ctxt->decode;
926 int rc = X86EMUL_CONTINUE;
927 int mode = ctxt->mode;
928 int def_op_bytes, def_ad_bytes, group;
930 /* Shadow copy of register state. Committed on successful emulation. */
932 memset(c, 0, sizeof(struct decode_cache));
934 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
935 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
938 case X86EMUL_MODE_REAL:
939 case X86EMUL_MODE_VM86:
940 case X86EMUL_MODE_PROT16:
941 def_op_bytes = def_ad_bytes = 2;
943 case X86EMUL_MODE_PROT32:
944 def_op_bytes = def_ad_bytes = 4;
947 case X86EMUL_MODE_PROT64:
956 c->op_bytes = def_op_bytes;
957 c->ad_bytes = def_ad_bytes;
959 /* Legacy prefixes. */
961 switch (c->b = insn_fetch(u8, 1, c->eip)) {
962 case 0x66: /* operand-size override */
963 /* switch between 2/4 bytes */
964 c->op_bytes = def_op_bytes ^ 6;
966 case 0x67: /* address-size override */
967 if (mode == X86EMUL_MODE_PROT64)
968 /* switch between 4/8 bytes */
969 c->ad_bytes = def_ad_bytes ^ 12;
971 /* switch between 2/4 bytes */
972 c->ad_bytes = def_ad_bytes ^ 6;
974 case 0x26: /* ES override */
975 case 0x2e: /* CS override */
976 case 0x36: /* SS override */
977 case 0x3e: /* DS override */
978 set_seg_override(c, (c->b >> 3) & 3);
980 case 0x64: /* FS override */
981 case 0x65: /* GS override */
982 set_seg_override(c, c->b & 7);
984 case 0x40 ... 0x4f: /* REX */
985 if (mode != X86EMUL_MODE_PROT64)
987 c->rex_prefix = c->b;
989 case 0xf0: /* LOCK */
992 case 0xf2: /* REPNE/REPNZ */
993 c->rep_prefix = REPNE_PREFIX;
995 case 0xf3: /* REP/REPE/REPZ */
996 c->rep_prefix = REPE_PREFIX;
1002 /* Any legacy prefix after a REX prefix nullifies its effect. */
1011 if (c->rex_prefix & 8)
1012 c->op_bytes = 8; /* REX.W */
1014 /* Opcode byte(s). */
1015 c->d = opcode_table[c->b];
1017 /* Two-byte opcode? */
1020 c->b = insn_fetch(u8, 1, c->eip);
1021 c->d = twobyte_table[c->b];
1026 group = c->d & GroupMask;
1027 c->modrm = insn_fetch(u8, 1, c->eip);
1030 group = (group << 3) + ((c->modrm >> 3) & 7);
1031 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1032 c->d = group2_table[group];
1034 c->d = group_table[group];
1039 DPRINTF("Cannot emulate %02x\n", c->b);
1043 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1046 /* ModRM and SIB bytes. */
1048 rc = decode_modrm(ctxt, ops);
1049 else if (c->d & MemAbs)
1050 rc = decode_abs(ctxt, ops);
1051 if (rc != X86EMUL_CONTINUE)
1054 if (!c->has_seg_override)
1055 set_seg_override(c, VCPU_SREG_DS);
1057 if (!(!c->twobyte && c->b == 0x8d))
1058 c->modrm_ea += seg_override_base(ctxt, c);
1060 if (c->ad_bytes != 8)
1061 c->modrm_ea = (u32)c->modrm_ea;
1063 if (c->rip_relative)
1064 c->modrm_ea += c->eip;
1067 * Decode and fetch the source operand: register, memory
1070 switch (c->d & SrcMask) {
1074 decode_register_operand(&c->src, c, 0);
1083 c->src.bytes = (c->d & ByteOp) ? 1 :
1085 /* Don't fetch the address for invlpg: it could be unmapped. */
1086 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1090 * For instructions with a ModR/M byte, switch to register
1091 * access if Mod = 3.
1093 if ((c->d & ModRM) && c->modrm_mod == 3) {
1094 c->src.type = OP_REG;
1095 c->src.val = c->modrm_val;
1096 c->src.ptr = c->modrm_ptr;
1099 c->src.type = OP_MEM;
1100 c->src.ptr = (unsigned long *)c->modrm_ea;
1105 c->src.type = OP_IMM;
1106 c->src.ptr = (unsigned long *)c->eip;
1107 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1108 if (c->src.bytes == 8)
1110 /* NB. Immediates are sign-extended as necessary. */
1111 switch (c->src.bytes) {
1113 c->src.val = insn_fetch(s8, 1, c->eip);
1116 c->src.val = insn_fetch(s16, 2, c->eip);
1119 c->src.val = insn_fetch(s32, 4, c->eip);
1122 if ((c->d & SrcMask) == SrcImmU) {
1123 switch (c->src.bytes) {
1128 c->src.val &= 0xffff;
1131 c->src.val &= 0xffffffff;
1138 c->src.type = OP_IMM;
1139 c->src.ptr = (unsigned long *)c->eip;
1141 if ((c->d & SrcMask) == SrcImmByte)
1142 c->src.val = insn_fetch(s8, 1, c->eip);
1144 c->src.val = insn_fetch(u8, 1, c->eip);
1151 c->src.type = OP_MEM;
1152 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1153 c->src.ptr = (unsigned long *)
1154 register_address(c, seg_override_base(ctxt, c),
1155 c->regs[VCPU_REGS_RSI]);
1161 * Decode and fetch the second source operand: register, memory
1164 switch (c->d & Src2Mask) {
1169 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1172 c->src2.type = OP_IMM;
1173 c->src2.ptr = (unsigned long *)c->eip;
1175 c->src2.val = insn_fetch(u8, 1, c->eip);
1178 c->src2.type = OP_IMM;
1179 c->src2.ptr = (unsigned long *)c->eip;
1181 c->src2.val = insn_fetch(u16, 2, c->eip);
1188 c->src2.type = OP_MEM;
1190 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1195 /* Decode and fetch the destination operand: register or memory. */
1196 switch (c->d & DstMask) {
1198 /* Special instructions do their own operand decoding. */
1201 decode_register_operand(&c->dst, c,
1202 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1205 if ((c->d & ModRM) && c->modrm_mod == 3) {
1206 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1207 c->dst.type = OP_REG;
1208 c->dst.val = c->dst.orig_val = c->modrm_val;
1209 c->dst.ptr = c->modrm_ptr;
1212 c->dst.type = OP_MEM;
1213 c->dst.ptr = (unsigned long *)c->modrm_ea;
1214 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1217 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1219 c->dst.ptr = (void *)c->dst.ptr +
1220 (c->src.val & mask) / 8;
1224 c->dst.type = OP_REG;
1225 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1226 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1227 switch (c->dst.bytes) {
1229 c->dst.val = *(u8 *)c->dst.ptr;
1232 c->dst.val = *(u16 *)c->dst.ptr;
1235 c->dst.val = *(u32 *)c->dst.ptr;
1238 c->dst.val = *(u64 *)c->dst.ptr;
1241 c->dst.orig_val = c->dst.val;
1244 c->dst.type = OP_MEM;
1245 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1246 c->dst.ptr = (unsigned long *)
1247 register_address(c, es_base(ctxt),
1248 c->regs[VCPU_REGS_RDI]);
1254 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1257 static u32 desc_limit_scaled(struct desc_struct *desc)
1259 u32 limit = get_desc_limit(desc);
1261 return desc->g ? (limit << 12) | 0xfff : limit;
1264 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1265 struct x86_emulate_ops *ops,
1266 u16 selector, struct desc_ptr *dt)
1268 if (selector & 1 << 2) {
1269 struct desc_struct desc;
1270 memset (dt, 0, sizeof *dt);
1271 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1274 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1275 dt->address = get_desc_base(&desc);
1277 ops->get_gdt(dt, ctxt->vcpu);
1280 /* allowed just for 8 bytes segments */
1281 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1282 struct x86_emulate_ops *ops,
1283 u16 selector, struct desc_struct *desc)
1286 u16 index = selector >> 3;
1291 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1293 if (dt.size < index * 8 + 7) {
1294 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1295 return X86EMUL_PROPAGATE_FAULT;
1297 addr = dt.address + index * 8;
1298 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1299 if (ret == X86EMUL_PROPAGATE_FAULT)
1300 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1305 /* allowed just for 8 bytes segments */
1306 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1307 struct x86_emulate_ops *ops,
1308 u16 selector, struct desc_struct *desc)
1311 u16 index = selector >> 3;
1316 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1318 if (dt.size < index * 8 + 7) {
1319 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1320 return X86EMUL_PROPAGATE_FAULT;
1323 addr = dt.address + index * 8;
1324 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1325 if (ret == X86EMUL_PROPAGATE_FAULT)
1326 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1331 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1332 struct x86_emulate_ops *ops,
1333 u16 selector, int seg)
1335 struct desc_struct seg_desc;
1337 unsigned err_vec = GP_VECTOR;
1339 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1342 memset(&seg_desc, 0, sizeof seg_desc);
1344 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1345 || ctxt->mode == X86EMUL_MODE_REAL) {
1346 /* set real mode segment descriptor */
1347 set_desc_base(&seg_desc, selector << 4);
1348 set_desc_limit(&seg_desc, 0xffff);
1355 /* NULL selector is not valid for TR, CS and SS */
1356 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1360 /* TR should be in GDT only */
1361 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1364 if (null_selector) /* for NULL selector skip all following checks */
1367 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1368 if (ret != X86EMUL_CONTINUE)
1371 err_code = selector & 0xfffc;
1372 err_vec = GP_VECTOR;
1374 /* can't load system descriptor into segment selecor */
1375 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1379 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1385 cpl = ops->cpl(ctxt->vcpu);
1390 * segment is not a writable data segment or segment
1391 * selector's RPL != CPL or segment selector's RPL != CPL
1393 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1397 if (!(seg_desc.type & 8))
1400 if (seg_desc.type & 4) {
1406 if (rpl > cpl || dpl != cpl)
1409 /* CS(RPL) <- CPL */
1410 selector = (selector & 0xfffc) | cpl;
1413 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1416 case VCPU_SREG_LDTR:
1417 if (seg_desc.s || seg_desc.type != 2)
1420 default: /* DS, ES, FS, or GS */
1422 * segment is not a data or readable code segment or
1423 * ((segment is a data or nonconforming code segment)
1424 * and (both RPL and CPL > DPL))
1426 if ((seg_desc.type & 0xa) == 0x8 ||
1427 (((seg_desc.type & 0xc) != 0xc) &&
1428 (rpl > dpl && cpl > dpl)))
1434 /* mark segment as accessed */
1436 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1437 if (ret != X86EMUL_CONTINUE)
1441 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1442 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1443 return X86EMUL_CONTINUE;
1445 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1446 return X86EMUL_PROPAGATE_FAULT;
1449 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1451 struct decode_cache *c = &ctxt->decode;
1453 c->dst.type = OP_MEM;
1454 c->dst.bytes = c->op_bytes;
1455 c->dst.val = c->src.val;
1456 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1457 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1458 c->regs[VCPU_REGS_RSP]);
1461 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1462 struct x86_emulate_ops *ops,
1463 void *dest, int len)
1465 struct decode_cache *c = &ctxt->decode;
1468 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1469 c->regs[VCPU_REGS_RSP]),
1470 dest, len, ctxt->vcpu);
1471 if (rc != X86EMUL_CONTINUE)
1474 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1478 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1479 struct x86_emulate_ops *ops,
1480 void *dest, int len)
1483 unsigned long val, change_mask;
1484 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1485 int cpl = ops->cpl(ctxt->vcpu);
1487 rc = emulate_pop(ctxt, ops, &val, len);
1488 if (rc != X86EMUL_CONTINUE)
1491 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1492 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1494 switch(ctxt->mode) {
1495 case X86EMUL_MODE_PROT64:
1496 case X86EMUL_MODE_PROT32:
1497 case X86EMUL_MODE_PROT16:
1499 change_mask |= EFLG_IOPL;
1501 change_mask |= EFLG_IF;
1503 case X86EMUL_MODE_VM86:
1505 kvm_inject_gp(ctxt->vcpu, 0);
1506 return X86EMUL_PROPAGATE_FAULT;
1508 change_mask |= EFLG_IF;
1510 default: /* real mode */
1511 change_mask |= (EFLG_IOPL | EFLG_IF);
1515 *(unsigned long *)dest =
1516 (ctxt->eflags & ~change_mask) | (val & change_mask);
1521 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1523 struct decode_cache *c = &ctxt->decode;
1524 struct kvm_segment segment;
1526 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1528 c->src.val = segment.selector;
1532 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1533 struct x86_emulate_ops *ops, int seg)
1535 struct decode_cache *c = &ctxt->decode;
1536 unsigned long selector;
1539 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1540 if (rc != X86EMUL_CONTINUE)
1543 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1547 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1549 struct decode_cache *c = &ctxt->decode;
1550 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1551 int reg = VCPU_REGS_RAX;
1553 while (reg <= VCPU_REGS_RDI) {
1554 (reg == VCPU_REGS_RSP) ?
1555 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1562 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1563 struct x86_emulate_ops *ops)
1565 struct decode_cache *c = &ctxt->decode;
1566 int rc = X86EMUL_CONTINUE;
1567 int reg = VCPU_REGS_RDI;
1569 while (reg >= VCPU_REGS_RAX) {
1570 if (reg == VCPU_REGS_RSP) {
1571 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1576 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1577 if (rc != X86EMUL_CONTINUE)
1584 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1585 struct x86_emulate_ops *ops)
1587 struct decode_cache *c = &ctxt->decode;
1589 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1592 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1594 struct decode_cache *c = &ctxt->decode;
1595 switch (c->modrm_reg) {
1597 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1600 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1603 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1606 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1608 case 4: /* sal/shl */
1609 case 6: /* sal/shl */
1610 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1613 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1616 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1621 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1622 struct x86_emulate_ops *ops)
1624 struct decode_cache *c = &ctxt->decode;
1626 switch (c->modrm_reg) {
1627 case 0 ... 1: /* test */
1628 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1631 c->dst.val = ~c->dst.val;
1634 emulate_1op("neg", c->dst, ctxt->eflags);
1642 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1643 struct x86_emulate_ops *ops)
1645 struct decode_cache *c = &ctxt->decode;
1647 switch (c->modrm_reg) {
1649 emulate_1op("inc", c->dst, ctxt->eflags);
1652 emulate_1op("dec", c->dst, ctxt->eflags);
1654 case 2: /* call near abs */ {
1657 c->eip = c->src.val;
1658 c->src.val = old_eip;
1662 case 4: /* jmp abs */
1663 c->eip = c->src.val;
1669 return X86EMUL_CONTINUE;
1672 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1673 struct x86_emulate_ops *ops)
1675 struct decode_cache *c = &ctxt->decode;
1679 rc = ops->read_emulated(c->modrm_ea, &old, 8, ctxt->vcpu);
1680 if (rc != X86EMUL_CONTINUE)
1683 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1684 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1686 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1687 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1688 ctxt->eflags &= ~EFLG_ZF;
1691 new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1692 (u32) c->regs[VCPU_REGS_RBX];
1694 rc = ops->cmpxchg_emulated(c->modrm_ea, &old, &new, 8, ctxt->vcpu);
1695 if (rc != X86EMUL_CONTINUE)
1697 ctxt->eflags |= EFLG_ZF;
1699 return X86EMUL_CONTINUE;
1702 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1703 struct x86_emulate_ops *ops)
1705 struct decode_cache *c = &ctxt->decode;
1709 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1710 if (rc != X86EMUL_CONTINUE)
1712 if (c->op_bytes == 4)
1713 c->eip = (u32)c->eip;
1714 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1715 if (rc != X86EMUL_CONTINUE)
1717 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1721 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1722 struct x86_emulate_ops *ops)
1725 struct decode_cache *c = &ctxt->decode;
1727 switch (c->dst.type) {
1729 /* The 4-byte case *is* correct:
1730 * in 64-bit mode we zero-extend.
1732 switch (c->dst.bytes) {
1734 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1737 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1740 *c->dst.ptr = (u32)c->dst.val;
1741 break; /* 64b: zero-ext */
1743 *c->dst.ptr = c->dst.val;
1749 rc = ops->cmpxchg_emulated(
1750 (unsigned long)c->dst.ptr,
1756 rc = ops->write_emulated(
1757 (unsigned long)c->dst.ptr,
1761 if (rc != X86EMUL_CONTINUE)
1770 return X86EMUL_CONTINUE;
1773 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1775 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1777 * an sti; sti; sequence only disable interrupts for the first
1778 * instruction. So, if the last instruction, be it emulated or
1779 * not, left the system with the INT_STI flag enabled, it
1780 * means that the last instruction is an sti. We should not
1781 * leave the flag on in this case. The same goes for mov ss
1783 if (!(int_shadow & mask))
1784 ctxt->interruptibility = mask;
1788 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1789 struct kvm_segment *cs, struct kvm_segment *ss)
1791 memset(cs, 0, sizeof(struct kvm_segment));
1792 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1793 memset(ss, 0, sizeof(struct kvm_segment));
1795 cs->l = 0; /* will be adjusted later */
1796 cs->base = 0; /* flat segment */
1797 cs->g = 1; /* 4kb granularity */
1798 cs->limit = 0xffffffff; /* 4GB limit */
1799 cs->type = 0x0b; /* Read, Execute, Accessed */
1801 cs->dpl = 0; /* will be adjusted later */
1806 ss->base = 0; /* flat segment */
1807 ss->limit = 0xffffffff; /* 4GB limit */
1808 ss->g = 1; /* 4kb granularity */
1810 ss->type = 0x03; /* Read/Write, Accessed */
1811 ss->db = 1; /* 32bit stack segment */
1817 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1819 struct decode_cache *c = &ctxt->decode;
1820 struct kvm_segment cs, ss;
1823 /* syscall is not available in real mode */
1824 if (ctxt->mode == X86EMUL_MODE_REAL ||
1825 ctxt->mode == X86EMUL_MODE_VM86) {
1826 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1827 return X86EMUL_PROPAGATE_FAULT;
1830 setup_syscalls_segments(ctxt, &cs, &ss);
1832 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1834 cs.selector = (u16)(msr_data & 0xfffc);
1835 ss.selector = (u16)(msr_data + 8);
1837 if (is_long_mode(ctxt->vcpu)) {
1841 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1842 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1844 c->regs[VCPU_REGS_RCX] = c->eip;
1845 if (is_long_mode(ctxt->vcpu)) {
1846 #ifdef CONFIG_X86_64
1847 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1849 kvm_x86_ops->get_msr(ctxt->vcpu,
1850 ctxt->mode == X86EMUL_MODE_PROT64 ?
1851 MSR_LSTAR : MSR_CSTAR, &msr_data);
1854 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1855 ctxt->eflags &= ~(msr_data | EFLG_RF);
1859 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1860 c->eip = (u32)msr_data;
1862 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1865 return X86EMUL_CONTINUE;
1869 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1871 struct decode_cache *c = &ctxt->decode;
1872 struct kvm_segment cs, ss;
1875 /* inject #GP if in real mode */
1876 if (ctxt->mode == X86EMUL_MODE_REAL) {
1877 kvm_inject_gp(ctxt->vcpu, 0);
1878 return X86EMUL_PROPAGATE_FAULT;
1881 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1882 * Therefore, we inject an #UD.
1884 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1885 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1886 return X86EMUL_PROPAGATE_FAULT;
1889 setup_syscalls_segments(ctxt, &cs, &ss);
1891 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1892 switch (ctxt->mode) {
1893 case X86EMUL_MODE_PROT32:
1894 if ((msr_data & 0xfffc) == 0x0) {
1895 kvm_inject_gp(ctxt->vcpu, 0);
1896 return X86EMUL_PROPAGATE_FAULT;
1899 case X86EMUL_MODE_PROT64:
1900 if (msr_data == 0x0) {
1901 kvm_inject_gp(ctxt->vcpu, 0);
1902 return X86EMUL_PROPAGATE_FAULT;
1907 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1908 cs.selector = (u16)msr_data;
1909 cs.selector &= ~SELECTOR_RPL_MASK;
1910 ss.selector = cs.selector + 8;
1911 ss.selector &= ~SELECTOR_RPL_MASK;
1912 if (ctxt->mode == X86EMUL_MODE_PROT64
1913 || is_long_mode(ctxt->vcpu)) {
1918 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1919 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1921 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1924 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1925 c->regs[VCPU_REGS_RSP] = msr_data;
1927 return X86EMUL_CONTINUE;
1931 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1933 struct decode_cache *c = &ctxt->decode;
1934 struct kvm_segment cs, ss;
1938 /* inject #GP if in real mode or Virtual 8086 mode */
1939 if (ctxt->mode == X86EMUL_MODE_REAL ||
1940 ctxt->mode == X86EMUL_MODE_VM86) {
1941 kvm_inject_gp(ctxt->vcpu, 0);
1942 return X86EMUL_PROPAGATE_FAULT;
1945 setup_syscalls_segments(ctxt, &cs, &ss);
1947 if ((c->rex_prefix & 0x8) != 0x0)
1948 usermode = X86EMUL_MODE_PROT64;
1950 usermode = X86EMUL_MODE_PROT32;
1954 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1956 case X86EMUL_MODE_PROT32:
1957 cs.selector = (u16)(msr_data + 16);
1958 if ((msr_data & 0xfffc) == 0x0) {
1959 kvm_inject_gp(ctxt->vcpu, 0);
1960 return X86EMUL_PROPAGATE_FAULT;
1962 ss.selector = (u16)(msr_data + 24);
1964 case X86EMUL_MODE_PROT64:
1965 cs.selector = (u16)(msr_data + 32);
1966 if (msr_data == 0x0) {
1967 kvm_inject_gp(ctxt->vcpu, 0);
1968 return X86EMUL_PROPAGATE_FAULT;
1970 ss.selector = cs.selector + 8;
1975 cs.selector |= SELECTOR_RPL_MASK;
1976 ss.selector |= SELECTOR_RPL_MASK;
1978 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1979 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1981 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1982 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1984 return X86EMUL_CONTINUE;
1987 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1988 struct x86_emulate_ops *ops)
1991 if (ctxt->mode == X86EMUL_MODE_REAL)
1993 if (ctxt->mode == X86EMUL_MODE_VM86)
1995 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1996 return ops->cpl(ctxt->vcpu) > iopl;
1999 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2000 struct x86_emulate_ops *ops,
2003 struct kvm_segment tr_seg;
2006 u8 perm, bit_idx = port & 0x7;
2007 unsigned mask = (1 << len) - 1;
2009 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2010 if (tr_seg.unusable)
2012 if (tr_seg.limit < 103)
2014 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2016 if (r != X86EMUL_CONTINUE)
2018 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2020 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2022 if (r != X86EMUL_CONTINUE)
2024 if ((perm >> bit_idx) & mask)
2029 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2030 struct x86_emulate_ops *ops,
2033 if (emulator_bad_iopl(ctxt, ops))
2034 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2039 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2040 struct x86_emulate_ops *ops,
2043 struct desc_struct desc;
2044 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2045 return get_desc_base(&desc);
2050 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2051 struct x86_emulate_ops *ops,
2052 struct tss_segment_16 *tss)
2054 struct decode_cache *c = &ctxt->decode;
2057 tss->flag = ctxt->eflags;
2058 tss->ax = c->regs[VCPU_REGS_RAX];
2059 tss->cx = c->regs[VCPU_REGS_RCX];
2060 tss->dx = c->regs[VCPU_REGS_RDX];
2061 tss->bx = c->regs[VCPU_REGS_RBX];
2062 tss->sp = c->regs[VCPU_REGS_RSP];
2063 tss->bp = c->regs[VCPU_REGS_RBP];
2064 tss->si = c->regs[VCPU_REGS_RSI];
2065 tss->di = c->regs[VCPU_REGS_RDI];
2067 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2068 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2069 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2070 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2071 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2074 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2075 struct x86_emulate_ops *ops,
2076 struct tss_segment_16 *tss)
2078 struct decode_cache *c = &ctxt->decode;
2082 ctxt->eflags = tss->flag | 2;
2083 c->regs[VCPU_REGS_RAX] = tss->ax;
2084 c->regs[VCPU_REGS_RCX] = tss->cx;
2085 c->regs[VCPU_REGS_RDX] = tss->dx;
2086 c->regs[VCPU_REGS_RBX] = tss->bx;
2087 c->regs[VCPU_REGS_RSP] = tss->sp;
2088 c->regs[VCPU_REGS_RBP] = tss->bp;
2089 c->regs[VCPU_REGS_RSI] = tss->si;
2090 c->regs[VCPU_REGS_RDI] = tss->di;
2093 * SDM says that segment selectors are loaded before segment
2096 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2097 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2098 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2099 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2100 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2103 * Now load segment descriptors. If fault happenes at this stage
2104 * it is handled in a context of new task
2106 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2107 if (ret != X86EMUL_CONTINUE)
2109 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2110 if (ret != X86EMUL_CONTINUE)
2112 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2113 if (ret != X86EMUL_CONTINUE)
2115 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2116 if (ret != X86EMUL_CONTINUE)
2118 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2119 if (ret != X86EMUL_CONTINUE)
2122 return X86EMUL_CONTINUE;
2125 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2126 struct x86_emulate_ops *ops,
2127 u16 tss_selector, u16 old_tss_sel,
2128 ulong old_tss_base, struct desc_struct *new_desc)
2130 struct tss_segment_16 tss_seg;
2132 u32 err, new_tss_base = get_desc_base(new_desc);
2134 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2136 if (ret == X86EMUL_PROPAGATE_FAULT) {
2137 /* FIXME: need to provide precise fault address */
2138 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2142 save_state_to_tss16(ctxt, ops, &tss_seg);
2144 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2146 if (ret == X86EMUL_PROPAGATE_FAULT) {
2147 /* FIXME: need to provide precise fault address */
2148 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2152 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2154 if (ret == X86EMUL_PROPAGATE_FAULT) {
2155 /* FIXME: need to provide precise fault address */
2156 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2160 if (old_tss_sel != 0xffff) {
2161 tss_seg.prev_task_link = old_tss_sel;
2163 ret = ops->write_std(new_tss_base,
2164 &tss_seg.prev_task_link,
2165 sizeof tss_seg.prev_task_link,
2167 if (ret == X86EMUL_PROPAGATE_FAULT) {
2168 /* FIXME: need to provide precise fault address */
2169 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2174 return load_state_from_tss16(ctxt, ops, &tss_seg);
2177 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2178 struct x86_emulate_ops *ops,
2179 struct tss_segment_32 *tss)
2181 struct decode_cache *c = &ctxt->decode;
2183 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2185 tss->eflags = ctxt->eflags;
2186 tss->eax = c->regs[VCPU_REGS_RAX];
2187 tss->ecx = c->regs[VCPU_REGS_RCX];
2188 tss->edx = c->regs[VCPU_REGS_RDX];
2189 tss->ebx = c->regs[VCPU_REGS_RBX];
2190 tss->esp = c->regs[VCPU_REGS_RSP];
2191 tss->ebp = c->regs[VCPU_REGS_RBP];
2192 tss->esi = c->regs[VCPU_REGS_RSI];
2193 tss->edi = c->regs[VCPU_REGS_RDI];
2195 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2196 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2197 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2198 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2199 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2200 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2201 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2204 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2205 struct x86_emulate_ops *ops,
2206 struct tss_segment_32 *tss)
2208 struct decode_cache *c = &ctxt->decode;
2211 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2213 ctxt->eflags = tss->eflags | 2;
2214 c->regs[VCPU_REGS_RAX] = tss->eax;
2215 c->regs[VCPU_REGS_RCX] = tss->ecx;
2216 c->regs[VCPU_REGS_RDX] = tss->edx;
2217 c->regs[VCPU_REGS_RBX] = tss->ebx;
2218 c->regs[VCPU_REGS_RSP] = tss->esp;
2219 c->regs[VCPU_REGS_RBP] = tss->ebp;
2220 c->regs[VCPU_REGS_RSI] = tss->esi;
2221 c->regs[VCPU_REGS_RDI] = tss->edi;
2224 * SDM says that segment selectors are loaded before segment
2227 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2228 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2229 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2230 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2231 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2232 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2233 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2236 * Now load segment descriptors. If fault happenes at this stage
2237 * it is handled in a context of new task
2239 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2240 if (ret != X86EMUL_CONTINUE)
2242 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2243 if (ret != X86EMUL_CONTINUE)
2245 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2246 if (ret != X86EMUL_CONTINUE)
2248 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2249 if (ret != X86EMUL_CONTINUE)
2251 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2252 if (ret != X86EMUL_CONTINUE)
2254 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2255 if (ret != X86EMUL_CONTINUE)
2257 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2258 if (ret != X86EMUL_CONTINUE)
2261 return X86EMUL_CONTINUE;
2264 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2265 struct x86_emulate_ops *ops,
2266 u16 tss_selector, u16 old_tss_sel,
2267 ulong old_tss_base, struct desc_struct *new_desc)
2269 struct tss_segment_32 tss_seg;
2271 u32 err, new_tss_base = get_desc_base(new_desc);
2273 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2275 if (ret == X86EMUL_PROPAGATE_FAULT) {
2276 /* FIXME: need to provide precise fault address */
2277 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2281 save_state_to_tss32(ctxt, ops, &tss_seg);
2283 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2285 if (ret == X86EMUL_PROPAGATE_FAULT) {
2286 /* FIXME: need to provide precise fault address */
2287 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2291 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2293 if (ret == X86EMUL_PROPAGATE_FAULT) {
2294 /* FIXME: need to provide precise fault address */
2295 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2299 if (old_tss_sel != 0xffff) {
2300 tss_seg.prev_task_link = old_tss_sel;
2302 ret = ops->write_std(new_tss_base,
2303 &tss_seg.prev_task_link,
2304 sizeof tss_seg.prev_task_link,
2306 if (ret == X86EMUL_PROPAGATE_FAULT) {
2307 /* FIXME: need to provide precise fault address */
2308 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2313 return load_state_from_tss32(ctxt, ops, &tss_seg);
2316 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2317 struct x86_emulate_ops *ops,
2318 u16 tss_selector, int reason)
2320 struct desc_struct curr_tss_desc, next_tss_desc;
2322 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2323 ulong old_tss_base =
2324 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2327 /* FIXME: old_tss_base == ~0 ? */
2329 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2330 if (ret != X86EMUL_CONTINUE)
2332 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2333 if (ret != X86EMUL_CONTINUE)
2336 /* FIXME: check that next_tss_desc is tss */
2338 if (reason != TASK_SWITCH_IRET) {
2339 if ((tss_selector & 3) > next_tss_desc.dpl ||
2340 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2341 kvm_inject_gp(ctxt->vcpu, 0);
2342 return X86EMUL_PROPAGATE_FAULT;
2346 desc_limit = desc_limit_scaled(&next_tss_desc);
2347 if (!next_tss_desc.p ||
2348 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2349 desc_limit < 0x2b)) {
2350 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2351 tss_selector & 0xfffc);
2352 return X86EMUL_PROPAGATE_FAULT;
2355 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2356 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2357 write_segment_descriptor(ctxt, ops, old_tss_sel,
2361 if (reason == TASK_SWITCH_IRET)
2362 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2364 /* set back link to prev task only if NT bit is set in eflags
2365 note that old_tss_sel is not used afetr this point */
2366 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2367 old_tss_sel = 0xffff;
2369 if (next_tss_desc.type & 8)
2370 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2371 old_tss_base, &next_tss_desc);
2373 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2374 old_tss_base, &next_tss_desc);
2376 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2377 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2379 if (reason != TASK_SWITCH_IRET) {
2380 next_tss_desc.type |= (1 << 1); /* set busy flag */
2381 write_segment_descriptor(ctxt, ops, tss_selector,
2385 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2386 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2387 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2392 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2393 struct x86_emulate_ops *ops,
2394 u16 tss_selector, int reason)
2396 struct decode_cache *c = &ctxt->decode;
2399 memset(c, 0, sizeof(struct decode_cache));
2401 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2403 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason);
2405 if (rc == X86EMUL_CONTINUE) {
2406 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2407 kvm_rip_write(ctxt->vcpu, c->eip);
2413 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2414 int reg, struct operand *op)
2416 struct decode_cache *c = &ctxt->decode;
2417 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2419 register_address_increment(c, &c->regs[reg], df * op->bytes);
2420 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2424 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2427 unsigned long saved_eip = 0;
2428 struct decode_cache *c = &ctxt->decode;
2431 int rc = X86EMUL_CONTINUE;
2433 ctxt->interruptibility = 0;
2435 /* Shadow copy of register state. Committed on successful emulation.
2436 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2440 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2443 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2444 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2448 /* LOCK prefix is allowed only with some instructions */
2449 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2450 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2454 /* Privileged instruction can be executed only in CPL=0 */
2455 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2456 kvm_inject_gp(ctxt->vcpu, 0);
2460 if (c->rep_prefix && (c->d & String)) {
2461 /* All REP prefixes have the same first termination condition */
2462 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2463 kvm_rip_write(ctxt->vcpu, c->eip);
2466 /* The second termination condition only applies for REPE
2467 * and REPNE. Test if the repeat string operation prefix is
2468 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2469 * corresponding termination condition according to:
2470 * - if REPE/REPZ and ZF = 0 then done
2471 * - if REPNE/REPNZ and ZF = 1 then done
2473 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2474 (c->b == 0xae) || (c->b == 0xaf)) {
2475 if ((c->rep_prefix == REPE_PREFIX) &&
2476 ((ctxt->eflags & EFLG_ZF) == 0)) {
2477 kvm_rip_write(ctxt->vcpu, c->eip);
2480 if ((c->rep_prefix == REPNE_PREFIX) &&
2481 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
2482 kvm_rip_write(ctxt->vcpu, c->eip);
2489 if (c->src.type == OP_MEM) {
2490 rc = ops->read_emulated((unsigned long)c->src.ptr,
2494 if (rc != X86EMUL_CONTINUE)
2496 c->src.orig_val = c->src.val;
2499 if (c->src2.type == OP_MEM) {
2500 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2504 if (rc != X86EMUL_CONTINUE)
2508 if ((c->d & DstMask) == ImplicitOps)
2512 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2513 /* optimisation - avoid slow emulated read if Mov */
2514 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
2515 c->dst.bytes, ctxt->vcpu);
2516 if (rc != X86EMUL_CONTINUE)
2519 c->dst.orig_val = c->dst.val;
2529 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2531 case 0x06: /* push es */
2532 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2534 case 0x07: /* pop es */
2535 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2536 if (rc != X86EMUL_CONTINUE)
2541 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2543 case 0x0e: /* push cs */
2544 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2548 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2550 case 0x16: /* push ss */
2551 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2553 case 0x17: /* pop ss */
2554 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2555 if (rc != X86EMUL_CONTINUE)
2560 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2562 case 0x1e: /* push ds */
2563 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2565 case 0x1f: /* pop ds */
2566 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2567 if (rc != X86EMUL_CONTINUE)
2572 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2576 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2580 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2584 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2586 case 0x40 ... 0x47: /* inc r16/r32 */
2587 emulate_1op("inc", c->dst, ctxt->eflags);
2589 case 0x48 ... 0x4f: /* dec r16/r32 */
2590 emulate_1op("dec", c->dst, ctxt->eflags);
2592 case 0x50 ... 0x57: /* push reg */
2595 case 0x58 ... 0x5f: /* pop reg */
2597 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2598 if (rc != X86EMUL_CONTINUE)
2601 case 0x60: /* pusha */
2602 emulate_pusha(ctxt);
2604 case 0x61: /* popa */
2605 rc = emulate_popa(ctxt, ops);
2606 if (rc != X86EMUL_CONTINUE)
2609 case 0x63: /* movsxd */
2610 if (ctxt->mode != X86EMUL_MODE_PROT64)
2611 goto cannot_emulate;
2612 c->dst.val = (s32) c->src.val;
2614 case 0x68: /* push imm */
2615 case 0x6a: /* push imm8 */
2618 case 0x6c: /* insb */
2619 case 0x6d: /* insw/insd */
2620 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2621 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2622 kvm_inject_gp(ctxt->vcpu, 0);
2625 if (kvm_emulate_pio_string(ctxt->vcpu,
2627 (c->d & ByteOp) ? 1 : c->op_bytes,
2629 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2630 (ctxt->eflags & EFLG_DF),
2631 register_address(c, es_base(ctxt),
2632 c->regs[VCPU_REGS_RDI]),
2634 c->regs[VCPU_REGS_RDX]) == 0) {
2639 case 0x6e: /* outsb */
2640 case 0x6f: /* outsw/outsd */
2641 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2642 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2643 kvm_inject_gp(ctxt->vcpu, 0);
2646 if (kvm_emulate_pio_string(ctxt->vcpu,
2648 (c->d & ByteOp) ? 1 : c->op_bytes,
2650 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2651 (ctxt->eflags & EFLG_DF),
2653 seg_override_base(ctxt, c),
2654 c->regs[VCPU_REGS_RSI]),
2656 c->regs[VCPU_REGS_RDX]) == 0) {
2661 case 0x70 ... 0x7f: /* jcc (short) */
2662 if (test_cc(c->b, ctxt->eflags))
2663 jmp_rel(c, c->src.val);
2665 case 0x80 ... 0x83: /* Grp1 */
2666 switch (c->modrm_reg) {
2686 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2688 case 0x86 ... 0x87: /* xchg */
2690 /* Write back the register source. */
2691 switch (c->dst.bytes) {
2693 *(u8 *) c->src.ptr = (u8) c->dst.val;
2696 *(u16 *) c->src.ptr = (u16) c->dst.val;
2699 *c->src.ptr = (u32) c->dst.val;
2700 break; /* 64b reg: zero-extend */
2702 *c->src.ptr = c->dst.val;
2706 * Write back the memory destination with implicit LOCK
2709 c->dst.val = c->src.val;
2712 case 0x88 ... 0x8b: /* mov */
2714 case 0x8c: { /* mov r/m, sreg */
2715 struct kvm_segment segreg;
2717 if (c->modrm_reg <= VCPU_SREG_GS)
2718 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2720 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2723 c->dst.val = segreg.selector;
2726 case 0x8d: /* lea r16/r32, m */
2727 c->dst.val = c->modrm_ea;
2729 case 0x8e: { /* mov seg, r/m16 */
2734 if (c->modrm_reg == VCPU_SREG_CS ||
2735 c->modrm_reg > VCPU_SREG_GS) {
2736 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2740 if (c->modrm_reg == VCPU_SREG_SS)
2741 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2743 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2745 c->dst.type = OP_NONE; /* Disable writeback. */
2748 case 0x8f: /* pop (sole member of Grp1a) */
2749 rc = emulate_grp1a(ctxt, ops);
2750 if (rc != X86EMUL_CONTINUE)
2753 case 0x90: /* nop / xchg r8,rax */
2754 if (!(c->rex_prefix & 1)) { /* nop */
2755 c->dst.type = OP_NONE;
2758 case 0x91 ... 0x97: /* xchg reg,rax */
2759 c->src.type = c->dst.type = OP_REG;
2760 c->src.bytes = c->dst.bytes = c->op_bytes;
2761 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2762 c->src.val = *(c->src.ptr);
2764 case 0x9c: /* pushf */
2765 c->src.val = (unsigned long) ctxt->eflags;
2768 case 0x9d: /* popf */
2769 c->dst.type = OP_REG;
2770 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2771 c->dst.bytes = c->op_bytes;
2772 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2773 if (rc != X86EMUL_CONTINUE)
2776 case 0xa0 ... 0xa1: /* mov */
2777 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2778 c->dst.val = c->src.val;
2780 case 0xa2 ... 0xa3: /* mov */
2781 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2783 case 0xa4 ... 0xa5: /* movs */
2785 case 0xa6 ... 0xa7: /* cmps */
2786 c->dst.type = OP_NONE; /* Disable writeback. */
2787 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2789 case 0xaa ... 0xab: /* stos */
2790 c->dst.val = c->regs[VCPU_REGS_RAX];
2792 case 0xac ... 0xad: /* lods */
2794 case 0xae ... 0xaf: /* scas */
2795 DPRINTF("Urk! I don't handle SCAS.\n");
2796 goto cannot_emulate;
2797 case 0xb0 ... 0xbf: /* mov r, imm */
2802 case 0xc3: /* ret */
2803 c->dst.type = OP_REG;
2804 c->dst.ptr = &c->eip;
2805 c->dst.bytes = c->op_bytes;
2806 goto pop_instruction;
2807 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2809 c->dst.val = c->src.val;
2811 case 0xcb: /* ret far */
2812 rc = emulate_ret_far(ctxt, ops);
2813 if (rc != X86EMUL_CONTINUE)
2816 case 0xd0 ... 0xd1: /* Grp2 */
2820 case 0xd2 ... 0xd3: /* Grp2 */
2821 c->src.val = c->regs[VCPU_REGS_RCX];
2824 case 0xe4: /* inb */
2829 case 0xe6: /* outb */
2830 case 0xe7: /* out */
2834 case 0xe8: /* call (near) */ {
2835 long int rel = c->src.val;
2836 c->src.val = (unsigned long) c->eip;
2841 case 0xe9: /* jmp rel */
2843 case 0xea: /* jmp far */
2845 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2849 c->eip = c->src.val;
2852 jmp: /* jmp rel short */
2853 jmp_rel(c, c->src.val);
2854 c->dst.type = OP_NONE; /* Disable writeback. */
2856 case 0xec: /* in al,dx */
2857 case 0xed: /* in (e/r)ax,dx */
2858 port = c->regs[VCPU_REGS_RDX];
2861 case 0xee: /* out al,dx */
2862 case 0xef: /* out (e/r)ax,dx */
2863 port = c->regs[VCPU_REGS_RDX];
2866 if (!emulator_io_permited(ctxt, ops, port,
2867 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2868 kvm_inject_gp(ctxt->vcpu, 0);
2871 if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2872 (c->d & ByteOp) ? 1 : c->op_bytes,
2875 goto cannot_emulate;
2878 case 0xf4: /* hlt */
2879 ctxt->vcpu->arch.halt_request = 1;
2881 case 0xf5: /* cmc */
2882 /* complement carry flag from eflags reg */
2883 ctxt->eflags ^= EFLG_CF;
2884 c->dst.type = OP_NONE; /* Disable writeback. */
2886 case 0xf6 ... 0xf7: /* Grp3 */
2887 if (!emulate_grp3(ctxt, ops))
2888 goto cannot_emulate;
2890 case 0xf8: /* clc */
2891 ctxt->eflags &= ~EFLG_CF;
2892 c->dst.type = OP_NONE; /* Disable writeback. */
2894 case 0xfa: /* cli */
2895 if (emulator_bad_iopl(ctxt, ops))
2896 kvm_inject_gp(ctxt->vcpu, 0);
2898 ctxt->eflags &= ~X86_EFLAGS_IF;
2899 c->dst.type = OP_NONE; /* Disable writeback. */
2902 case 0xfb: /* sti */
2903 if (emulator_bad_iopl(ctxt, ops))
2904 kvm_inject_gp(ctxt->vcpu, 0);
2906 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2907 ctxt->eflags |= X86_EFLAGS_IF;
2908 c->dst.type = OP_NONE; /* Disable writeback. */
2911 case 0xfc: /* cld */
2912 ctxt->eflags &= ~EFLG_DF;
2913 c->dst.type = OP_NONE; /* Disable writeback. */
2915 case 0xfd: /* std */
2916 ctxt->eflags |= EFLG_DF;
2917 c->dst.type = OP_NONE; /* Disable writeback. */
2919 case 0xfe: /* Grp4 */
2921 rc = emulate_grp45(ctxt, ops);
2922 if (rc != X86EMUL_CONTINUE)
2925 case 0xff: /* Grp5 */
2926 if (c->modrm_reg == 5)
2932 rc = writeback(ctxt, ops);
2933 if (rc != X86EMUL_CONTINUE)
2936 if ((c->d & SrcMask) == SrcSI)
2937 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2940 if ((c->d & DstMask) == DstDI)
2941 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2943 if (c->rep_prefix && (c->d & String))
2944 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2946 /* Commit shadow register state. */
2947 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2948 kvm_rip_write(ctxt->vcpu, c->eip);
2951 if (rc == X86EMUL_UNHANDLEABLE) {
2959 case 0x01: /* lgdt, lidt, lmsw */
2960 switch (c->modrm_reg) {
2962 unsigned long address;
2964 case 0: /* vmcall */
2965 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2966 goto cannot_emulate;
2968 rc = kvm_fix_hypercall(ctxt->vcpu);
2969 if (rc != X86EMUL_CONTINUE)
2972 /* Let the processor re-execute the fixed hypercall */
2974 /* Disable writeback. */
2975 c->dst.type = OP_NONE;
2978 rc = read_descriptor(ctxt, ops, c->src.ptr,
2979 &size, &address, c->op_bytes);
2980 if (rc != X86EMUL_CONTINUE)
2982 realmode_lgdt(ctxt->vcpu, size, address);
2983 /* Disable writeback. */
2984 c->dst.type = OP_NONE;
2986 case 3: /* lidt/vmmcall */
2987 if (c->modrm_mod == 3) {
2988 switch (c->modrm_rm) {
2990 rc = kvm_fix_hypercall(ctxt->vcpu);
2991 if (rc != X86EMUL_CONTINUE)
2995 goto cannot_emulate;
2998 rc = read_descriptor(ctxt, ops, c->src.ptr,
3001 if (rc != X86EMUL_CONTINUE)
3003 realmode_lidt(ctxt->vcpu, size, address);
3005 /* Disable writeback. */
3006 c->dst.type = OP_NONE;
3010 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3013 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3014 (c->src.val & 0x0f), ctxt->vcpu);
3015 c->dst.type = OP_NONE;
3017 case 5: /* not defined */
3018 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3021 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3022 /* Disable writeback. */
3023 c->dst.type = OP_NONE;
3026 goto cannot_emulate;
3029 case 0x05: /* syscall */
3030 rc = emulate_syscall(ctxt);
3031 if (rc != X86EMUL_CONTINUE)
3037 emulate_clts(ctxt->vcpu);
3038 c->dst.type = OP_NONE;
3040 case 0x08: /* invd */
3041 case 0x09: /* wbinvd */
3042 case 0x0d: /* GrpP (prefetch) */
3043 case 0x18: /* Grp16 (prefetch/nop) */
3044 c->dst.type = OP_NONE;
3046 case 0x20: /* mov cr, reg */
3047 switch (c->modrm_reg) {
3051 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3054 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3055 c->dst.type = OP_NONE; /* no writeback */
3057 case 0x21: /* mov from dr to reg */
3058 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3059 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3060 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3063 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3064 c->dst.type = OP_NONE; /* no writeback */
3066 case 0x22: /* mov reg, cr */
3067 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3068 c->dst.type = OP_NONE;
3070 case 0x23: /* mov from reg to dr */
3071 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3072 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3073 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3076 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3077 c->dst.type = OP_NONE; /* no writeback */
3081 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3082 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3083 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3084 kvm_inject_gp(ctxt->vcpu, 0);
3087 rc = X86EMUL_CONTINUE;
3088 c->dst.type = OP_NONE;
3092 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3093 kvm_inject_gp(ctxt->vcpu, 0);
3096 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3097 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3099 rc = X86EMUL_CONTINUE;
3100 c->dst.type = OP_NONE;
3102 case 0x34: /* sysenter */
3103 rc = emulate_sysenter(ctxt);
3104 if (rc != X86EMUL_CONTINUE)
3109 case 0x35: /* sysexit */
3110 rc = emulate_sysexit(ctxt);
3111 if (rc != X86EMUL_CONTINUE)
3116 case 0x40 ... 0x4f: /* cmov */
3117 c->dst.val = c->dst.orig_val = c->src.val;
3118 if (!test_cc(c->b, ctxt->eflags))
3119 c->dst.type = OP_NONE; /* no writeback */
3121 case 0x80 ... 0x8f: /* jnz rel, etc*/
3122 if (test_cc(c->b, ctxt->eflags))
3123 jmp_rel(c, c->src.val);
3124 c->dst.type = OP_NONE;
3126 case 0xa0: /* push fs */
3127 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3129 case 0xa1: /* pop fs */
3130 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3131 if (rc != X86EMUL_CONTINUE)
3136 c->dst.type = OP_NONE;
3137 /* only subword offset */
3138 c->src.val &= (c->dst.bytes << 3) - 1;
3139 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3141 case 0xa4: /* shld imm8, r, r/m */
3142 case 0xa5: /* shld cl, r, r/m */
3143 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3145 case 0xa8: /* push gs */
3146 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3148 case 0xa9: /* pop gs */
3149 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3150 if (rc != X86EMUL_CONTINUE)
3155 /* only subword offset */
3156 c->src.val &= (c->dst.bytes << 3) - 1;
3157 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3159 case 0xac: /* shrd imm8, r, r/m */
3160 case 0xad: /* shrd cl, r, r/m */
3161 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3163 case 0xae: /* clflush */
3165 case 0xb0 ... 0xb1: /* cmpxchg */
3167 * Save real source value, then compare EAX against
3170 c->src.orig_val = c->src.val;
3171 c->src.val = c->regs[VCPU_REGS_RAX];
3172 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3173 if (ctxt->eflags & EFLG_ZF) {
3174 /* Success: write back to memory. */
3175 c->dst.val = c->src.orig_val;
3177 /* Failure: write the value we saw to EAX. */
3178 c->dst.type = OP_REG;
3179 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3184 /* only subword offset */
3185 c->src.val &= (c->dst.bytes << 3) - 1;
3186 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3188 case 0xb6 ... 0xb7: /* movzx */
3189 c->dst.bytes = c->op_bytes;
3190 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3193 case 0xba: /* Grp8 */
3194 switch (c->modrm_reg & 3) {
3207 /* only subword offset */
3208 c->src.val &= (c->dst.bytes << 3) - 1;
3209 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3211 case 0xbe ... 0xbf: /* movsx */
3212 c->dst.bytes = c->op_bytes;
3213 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3216 case 0xc3: /* movnti */
3217 c->dst.bytes = c->op_bytes;
3218 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3221 case 0xc7: /* Grp9 (cmpxchg8b) */
3222 rc = emulate_grp9(ctxt, ops);
3223 if (rc != X86EMUL_CONTINUE)
3225 c->dst.type = OP_NONE;
3231 DPRINTF("Cannot emulate %02x\n", c->b);