1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMask (7<<1)
56 /* Source operand type. */
57 #define SrcNone (0<<4) /* No source operand. */
58 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
59 #define SrcReg (1<<4) /* Register operand. */
60 #define SrcMem (2<<4) /* Memory operand. */
61 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
62 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
63 #define SrcImm (5<<4) /* Immediate operand. */
64 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
65 #define SrcOne (7<<4) /* Implied '1' */
66 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
67 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
68 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
69 #define SrcMask (0xf<<4)
70 /* Generic ModRM decode. */
72 /* Destination is only written; never read. */
75 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
76 #define String (1<<12) /* String instruction (rep capable) */
77 #define Stack (1<<13) /* Stack instruction (push/pop) */
78 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
79 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
80 #define GroupMask 0xff /* Group number stored in bits 0:7 */
82 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
83 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
85 /* Source 2 operand type */
86 #define Src2None (0<<29)
87 #define Src2CL (1<<29)
88 #define Src2ImmByte (2<<29)
89 #define Src2One (3<<29)
90 #define Src2Imm16 (4<<29)
91 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
92 in memory and second argument is located
93 immediately after the first one in memory. */
94 #define Src2Mask (7<<29)
97 Group1_80, Group1_81, Group1_82, Group1_83,
98 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
102 static u32 opcode_table[256] = {
104 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
105 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
106 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
107 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
110 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
111 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
112 ImplicitOps | Stack | No64, 0,
114 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
115 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
116 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
117 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
119 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
120 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
121 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
122 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
124 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
125 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
126 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
128 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
129 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
132 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
133 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
136 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
137 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
138 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
141 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
143 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
145 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
146 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
148 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
149 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
151 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
152 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
155 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
156 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
157 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
159 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
162 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
165 Group | Group1_80, Group | Group1_81,
166 Group | Group1_82, Group | Group1_83,
167 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
168 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
170 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
171 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
172 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
173 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
175 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
177 0, 0, SrcImm | Src2Imm16 | No64, 0,
178 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
180 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
181 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
182 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
183 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
185 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
186 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
187 ByteOp | DstDI | String, DstDI | String,
189 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
194 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
199 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
200 0, ImplicitOps | Stack, 0, 0,
201 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
203 0, 0, 0, ImplicitOps | Stack,
204 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
206 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
207 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
210 0, 0, 0, 0, 0, 0, 0, 0,
213 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
214 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
216 SrcImm | Stack, SrcImm | ImplicitOps,
217 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
218 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
222 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
224 ImplicitOps, 0, ImplicitOps, ImplicitOps,
225 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
228 static u32 twobyte_table[256] = {
230 0, Group | GroupDual | Group7, 0, 0,
231 0, ImplicitOps, ImplicitOps | Priv, 0,
232 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
233 0, ImplicitOps | ModRM, 0, 0,
235 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
237 ModRM | ImplicitOps | Priv, ModRM | Priv,
238 ModRM | ImplicitOps | Priv, ModRM | Priv,
240 0, 0, 0, 0, 0, 0, 0, 0,
242 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
243 ImplicitOps, ImplicitOps | Priv, 0, 0,
244 0, 0, 0, 0, 0, 0, 0, 0,
246 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
251 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
263 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 ImplicitOps | Stack, ImplicitOps | Stack,
268 0, DstMem | SrcReg | ModRM | BitOp,
269 DstMem | SrcReg | Src2ImmByte | ModRM,
270 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
272 ImplicitOps | Stack, ImplicitOps | Stack,
273 0, DstMem | SrcReg | ModRM | BitOp | Lock,
274 DstMem | SrcReg | Src2ImmByte | ModRM,
275 DstMem | SrcReg | Src2CL | ModRM,
278 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
279 0, DstMem | SrcReg | ModRM | BitOp | Lock,
280 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
281 DstReg | SrcMem16 | ModRM | Mov,
284 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
285 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
286 DstReg | SrcMem16 | ModRM | Mov,
288 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
289 0, 0, 0, Group | GroupDual | Group9,
290 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
299 static u32 group_table[] = {
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM | Lock,
308 ByteOp | DstMem | SrcImm | ModRM,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM | Lock,
317 DstMem | SrcImm | ModRM,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326 ByteOp | DstMem | SrcImm | ModRM | No64,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM | Lock,
335 DstMem | SrcImmByte | ModRM,
337 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
339 ByteOp | SrcImm | DstMem | ModRM, 0,
340 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
343 DstMem | SrcImm | ModRM, 0,
344 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
347 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
350 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
351 SrcMem | ModRM | Stack, 0,
352 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
353 SrcMem | ModRM | Stack, 0,
355 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
356 SrcNone | ModRM | DstMem | Mov, 0,
357 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
360 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
361 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
363 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
366 static u32 group2_table[] = {
368 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
369 SrcNone | ModRM | DstMem | Mov, 0,
370 SrcMem16 | ModRM | Mov | Priv, 0,
372 0, 0, 0, 0, 0, 0, 0, 0,
375 /* EFLAGS bit definitions. */
376 #define EFLG_ID (1<<21)
377 #define EFLG_VIP (1<<20)
378 #define EFLG_VIF (1<<19)
379 #define EFLG_AC (1<<18)
380 #define EFLG_VM (1<<17)
381 #define EFLG_RF (1<<16)
382 #define EFLG_IOPL (3<<12)
383 #define EFLG_NT (1<<14)
384 #define EFLG_OF (1<<11)
385 #define EFLG_DF (1<<10)
386 #define EFLG_IF (1<<9)
387 #define EFLG_TF (1<<8)
388 #define EFLG_SF (1<<7)
389 #define EFLG_ZF (1<<6)
390 #define EFLG_AF (1<<4)
391 #define EFLG_PF (1<<2)
392 #define EFLG_CF (1<<0)
395 * Instruction emulation:
396 * Most instructions are emulated directly via a fragment of inline assembly
397 * code. This allows us to save/restore EFLAGS and thus very easily pick up
398 * any modified flags.
401 #if defined(CONFIG_X86_64)
402 #define _LO32 "k" /* force 32-bit operand */
403 #define _STK "%%rsp" /* stack pointer */
404 #elif defined(__i386__)
405 #define _LO32 "" /* force 32-bit operand */
406 #define _STK "%%esp" /* stack pointer */
410 * These EFLAGS bits are restored from saved value during emulation, and
411 * any changes are written back to the saved value after emulation.
413 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
415 /* Before executing instruction: restore necessary bits in EFLAGS. */
416 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
417 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
418 "movl %"_sav",%"_LO32 _tmp"; " \
421 "movl %"_msk",%"_LO32 _tmp"; " \
422 "andl %"_LO32 _tmp",("_STK"); " \
424 "notl %"_LO32 _tmp"; " \
425 "andl %"_LO32 _tmp",("_STK"); " \
426 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
428 "orl %"_LO32 _tmp",("_STK"); " \
432 /* After executing instruction: write-back necessary bits in EFLAGS. */
433 #define _POST_EFLAGS(_sav, _msk, _tmp) \
434 /* _sav |= EFLAGS & _msk; */ \
437 "andl %"_msk",%"_LO32 _tmp"; " \
438 "orl %"_LO32 _tmp",%"_sav"; "
446 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
448 __asm__ __volatile__ ( \
449 _PRE_EFLAGS("0", "4", "2") \
450 _op _suffix " %"_x"3,%1; " \
451 _POST_EFLAGS("0", "4", "2") \
452 : "=m" (_eflags), "=m" ((_dst).val), \
454 : _y ((_src).val), "i" (EFLAGS_MASK)); \
458 /* Raw emulation: instruction has two explicit operands. */
459 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
461 unsigned long _tmp; \
463 switch ((_dst).bytes) { \
465 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
468 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
471 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
476 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
478 unsigned long _tmp; \
479 switch ((_dst).bytes) { \
481 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
484 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
485 _wx, _wy, _lx, _ly, _qx, _qy); \
490 /* Source operand is byte-sized and may be restricted to just %cl. */
491 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
492 __emulate_2op(_op, _src, _dst, _eflags, \
493 "b", "c", "b", "c", "b", "c", "b", "c")
495 /* Source operand is byte, word, long or quad sized. */
496 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
497 __emulate_2op(_op, _src, _dst, _eflags, \
498 "b", "q", "w", "r", _LO32, "r", "", "r")
500 /* Source operand is word, long or quad sized. */
501 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
502 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
503 "w", "r", _LO32, "r", "", "r")
505 /* Instruction has three operands and one operand is stored in ECX register */
506 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
508 unsigned long _tmp; \
509 _type _clv = (_cl).val; \
510 _type _srcv = (_src).val; \
511 _type _dstv = (_dst).val; \
513 __asm__ __volatile__ ( \
514 _PRE_EFLAGS("0", "5", "2") \
515 _op _suffix " %4,%1 \n" \
516 _POST_EFLAGS("0", "5", "2") \
517 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
518 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
521 (_cl).val = (unsigned long) _clv; \
522 (_src).val = (unsigned long) _srcv; \
523 (_dst).val = (unsigned long) _dstv; \
526 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
528 switch ((_dst).bytes) { \
530 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
531 "w", unsigned short); \
534 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
535 "l", unsigned int); \
538 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
539 "q", unsigned long)); \
544 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
546 unsigned long _tmp; \
548 __asm__ __volatile__ ( \
549 _PRE_EFLAGS("0", "3", "2") \
550 _op _suffix " %1; " \
551 _POST_EFLAGS("0", "3", "2") \
552 : "=m" (_eflags), "+m" ((_dst).val), \
554 : "i" (EFLAGS_MASK)); \
557 /* Instruction has only one explicit operand (no source operand). */
558 #define emulate_1op(_op, _dst, _eflags) \
560 switch ((_dst).bytes) { \
561 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
562 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
563 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
564 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
568 /* Fetch next part of the instruction being emulated. */
569 #define insn_fetch(_type, _size, _eip) \
570 ({ unsigned long _x; \
571 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
572 if (rc != X86EMUL_CONTINUE) \
578 static inline unsigned long ad_mask(struct decode_cache *c)
580 return (1UL << (c->ad_bytes << 3)) - 1;
583 /* Access/update address held in a register, based on addressing mode. */
584 static inline unsigned long
585 address_mask(struct decode_cache *c, unsigned long reg)
587 if (c->ad_bytes == sizeof(unsigned long))
590 return reg & ad_mask(c);
593 static inline unsigned long
594 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
596 return base + address_mask(c, reg);
600 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
602 if (c->ad_bytes == sizeof(unsigned long))
605 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
608 static inline void jmp_rel(struct decode_cache *c, int rel)
610 register_address_increment(c, &c->eip, rel);
613 static void set_seg_override(struct decode_cache *c, int seg)
615 c->has_seg_override = true;
616 c->seg_override = seg;
619 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
621 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
624 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
627 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
628 struct decode_cache *c)
630 if (!c->has_seg_override)
633 return seg_base(ctxt, c->seg_override);
636 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
638 return seg_base(ctxt, VCPU_SREG_ES);
641 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
643 return seg_base(ctxt, VCPU_SREG_SS);
646 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
647 struct x86_emulate_ops *ops,
648 unsigned long linear, u8 *dest)
650 struct fetch_cache *fc = &ctxt->decode.fetch;
654 if (linear < fc->start || linear >= fc->end) {
655 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
656 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
657 if (rc != X86EMUL_CONTINUE)
660 fc->end = linear + size;
662 *dest = fc->data[linear - fc->start];
663 return X86EMUL_CONTINUE;
666 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
667 struct x86_emulate_ops *ops,
668 unsigned long eip, void *dest, unsigned size)
672 /* x86 instructions are limited to 15 bytes. */
673 if (eip + size - ctxt->eip > 15)
674 return X86EMUL_UNHANDLEABLE;
675 eip += ctxt->cs_base;
677 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
678 if (rc != X86EMUL_CONTINUE)
681 return X86EMUL_CONTINUE;
685 * Given the 'reg' portion of a ModRM byte, and a register block, return a
686 * pointer into the block that addresses the relevant register.
687 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
689 static void *decode_register(u8 modrm_reg, unsigned long *regs,
694 p = ®s[modrm_reg];
695 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
696 p = (unsigned char *)®s[modrm_reg & 3] + 1;
700 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
701 struct x86_emulate_ops *ops,
703 u16 *size, unsigned long *address, int op_bytes)
710 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
712 if (rc != X86EMUL_CONTINUE)
714 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
719 static int test_cc(unsigned int condition, unsigned int flags)
723 switch ((condition & 15) >> 1) {
725 rc |= (flags & EFLG_OF);
727 case 1: /* b/c/nae */
728 rc |= (flags & EFLG_CF);
731 rc |= (flags & EFLG_ZF);
734 rc |= (flags & (EFLG_CF|EFLG_ZF));
737 rc |= (flags & EFLG_SF);
740 rc |= (flags & EFLG_PF);
743 rc |= (flags & EFLG_ZF);
746 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
750 /* Odd condition identifiers (lsb == 1) have inverted sense. */
751 return (!!rc ^ (condition & 1));
754 static void decode_register_operand(struct operand *op,
755 struct decode_cache *c,
758 unsigned reg = c->modrm_reg;
759 int highbyte_regs = c->rex_prefix == 0;
762 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
764 if ((c->d & ByteOp) && !inhibit_bytereg) {
765 op->ptr = decode_register(reg, c->regs, highbyte_regs);
766 op->val = *(u8 *)op->ptr;
769 op->ptr = decode_register(reg, c->regs, 0);
770 op->bytes = c->op_bytes;
773 op->val = *(u16 *)op->ptr;
776 op->val = *(u32 *)op->ptr;
779 op->val = *(u64 *) op->ptr;
783 op->orig_val = op->val;
786 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
787 struct x86_emulate_ops *ops)
789 struct decode_cache *c = &ctxt->decode;
791 int index_reg = 0, base_reg = 0, scale;
792 int rc = X86EMUL_CONTINUE;
795 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
796 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
797 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
800 c->modrm = insn_fetch(u8, 1, c->eip);
801 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
802 c->modrm_reg |= (c->modrm & 0x38) >> 3;
803 c->modrm_rm |= (c->modrm & 0x07);
807 if (c->modrm_mod == 3) {
808 c->modrm_ptr = decode_register(c->modrm_rm,
809 c->regs, c->d & ByteOp);
810 c->modrm_val = *(unsigned long *)c->modrm_ptr;
814 if (c->ad_bytes == 2) {
815 unsigned bx = c->regs[VCPU_REGS_RBX];
816 unsigned bp = c->regs[VCPU_REGS_RBP];
817 unsigned si = c->regs[VCPU_REGS_RSI];
818 unsigned di = c->regs[VCPU_REGS_RDI];
820 /* 16-bit ModR/M decode. */
821 switch (c->modrm_mod) {
823 if (c->modrm_rm == 6)
824 c->modrm_ea += insn_fetch(u16, 2, c->eip);
827 c->modrm_ea += insn_fetch(s8, 1, c->eip);
830 c->modrm_ea += insn_fetch(u16, 2, c->eip);
833 switch (c->modrm_rm) {
835 c->modrm_ea += bx + si;
838 c->modrm_ea += bx + di;
841 c->modrm_ea += bp + si;
844 c->modrm_ea += bp + di;
853 if (c->modrm_mod != 0)
860 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
861 (c->modrm_rm == 6 && c->modrm_mod != 0))
862 if (!c->has_seg_override)
863 set_seg_override(c, VCPU_SREG_SS);
864 c->modrm_ea = (u16)c->modrm_ea;
866 /* 32/64-bit ModR/M decode. */
867 if ((c->modrm_rm & 7) == 4) {
868 sib = insn_fetch(u8, 1, c->eip);
869 index_reg |= (sib >> 3) & 7;
873 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
874 c->modrm_ea += insn_fetch(s32, 4, c->eip);
876 c->modrm_ea += c->regs[base_reg];
878 c->modrm_ea += c->regs[index_reg] << scale;
879 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
880 if (ctxt->mode == X86EMUL_MODE_PROT64)
883 c->modrm_ea += c->regs[c->modrm_rm];
884 switch (c->modrm_mod) {
886 if (c->modrm_rm == 5)
887 c->modrm_ea += insn_fetch(s32, 4, c->eip);
890 c->modrm_ea += insn_fetch(s8, 1, c->eip);
893 c->modrm_ea += insn_fetch(s32, 4, c->eip);
901 static int decode_abs(struct x86_emulate_ctxt *ctxt,
902 struct x86_emulate_ops *ops)
904 struct decode_cache *c = &ctxt->decode;
905 int rc = X86EMUL_CONTINUE;
907 switch (c->ad_bytes) {
909 c->modrm_ea = insn_fetch(u16, 2, c->eip);
912 c->modrm_ea = insn_fetch(u32, 4, c->eip);
915 c->modrm_ea = insn_fetch(u64, 8, c->eip);
923 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
925 struct decode_cache *c = &ctxt->decode;
926 int rc = X86EMUL_CONTINUE;
927 int mode = ctxt->mode;
928 int def_op_bytes, def_ad_bytes, group;
930 /* Shadow copy of register state. Committed on successful emulation. */
932 memset(c, 0, sizeof(struct decode_cache));
934 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
935 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
938 case X86EMUL_MODE_REAL:
939 case X86EMUL_MODE_VM86:
940 case X86EMUL_MODE_PROT16:
941 def_op_bytes = def_ad_bytes = 2;
943 case X86EMUL_MODE_PROT32:
944 def_op_bytes = def_ad_bytes = 4;
947 case X86EMUL_MODE_PROT64:
956 c->op_bytes = def_op_bytes;
957 c->ad_bytes = def_ad_bytes;
959 /* Legacy prefixes. */
961 switch (c->b = insn_fetch(u8, 1, c->eip)) {
962 case 0x66: /* operand-size override */
963 /* switch between 2/4 bytes */
964 c->op_bytes = def_op_bytes ^ 6;
966 case 0x67: /* address-size override */
967 if (mode == X86EMUL_MODE_PROT64)
968 /* switch between 4/8 bytes */
969 c->ad_bytes = def_ad_bytes ^ 12;
971 /* switch between 2/4 bytes */
972 c->ad_bytes = def_ad_bytes ^ 6;
974 case 0x26: /* ES override */
975 case 0x2e: /* CS override */
976 case 0x36: /* SS override */
977 case 0x3e: /* DS override */
978 set_seg_override(c, (c->b >> 3) & 3);
980 case 0x64: /* FS override */
981 case 0x65: /* GS override */
982 set_seg_override(c, c->b & 7);
984 case 0x40 ... 0x4f: /* REX */
985 if (mode != X86EMUL_MODE_PROT64)
987 c->rex_prefix = c->b;
989 case 0xf0: /* LOCK */
992 case 0xf2: /* REPNE/REPNZ */
993 c->rep_prefix = REPNE_PREFIX;
995 case 0xf3: /* REP/REPE/REPZ */
996 c->rep_prefix = REPE_PREFIX;
1002 /* Any legacy prefix after a REX prefix nullifies its effect. */
1011 if (c->rex_prefix & 8)
1012 c->op_bytes = 8; /* REX.W */
1014 /* Opcode byte(s). */
1015 c->d = opcode_table[c->b];
1017 /* Two-byte opcode? */
1020 c->b = insn_fetch(u8, 1, c->eip);
1021 c->d = twobyte_table[c->b];
1026 group = c->d & GroupMask;
1027 c->modrm = insn_fetch(u8, 1, c->eip);
1030 group = (group << 3) + ((c->modrm >> 3) & 7);
1031 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1032 c->d = group2_table[group];
1034 c->d = group_table[group];
1039 DPRINTF("Cannot emulate %02x\n", c->b);
1043 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1046 /* ModRM and SIB bytes. */
1048 rc = decode_modrm(ctxt, ops);
1049 else if (c->d & MemAbs)
1050 rc = decode_abs(ctxt, ops);
1051 if (rc != X86EMUL_CONTINUE)
1054 if (!c->has_seg_override)
1055 set_seg_override(c, VCPU_SREG_DS);
1057 if (!(!c->twobyte && c->b == 0x8d))
1058 c->modrm_ea += seg_override_base(ctxt, c);
1060 if (c->ad_bytes != 8)
1061 c->modrm_ea = (u32)c->modrm_ea;
1063 if (c->rip_relative)
1064 c->modrm_ea += c->eip;
1067 * Decode and fetch the source operand: register, memory
1070 switch (c->d & SrcMask) {
1074 decode_register_operand(&c->src, c, 0);
1083 c->src.bytes = (c->d & ByteOp) ? 1 :
1085 /* Don't fetch the address for invlpg: it could be unmapped. */
1086 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1090 * For instructions with a ModR/M byte, switch to register
1091 * access if Mod = 3.
1093 if ((c->d & ModRM) && c->modrm_mod == 3) {
1094 c->src.type = OP_REG;
1095 c->src.val = c->modrm_val;
1096 c->src.ptr = c->modrm_ptr;
1099 c->src.type = OP_MEM;
1100 c->src.ptr = (unsigned long *)c->modrm_ea;
1105 c->src.type = OP_IMM;
1106 c->src.ptr = (unsigned long *)c->eip;
1107 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1108 if (c->src.bytes == 8)
1110 /* NB. Immediates are sign-extended as necessary. */
1111 switch (c->src.bytes) {
1113 c->src.val = insn_fetch(s8, 1, c->eip);
1116 c->src.val = insn_fetch(s16, 2, c->eip);
1119 c->src.val = insn_fetch(s32, 4, c->eip);
1122 if ((c->d & SrcMask) == SrcImmU) {
1123 switch (c->src.bytes) {
1128 c->src.val &= 0xffff;
1131 c->src.val &= 0xffffffff;
1138 c->src.type = OP_IMM;
1139 c->src.ptr = (unsigned long *)c->eip;
1141 if ((c->d & SrcMask) == SrcImmByte)
1142 c->src.val = insn_fetch(s8, 1, c->eip);
1144 c->src.val = insn_fetch(u8, 1, c->eip);
1151 c->src.type = OP_MEM;
1152 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1153 c->src.ptr = (unsigned long *)
1154 register_address(c, seg_override_base(ctxt, c),
1155 c->regs[VCPU_REGS_RSI]);
1161 * Decode and fetch the second source operand: register, memory
1164 switch (c->d & Src2Mask) {
1169 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1172 c->src2.type = OP_IMM;
1173 c->src2.ptr = (unsigned long *)c->eip;
1175 c->src2.val = insn_fetch(u8, 1, c->eip);
1178 c->src2.type = OP_IMM;
1179 c->src2.ptr = (unsigned long *)c->eip;
1181 c->src2.val = insn_fetch(u16, 2, c->eip);
1188 c->src2.type = OP_MEM;
1190 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1195 /* Decode and fetch the destination operand: register or memory. */
1196 switch (c->d & DstMask) {
1198 /* Special instructions do their own operand decoding. */
1201 decode_register_operand(&c->dst, c,
1202 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1205 if ((c->d & ModRM) && c->modrm_mod == 3) {
1206 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1207 c->dst.type = OP_REG;
1208 c->dst.val = c->dst.orig_val = c->modrm_val;
1209 c->dst.ptr = c->modrm_ptr;
1212 c->dst.type = OP_MEM;
1213 c->dst.ptr = (unsigned long *)c->modrm_ea;
1214 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1217 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1219 c->dst.ptr = (void *)c->dst.ptr +
1220 (c->src.val & mask) / 8;
1224 c->dst.type = OP_REG;
1225 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1226 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1227 switch (c->dst.bytes) {
1229 c->dst.val = *(u8 *)c->dst.ptr;
1232 c->dst.val = *(u16 *)c->dst.ptr;
1235 c->dst.val = *(u32 *)c->dst.ptr;
1238 c->dst.val = *(u64 *)c->dst.ptr;
1241 c->dst.orig_val = c->dst.val;
1244 c->dst.type = OP_MEM;
1245 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1246 c->dst.ptr = (unsigned long *)
1247 register_address(c, es_base(ctxt),
1248 c->regs[VCPU_REGS_RDI]);
1254 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1257 static u32 desc_limit_scaled(struct desc_struct *desc)
1259 u32 limit = get_desc_limit(desc);
1261 return desc->g ? (limit << 12) | 0xfff : limit;
1264 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1265 struct x86_emulate_ops *ops,
1266 u16 selector, struct desc_ptr *dt)
1268 if (selector & 1 << 2) {
1269 struct desc_struct desc;
1270 memset (dt, 0, sizeof *dt);
1271 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1274 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1275 dt->address = get_desc_base(&desc);
1277 ops->get_gdt(dt, ctxt->vcpu);
1280 /* allowed just for 8 bytes segments */
1281 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1282 struct x86_emulate_ops *ops,
1283 u16 selector, struct desc_struct *desc)
1286 u16 index = selector >> 3;
1291 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1293 if (dt.size < index * 8 + 7) {
1294 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1295 return X86EMUL_PROPAGATE_FAULT;
1297 addr = dt.address + index * 8;
1298 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1299 if (ret == X86EMUL_PROPAGATE_FAULT)
1300 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1305 /* allowed just for 8 bytes segments */
1306 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1307 struct x86_emulate_ops *ops,
1308 u16 selector, struct desc_struct *desc)
1311 u16 index = selector >> 3;
1316 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1318 if (dt.size < index * 8 + 7) {
1319 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1320 return X86EMUL_PROPAGATE_FAULT;
1323 addr = dt.address + index * 8;
1324 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1325 if (ret == X86EMUL_PROPAGATE_FAULT)
1326 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1331 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1332 struct x86_emulate_ops *ops,
1333 u16 selector, int seg)
1335 struct desc_struct seg_desc;
1337 unsigned err_vec = GP_VECTOR;
1339 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1342 memset(&seg_desc, 0, sizeof seg_desc);
1344 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1345 || ctxt->mode == X86EMUL_MODE_REAL) {
1346 /* set real mode segment descriptor */
1347 set_desc_base(&seg_desc, selector << 4);
1348 set_desc_limit(&seg_desc, 0xffff);
1355 /* NULL selector is not valid for TR, CS and SS */
1356 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1360 /* TR should be in GDT only */
1361 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1364 if (null_selector) /* for NULL selector skip all following checks */
1367 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1368 if (ret != X86EMUL_CONTINUE)
1371 err_code = selector & 0xfffc;
1372 err_vec = GP_VECTOR;
1374 /* can't load system descriptor into segment selecor */
1375 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1379 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1385 cpl = ops->cpl(ctxt->vcpu);
1390 * segment is not a writable data segment or segment
1391 * selector's RPL != CPL or segment selector's RPL != CPL
1393 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1397 if (!(seg_desc.type & 8))
1400 if (seg_desc.type & 4) {
1406 if (rpl > cpl || dpl != cpl)
1409 /* CS(RPL) <- CPL */
1410 selector = (selector & 0xfffc) | cpl;
1413 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1416 case VCPU_SREG_LDTR:
1417 if (seg_desc.s || seg_desc.type != 2)
1420 default: /* DS, ES, FS, or GS */
1422 * segment is not a data or readable code segment or
1423 * ((segment is a data or nonconforming code segment)
1424 * and (both RPL and CPL > DPL))
1426 if ((seg_desc.type & 0xa) == 0x8 ||
1427 (((seg_desc.type & 0xc) != 0xc) &&
1428 (rpl > dpl && cpl > dpl)))
1434 /* mark segment as accessed */
1436 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1437 if (ret != X86EMUL_CONTINUE)
1441 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1442 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1443 return X86EMUL_CONTINUE;
1445 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1446 return X86EMUL_PROPAGATE_FAULT;
1449 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1451 struct decode_cache *c = &ctxt->decode;
1453 c->dst.type = OP_MEM;
1454 c->dst.bytes = c->op_bytes;
1455 c->dst.val = c->src.val;
1456 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1457 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1458 c->regs[VCPU_REGS_RSP]);
1461 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1462 struct x86_emulate_ops *ops,
1463 void *dest, int len)
1465 struct decode_cache *c = &ctxt->decode;
1468 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1469 c->regs[VCPU_REGS_RSP]),
1470 dest, len, ctxt->vcpu);
1471 if (rc != X86EMUL_CONTINUE)
1474 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1478 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1479 struct x86_emulate_ops *ops,
1480 void *dest, int len)
1483 unsigned long val, change_mask;
1484 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1485 int cpl = ops->cpl(ctxt->vcpu);
1487 rc = emulate_pop(ctxt, ops, &val, len);
1488 if (rc != X86EMUL_CONTINUE)
1491 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1492 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1494 switch(ctxt->mode) {
1495 case X86EMUL_MODE_PROT64:
1496 case X86EMUL_MODE_PROT32:
1497 case X86EMUL_MODE_PROT16:
1499 change_mask |= EFLG_IOPL;
1501 change_mask |= EFLG_IF;
1503 case X86EMUL_MODE_VM86:
1505 kvm_inject_gp(ctxt->vcpu, 0);
1506 return X86EMUL_PROPAGATE_FAULT;
1508 change_mask |= EFLG_IF;
1510 default: /* real mode */
1511 change_mask |= (EFLG_IOPL | EFLG_IF);
1515 *(unsigned long *)dest =
1516 (ctxt->eflags & ~change_mask) | (val & change_mask);
1521 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1523 struct decode_cache *c = &ctxt->decode;
1524 struct kvm_segment segment;
1526 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1528 c->src.val = segment.selector;
1532 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1533 struct x86_emulate_ops *ops, int seg)
1535 struct decode_cache *c = &ctxt->decode;
1536 unsigned long selector;
1539 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1540 if (rc != X86EMUL_CONTINUE)
1543 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1547 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1549 struct decode_cache *c = &ctxt->decode;
1550 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1551 int reg = VCPU_REGS_RAX;
1553 while (reg <= VCPU_REGS_RDI) {
1554 (reg == VCPU_REGS_RSP) ?
1555 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1562 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1563 struct x86_emulate_ops *ops)
1565 struct decode_cache *c = &ctxt->decode;
1566 int rc = X86EMUL_CONTINUE;
1567 int reg = VCPU_REGS_RDI;
1569 while (reg >= VCPU_REGS_RAX) {
1570 if (reg == VCPU_REGS_RSP) {
1571 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1576 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1577 if (rc != X86EMUL_CONTINUE)
1584 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1585 struct x86_emulate_ops *ops)
1587 struct decode_cache *c = &ctxt->decode;
1589 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1592 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1594 struct decode_cache *c = &ctxt->decode;
1595 switch (c->modrm_reg) {
1597 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1600 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1603 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1606 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1608 case 4: /* sal/shl */
1609 case 6: /* sal/shl */
1610 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1613 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1616 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1621 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1622 struct x86_emulate_ops *ops)
1624 struct decode_cache *c = &ctxt->decode;
1626 switch (c->modrm_reg) {
1627 case 0 ... 1: /* test */
1628 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1631 c->dst.val = ~c->dst.val;
1634 emulate_1op("neg", c->dst, ctxt->eflags);
1642 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1643 struct x86_emulate_ops *ops)
1645 struct decode_cache *c = &ctxt->decode;
1647 switch (c->modrm_reg) {
1649 emulate_1op("inc", c->dst, ctxt->eflags);
1652 emulate_1op("dec", c->dst, ctxt->eflags);
1654 case 2: /* call near abs */ {
1657 c->eip = c->src.val;
1658 c->src.val = old_eip;
1662 case 4: /* jmp abs */
1663 c->eip = c->src.val;
1669 return X86EMUL_CONTINUE;
1672 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1673 struct x86_emulate_ops *ops)
1675 struct decode_cache *c = &ctxt->decode;
1679 rc = ops->read_emulated(c->modrm_ea, &old, 8, ctxt->vcpu);
1680 if (rc != X86EMUL_CONTINUE)
1683 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1684 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1686 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1687 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1688 ctxt->eflags &= ~EFLG_ZF;
1691 new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1692 (u32) c->regs[VCPU_REGS_RBX];
1694 rc = ops->cmpxchg_emulated(c->modrm_ea, &old, &new, 8, ctxt->vcpu);
1695 if (rc != X86EMUL_CONTINUE)
1697 ctxt->eflags |= EFLG_ZF;
1699 return X86EMUL_CONTINUE;
1702 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1703 struct x86_emulate_ops *ops)
1705 struct decode_cache *c = &ctxt->decode;
1709 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1710 if (rc != X86EMUL_CONTINUE)
1712 if (c->op_bytes == 4)
1713 c->eip = (u32)c->eip;
1714 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1715 if (rc != X86EMUL_CONTINUE)
1717 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1721 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1722 struct x86_emulate_ops *ops)
1725 struct decode_cache *c = &ctxt->decode;
1727 switch (c->dst.type) {
1729 /* The 4-byte case *is* correct:
1730 * in 64-bit mode we zero-extend.
1732 switch (c->dst.bytes) {
1734 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1737 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1740 *c->dst.ptr = (u32)c->dst.val;
1741 break; /* 64b: zero-ext */
1743 *c->dst.ptr = c->dst.val;
1749 rc = ops->cmpxchg_emulated(
1750 (unsigned long)c->dst.ptr,
1756 rc = ops->write_emulated(
1757 (unsigned long)c->dst.ptr,
1761 if (rc != X86EMUL_CONTINUE)
1770 return X86EMUL_CONTINUE;
1773 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1775 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1777 * an sti; sti; sequence only disable interrupts for the first
1778 * instruction. So, if the last instruction, be it emulated or
1779 * not, left the system with the INT_STI flag enabled, it
1780 * means that the last instruction is an sti. We should not
1781 * leave the flag on in this case. The same goes for mov ss
1783 if (!(int_shadow & mask))
1784 ctxt->interruptibility = mask;
1788 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1789 struct kvm_segment *cs, struct kvm_segment *ss)
1791 memset(cs, 0, sizeof(struct kvm_segment));
1792 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1793 memset(ss, 0, sizeof(struct kvm_segment));
1795 cs->l = 0; /* will be adjusted later */
1796 cs->base = 0; /* flat segment */
1797 cs->g = 1; /* 4kb granularity */
1798 cs->limit = 0xffffffff; /* 4GB limit */
1799 cs->type = 0x0b; /* Read, Execute, Accessed */
1801 cs->dpl = 0; /* will be adjusted later */
1806 ss->base = 0; /* flat segment */
1807 ss->limit = 0xffffffff; /* 4GB limit */
1808 ss->g = 1; /* 4kb granularity */
1810 ss->type = 0x03; /* Read/Write, Accessed */
1811 ss->db = 1; /* 32bit stack segment */
1817 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1819 struct decode_cache *c = &ctxt->decode;
1820 struct kvm_segment cs, ss;
1823 /* syscall is not available in real mode */
1824 if (ctxt->mode == X86EMUL_MODE_REAL ||
1825 ctxt->mode == X86EMUL_MODE_VM86) {
1826 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1827 return X86EMUL_PROPAGATE_FAULT;
1830 setup_syscalls_segments(ctxt, &cs, &ss);
1832 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1834 cs.selector = (u16)(msr_data & 0xfffc);
1835 ss.selector = (u16)(msr_data + 8);
1837 if (is_long_mode(ctxt->vcpu)) {
1841 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1842 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1844 c->regs[VCPU_REGS_RCX] = c->eip;
1845 if (is_long_mode(ctxt->vcpu)) {
1846 #ifdef CONFIG_X86_64
1847 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1849 kvm_x86_ops->get_msr(ctxt->vcpu,
1850 ctxt->mode == X86EMUL_MODE_PROT64 ?
1851 MSR_LSTAR : MSR_CSTAR, &msr_data);
1854 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1855 ctxt->eflags &= ~(msr_data | EFLG_RF);
1859 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1860 c->eip = (u32)msr_data;
1862 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1865 return X86EMUL_CONTINUE;
1869 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1871 struct decode_cache *c = &ctxt->decode;
1872 struct kvm_segment cs, ss;
1875 /* inject #GP if in real mode */
1876 if (ctxt->mode == X86EMUL_MODE_REAL) {
1877 kvm_inject_gp(ctxt->vcpu, 0);
1878 return X86EMUL_PROPAGATE_FAULT;
1881 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1882 * Therefore, we inject an #UD.
1884 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1885 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1886 return X86EMUL_PROPAGATE_FAULT;
1889 setup_syscalls_segments(ctxt, &cs, &ss);
1891 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1892 switch (ctxt->mode) {
1893 case X86EMUL_MODE_PROT32:
1894 if ((msr_data & 0xfffc) == 0x0) {
1895 kvm_inject_gp(ctxt->vcpu, 0);
1896 return X86EMUL_PROPAGATE_FAULT;
1899 case X86EMUL_MODE_PROT64:
1900 if (msr_data == 0x0) {
1901 kvm_inject_gp(ctxt->vcpu, 0);
1902 return X86EMUL_PROPAGATE_FAULT;
1907 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1908 cs.selector = (u16)msr_data;
1909 cs.selector &= ~SELECTOR_RPL_MASK;
1910 ss.selector = cs.selector + 8;
1911 ss.selector &= ~SELECTOR_RPL_MASK;
1912 if (ctxt->mode == X86EMUL_MODE_PROT64
1913 || is_long_mode(ctxt->vcpu)) {
1918 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1919 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1921 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1924 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1925 c->regs[VCPU_REGS_RSP] = msr_data;
1927 return X86EMUL_CONTINUE;
1931 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1933 struct decode_cache *c = &ctxt->decode;
1934 struct kvm_segment cs, ss;
1938 /* inject #GP if in real mode or Virtual 8086 mode */
1939 if (ctxt->mode == X86EMUL_MODE_REAL ||
1940 ctxt->mode == X86EMUL_MODE_VM86) {
1941 kvm_inject_gp(ctxt->vcpu, 0);
1942 return X86EMUL_PROPAGATE_FAULT;
1945 setup_syscalls_segments(ctxt, &cs, &ss);
1947 if ((c->rex_prefix & 0x8) != 0x0)
1948 usermode = X86EMUL_MODE_PROT64;
1950 usermode = X86EMUL_MODE_PROT32;
1954 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1956 case X86EMUL_MODE_PROT32:
1957 cs.selector = (u16)(msr_data + 16);
1958 if ((msr_data & 0xfffc) == 0x0) {
1959 kvm_inject_gp(ctxt->vcpu, 0);
1960 return X86EMUL_PROPAGATE_FAULT;
1962 ss.selector = (u16)(msr_data + 24);
1964 case X86EMUL_MODE_PROT64:
1965 cs.selector = (u16)(msr_data + 32);
1966 if (msr_data == 0x0) {
1967 kvm_inject_gp(ctxt->vcpu, 0);
1968 return X86EMUL_PROPAGATE_FAULT;
1970 ss.selector = cs.selector + 8;
1975 cs.selector |= SELECTOR_RPL_MASK;
1976 ss.selector |= SELECTOR_RPL_MASK;
1978 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1979 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1981 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1982 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1984 return X86EMUL_CONTINUE;
1987 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1988 struct x86_emulate_ops *ops)
1991 if (ctxt->mode == X86EMUL_MODE_REAL)
1993 if (ctxt->mode == X86EMUL_MODE_VM86)
1995 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1996 return ops->cpl(ctxt->vcpu) > iopl;
1999 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2000 struct x86_emulate_ops *ops,
2003 struct kvm_segment tr_seg;
2006 u8 perm, bit_idx = port & 0x7;
2007 unsigned mask = (1 << len) - 1;
2009 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2010 if (tr_seg.unusable)
2012 if (tr_seg.limit < 103)
2014 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2016 if (r != X86EMUL_CONTINUE)
2018 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2020 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2022 if (r != X86EMUL_CONTINUE)
2024 if ((perm >> bit_idx) & mask)
2029 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2030 struct x86_emulate_ops *ops,
2033 if (emulator_bad_iopl(ctxt, ops))
2034 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2039 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2040 struct x86_emulate_ops *ops,
2043 struct desc_struct desc;
2044 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2045 return get_desc_base(&desc);
2050 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2051 struct x86_emulate_ops *ops,
2052 struct tss_segment_16 *tss)
2054 struct decode_cache *c = &ctxt->decode;
2057 tss->flag = ctxt->eflags;
2058 tss->ax = c->regs[VCPU_REGS_RAX];
2059 tss->cx = c->regs[VCPU_REGS_RCX];
2060 tss->dx = c->regs[VCPU_REGS_RDX];
2061 tss->bx = c->regs[VCPU_REGS_RBX];
2062 tss->sp = c->regs[VCPU_REGS_RSP];
2063 tss->bp = c->regs[VCPU_REGS_RBP];
2064 tss->si = c->regs[VCPU_REGS_RSI];
2065 tss->di = c->regs[VCPU_REGS_RDI];
2067 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2068 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2069 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2070 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2071 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2074 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2075 struct x86_emulate_ops *ops,
2076 struct tss_segment_16 *tss)
2078 struct decode_cache *c = &ctxt->decode;
2082 ctxt->eflags = tss->flag | 2;
2083 c->regs[VCPU_REGS_RAX] = tss->ax;
2084 c->regs[VCPU_REGS_RCX] = tss->cx;
2085 c->regs[VCPU_REGS_RDX] = tss->dx;
2086 c->regs[VCPU_REGS_RBX] = tss->bx;
2087 c->regs[VCPU_REGS_RSP] = tss->sp;
2088 c->regs[VCPU_REGS_RBP] = tss->bp;
2089 c->regs[VCPU_REGS_RSI] = tss->si;
2090 c->regs[VCPU_REGS_RDI] = tss->di;
2093 * SDM says that segment selectors are loaded before segment
2096 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2097 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2098 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2099 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2100 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2103 * Now load segment descriptors. If fault happenes at this stage
2104 * it is handled in a context of new task
2106 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2107 if (ret != X86EMUL_CONTINUE)
2109 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2110 if (ret != X86EMUL_CONTINUE)
2112 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2113 if (ret != X86EMUL_CONTINUE)
2115 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2116 if (ret != X86EMUL_CONTINUE)
2118 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2119 if (ret != X86EMUL_CONTINUE)
2122 return X86EMUL_CONTINUE;
2125 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2126 struct x86_emulate_ops *ops,
2127 u16 tss_selector, u16 old_tss_sel,
2128 ulong old_tss_base, struct desc_struct *new_desc)
2130 struct tss_segment_16 tss_seg;
2132 u32 err, new_tss_base = get_desc_base(new_desc);
2134 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2136 if (ret == X86EMUL_PROPAGATE_FAULT) {
2137 /* FIXME: need to provide precise fault address */
2138 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2142 save_state_to_tss16(ctxt, ops, &tss_seg);
2144 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2146 if (ret == X86EMUL_PROPAGATE_FAULT) {
2147 /* FIXME: need to provide precise fault address */
2148 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2152 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2154 if (ret == X86EMUL_PROPAGATE_FAULT) {
2155 /* FIXME: need to provide precise fault address */
2156 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2160 if (old_tss_sel != 0xffff) {
2161 tss_seg.prev_task_link = old_tss_sel;
2163 ret = ops->write_std(new_tss_base,
2164 &tss_seg.prev_task_link,
2165 sizeof tss_seg.prev_task_link,
2167 if (ret == X86EMUL_PROPAGATE_FAULT) {
2168 /* FIXME: need to provide precise fault address */
2169 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2174 return load_state_from_tss16(ctxt, ops, &tss_seg);
2177 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2178 struct x86_emulate_ops *ops,
2179 struct tss_segment_32 *tss)
2181 struct decode_cache *c = &ctxt->decode;
2183 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2185 tss->eflags = ctxt->eflags;
2186 tss->eax = c->regs[VCPU_REGS_RAX];
2187 tss->ecx = c->regs[VCPU_REGS_RCX];
2188 tss->edx = c->regs[VCPU_REGS_RDX];
2189 tss->ebx = c->regs[VCPU_REGS_RBX];
2190 tss->esp = c->regs[VCPU_REGS_RSP];
2191 tss->ebp = c->regs[VCPU_REGS_RBP];
2192 tss->esi = c->regs[VCPU_REGS_RSI];
2193 tss->edi = c->regs[VCPU_REGS_RDI];
2195 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2196 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2197 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2198 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2199 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2200 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2201 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2204 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2205 struct x86_emulate_ops *ops,
2206 struct tss_segment_32 *tss)
2208 struct decode_cache *c = &ctxt->decode;
2211 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2213 ctxt->eflags = tss->eflags | 2;
2214 c->regs[VCPU_REGS_RAX] = tss->eax;
2215 c->regs[VCPU_REGS_RCX] = tss->ecx;
2216 c->regs[VCPU_REGS_RDX] = tss->edx;
2217 c->regs[VCPU_REGS_RBX] = tss->ebx;
2218 c->regs[VCPU_REGS_RSP] = tss->esp;
2219 c->regs[VCPU_REGS_RBP] = tss->ebp;
2220 c->regs[VCPU_REGS_RSI] = tss->esi;
2221 c->regs[VCPU_REGS_RDI] = tss->edi;
2224 * SDM says that segment selectors are loaded before segment
2227 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2228 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2229 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2230 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2231 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2232 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2233 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2236 * Now load segment descriptors. If fault happenes at this stage
2237 * it is handled in a context of new task
2239 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2240 if (ret != X86EMUL_CONTINUE)
2242 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2243 if (ret != X86EMUL_CONTINUE)
2245 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2246 if (ret != X86EMUL_CONTINUE)
2248 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2249 if (ret != X86EMUL_CONTINUE)
2251 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2252 if (ret != X86EMUL_CONTINUE)
2254 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2255 if (ret != X86EMUL_CONTINUE)
2257 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2258 if (ret != X86EMUL_CONTINUE)
2261 return X86EMUL_CONTINUE;
2264 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2265 struct x86_emulate_ops *ops,
2266 u16 tss_selector, u16 old_tss_sel,
2267 ulong old_tss_base, struct desc_struct *new_desc)
2269 struct tss_segment_32 tss_seg;
2271 u32 err, new_tss_base = get_desc_base(new_desc);
2273 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2275 if (ret == X86EMUL_PROPAGATE_FAULT) {
2276 /* FIXME: need to provide precise fault address */
2277 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2281 save_state_to_tss32(ctxt, ops, &tss_seg);
2283 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2285 if (ret == X86EMUL_PROPAGATE_FAULT) {
2286 /* FIXME: need to provide precise fault address */
2287 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2291 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2293 if (ret == X86EMUL_PROPAGATE_FAULT) {
2294 /* FIXME: need to provide precise fault address */
2295 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2299 if (old_tss_sel != 0xffff) {
2300 tss_seg.prev_task_link = old_tss_sel;
2302 ret = ops->write_std(new_tss_base,
2303 &tss_seg.prev_task_link,
2304 sizeof tss_seg.prev_task_link,
2306 if (ret == X86EMUL_PROPAGATE_FAULT) {
2307 /* FIXME: need to provide precise fault address */
2308 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2313 return load_state_from_tss32(ctxt, ops, &tss_seg);
2316 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2317 struct x86_emulate_ops *ops,
2318 u16 tss_selector, int reason)
2320 struct desc_struct curr_tss_desc, next_tss_desc;
2322 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2323 ulong old_tss_base =
2324 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2327 /* FIXME: old_tss_base == ~0 ? */
2329 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2330 if (ret != X86EMUL_CONTINUE)
2332 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2333 if (ret != X86EMUL_CONTINUE)
2336 /* FIXME: check that next_tss_desc is tss */
2338 if (reason != TASK_SWITCH_IRET) {
2339 if ((tss_selector & 3) > next_tss_desc.dpl ||
2340 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2341 kvm_inject_gp(ctxt->vcpu, 0);
2342 return X86EMUL_PROPAGATE_FAULT;
2346 desc_limit = desc_limit_scaled(&next_tss_desc);
2347 if (!next_tss_desc.p ||
2348 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2349 desc_limit < 0x2b)) {
2350 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2351 tss_selector & 0xfffc);
2352 return X86EMUL_PROPAGATE_FAULT;
2355 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2356 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2357 write_segment_descriptor(ctxt, ops, old_tss_sel,
2361 if (reason == TASK_SWITCH_IRET)
2362 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2364 /* set back link to prev task only if NT bit is set in eflags
2365 note that old_tss_sel is not used afetr this point */
2366 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2367 old_tss_sel = 0xffff;
2369 if (next_tss_desc.type & 8)
2370 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2371 old_tss_base, &next_tss_desc);
2373 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2374 old_tss_base, &next_tss_desc);
2376 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2377 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2379 if (reason != TASK_SWITCH_IRET) {
2380 next_tss_desc.type |= (1 << 1); /* set busy flag */
2381 write_segment_descriptor(ctxt, ops, tss_selector,
2385 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2386 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2387 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2392 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2393 struct x86_emulate_ops *ops,
2394 u16 tss_selector, int reason)
2396 struct decode_cache *c = &ctxt->decode;
2399 memset(c, 0, sizeof(struct decode_cache));
2401 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2403 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason);
2405 if (rc == X86EMUL_CONTINUE) {
2406 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2407 kvm_rip_write(ctxt->vcpu, c->eip);
2413 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2414 int reg, struct operand *op)
2416 struct decode_cache *c = &ctxt->decode;
2417 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2419 register_address_increment(c, &c->regs[reg], df * op->bytes);
2420 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2424 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2427 unsigned long saved_eip = 0;
2428 struct decode_cache *c = &ctxt->decode;
2429 int rc = X86EMUL_CONTINUE;
2431 ctxt->interruptibility = 0;
2433 /* Shadow copy of register state. Committed on successful emulation.
2434 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2438 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2441 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2442 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2446 /* LOCK prefix is allowed only with some instructions */
2447 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2448 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2452 /* Privileged instruction can be executed only in CPL=0 */
2453 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2454 kvm_inject_gp(ctxt->vcpu, 0);
2458 if (c->rep_prefix && (c->d & String)) {
2459 /* All REP prefixes have the same first termination condition */
2460 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2461 kvm_rip_write(ctxt->vcpu, c->eip);
2464 /* The second termination condition only applies for REPE
2465 * and REPNE. Test if the repeat string operation prefix is
2466 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2467 * corresponding termination condition according to:
2468 * - if REPE/REPZ and ZF = 0 then done
2469 * - if REPNE/REPNZ and ZF = 1 then done
2471 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2472 (c->b == 0xae) || (c->b == 0xaf)) {
2473 if ((c->rep_prefix == REPE_PREFIX) &&
2474 ((ctxt->eflags & EFLG_ZF) == 0)) {
2475 kvm_rip_write(ctxt->vcpu, c->eip);
2478 if ((c->rep_prefix == REPNE_PREFIX) &&
2479 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
2480 kvm_rip_write(ctxt->vcpu, c->eip);
2487 if (c->src.type == OP_MEM) {
2488 rc = ops->read_emulated((unsigned long)c->src.ptr,
2492 if (rc != X86EMUL_CONTINUE)
2494 c->src.orig_val = c->src.val;
2497 if (c->src2.type == OP_MEM) {
2498 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2502 if (rc != X86EMUL_CONTINUE)
2506 if ((c->d & DstMask) == ImplicitOps)
2510 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2511 /* optimisation - avoid slow emulated read if Mov */
2512 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
2513 c->dst.bytes, ctxt->vcpu);
2514 if (rc != X86EMUL_CONTINUE)
2517 c->dst.orig_val = c->dst.val;
2527 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2529 case 0x06: /* push es */
2530 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2532 case 0x07: /* pop es */
2533 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2534 if (rc != X86EMUL_CONTINUE)
2539 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2541 case 0x0e: /* push cs */
2542 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2546 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2548 case 0x16: /* push ss */
2549 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2551 case 0x17: /* pop ss */
2552 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2553 if (rc != X86EMUL_CONTINUE)
2558 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2560 case 0x1e: /* push ds */
2561 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2563 case 0x1f: /* pop ds */
2564 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2565 if (rc != X86EMUL_CONTINUE)
2570 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2574 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2578 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2582 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2584 case 0x40 ... 0x47: /* inc r16/r32 */
2585 emulate_1op("inc", c->dst, ctxt->eflags);
2587 case 0x48 ... 0x4f: /* dec r16/r32 */
2588 emulate_1op("dec", c->dst, ctxt->eflags);
2590 case 0x50 ... 0x57: /* push reg */
2593 case 0x58 ... 0x5f: /* pop reg */
2595 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2596 if (rc != X86EMUL_CONTINUE)
2599 case 0x60: /* pusha */
2600 emulate_pusha(ctxt);
2602 case 0x61: /* popa */
2603 rc = emulate_popa(ctxt, ops);
2604 if (rc != X86EMUL_CONTINUE)
2607 case 0x63: /* movsxd */
2608 if (ctxt->mode != X86EMUL_MODE_PROT64)
2609 goto cannot_emulate;
2610 c->dst.val = (s32) c->src.val;
2612 case 0x68: /* push imm */
2613 case 0x6a: /* push imm8 */
2616 case 0x6c: /* insb */
2617 case 0x6d: /* insw/insd */
2618 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2619 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2620 kvm_inject_gp(ctxt->vcpu, 0);
2623 if (kvm_emulate_pio_string(ctxt->vcpu,
2625 (c->d & ByteOp) ? 1 : c->op_bytes,
2627 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2628 (ctxt->eflags & EFLG_DF),
2629 register_address(c, es_base(ctxt),
2630 c->regs[VCPU_REGS_RDI]),
2632 c->regs[VCPU_REGS_RDX]) == 0) {
2637 case 0x6e: /* outsb */
2638 case 0x6f: /* outsw/outsd */
2639 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2640 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2641 kvm_inject_gp(ctxt->vcpu, 0);
2644 if (kvm_emulate_pio_string(ctxt->vcpu,
2646 (c->d & ByteOp) ? 1 : c->op_bytes,
2648 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2649 (ctxt->eflags & EFLG_DF),
2651 seg_override_base(ctxt, c),
2652 c->regs[VCPU_REGS_RSI]),
2654 c->regs[VCPU_REGS_RDX]) == 0) {
2659 case 0x70 ... 0x7f: /* jcc (short) */
2660 if (test_cc(c->b, ctxt->eflags))
2661 jmp_rel(c, c->src.val);
2663 case 0x80 ... 0x83: /* Grp1 */
2664 switch (c->modrm_reg) {
2684 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2686 case 0x86 ... 0x87: /* xchg */
2688 /* Write back the register source. */
2689 switch (c->dst.bytes) {
2691 *(u8 *) c->src.ptr = (u8) c->dst.val;
2694 *(u16 *) c->src.ptr = (u16) c->dst.val;
2697 *c->src.ptr = (u32) c->dst.val;
2698 break; /* 64b reg: zero-extend */
2700 *c->src.ptr = c->dst.val;
2704 * Write back the memory destination with implicit LOCK
2707 c->dst.val = c->src.val;
2710 case 0x88 ... 0x8b: /* mov */
2712 case 0x8c: { /* mov r/m, sreg */
2713 struct kvm_segment segreg;
2715 if (c->modrm_reg <= VCPU_SREG_GS)
2716 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2718 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2721 c->dst.val = segreg.selector;
2724 case 0x8d: /* lea r16/r32, m */
2725 c->dst.val = c->modrm_ea;
2727 case 0x8e: { /* mov seg, r/m16 */
2732 if (c->modrm_reg == VCPU_SREG_CS ||
2733 c->modrm_reg > VCPU_SREG_GS) {
2734 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2738 if (c->modrm_reg == VCPU_SREG_SS)
2739 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2741 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2743 c->dst.type = OP_NONE; /* Disable writeback. */
2746 case 0x8f: /* pop (sole member of Grp1a) */
2747 rc = emulate_grp1a(ctxt, ops);
2748 if (rc != X86EMUL_CONTINUE)
2751 case 0x90: /* nop / xchg r8,rax */
2752 if (!(c->rex_prefix & 1)) { /* nop */
2753 c->dst.type = OP_NONE;
2756 case 0x91 ... 0x97: /* xchg reg,rax */
2757 c->src.type = c->dst.type = OP_REG;
2758 c->src.bytes = c->dst.bytes = c->op_bytes;
2759 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2760 c->src.val = *(c->src.ptr);
2762 case 0x9c: /* pushf */
2763 c->src.val = (unsigned long) ctxt->eflags;
2766 case 0x9d: /* popf */
2767 c->dst.type = OP_REG;
2768 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2769 c->dst.bytes = c->op_bytes;
2770 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2771 if (rc != X86EMUL_CONTINUE)
2774 case 0xa0 ... 0xa1: /* mov */
2775 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2776 c->dst.val = c->src.val;
2778 case 0xa2 ... 0xa3: /* mov */
2779 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2781 case 0xa4 ... 0xa5: /* movs */
2783 case 0xa6 ... 0xa7: /* cmps */
2784 c->dst.type = OP_NONE; /* Disable writeback. */
2785 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2787 case 0xaa ... 0xab: /* stos */
2788 c->dst.val = c->regs[VCPU_REGS_RAX];
2790 case 0xac ... 0xad: /* lods */
2792 case 0xae ... 0xaf: /* scas */
2793 DPRINTF("Urk! I don't handle SCAS.\n");
2794 goto cannot_emulate;
2795 case 0xb0 ... 0xbf: /* mov r, imm */
2800 case 0xc3: /* ret */
2801 c->dst.type = OP_REG;
2802 c->dst.ptr = &c->eip;
2803 c->dst.bytes = c->op_bytes;
2804 goto pop_instruction;
2805 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2807 c->dst.val = c->src.val;
2809 case 0xcb: /* ret far */
2810 rc = emulate_ret_far(ctxt, ops);
2811 if (rc != X86EMUL_CONTINUE)
2814 case 0xd0 ... 0xd1: /* Grp2 */
2818 case 0xd2 ... 0xd3: /* Grp2 */
2819 c->src.val = c->regs[VCPU_REGS_RCX];
2822 case 0xe4: /* inb */
2825 case 0xe6: /* outb */
2826 case 0xe7: /* out */
2828 case 0xe8: /* call (near) */ {
2829 long int rel = c->src.val;
2830 c->src.val = (unsigned long) c->eip;
2835 case 0xe9: /* jmp rel */
2837 case 0xea: /* jmp far */
2839 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2843 c->eip = c->src.val;
2846 jmp: /* jmp rel short */
2847 jmp_rel(c, c->src.val);
2848 c->dst.type = OP_NONE; /* Disable writeback. */
2850 case 0xec: /* in al,dx */
2851 case 0xed: /* in (e/r)ax,dx */
2852 c->src.val = c->regs[VCPU_REGS_RDX];
2854 c->dst.bytes = min(c->dst.bytes, 4u);
2855 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2856 kvm_inject_gp(ctxt->vcpu, 0);
2859 if (!ops->pio_in_emulated(c->dst.bytes, c->src.val,
2860 &c->dst.val, 1, ctxt->vcpu))
2861 goto done; /* IO is needed */
2863 case 0xee: /* out al,dx */
2864 case 0xef: /* out (e/r)ax,dx */
2865 c->src.val = c->regs[VCPU_REGS_RDX];
2867 c->dst.bytes = min(c->dst.bytes, 4u);
2868 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2869 kvm_inject_gp(ctxt->vcpu, 0);
2872 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2874 c->dst.type = OP_NONE; /* Disable writeback. */
2876 case 0xf4: /* hlt */
2877 ctxt->vcpu->arch.halt_request = 1;
2879 case 0xf5: /* cmc */
2880 /* complement carry flag from eflags reg */
2881 ctxt->eflags ^= EFLG_CF;
2882 c->dst.type = OP_NONE; /* Disable writeback. */
2884 case 0xf6 ... 0xf7: /* Grp3 */
2885 if (!emulate_grp3(ctxt, ops))
2886 goto cannot_emulate;
2888 case 0xf8: /* clc */
2889 ctxt->eflags &= ~EFLG_CF;
2890 c->dst.type = OP_NONE; /* Disable writeback. */
2892 case 0xfa: /* cli */
2893 if (emulator_bad_iopl(ctxt, ops))
2894 kvm_inject_gp(ctxt->vcpu, 0);
2896 ctxt->eflags &= ~X86_EFLAGS_IF;
2897 c->dst.type = OP_NONE; /* Disable writeback. */
2900 case 0xfb: /* sti */
2901 if (emulator_bad_iopl(ctxt, ops))
2902 kvm_inject_gp(ctxt->vcpu, 0);
2904 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2905 ctxt->eflags |= X86_EFLAGS_IF;
2906 c->dst.type = OP_NONE; /* Disable writeback. */
2909 case 0xfc: /* cld */
2910 ctxt->eflags &= ~EFLG_DF;
2911 c->dst.type = OP_NONE; /* Disable writeback. */
2913 case 0xfd: /* std */
2914 ctxt->eflags |= EFLG_DF;
2915 c->dst.type = OP_NONE; /* Disable writeback. */
2917 case 0xfe: /* Grp4 */
2919 rc = emulate_grp45(ctxt, ops);
2920 if (rc != X86EMUL_CONTINUE)
2923 case 0xff: /* Grp5 */
2924 if (c->modrm_reg == 5)
2930 rc = writeback(ctxt, ops);
2931 if (rc != X86EMUL_CONTINUE)
2934 if ((c->d & SrcMask) == SrcSI)
2935 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2938 if ((c->d & DstMask) == DstDI)
2939 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2941 if (c->rep_prefix && (c->d & String))
2942 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2944 /* Commit shadow register state. */
2945 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2946 kvm_rip_write(ctxt->vcpu, c->eip);
2949 if (rc == X86EMUL_UNHANDLEABLE) {
2957 case 0x01: /* lgdt, lidt, lmsw */
2958 switch (c->modrm_reg) {
2960 unsigned long address;
2962 case 0: /* vmcall */
2963 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2964 goto cannot_emulate;
2966 rc = kvm_fix_hypercall(ctxt->vcpu);
2967 if (rc != X86EMUL_CONTINUE)
2970 /* Let the processor re-execute the fixed hypercall */
2972 /* Disable writeback. */
2973 c->dst.type = OP_NONE;
2976 rc = read_descriptor(ctxt, ops, c->src.ptr,
2977 &size, &address, c->op_bytes);
2978 if (rc != X86EMUL_CONTINUE)
2980 realmode_lgdt(ctxt->vcpu, size, address);
2981 /* Disable writeback. */
2982 c->dst.type = OP_NONE;
2984 case 3: /* lidt/vmmcall */
2985 if (c->modrm_mod == 3) {
2986 switch (c->modrm_rm) {
2988 rc = kvm_fix_hypercall(ctxt->vcpu);
2989 if (rc != X86EMUL_CONTINUE)
2993 goto cannot_emulate;
2996 rc = read_descriptor(ctxt, ops, c->src.ptr,
2999 if (rc != X86EMUL_CONTINUE)
3001 realmode_lidt(ctxt->vcpu, size, address);
3003 /* Disable writeback. */
3004 c->dst.type = OP_NONE;
3008 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3011 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3012 (c->src.val & 0x0f), ctxt->vcpu);
3013 c->dst.type = OP_NONE;
3015 case 5: /* not defined */
3016 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3019 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3020 /* Disable writeback. */
3021 c->dst.type = OP_NONE;
3024 goto cannot_emulate;
3027 case 0x05: /* syscall */
3028 rc = emulate_syscall(ctxt);
3029 if (rc != X86EMUL_CONTINUE)
3035 emulate_clts(ctxt->vcpu);
3036 c->dst.type = OP_NONE;
3038 case 0x08: /* invd */
3039 case 0x09: /* wbinvd */
3040 case 0x0d: /* GrpP (prefetch) */
3041 case 0x18: /* Grp16 (prefetch/nop) */
3042 c->dst.type = OP_NONE;
3044 case 0x20: /* mov cr, reg */
3045 switch (c->modrm_reg) {
3049 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3052 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3053 c->dst.type = OP_NONE; /* no writeback */
3055 case 0x21: /* mov from dr to reg */
3056 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3057 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3058 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3061 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3062 c->dst.type = OP_NONE; /* no writeback */
3064 case 0x22: /* mov reg, cr */
3065 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3066 c->dst.type = OP_NONE;
3068 case 0x23: /* mov from reg to dr */
3069 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3070 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3071 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3074 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3075 c->dst.type = OP_NONE; /* no writeback */
3079 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3080 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3081 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3082 kvm_inject_gp(ctxt->vcpu, 0);
3085 rc = X86EMUL_CONTINUE;
3086 c->dst.type = OP_NONE;
3090 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3091 kvm_inject_gp(ctxt->vcpu, 0);
3094 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3095 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3097 rc = X86EMUL_CONTINUE;
3098 c->dst.type = OP_NONE;
3100 case 0x34: /* sysenter */
3101 rc = emulate_sysenter(ctxt);
3102 if (rc != X86EMUL_CONTINUE)
3107 case 0x35: /* sysexit */
3108 rc = emulate_sysexit(ctxt);
3109 if (rc != X86EMUL_CONTINUE)
3114 case 0x40 ... 0x4f: /* cmov */
3115 c->dst.val = c->dst.orig_val = c->src.val;
3116 if (!test_cc(c->b, ctxt->eflags))
3117 c->dst.type = OP_NONE; /* no writeback */
3119 case 0x80 ... 0x8f: /* jnz rel, etc*/
3120 if (test_cc(c->b, ctxt->eflags))
3121 jmp_rel(c, c->src.val);
3122 c->dst.type = OP_NONE;
3124 case 0xa0: /* push fs */
3125 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3127 case 0xa1: /* pop fs */
3128 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3129 if (rc != X86EMUL_CONTINUE)
3134 c->dst.type = OP_NONE;
3135 /* only subword offset */
3136 c->src.val &= (c->dst.bytes << 3) - 1;
3137 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3139 case 0xa4: /* shld imm8, r, r/m */
3140 case 0xa5: /* shld cl, r, r/m */
3141 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3143 case 0xa8: /* push gs */
3144 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3146 case 0xa9: /* pop gs */
3147 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3148 if (rc != X86EMUL_CONTINUE)
3153 /* only subword offset */
3154 c->src.val &= (c->dst.bytes << 3) - 1;
3155 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3157 case 0xac: /* shrd imm8, r, r/m */
3158 case 0xad: /* shrd cl, r, r/m */
3159 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3161 case 0xae: /* clflush */
3163 case 0xb0 ... 0xb1: /* cmpxchg */
3165 * Save real source value, then compare EAX against
3168 c->src.orig_val = c->src.val;
3169 c->src.val = c->regs[VCPU_REGS_RAX];
3170 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3171 if (ctxt->eflags & EFLG_ZF) {
3172 /* Success: write back to memory. */
3173 c->dst.val = c->src.orig_val;
3175 /* Failure: write the value we saw to EAX. */
3176 c->dst.type = OP_REG;
3177 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3182 /* only subword offset */
3183 c->src.val &= (c->dst.bytes << 3) - 1;
3184 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3186 case 0xb6 ... 0xb7: /* movzx */
3187 c->dst.bytes = c->op_bytes;
3188 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3191 case 0xba: /* Grp8 */
3192 switch (c->modrm_reg & 3) {
3205 /* only subword offset */
3206 c->src.val &= (c->dst.bytes << 3) - 1;
3207 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3209 case 0xbe ... 0xbf: /* movsx */
3210 c->dst.bytes = c->op_bytes;
3211 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3214 case 0xc3: /* movnti */
3215 c->dst.bytes = c->op_bytes;
3216 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3219 case 0xc7: /* Grp9 (cmpxchg8b) */
3220 rc = emulate_grp9(ctxt, ops);
3221 if (rc != X86EMUL_CONTINUE)
3223 c->dst.type = OP_NONE;
3229 DPRINTF("Cannot emulate %02x\n", c->b);