1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMem64 (6<<1) /* 64bit memory operand */
56 #define DstMask (7<<1)
57 /* Source operand type. */
58 #define SrcNone (0<<4) /* No source operand. */
59 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
60 #define SrcReg (1<<4) /* Register operand. */
61 #define SrcMem (2<<4) /* Memory operand. */
62 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
63 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
64 #define SrcImm (5<<4) /* Immediate operand. */
65 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
66 #define SrcOne (7<<4) /* Implied '1' */
67 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
68 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
69 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
70 #define SrcMask (0xf<<4)
71 /* Generic ModRM decode. */
73 /* Destination is only written; never read. */
76 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
77 #define String (1<<12) /* String instruction (rep capable) */
78 #define Stack (1<<13) /* Stack instruction (push/pop) */
79 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
80 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
81 #define GroupMask 0xff /* Group number stored in bits 0:7 */
83 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
84 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
86 /* Source 2 operand type */
87 #define Src2None (0<<29)
88 #define Src2CL (1<<29)
89 #define Src2ImmByte (2<<29)
90 #define Src2One (3<<29)
91 #define Src2Imm16 (4<<29)
92 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
93 in memory and second argument is located
94 immediately after the first one in memory. */
95 #define Src2Mask (7<<29)
98 Group1_80, Group1_81, Group1_82, Group1_83,
99 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
103 static u32 opcode_table[256] = {
105 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
110 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 ImplicitOps | Stack | No64, 0,
115 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
118 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
120 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
121 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
122 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
123 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
125 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
126 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
127 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
129 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
130 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
133 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
134 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
138 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
139 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
142 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
144 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
146 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
147 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
149 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
150 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
152 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
153 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
156 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
157 DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
158 SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
164 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
166 Group | Group1_80, Group | Group1_81,
167 Group | Group1_82, Group | Group1_83,
168 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
169 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
171 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
172 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
173 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
174 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
176 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
178 0, 0, SrcImm | Src2Imm16 | No64, 0,
179 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
181 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
182 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
186 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
188 ByteOp | DstDI | String, DstDI | String,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
193 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
198 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
200 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
201 0, ImplicitOps | Stack, 0, 0,
202 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
204 0, 0, 0, ImplicitOps | Stack,
205 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
207 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
208 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
211 0, 0, 0, 0, 0, 0, 0, 0,
214 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
217 SrcImm | Stack, SrcImm | ImplicitOps,
218 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
223 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
225 ImplicitOps, 0, ImplicitOps, ImplicitOps,
226 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
229 static u32 twobyte_table[256] = {
231 0, Group | GroupDual | Group7, 0, 0,
232 0, ImplicitOps, ImplicitOps | Priv, 0,
233 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
234 0, ImplicitOps | ModRM, 0, 0,
236 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
238 ModRM | ImplicitOps | Priv, ModRM | Priv,
239 ModRM | ImplicitOps | Priv, ModRM | Priv,
241 0, 0, 0, 0, 0, 0, 0, 0,
243 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
244 ImplicitOps, ImplicitOps | Priv, 0, 0,
245 0, 0, 0, 0, 0, 0, 0, 0,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
255 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
257 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
263 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
264 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 ImplicitOps | Stack, ImplicitOps | Stack,
269 0, DstMem | SrcReg | ModRM | BitOp,
270 DstMem | SrcReg | Src2ImmByte | ModRM,
271 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
273 ImplicitOps | Stack, ImplicitOps | Stack,
274 0, DstMem | SrcReg | ModRM | BitOp | Lock,
275 DstMem | SrcReg | Src2ImmByte | ModRM,
276 DstMem | SrcReg | Src2CL | ModRM,
279 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
280 0, DstMem | SrcReg | ModRM | BitOp | Lock,
281 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
282 DstReg | SrcMem16 | ModRM | Mov,
285 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
286 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
287 DstReg | SrcMem16 | ModRM | Mov,
289 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
290 0, 0, 0, Group | GroupDual | Group9,
291 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
300 static u32 group_table[] = {
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM | Lock,
308 ByteOp | DstMem | SrcImm | ModRM | Lock,
309 ByteOp | DstMem | SrcImm | ModRM,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM | Lock,
317 DstMem | SrcImm | ModRM | Lock,
318 DstMem | SrcImm | ModRM,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
327 ByteOp | DstMem | SrcImm | ModRM | No64,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM | Lock,
335 DstMem | SrcImmByte | ModRM | Lock,
336 DstMem | SrcImmByte | ModRM,
338 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
340 ByteOp | SrcImm | DstMem | ModRM, 0,
341 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
344 DstMem | SrcImm | ModRM, 0,
345 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
348 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
351 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
352 SrcMem | ModRM | Stack, 0,
353 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
354 SrcMem | ModRM | Stack, 0,
356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
357 SrcNone | ModRM | DstMem | Mov, 0,
358 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
361 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
362 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
364 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
367 static u32 group2_table[] = {
369 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
370 SrcNone | ModRM | DstMem | Mov, 0,
371 SrcMem16 | ModRM | Mov | Priv, 0,
373 0, 0, 0, 0, 0, 0, 0, 0,
376 /* EFLAGS bit definitions. */
377 #define EFLG_ID (1<<21)
378 #define EFLG_VIP (1<<20)
379 #define EFLG_VIF (1<<19)
380 #define EFLG_AC (1<<18)
381 #define EFLG_VM (1<<17)
382 #define EFLG_RF (1<<16)
383 #define EFLG_IOPL (3<<12)
384 #define EFLG_NT (1<<14)
385 #define EFLG_OF (1<<11)
386 #define EFLG_DF (1<<10)
387 #define EFLG_IF (1<<9)
388 #define EFLG_TF (1<<8)
389 #define EFLG_SF (1<<7)
390 #define EFLG_ZF (1<<6)
391 #define EFLG_AF (1<<4)
392 #define EFLG_PF (1<<2)
393 #define EFLG_CF (1<<0)
396 * Instruction emulation:
397 * Most instructions are emulated directly via a fragment of inline assembly
398 * code. This allows us to save/restore EFLAGS and thus very easily pick up
399 * any modified flags.
402 #if defined(CONFIG_X86_64)
403 #define _LO32 "k" /* force 32-bit operand */
404 #define _STK "%%rsp" /* stack pointer */
405 #elif defined(__i386__)
406 #define _LO32 "" /* force 32-bit operand */
407 #define _STK "%%esp" /* stack pointer */
411 * These EFLAGS bits are restored from saved value during emulation, and
412 * any changes are written back to the saved value after emulation.
414 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
416 /* Before executing instruction: restore necessary bits in EFLAGS. */
417 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
418 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
419 "movl %"_sav",%"_LO32 _tmp"; " \
422 "movl %"_msk",%"_LO32 _tmp"; " \
423 "andl %"_LO32 _tmp",("_STK"); " \
425 "notl %"_LO32 _tmp"; " \
426 "andl %"_LO32 _tmp",("_STK"); " \
427 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
429 "orl %"_LO32 _tmp",("_STK"); " \
433 /* After executing instruction: write-back necessary bits in EFLAGS. */
434 #define _POST_EFLAGS(_sav, _msk, _tmp) \
435 /* _sav |= EFLAGS & _msk; */ \
438 "andl %"_msk",%"_LO32 _tmp"; " \
439 "orl %"_LO32 _tmp",%"_sav"; "
447 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
449 __asm__ __volatile__ ( \
450 _PRE_EFLAGS("0", "4", "2") \
451 _op _suffix " %"_x"3,%1; " \
452 _POST_EFLAGS("0", "4", "2") \
453 : "=m" (_eflags), "=m" ((_dst).val), \
455 : _y ((_src).val), "i" (EFLAGS_MASK)); \
459 /* Raw emulation: instruction has two explicit operands. */
460 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
462 unsigned long _tmp; \
464 switch ((_dst).bytes) { \
466 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
469 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
472 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
477 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
479 unsigned long _tmp; \
480 switch ((_dst).bytes) { \
482 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
485 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
486 _wx, _wy, _lx, _ly, _qx, _qy); \
491 /* Source operand is byte-sized and may be restricted to just %cl. */
492 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
493 __emulate_2op(_op, _src, _dst, _eflags, \
494 "b", "c", "b", "c", "b", "c", "b", "c")
496 /* Source operand is byte, word, long or quad sized. */
497 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
498 __emulate_2op(_op, _src, _dst, _eflags, \
499 "b", "q", "w", "r", _LO32, "r", "", "r")
501 /* Source operand is word, long or quad sized. */
502 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
503 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
504 "w", "r", _LO32, "r", "", "r")
506 /* Instruction has three operands and one operand is stored in ECX register */
507 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
509 unsigned long _tmp; \
510 _type _clv = (_cl).val; \
511 _type _srcv = (_src).val; \
512 _type _dstv = (_dst).val; \
514 __asm__ __volatile__ ( \
515 _PRE_EFLAGS("0", "5", "2") \
516 _op _suffix " %4,%1 \n" \
517 _POST_EFLAGS("0", "5", "2") \
518 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
519 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
522 (_cl).val = (unsigned long) _clv; \
523 (_src).val = (unsigned long) _srcv; \
524 (_dst).val = (unsigned long) _dstv; \
527 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
529 switch ((_dst).bytes) { \
531 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
532 "w", unsigned short); \
535 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
536 "l", unsigned int); \
539 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
540 "q", unsigned long)); \
545 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
547 unsigned long _tmp; \
549 __asm__ __volatile__ ( \
550 _PRE_EFLAGS("0", "3", "2") \
551 _op _suffix " %1; " \
552 _POST_EFLAGS("0", "3", "2") \
553 : "=m" (_eflags), "+m" ((_dst).val), \
555 : "i" (EFLAGS_MASK)); \
558 /* Instruction has only one explicit operand (no source operand). */
559 #define emulate_1op(_op, _dst, _eflags) \
561 switch ((_dst).bytes) { \
562 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
563 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
564 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
565 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
569 /* Fetch next part of the instruction being emulated. */
570 #define insn_fetch(_type, _size, _eip) \
571 ({ unsigned long _x; \
572 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
573 if (rc != X86EMUL_CONTINUE) \
579 static inline unsigned long ad_mask(struct decode_cache *c)
581 return (1UL << (c->ad_bytes << 3)) - 1;
584 /* Access/update address held in a register, based on addressing mode. */
585 static inline unsigned long
586 address_mask(struct decode_cache *c, unsigned long reg)
588 if (c->ad_bytes == sizeof(unsigned long))
591 return reg & ad_mask(c);
594 static inline unsigned long
595 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
597 return base + address_mask(c, reg);
601 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
603 if (c->ad_bytes == sizeof(unsigned long))
606 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
609 static inline void jmp_rel(struct decode_cache *c, int rel)
611 register_address_increment(c, &c->eip, rel);
614 static void set_seg_override(struct decode_cache *c, int seg)
616 c->has_seg_override = true;
617 c->seg_override = seg;
620 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
622 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
625 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
628 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
629 struct decode_cache *c)
631 if (!c->has_seg_override)
634 return seg_base(ctxt, c->seg_override);
637 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
639 return seg_base(ctxt, VCPU_SREG_ES);
642 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
644 return seg_base(ctxt, VCPU_SREG_SS);
647 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
648 struct x86_emulate_ops *ops,
649 unsigned long linear, u8 *dest)
651 struct fetch_cache *fc = &ctxt->decode.fetch;
655 if (linear < fc->start || linear >= fc->end) {
656 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
657 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
658 if (rc != X86EMUL_CONTINUE)
661 fc->end = linear + size;
663 *dest = fc->data[linear - fc->start];
664 return X86EMUL_CONTINUE;
667 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
668 struct x86_emulate_ops *ops,
669 unsigned long eip, void *dest, unsigned size)
673 /* x86 instructions are limited to 15 bytes. */
674 if (eip + size - ctxt->eip > 15)
675 return X86EMUL_UNHANDLEABLE;
676 eip += ctxt->cs_base;
678 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
679 if (rc != X86EMUL_CONTINUE)
682 return X86EMUL_CONTINUE;
686 * Given the 'reg' portion of a ModRM byte, and a register block, return a
687 * pointer into the block that addresses the relevant register.
688 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
690 static void *decode_register(u8 modrm_reg, unsigned long *regs,
695 p = ®s[modrm_reg];
696 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
697 p = (unsigned char *)®s[modrm_reg & 3] + 1;
701 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
702 struct x86_emulate_ops *ops,
704 u16 *size, unsigned long *address, int op_bytes)
711 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
713 if (rc != X86EMUL_CONTINUE)
715 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
720 static int test_cc(unsigned int condition, unsigned int flags)
724 switch ((condition & 15) >> 1) {
726 rc |= (flags & EFLG_OF);
728 case 1: /* b/c/nae */
729 rc |= (flags & EFLG_CF);
732 rc |= (flags & EFLG_ZF);
735 rc |= (flags & (EFLG_CF|EFLG_ZF));
738 rc |= (flags & EFLG_SF);
741 rc |= (flags & EFLG_PF);
744 rc |= (flags & EFLG_ZF);
747 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
751 /* Odd condition identifiers (lsb == 1) have inverted sense. */
752 return (!!rc ^ (condition & 1));
755 static void decode_register_operand(struct operand *op,
756 struct decode_cache *c,
759 unsigned reg = c->modrm_reg;
760 int highbyte_regs = c->rex_prefix == 0;
763 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
765 if ((c->d & ByteOp) && !inhibit_bytereg) {
766 op->ptr = decode_register(reg, c->regs, highbyte_regs);
767 op->val = *(u8 *)op->ptr;
770 op->ptr = decode_register(reg, c->regs, 0);
771 op->bytes = c->op_bytes;
774 op->val = *(u16 *)op->ptr;
777 op->val = *(u32 *)op->ptr;
780 op->val = *(u64 *) op->ptr;
784 op->orig_val = op->val;
787 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
788 struct x86_emulate_ops *ops)
790 struct decode_cache *c = &ctxt->decode;
792 int index_reg = 0, base_reg = 0, scale;
793 int rc = X86EMUL_CONTINUE;
796 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
797 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
798 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
801 c->modrm = insn_fetch(u8, 1, c->eip);
802 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
803 c->modrm_reg |= (c->modrm & 0x38) >> 3;
804 c->modrm_rm |= (c->modrm & 0x07);
808 if (c->modrm_mod == 3) {
809 c->modrm_ptr = decode_register(c->modrm_rm,
810 c->regs, c->d & ByteOp);
811 c->modrm_val = *(unsigned long *)c->modrm_ptr;
815 if (c->ad_bytes == 2) {
816 unsigned bx = c->regs[VCPU_REGS_RBX];
817 unsigned bp = c->regs[VCPU_REGS_RBP];
818 unsigned si = c->regs[VCPU_REGS_RSI];
819 unsigned di = c->regs[VCPU_REGS_RDI];
821 /* 16-bit ModR/M decode. */
822 switch (c->modrm_mod) {
824 if (c->modrm_rm == 6)
825 c->modrm_ea += insn_fetch(u16, 2, c->eip);
828 c->modrm_ea += insn_fetch(s8, 1, c->eip);
831 c->modrm_ea += insn_fetch(u16, 2, c->eip);
834 switch (c->modrm_rm) {
836 c->modrm_ea += bx + si;
839 c->modrm_ea += bx + di;
842 c->modrm_ea += bp + si;
845 c->modrm_ea += bp + di;
854 if (c->modrm_mod != 0)
861 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
862 (c->modrm_rm == 6 && c->modrm_mod != 0))
863 if (!c->has_seg_override)
864 set_seg_override(c, VCPU_SREG_SS);
865 c->modrm_ea = (u16)c->modrm_ea;
867 /* 32/64-bit ModR/M decode. */
868 if ((c->modrm_rm & 7) == 4) {
869 sib = insn_fetch(u8, 1, c->eip);
870 index_reg |= (sib >> 3) & 7;
874 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
875 c->modrm_ea += insn_fetch(s32, 4, c->eip);
877 c->modrm_ea += c->regs[base_reg];
879 c->modrm_ea += c->regs[index_reg] << scale;
880 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
881 if (ctxt->mode == X86EMUL_MODE_PROT64)
884 c->modrm_ea += c->regs[c->modrm_rm];
885 switch (c->modrm_mod) {
887 if (c->modrm_rm == 5)
888 c->modrm_ea += insn_fetch(s32, 4, c->eip);
891 c->modrm_ea += insn_fetch(s8, 1, c->eip);
894 c->modrm_ea += insn_fetch(s32, 4, c->eip);
902 static int decode_abs(struct x86_emulate_ctxt *ctxt,
903 struct x86_emulate_ops *ops)
905 struct decode_cache *c = &ctxt->decode;
906 int rc = X86EMUL_CONTINUE;
908 switch (c->ad_bytes) {
910 c->modrm_ea = insn_fetch(u16, 2, c->eip);
913 c->modrm_ea = insn_fetch(u32, 4, c->eip);
916 c->modrm_ea = insn_fetch(u64, 8, c->eip);
924 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
926 struct decode_cache *c = &ctxt->decode;
927 int rc = X86EMUL_CONTINUE;
928 int mode = ctxt->mode;
929 int def_op_bytes, def_ad_bytes, group;
932 /* we cannot decode insn before we complete previous rep insn */
933 WARN_ON(ctxt->restart);
935 /* Shadow copy of register state. Committed on successful emulation. */
936 memset(c, 0, sizeof(struct decode_cache));
938 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
939 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
942 case X86EMUL_MODE_REAL:
943 case X86EMUL_MODE_VM86:
944 case X86EMUL_MODE_PROT16:
945 def_op_bytes = def_ad_bytes = 2;
947 case X86EMUL_MODE_PROT32:
948 def_op_bytes = def_ad_bytes = 4;
951 case X86EMUL_MODE_PROT64:
960 c->op_bytes = def_op_bytes;
961 c->ad_bytes = def_ad_bytes;
963 /* Legacy prefixes. */
965 switch (c->b = insn_fetch(u8, 1, c->eip)) {
966 case 0x66: /* operand-size override */
967 /* switch between 2/4 bytes */
968 c->op_bytes = def_op_bytes ^ 6;
970 case 0x67: /* address-size override */
971 if (mode == X86EMUL_MODE_PROT64)
972 /* switch between 4/8 bytes */
973 c->ad_bytes = def_ad_bytes ^ 12;
975 /* switch between 2/4 bytes */
976 c->ad_bytes = def_ad_bytes ^ 6;
978 case 0x26: /* ES override */
979 case 0x2e: /* CS override */
980 case 0x36: /* SS override */
981 case 0x3e: /* DS override */
982 set_seg_override(c, (c->b >> 3) & 3);
984 case 0x64: /* FS override */
985 case 0x65: /* GS override */
986 set_seg_override(c, c->b & 7);
988 case 0x40 ... 0x4f: /* REX */
989 if (mode != X86EMUL_MODE_PROT64)
991 c->rex_prefix = c->b;
993 case 0xf0: /* LOCK */
996 case 0xf2: /* REPNE/REPNZ */
997 c->rep_prefix = REPNE_PREFIX;
999 case 0xf3: /* REP/REPE/REPZ */
1000 c->rep_prefix = REPE_PREFIX;
1006 /* Any legacy prefix after a REX prefix nullifies its effect. */
1015 if (c->rex_prefix & 8)
1016 c->op_bytes = 8; /* REX.W */
1018 /* Opcode byte(s). */
1019 c->d = opcode_table[c->b];
1021 /* Two-byte opcode? */
1024 c->b = insn_fetch(u8, 1, c->eip);
1025 c->d = twobyte_table[c->b];
1030 group = c->d & GroupMask;
1031 c->modrm = insn_fetch(u8, 1, c->eip);
1034 group = (group << 3) + ((c->modrm >> 3) & 7);
1035 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1036 c->d = group2_table[group];
1038 c->d = group_table[group];
1043 DPRINTF("Cannot emulate %02x\n", c->b);
1047 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1050 /* ModRM and SIB bytes. */
1052 rc = decode_modrm(ctxt, ops);
1053 else if (c->d & MemAbs)
1054 rc = decode_abs(ctxt, ops);
1055 if (rc != X86EMUL_CONTINUE)
1058 if (!c->has_seg_override)
1059 set_seg_override(c, VCPU_SREG_DS);
1061 if (!(!c->twobyte && c->b == 0x8d))
1062 c->modrm_ea += seg_override_base(ctxt, c);
1064 if (c->ad_bytes != 8)
1065 c->modrm_ea = (u32)c->modrm_ea;
1067 if (c->rip_relative)
1068 c->modrm_ea += c->eip;
1071 * Decode and fetch the source operand: register, memory
1074 switch (c->d & SrcMask) {
1078 decode_register_operand(&c->src, c, 0);
1087 c->src.bytes = (c->d & ByteOp) ? 1 :
1089 /* Don't fetch the address for invlpg: it could be unmapped. */
1090 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1094 * For instructions with a ModR/M byte, switch to register
1095 * access if Mod = 3.
1097 if ((c->d & ModRM) && c->modrm_mod == 3) {
1098 c->src.type = OP_REG;
1099 c->src.val = c->modrm_val;
1100 c->src.ptr = c->modrm_ptr;
1103 c->src.type = OP_MEM;
1104 c->src.ptr = (unsigned long *)c->modrm_ea;
1109 c->src.type = OP_IMM;
1110 c->src.ptr = (unsigned long *)c->eip;
1111 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1112 if (c->src.bytes == 8)
1114 /* NB. Immediates are sign-extended as necessary. */
1115 switch (c->src.bytes) {
1117 c->src.val = insn_fetch(s8, 1, c->eip);
1120 c->src.val = insn_fetch(s16, 2, c->eip);
1123 c->src.val = insn_fetch(s32, 4, c->eip);
1126 if ((c->d & SrcMask) == SrcImmU) {
1127 switch (c->src.bytes) {
1132 c->src.val &= 0xffff;
1135 c->src.val &= 0xffffffff;
1142 c->src.type = OP_IMM;
1143 c->src.ptr = (unsigned long *)c->eip;
1145 if ((c->d & SrcMask) == SrcImmByte)
1146 c->src.val = insn_fetch(s8, 1, c->eip);
1148 c->src.val = insn_fetch(u8, 1, c->eip);
1155 c->src.type = OP_MEM;
1156 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1157 c->src.ptr = (unsigned long *)
1158 register_address(c, seg_override_base(ctxt, c),
1159 c->regs[VCPU_REGS_RSI]);
1165 * Decode and fetch the second source operand: register, memory
1168 switch (c->d & Src2Mask) {
1173 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1176 c->src2.type = OP_IMM;
1177 c->src2.ptr = (unsigned long *)c->eip;
1179 c->src2.val = insn_fetch(u8, 1, c->eip);
1182 c->src2.type = OP_IMM;
1183 c->src2.ptr = (unsigned long *)c->eip;
1185 c->src2.val = insn_fetch(u16, 2, c->eip);
1192 c->src2.type = OP_MEM;
1194 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1199 /* Decode and fetch the destination operand: register or memory. */
1200 switch (c->d & DstMask) {
1202 /* Special instructions do their own operand decoding. */
1205 decode_register_operand(&c->dst, c,
1206 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1210 if ((c->d & ModRM) && c->modrm_mod == 3) {
1211 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1212 c->dst.type = OP_REG;
1213 c->dst.val = c->dst.orig_val = c->modrm_val;
1214 c->dst.ptr = c->modrm_ptr;
1217 c->dst.type = OP_MEM;
1218 c->dst.ptr = (unsigned long *)c->modrm_ea;
1219 if ((c->d & DstMask) == DstMem64)
1222 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1225 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1227 c->dst.ptr = (void *)c->dst.ptr +
1228 (c->src.val & mask) / 8;
1232 c->dst.type = OP_REG;
1233 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1234 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1235 switch (c->dst.bytes) {
1237 c->dst.val = *(u8 *)c->dst.ptr;
1240 c->dst.val = *(u16 *)c->dst.ptr;
1243 c->dst.val = *(u32 *)c->dst.ptr;
1246 c->dst.val = *(u64 *)c->dst.ptr;
1249 c->dst.orig_val = c->dst.val;
1252 c->dst.type = OP_MEM;
1253 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1254 c->dst.ptr = (unsigned long *)
1255 register_address(c, es_base(ctxt),
1256 c->regs[VCPU_REGS_RDI]);
1262 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1265 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1266 struct x86_emulate_ops *ops,
1267 unsigned int size, unsigned short port,
1270 struct read_cache *rc = &ctxt->decode.io_read;
1272 if (rc->pos == rc->end) { /* refill pio read ahead */
1273 struct decode_cache *c = &ctxt->decode;
1274 unsigned int in_page, n;
1275 unsigned int count = c->rep_prefix ?
1276 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1277 in_page = (ctxt->eflags & EFLG_DF) ?
1278 offset_in_page(c->regs[VCPU_REGS_RDI]) :
1279 PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1280 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1284 rc->pos = rc->end = 0;
1285 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1290 memcpy(dest, rc->data + rc->pos, size);
1295 static u32 desc_limit_scaled(struct desc_struct *desc)
1297 u32 limit = get_desc_limit(desc);
1299 return desc->g ? (limit << 12) | 0xfff : limit;
1302 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1303 struct x86_emulate_ops *ops,
1304 u16 selector, struct desc_ptr *dt)
1306 if (selector & 1 << 2) {
1307 struct desc_struct desc;
1308 memset (dt, 0, sizeof *dt);
1309 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1312 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1313 dt->address = get_desc_base(&desc);
1315 ops->get_gdt(dt, ctxt->vcpu);
1318 /* allowed just for 8 bytes segments */
1319 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1320 struct x86_emulate_ops *ops,
1321 u16 selector, struct desc_struct *desc)
1324 u16 index = selector >> 3;
1329 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1331 if (dt.size < index * 8 + 7) {
1332 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1333 return X86EMUL_PROPAGATE_FAULT;
1335 addr = dt.address + index * 8;
1336 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1337 if (ret == X86EMUL_PROPAGATE_FAULT)
1338 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1343 /* allowed just for 8 bytes segments */
1344 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1345 struct x86_emulate_ops *ops,
1346 u16 selector, struct desc_struct *desc)
1349 u16 index = selector >> 3;
1354 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1356 if (dt.size < index * 8 + 7) {
1357 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1358 return X86EMUL_PROPAGATE_FAULT;
1361 addr = dt.address + index * 8;
1362 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1363 if (ret == X86EMUL_PROPAGATE_FAULT)
1364 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1369 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1370 struct x86_emulate_ops *ops,
1371 u16 selector, int seg)
1373 struct desc_struct seg_desc;
1375 unsigned err_vec = GP_VECTOR;
1377 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1380 memset(&seg_desc, 0, sizeof seg_desc);
1382 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1383 || ctxt->mode == X86EMUL_MODE_REAL) {
1384 /* set real mode segment descriptor */
1385 set_desc_base(&seg_desc, selector << 4);
1386 set_desc_limit(&seg_desc, 0xffff);
1393 /* NULL selector is not valid for TR, CS and SS */
1394 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1398 /* TR should be in GDT only */
1399 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1402 if (null_selector) /* for NULL selector skip all following checks */
1405 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1406 if (ret != X86EMUL_CONTINUE)
1409 err_code = selector & 0xfffc;
1410 err_vec = GP_VECTOR;
1412 /* can't load system descriptor into segment selecor */
1413 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1417 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1423 cpl = ops->cpl(ctxt->vcpu);
1428 * segment is not a writable data segment or segment
1429 * selector's RPL != CPL or segment selector's RPL != CPL
1431 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1435 if (!(seg_desc.type & 8))
1438 if (seg_desc.type & 4) {
1444 if (rpl > cpl || dpl != cpl)
1447 /* CS(RPL) <- CPL */
1448 selector = (selector & 0xfffc) | cpl;
1451 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1454 case VCPU_SREG_LDTR:
1455 if (seg_desc.s || seg_desc.type != 2)
1458 default: /* DS, ES, FS, or GS */
1460 * segment is not a data or readable code segment or
1461 * ((segment is a data or nonconforming code segment)
1462 * and (both RPL and CPL > DPL))
1464 if ((seg_desc.type & 0xa) == 0x8 ||
1465 (((seg_desc.type & 0xc) != 0xc) &&
1466 (rpl > dpl && cpl > dpl)))
1472 /* mark segment as accessed */
1474 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1475 if (ret != X86EMUL_CONTINUE)
1479 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1480 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1481 return X86EMUL_CONTINUE;
1483 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1484 return X86EMUL_PROPAGATE_FAULT;
1487 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1489 struct decode_cache *c = &ctxt->decode;
1491 c->dst.type = OP_MEM;
1492 c->dst.bytes = c->op_bytes;
1493 c->dst.val = c->src.val;
1494 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1495 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1496 c->regs[VCPU_REGS_RSP]);
1499 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1500 struct x86_emulate_ops *ops,
1501 void *dest, int len)
1503 struct decode_cache *c = &ctxt->decode;
1506 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1507 c->regs[VCPU_REGS_RSP]),
1508 dest, len, ctxt->vcpu);
1509 if (rc != X86EMUL_CONTINUE)
1512 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1516 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1517 struct x86_emulate_ops *ops,
1518 void *dest, int len)
1521 unsigned long val, change_mask;
1522 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1523 int cpl = ops->cpl(ctxt->vcpu);
1525 rc = emulate_pop(ctxt, ops, &val, len);
1526 if (rc != X86EMUL_CONTINUE)
1529 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1530 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1532 switch(ctxt->mode) {
1533 case X86EMUL_MODE_PROT64:
1534 case X86EMUL_MODE_PROT32:
1535 case X86EMUL_MODE_PROT16:
1537 change_mask |= EFLG_IOPL;
1539 change_mask |= EFLG_IF;
1541 case X86EMUL_MODE_VM86:
1543 kvm_inject_gp(ctxt->vcpu, 0);
1544 return X86EMUL_PROPAGATE_FAULT;
1546 change_mask |= EFLG_IF;
1548 default: /* real mode */
1549 change_mask |= (EFLG_IOPL | EFLG_IF);
1553 *(unsigned long *)dest =
1554 (ctxt->eflags & ~change_mask) | (val & change_mask);
1559 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1561 struct decode_cache *c = &ctxt->decode;
1562 struct kvm_segment segment;
1564 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1566 c->src.val = segment.selector;
1570 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1571 struct x86_emulate_ops *ops, int seg)
1573 struct decode_cache *c = &ctxt->decode;
1574 unsigned long selector;
1577 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1578 if (rc != X86EMUL_CONTINUE)
1581 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1585 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1587 struct decode_cache *c = &ctxt->decode;
1588 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1589 int reg = VCPU_REGS_RAX;
1591 while (reg <= VCPU_REGS_RDI) {
1592 (reg == VCPU_REGS_RSP) ?
1593 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1600 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1601 struct x86_emulate_ops *ops)
1603 struct decode_cache *c = &ctxt->decode;
1604 int rc = X86EMUL_CONTINUE;
1605 int reg = VCPU_REGS_RDI;
1607 while (reg >= VCPU_REGS_RAX) {
1608 if (reg == VCPU_REGS_RSP) {
1609 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1614 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1615 if (rc != X86EMUL_CONTINUE)
1622 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1623 struct x86_emulate_ops *ops)
1625 struct decode_cache *c = &ctxt->decode;
1627 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1630 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1632 struct decode_cache *c = &ctxt->decode;
1633 switch (c->modrm_reg) {
1635 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1638 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1641 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1644 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1646 case 4: /* sal/shl */
1647 case 6: /* sal/shl */
1648 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1651 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1654 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1659 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1660 struct x86_emulate_ops *ops)
1662 struct decode_cache *c = &ctxt->decode;
1664 switch (c->modrm_reg) {
1665 case 0 ... 1: /* test */
1666 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1669 c->dst.val = ~c->dst.val;
1672 emulate_1op("neg", c->dst, ctxt->eflags);
1680 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1681 struct x86_emulate_ops *ops)
1683 struct decode_cache *c = &ctxt->decode;
1685 switch (c->modrm_reg) {
1687 emulate_1op("inc", c->dst, ctxt->eflags);
1690 emulate_1op("dec", c->dst, ctxt->eflags);
1692 case 2: /* call near abs */ {
1695 c->eip = c->src.val;
1696 c->src.val = old_eip;
1700 case 4: /* jmp abs */
1701 c->eip = c->src.val;
1707 return X86EMUL_CONTINUE;
1710 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1711 struct x86_emulate_ops *ops)
1713 struct decode_cache *c = &ctxt->decode;
1714 u64 old = c->dst.orig_val;
1716 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1717 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1719 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1720 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1721 ctxt->eflags &= ~EFLG_ZF;
1723 c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1724 (u32) c->regs[VCPU_REGS_RBX];
1726 ctxt->eflags |= EFLG_ZF;
1728 return X86EMUL_CONTINUE;
1731 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1732 struct x86_emulate_ops *ops)
1734 struct decode_cache *c = &ctxt->decode;
1738 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1739 if (rc != X86EMUL_CONTINUE)
1741 if (c->op_bytes == 4)
1742 c->eip = (u32)c->eip;
1743 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1744 if (rc != X86EMUL_CONTINUE)
1746 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1750 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1751 struct x86_emulate_ops *ops)
1754 struct decode_cache *c = &ctxt->decode;
1756 switch (c->dst.type) {
1758 /* The 4-byte case *is* correct:
1759 * in 64-bit mode we zero-extend.
1761 switch (c->dst.bytes) {
1763 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1766 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1769 *c->dst.ptr = (u32)c->dst.val;
1770 break; /* 64b: zero-ext */
1772 *c->dst.ptr = c->dst.val;
1778 rc = ops->cmpxchg_emulated(
1779 (unsigned long)c->dst.ptr,
1785 rc = ops->write_emulated(
1786 (unsigned long)c->dst.ptr,
1790 if (rc != X86EMUL_CONTINUE)
1799 return X86EMUL_CONTINUE;
1802 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1804 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1806 * an sti; sti; sequence only disable interrupts for the first
1807 * instruction. So, if the last instruction, be it emulated or
1808 * not, left the system with the INT_STI flag enabled, it
1809 * means that the last instruction is an sti. We should not
1810 * leave the flag on in this case. The same goes for mov ss
1812 if (!(int_shadow & mask))
1813 ctxt->interruptibility = mask;
1817 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1818 struct kvm_segment *cs, struct kvm_segment *ss)
1820 memset(cs, 0, sizeof(struct kvm_segment));
1821 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1822 memset(ss, 0, sizeof(struct kvm_segment));
1824 cs->l = 0; /* will be adjusted later */
1825 cs->base = 0; /* flat segment */
1826 cs->g = 1; /* 4kb granularity */
1827 cs->limit = 0xffffffff; /* 4GB limit */
1828 cs->type = 0x0b; /* Read, Execute, Accessed */
1830 cs->dpl = 0; /* will be adjusted later */
1835 ss->base = 0; /* flat segment */
1836 ss->limit = 0xffffffff; /* 4GB limit */
1837 ss->g = 1; /* 4kb granularity */
1839 ss->type = 0x03; /* Read/Write, Accessed */
1840 ss->db = 1; /* 32bit stack segment */
1846 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1848 struct decode_cache *c = &ctxt->decode;
1849 struct kvm_segment cs, ss;
1852 /* syscall is not available in real mode */
1853 if (ctxt->mode == X86EMUL_MODE_REAL ||
1854 ctxt->mode == X86EMUL_MODE_VM86) {
1855 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1856 return X86EMUL_PROPAGATE_FAULT;
1859 setup_syscalls_segments(ctxt, &cs, &ss);
1861 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1863 cs.selector = (u16)(msr_data & 0xfffc);
1864 ss.selector = (u16)(msr_data + 8);
1866 if (is_long_mode(ctxt->vcpu)) {
1870 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1871 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1873 c->regs[VCPU_REGS_RCX] = c->eip;
1874 if (is_long_mode(ctxt->vcpu)) {
1875 #ifdef CONFIG_X86_64
1876 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1878 kvm_x86_ops->get_msr(ctxt->vcpu,
1879 ctxt->mode == X86EMUL_MODE_PROT64 ?
1880 MSR_LSTAR : MSR_CSTAR, &msr_data);
1883 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1884 ctxt->eflags &= ~(msr_data | EFLG_RF);
1888 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1889 c->eip = (u32)msr_data;
1891 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1894 return X86EMUL_CONTINUE;
1898 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1900 struct decode_cache *c = &ctxt->decode;
1901 struct kvm_segment cs, ss;
1904 /* inject #GP if in real mode */
1905 if (ctxt->mode == X86EMUL_MODE_REAL) {
1906 kvm_inject_gp(ctxt->vcpu, 0);
1907 return X86EMUL_PROPAGATE_FAULT;
1910 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1911 * Therefore, we inject an #UD.
1913 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1914 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1915 return X86EMUL_PROPAGATE_FAULT;
1918 setup_syscalls_segments(ctxt, &cs, &ss);
1920 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1921 switch (ctxt->mode) {
1922 case X86EMUL_MODE_PROT32:
1923 if ((msr_data & 0xfffc) == 0x0) {
1924 kvm_inject_gp(ctxt->vcpu, 0);
1925 return X86EMUL_PROPAGATE_FAULT;
1928 case X86EMUL_MODE_PROT64:
1929 if (msr_data == 0x0) {
1930 kvm_inject_gp(ctxt->vcpu, 0);
1931 return X86EMUL_PROPAGATE_FAULT;
1936 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1937 cs.selector = (u16)msr_data;
1938 cs.selector &= ~SELECTOR_RPL_MASK;
1939 ss.selector = cs.selector + 8;
1940 ss.selector &= ~SELECTOR_RPL_MASK;
1941 if (ctxt->mode == X86EMUL_MODE_PROT64
1942 || is_long_mode(ctxt->vcpu)) {
1947 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1948 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1950 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1953 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1954 c->regs[VCPU_REGS_RSP] = msr_data;
1956 return X86EMUL_CONTINUE;
1960 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1962 struct decode_cache *c = &ctxt->decode;
1963 struct kvm_segment cs, ss;
1967 /* inject #GP if in real mode or Virtual 8086 mode */
1968 if (ctxt->mode == X86EMUL_MODE_REAL ||
1969 ctxt->mode == X86EMUL_MODE_VM86) {
1970 kvm_inject_gp(ctxt->vcpu, 0);
1971 return X86EMUL_PROPAGATE_FAULT;
1974 setup_syscalls_segments(ctxt, &cs, &ss);
1976 if ((c->rex_prefix & 0x8) != 0x0)
1977 usermode = X86EMUL_MODE_PROT64;
1979 usermode = X86EMUL_MODE_PROT32;
1983 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1985 case X86EMUL_MODE_PROT32:
1986 cs.selector = (u16)(msr_data + 16);
1987 if ((msr_data & 0xfffc) == 0x0) {
1988 kvm_inject_gp(ctxt->vcpu, 0);
1989 return X86EMUL_PROPAGATE_FAULT;
1991 ss.selector = (u16)(msr_data + 24);
1993 case X86EMUL_MODE_PROT64:
1994 cs.selector = (u16)(msr_data + 32);
1995 if (msr_data == 0x0) {
1996 kvm_inject_gp(ctxt->vcpu, 0);
1997 return X86EMUL_PROPAGATE_FAULT;
1999 ss.selector = cs.selector + 8;
2004 cs.selector |= SELECTOR_RPL_MASK;
2005 ss.selector |= SELECTOR_RPL_MASK;
2007 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
2008 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
2010 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
2011 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
2013 return X86EMUL_CONTINUE;
2016 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2017 struct x86_emulate_ops *ops)
2020 if (ctxt->mode == X86EMUL_MODE_REAL)
2022 if (ctxt->mode == X86EMUL_MODE_VM86)
2024 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2025 return ops->cpl(ctxt->vcpu) > iopl;
2028 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2029 struct x86_emulate_ops *ops,
2032 struct kvm_segment tr_seg;
2035 u8 perm, bit_idx = port & 0x7;
2036 unsigned mask = (1 << len) - 1;
2038 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2039 if (tr_seg.unusable)
2041 if (tr_seg.limit < 103)
2043 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2045 if (r != X86EMUL_CONTINUE)
2047 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2049 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2051 if (r != X86EMUL_CONTINUE)
2053 if ((perm >> bit_idx) & mask)
2058 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2059 struct x86_emulate_ops *ops,
2062 if (emulator_bad_iopl(ctxt, ops))
2063 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2068 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2069 struct x86_emulate_ops *ops,
2072 struct desc_struct desc;
2073 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2074 return get_desc_base(&desc);
2079 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2080 struct x86_emulate_ops *ops,
2081 struct tss_segment_16 *tss)
2083 struct decode_cache *c = &ctxt->decode;
2086 tss->flag = ctxt->eflags;
2087 tss->ax = c->regs[VCPU_REGS_RAX];
2088 tss->cx = c->regs[VCPU_REGS_RCX];
2089 tss->dx = c->regs[VCPU_REGS_RDX];
2090 tss->bx = c->regs[VCPU_REGS_RBX];
2091 tss->sp = c->regs[VCPU_REGS_RSP];
2092 tss->bp = c->regs[VCPU_REGS_RBP];
2093 tss->si = c->regs[VCPU_REGS_RSI];
2094 tss->di = c->regs[VCPU_REGS_RDI];
2096 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2097 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2098 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2099 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2100 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2103 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2104 struct x86_emulate_ops *ops,
2105 struct tss_segment_16 *tss)
2107 struct decode_cache *c = &ctxt->decode;
2111 ctxt->eflags = tss->flag | 2;
2112 c->regs[VCPU_REGS_RAX] = tss->ax;
2113 c->regs[VCPU_REGS_RCX] = tss->cx;
2114 c->regs[VCPU_REGS_RDX] = tss->dx;
2115 c->regs[VCPU_REGS_RBX] = tss->bx;
2116 c->regs[VCPU_REGS_RSP] = tss->sp;
2117 c->regs[VCPU_REGS_RBP] = tss->bp;
2118 c->regs[VCPU_REGS_RSI] = tss->si;
2119 c->regs[VCPU_REGS_RDI] = tss->di;
2122 * SDM says that segment selectors are loaded before segment
2125 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2126 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2127 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2128 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2129 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2132 * Now load segment descriptors. If fault happenes at this stage
2133 * it is handled in a context of new task
2135 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2136 if (ret != X86EMUL_CONTINUE)
2138 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2139 if (ret != X86EMUL_CONTINUE)
2141 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2142 if (ret != X86EMUL_CONTINUE)
2144 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2145 if (ret != X86EMUL_CONTINUE)
2147 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2148 if (ret != X86EMUL_CONTINUE)
2151 return X86EMUL_CONTINUE;
2154 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2155 struct x86_emulate_ops *ops,
2156 u16 tss_selector, u16 old_tss_sel,
2157 ulong old_tss_base, struct desc_struct *new_desc)
2159 struct tss_segment_16 tss_seg;
2161 u32 err, new_tss_base = get_desc_base(new_desc);
2163 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2165 if (ret == X86EMUL_PROPAGATE_FAULT) {
2166 /* FIXME: need to provide precise fault address */
2167 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2171 save_state_to_tss16(ctxt, ops, &tss_seg);
2173 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2175 if (ret == X86EMUL_PROPAGATE_FAULT) {
2176 /* FIXME: need to provide precise fault address */
2177 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2181 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2183 if (ret == X86EMUL_PROPAGATE_FAULT) {
2184 /* FIXME: need to provide precise fault address */
2185 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2189 if (old_tss_sel != 0xffff) {
2190 tss_seg.prev_task_link = old_tss_sel;
2192 ret = ops->write_std(new_tss_base,
2193 &tss_seg.prev_task_link,
2194 sizeof tss_seg.prev_task_link,
2196 if (ret == X86EMUL_PROPAGATE_FAULT) {
2197 /* FIXME: need to provide precise fault address */
2198 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2203 return load_state_from_tss16(ctxt, ops, &tss_seg);
2206 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2207 struct x86_emulate_ops *ops,
2208 struct tss_segment_32 *tss)
2210 struct decode_cache *c = &ctxt->decode;
2212 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2214 tss->eflags = ctxt->eflags;
2215 tss->eax = c->regs[VCPU_REGS_RAX];
2216 tss->ecx = c->regs[VCPU_REGS_RCX];
2217 tss->edx = c->regs[VCPU_REGS_RDX];
2218 tss->ebx = c->regs[VCPU_REGS_RBX];
2219 tss->esp = c->regs[VCPU_REGS_RSP];
2220 tss->ebp = c->regs[VCPU_REGS_RBP];
2221 tss->esi = c->regs[VCPU_REGS_RSI];
2222 tss->edi = c->regs[VCPU_REGS_RDI];
2224 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2225 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2226 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2227 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2228 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2229 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2230 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2233 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2234 struct x86_emulate_ops *ops,
2235 struct tss_segment_32 *tss)
2237 struct decode_cache *c = &ctxt->decode;
2240 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2242 ctxt->eflags = tss->eflags | 2;
2243 c->regs[VCPU_REGS_RAX] = tss->eax;
2244 c->regs[VCPU_REGS_RCX] = tss->ecx;
2245 c->regs[VCPU_REGS_RDX] = tss->edx;
2246 c->regs[VCPU_REGS_RBX] = tss->ebx;
2247 c->regs[VCPU_REGS_RSP] = tss->esp;
2248 c->regs[VCPU_REGS_RBP] = tss->ebp;
2249 c->regs[VCPU_REGS_RSI] = tss->esi;
2250 c->regs[VCPU_REGS_RDI] = tss->edi;
2253 * SDM says that segment selectors are loaded before segment
2256 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2257 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2258 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2259 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2260 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2261 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2262 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2265 * Now load segment descriptors. If fault happenes at this stage
2266 * it is handled in a context of new task
2268 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2269 if (ret != X86EMUL_CONTINUE)
2271 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2272 if (ret != X86EMUL_CONTINUE)
2274 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2275 if (ret != X86EMUL_CONTINUE)
2277 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2278 if (ret != X86EMUL_CONTINUE)
2280 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2281 if (ret != X86EMUL_CONTINUE)
2283 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2284 if (ret != X86EMUL_CONTINUE)
2286 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2287 if (ret != X86EMUL_CONTINUE)
2290 return X86EMUL_CONTINUE;
2293 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2294 struct x86_emulate_ops *ops,
2295 u16 tss_selector, u16 old_tss_sel,
2296 ulong old_tss_base, struct desc_struct *new_desc)
2298 struct tss_segment_32 tss_seg;
2300 u32 err, new_tss_base = get_desc_base(new_desc);
2302 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2304 if (ret == X86EMUL_PROPAGATE_FAULT) {
2305 /* FIXME: need to provide precise fault address */
2306 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2310 save_state_to_tss32(ctxt, ops, &tss_seg);
2312 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2314 if (ret == X86EMUL_PROPAGATE_FAULT) {
2315 /* FIXME: need to provide precise fault address */
2316 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2320 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2322 if (ret == X86EMUL_PROPAGATE_FAULT) {
2323 /* FIXME: need to provide precise fault address */
2324 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2328 if (old_tss_sel != 0xffff) {
2329 tss_seg.prev_task_link = old_tss_sel;
2331 ret = ops->write_std(new_tss_base,
2332 &tss_seg.prev_task_link,
2333 sizeof tss_seg.prev_task_link,
2335 if (ret == X86EMUL_PROPAGATE_FAULT) {
2336 /* FIXME: need to provide precise fault address */
2337 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2342 return load_state_from_tss32(ctxt, ops, &tss_seg);
2345 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2346 struct x86_emulate_ops *ops,
2347 u16 tss_selector, int reason)
2349 struct desc_struct curr_tss_desc, next_tss_desc;
2351 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2352 ulong old_tss_base =
2353 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2356 /* FIXME: old_tss_base == ~0 ? */
2358 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2359 if (ret != X86EMUL_CONTINUE)
2361 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2362 if (ret != X86EMUL_CONTINUE)
2365 /* FIXME: check that next_tss_desc is tss */
2367 if (reason != TASK_SWITCH_IRET) {
2368 if ((tss_selector & 3) > next_tss_desc.dpl ||
2369 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2370 kvm_inject_gp(ctxt->vcpu, 0);
2371 return X86EMUL_PROPAGATE_FAULT;
2375 desc_limit = desc_limit_scaled(&next_tss_desc);
2376 if (!next_tss_desc.p ||
2377 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2378 desc_limit < 0x2b)) {
2379 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2380 tss_selector & 0xfffc);
2381 return X86EMUL_PROPAGATE_FAULT;
2384 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2385 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2386 write_segment_descriptor(ctxt, ops, old_tss_sel,
2390 if (reason == TASK_SWITCH_IRET)
2391 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2393 /* set back link to prev task only if NT bit is set in eflags
2394 note that old_tss_sel is not used afetr this point */
2395 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2396 old_tss_sel = 0xffff;
2398 if (next_tss_desc.type & 8)
2399 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2400 old_tss_base, &next_tss_desc);
2402 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2403 old_tss_base, &next_tss_desc);
2405 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2406 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2408 if (reason != TASK_SWITCH_IRET) {
2409 next_tss_desc.type |= (1 << 1); /* set busy flag */
2410 write_segment_descriptor(ctxt, ops, tss_selector,
2414 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2415 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2416 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2421 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2422 struct x86_emulate_ops *ops,
2423 u16 tss_selector, int reason)
2425 struct decode_cache *c = &ctxt->decode;
2428 memset(c, 0, sizeof(struct decode_cache));
2430 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2432 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason);
2434 if (rc == X86EMUL_CONTINUE) {
2435 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2436 kvm_rip_write(ctxt->vcpu, c->eip);
2442 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2443 int reg, struct operand *op)
2445 struct decode_cache *c = &ctxt->decode;
2446 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2448 register_address_increment(c, &c->regs[reg], df * op->bytes);
2449 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2453 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2456 struct decode_cache *c = &ctxt->decode;
2457 int rc = X86EMUL_CONTINUE;
2458 int saved_dst_type = c->dst.type;
2460 ctxt->interruptibility = 0;
2462 /* Shadow copy of register state. Committed on successful emulation.
2463 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2467 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2469 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2470 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2474 /* LOCK prefix is allowed only with some instructions */
2475 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2476 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2480 /* Privileged instruction can be executed only in CPL=0 */
2481 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2482 kvm_inject_gp(ctxt->vcpu, 0);
2486 if (c->rep_prefix && (c->d & String)) {
2487 ctxt->restart = true;
2488 /* All REP prefixes have the same first termination condition */
2489 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2491 ctxt->restart = false;
2492 kvm_rip_write(ctxt->vcpu, c->eip);
2495 /* The second termination condition only applies for REPE
2496 * and REPNE. Test if the repeat string operation prefix is
2497 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2498 * corresponding termination condition according to:
2499 * - if REPE/REPZ and ZF = 0 then done
2500 * - if REPNE/REPNZ and ZF = 1 then done
2502 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2503 (c->b == 0xae) || (c->b == 0xaf)) {
2504 if ((c->rep_prefix == REPE_PREFIX) &&
2505 ((ctxt->eflags & EFLG_ZF) == 0))
2507 if ((c->rep_prefix == REPNE_PREFIX) &&
2508 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
2514 if (c->src.type == OP_MEM) {
2515 rc = ops->read_emulated((unsigned long)c->src.ptr,
2519 if (rc != X86EMUL_CONTINUE)
2521 c->src.orig_val = c->src.val;
2524 if (c->src2.type == OP_MEM) {
2525 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2529 if (rc != X86EMUL_CONTINUE)
2533 if ((c->d & DstMask) == ImplicitOps)
2537 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2538 /* optimisation - avoid slow emulated read if Mov */
2539 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
2540 c->dst.bytes, ctxt->vcpu);
2541 if (rc != X86EMUL_CONTINUE)
2544 c->dst.orig_val = c->dst.val;
2554 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2556 case 0x06: /* push es */
2557 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2559 case 0x07: /* pop es */
2560 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2561 if (rc != X86EMUL_CONTINUE)
2566 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2568 case 0x0e: /* push cs */
2569 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2573 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2575 case 0x16: /* push ss */
2576 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2578 case 0x17: /* pop ss */
2579 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2580 if (rc != X86EMUL_CONTINUE)
2585 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2587 case 0x1e: /* push ds */
2588 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2590 case 0x1f: /* pop ds */
2591 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2592 if (rc != X86EMUL_CONTINUE)
2597 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2601 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2605 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2609 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2611 case 0x40 ... 0x47: /* inc r16/r32 */
2612 emulate_1op("inc", c->dst, ctxt->eflags);
2614 case 0x48 ... 0x4f: /* dec r16/r32 */
2615 emulate_1op("dec", c->dst, ctxt->eflags);
2617 case 0x50 ... 0x57: /* push reg */
2620 case 0x58 ... 0x5f: /* pop reg */
2622 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2623 if (rc != X86EMUL_CONTINUE)
2626 case 0x60: /* pusha */
2627 emulate_pusha(ctxt);
2629 case 0x61: /* popa */
2630 rc = emulate_popa(ctxt, ops);
2631 if (rc != X86EMUL_CONTINUE)
2634 case 0x63: /* movsxd */
2635 if (ctxt->mode != X86EMUL_MODE_PROT64)
2636 goto cannot_emulate;
2637 c->dst.val = (s32) c->src.val;
2639 case 0x68: /* push imm */
2640 case 0x6a: /* push imm8 */
2643 case 0x6c: /* insb */
2644 case 0x6d: /* insw/insd */
2645 c->dst.bytes = min(c->dst.bytes, 4u);
2646 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2648 kvm_inject_gp(ctxt->vcpu, 0);
2651 if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2652 c->regs[VCPU_REGS_RDX], &c->dst.val))
2653 goto done; /* IO is needed, skip writeback */
2655 case 0x6e: /* outsb */
2656 case 0x6f: /* outsw/outsd */
2657 c->src.bytes = min(c->src.bytes, 4u);
2658 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2660 kvm_inject_gp(ctxt->vcpu, 0);
2663 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2664 &c->src.val, 1, ctxt->vcpu);
2666 c->dst.type = OP_NONE; /* nothing to writeback */
2668 case 0x70 ... 0x7f: /* jcc (short) */
2669 if (test_cc(c->b, ctxt->eflags))
2670 jmp_rel(c, c->src.val);
2672 case 0x80 ... 0x83: /* Grp1 */
2673 switch (c->modrm_reg) {
2693 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2695 case 0x86 ... 0x87: /* xchg */
2697 /* Write back the register source. */
2698 switch (c->dst.bytes) {
2700 *(u8 *) c->src.ptr = (u8) c->dst.val;
2703 *(u16 *) c->src.ptr = (u16) c->dst.val;
2706 *c->src.ptr = (u32) c->dst.val;
2707 break; /* 64b reg: zero-extend */
2709 *c->src.ptr = c->dst.val;
2713 * Write back the memory destination with implicit LOCK
2716 c->dst.val = c->src.val;
2719 case 0x88 ... 0x8b: /* mov */
2721 case 0x8c: { /* mov r/m, sreg */
2722 struct kvm_segment segreg;
2724 if (c->modrm_reg <= VCPU_SREG_GS)
2725 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2727 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2730 c->dst.val = segreg.selector;
2733 case 0x8d: /* lea r16/r32, m */
2734 c->dst.val = c->modrm_ea;
2736 case 0x8e: { /* mov seg, r/m16 */
2741 if (c->modrm_reg == VCPU_SREG_CS ||
2742 c->modrm_reg > VCPU_SREG_GS) {
2743 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2747 if (c->modrm_reg == VCPU_SREG_SS)
2748 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2750 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2752 c->dst.type = OP_NONE; /* Disable writeback. */
2755 case 0x8f: /* pop (sole member of Grp1a) */
2756 rc = emulate_grp1a(ctxt, ops);
2757 if (rc != X86EMUL_CONTINUE)
2760 case 0x90: /* nop / xchg r8,rax */
2761 if (!(c->rex_prefix & 1)) { /* nop */
2762 c->dst.type = OP_NONE;
2765 case 0x91 ... 0x97: /* xchg reg,rax */
2766 c->src.type = c->dst.type = OP_REG;
2767 c->src.bytes = c->dst.bytes = c->op_bytes;
2768 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2769 c->src.val = *(c->src.ptr);
2771 case 0x9c: /* pushf */
2772 c->src.val = (unsigned long) ctxt->eflags;
2775 case 0x9d: /* popf */
2776 c->dst.type = OP_REG;
2777 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2778 c->dst.bytes = c->op_bytes;
2779 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2780 if (rc != X86EMUL_CONTINUE)
2783 case 0xa0 ... 0xa1: /* mov */
2784 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2785 c->dst.val = c->src.val;
2787 case 0xa2 ... 0xa3: /* mov */
2788 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2790 case 0xa4 ... 0xa5: /* movs */
2792 case 0xa6 ... 0xa7: /* cmps */
2793 c->dst.type = OP_NONE; /* Disable writeback. */
2794 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2796 case 0xaa ... 0xab: /* stos */
2797 c->dst.val = c->regs[VCPU_REGS_RAX];
2799 case 0xac ... 0xad: /* lods */
2801 case 0xae ... 0xaf: /* scas */
2802 DPRINTF("Urk! I don't handle SCAS.\n");
2803 goto cannot_emulate;
2804 case 0xb0 ... 0xbf: /* mov r, imm */
2809 case 0xc3: /* ret */
2810 c->dst.type = OP_REG;
2811 c->dst.ptr = &c->eip;
2812 c->dst.bytes = c->op_bytes;
2813 goto pop_instruction;
2814 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2816 c->dst.val = c->src.val;
2818 case 0xcb: /* ret far */
2819 rc = emulate_ret_far(ctxt, ops);
2820 if (rc != X86EMUL_CONTINUE)
2823 case 0xd0 ... 0xd1: /* Grp2 */
2827 case 0xd2 ... 0xd3: /* Grp2 */
2828 c->src.val = c->regs[VCPU_REGS_RCX];
2831 case 0xe4: /* inb */
2834 case 0xe6: /* outb */
2835 case 0xe7: /* out */
2837 case 0xe8: /* call (near) */ {
2838 long int rel = c->src.val;
2839 c->src.val = (unsigned long) c->eip;
2844 case 0xe9: /* jmp rel */
2846 case 0xea: /* jmp far */
2848 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2852 c->eip = c->src.val;
2855 jmp: /* jmp rel short */
2856 jmp_rel(c, c->src.val);
2857 c->dst.type = OP_NONE; /* Disable writeback. */
2859 case 0xec: /* in al,dx */
2860 case 0xed: /* in (e/r)ax,dx */
2861 c->src.val = c->regs[VCPU_REGS_RDX];
2863 c->dst.bytes = min(c->dst.bytes, 4u);
2864 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2865 kvm_inject_gp(ctxt->vcpu, 0);
2868 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2870 goto done; /* IO is needed */
2872 case 0xee: /* out al,dx */
2873 case 0xef: /* out (e/r)ax,dx */
2874 c->src.val = c->regs[VCPU_REGS_RDX];
2876 c->dst.bytes = min(c->dst.bytes, 4u);
2877 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2878 kvm_inject_gp(ctxt->vcpu, 0);
2881 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2883 c->dst.type = OP_NONE; /* Disable writeback. */
2885 case 0xf4: /* hlt */
2886 ctxt->vcpu->arch.halt_request = 1;
2888 case 0xf5: /* cmc */
2889 /* complement carry flag from eflags reg */
2890 ctxt->eflags ^= EFLG_CF;
2891 c->dst.type = OP_NONE; /* Disable writeback. */
2893 case 0xf6 ... 0xf7: /* Grp3 */
2894 if (!emulate_grp3(ctxt, ops))
2895 goto cannot_emulate;
2897 case 0xf8: /* clc */
2898 ctxt->eflags &= ~EFLG_CF;
2899 c->dst.type = OP_NONE; /* Disable writeback. */
2901 case 0xfa: /* cli */
2902 if (emulator_bad_iopl(ctxt, ops))
2903 kvm_inject_gp(ctxt->vcpu, 0);
2905 ctxt->eflags &= ~X86_EFLAGS_IF;
2906 c->dst.type = OP_NONE; /* Disable writeback. */
2909 case 0xfb: /* sti */
2910 if (emulator_bad_iopl(ctxt, ops))
2911 kvm_inject_gp(ctxt->vcpu, 0);
2913 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2914 ctxt->eflags |= X86_EFLAGS_IF;
2915 c->dst.type = OP_NONE; /* Disable writeback. */
2918 case 0xfc: /* cld */
2919 ctxt->eflags &= ~EFLG_DF;
2920 c->dst.type = OP_NONE; /* Disable writeback. */
2922 case 0xfd: /* std */
2923 ctxt->eflags |= EFLG_DF;
2924 c->dst.type = OP_NONE; /* Disable writeback. */
2926 case 0xfe: /* Grp4 */
2928 rc = emulate_grp45(ctxt, ops);
2929 if (rc != X86EMUL_CONTINUE)
2932 case 0xff: /* Grp5 */
2933 if (c->modrm_reg == 5)
2939 rc = writeback(ctxt, ops);
2940 if (rc != X86EMUL_CONTINUE)
2944 * restore dst type in case the decoding will be reused
2945 * (happens for string instruction )
2947 c->dst.type = saved_dst_type;
2949 if ((c->d & SrcMask) == SrcSI)
2950 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2953 if ((c->d & DstMask) == DstDI)
2954 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2956 if (c->rep_prefix && (c->d & String)) {
2957 struct read_cache *rc = &ctxt->decode.io_read;
2958 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2960 * Re-enter guest when pio read ahead buffer is empty or,
2961 * if it is not used, after each 1024 iteration.
2963 if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
2964 (rc->end != 0 && rc->end == rc->pos))
2965 ctxt->restart = false;
2968 /* Commit shadow register state. */
2969 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2970 kvm_rip_write(ctxt->vcpu, c->eip);
2971 ops->set_rflags(ctxt->vcpu, ctxt->eflags);
2974 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2978 case 0x01: /* lgdt, lidt, lmsw */
2979 switch (c->modrm_reg) {
2981 unsigned long address;
2983 case 0: /* vmcall */
2984 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2985 goto cannot_emulate;
2987 rc = kvm_fix_hypercall(ctxt->vcpu);
2988 if (rc != X86EMUL_CONTINUE)
2991 /* Let the processor re-execute the fixed hypercall */
2993 /* Disable writeback. */
2994 c->dst.type = OP_NONE;
2997 rc = read_descriptor(ctxt, ops, c->src.ptr,
2998 &size, &address, c->op_bytes);
2999 if (rc != X86EMUL_CONTINUE)
3001 realmode_lgdt(ctxt->vcpu, size, address);
3002 /* Disable writeback. */
3003 c->dst.type = OP_NONE;
3005 case 3: /* lidt/vmmcall */
3006 if (c->modrm_mod == 3) {
3007 switch (c->modrm_rm) {
3009 rc = kvm_fix_hypercall(ctxt->vcpu);
3010 if (rc != X86EMUL_CONTINUE)
3014 goto cannot_emulate;
3017 rc = read_descriptor(ctxt, ops, c->src.ptr,
3020 if (rc != X86EMUL_CONTINUE)
3022 realmode_lidt(ctxt->vcpu, size, address);
3024 /* Disable writeback. */
3025 c->dst.type = OP_NONE;
3029 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3032 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3033 (c->src.val & 0x0f), ctxt->vcpu);
3034 c->dst.type = OP_NONE;
3036 case 5: /* not defined */
3037 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3040 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3041 /* Disable writeback. */
3042 c->dst.type = OP_NONE;
3045 goto cannot_emulate;
3048 case 0x05: /* syscall */
3049 rc = emulate_syscall(ctxt);
3050 if (rc != X86EMUL_CONTINUE)
3056 emulate_clts(ctxt->vcpu);
3057 c->dst.type = OP_NONE;
3059 case 0x08: /* invd */
3060 case 0x09: /* wbinvd */
3061 case 0x0d: /* GrpP (prefetch) */
3062 case 0x18: /* Grp16 (prefetch/nop) */
3063 c->dst.type = OP_NONE;
3065 case 0x20: /* mov cr, reg */
3066 switch (c->modrm_reg) {
3070 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3073 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3074 c->dst.type = OP_NONE; /* no writeback */
3076 case 0x21: /* mov from dr to reg */
3077 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3078 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3079 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3082 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3083 c->dst.type = OP_NONE; /* no writeback */
3085 case 0x22: /* mov reg, cr */
3086 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3087 c->dst.type = OP_NONE;
3089 case 0x23: /* mov from reg to dr */
3090 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3091 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3092 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3095 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3096 c->dst.type = OP_NONE; /* no writeback */
3100 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3101 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3102 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3103 kvm_inject_gp(ctxt->vcpu, 0);
3106 rc = X86EMUL_CONTINUE;
3107 c->dst.type = OP_NONE;
3111 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3112 kvm_inject_gp(ctxt->vcpu, 0);
3115 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3116 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3118 rc = X86EMUL_CONTINUE;
3119 c->dst.type = OP_NONE;
3121 case 0x34: /* sysenter */
3122 rc = emulate_sysenter(ctxt);
3123 if (rc != X86EMUL_CONTINUE)
3128 case 0x35: /* sysexit */
3129 rc = emulate_sysexit(ctxt);
3130 if (rc != X86EMUL_CONTINUE)
3135 case 0x40 ... 0x4f: /* cmov */
3136 c->dst.val = c->dst.orig_val = c->src.val;
3137 if (!test_cc(c->b, ctxt->eflags))
3138 c->dst.type = OP_NONE; /* no writeback */
3140 case 0x80 ... 0x8f: /* jnz rel, etc*/
3141 if (test_cc(c->b, ctxt->eflags))
3142 jmp_rel(c, c->src.val);
3143 c->dst.type = OP_NONE;
3145 case 0xa0: /* push fs */
3146 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3148 case 0xa1: /* pop fs */
3149 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3150 if (rc != X86EMUL_CONTINUE)
3155 c->dst.type = OP_NONE;
3156 /* only subword offset */
3157 c->src.val &= (c->dst.bytes << 3) - 1;
3158 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3160 case 0xa4: /* shld imm8, r, r/m */
3161 case 0xa5: /* shld cl, r, r/m */
3162 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3164 case 0xa8: /* push gs */
3165 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3167 case 0xa9: /* pop gs */
3168 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3169 if (rc != X86EMUL_CONTINUE)
3174 /* only subword offset */
3175 c->src.val &= (c->dst.bytes << 3) - 1;
3176 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3178 case 0xac: /* shrd imm8, r, r/m */
3179 case 0xad: /* shrd cl, r, r/m */
3180 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3182 case 0xae: /* clflush */
3184 case 0xb0 ... 0xb1: /* cmpxchg */
3186 * Save real source value, then compare EAX against
3189 c->src.orig_val = c->src.val;
3190 c->src.val = c->regs[VCPU_REGS_RAX];
3191 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3192 if (ctxt->eflags & EFLG_ZF) {
3193 /* Success: write back to memory. */
3194 c->dst.val = c->src.orig_val;
3196 /* Failure: write the value we saw to EAX. */
3197 c->dst.type = OP_REG;
3198 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3203 /* only subword offset */
3204 c->src.val &= (c->dst.bytes << 3) - 1;
3205 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3207 case 0xb6 ... 0xb7: /* movzx */
3208 c->dst.bytes = c->op_bytes;
3209 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3212 case 0xba: /* Grp8 */
3213 switch (c->modrm_reg & 3) {
3226 /* only subword offset */
3227 c->src.val &= (c->dst.bytes << 3) - 1;
3228 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3230 case 0xbe ... 0xbf: /* movsx */
3231 c->dst.bytes = c->op_bytes;
3232 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3235 case 0xc3: /* movnti */
3236 c->dst.bytes = c->op_bytes;
3237 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3240 case 0xc7: /* Grp9 (cmpxchg8b) */
3241 rc = emulate_grp9(ctxt, ops);
3242 if (rc != X86EMUL_CONTINUE)
3249 DPRINTF("Cannot emulate %02x\n", c->b);