• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/x86/kvm/
1/******************************************************************************
2 * emulate.c
3 *
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5 *
6 * Copyright (c) 2005 Keir Fraser
7 *
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
10 *
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affilates.
13 *
14 *   Avi Kivity <avi@qumranet.com>
15 *   Yaniv Kamay <yaniv@qumranet.com>
16 *
17 * This work is licensed under the terms of the GNU GPL, version 2.  See
18 * the COPYING file in the top-level directory.
19 *
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21 */
22
23#ifndef __KERNEL__
24#include <stdio.h>
25#include <stdint.h>
26#include <public/xen.h>
27#define DPRINTF(_f, _a ...) printf(_f , ## _a)
28#else
29#include <linux/kvm_host.h>
30#include "kvm_cache_regs.h"
31#define DPRINTF(x...) do {} while (0)
32#endif
33#include <linux/module.h>
34#include <asm/kvm_emulate.h>
35
36#include "x86.h"
37#include "tss.h"
38
39/*
40 * Opcode effective-address decode tables.
41 * Note that we only emulate instructions that have at least one memory
42 * operand (excluding implicit stack references). We assume that stack
43 * references and instruction fetches will never occur in special memory
44 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
45 * not be handled.
46 */
47
48/* Operand sizes: 8-bit operands or specified/overridden size. */
49#define ByteOp      (1<<0)	/* 8-bit operands. */
50/* Destination operand type. */
51#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
52#define DstReg      (2<<1)	/* Register operand. */
53#define DstMem      (3<<1)	/* Memory operand. */
54#define DstAcc      (4<<1)      /* Destination Accumulator */
55#define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
56#define DstMem64    (6<<1)	/* 64bit memory operand */
57#define DstMask     (7<<1)
58/* Source operand type. */
59#define SrcNone     (0<<4)	/* No source operand. */
60#define SrcImplicit (0<<4)	/* Source operand is implicit in the opcode. */
61#define SrcReg      (1<<4)	/* Register operand. */
62#define SrcMem      (2<<4)	/* Memory operand. */
63#define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
64#define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
65#define SrcImm      (5<<4)	/* Immediate operand. */
66#define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
67#define SrcOne      (7<<4)	/* Implied '1' */
68#define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
69#define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
70#define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
71#define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
72#define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
73#define SrcAcc      (0xd<<4)	/* Source Accumulator */
74#define SrcMask     (0xf<<4)
75/* Generic ModRM decode. */
76#define ModRM       (1<<8)
77/* Destination is only written; never read. */
78#define Mov         (1<<9)
79#define BitOp       (1<<10)
80#define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
81#define String      (1<<12)     /* String instruction (rep capable) */
82#define Stack       (1<<13)     /* Stack instruction (push/pop) */
83#define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
84#define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
85#define GroupMask   0xff        /* Group number stored in bits 0:7 */
86/* Misc flags */
87#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
88#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
89#define No64	    (1<<28)
90/* Source 2 operand type */
91#define Src2None    (0<<29)
92#define Src2CL      (1<<29)
93#define Src2ImmByte (2<<29)
94#define Src2One     (3<<29)
95#define Src2Mask    (7<<29)
96
97enum {
98	Group1_80, Group1_81, Group1_82, Group1_83,
99	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
100	Group8, Group9,
101};
102
103static u32 opcode_table[256] = {
104	/* 0x00 - 0x07 */
105	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109	/* 0x08 - 0x0F */
110	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113	ImplicitOps | Stack | No64, 0,
114	/* 0x10 - 0x17 */
115	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
118	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
119	/* 0x18 - 0x1F */
120	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
121	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
122	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
123	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
124	/* 0x20 - 0x27 */
125	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
126	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
127	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
128	/* 0x28 - 0x2F */
129	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
130	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
131	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
132	/* 0x30 - 0x37 */
133	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
134	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
135	ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
136	/* 0x38 - 0x3F */
137	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
138	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
139	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
140	0, 0,
141	/* 0x40 - 0x47 */
142	DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
143	/* 0x48 - 0x4F */
144	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
145	/* 0x50 - 0x57 */
146	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
147	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
148	/* 0x58 - 0x5F */
149	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
150	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
151	/* 0x60 - 0x67 */
152	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
153	0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
154	0, 0, 0, 0,
155	/* 0x68 - 0x6F */
156	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
157	DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
158	SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
159	/* 0x70 - 0x77 */
160	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
162	/* 0x78 - 0x7F */
163	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
164	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
165	/* 0x80 - 0x87 */
166	Group | Group1_80, Group | Group1_81,
167	Group | Group1_82, Group | Group1_83,
168	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
169	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
170	/* 0x88 - 0x8F */
171	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
172	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
173	DstMem | SrcNone | ModRM | Mov, ModRM | DstReg,
174	ImplicitOps | SrcMem16 | ModRM, Group | Group1A,
175	/* 0x90 - 0x97 */
176	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
177	/* 0x98 - 0x9F */
178	0, 0, SrcImmFAddr | No64, 0,
179	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
180	/* 0xA0 - 0xA7 */
181	ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs,
182	ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs,
183	ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
184	ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
185	/* 0xA8 - 0xAF */
186	DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
187	ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
188	ByteOp | DstDI | String, DstDI | String,
189	/* 0xB0 - 0xB7 */
190	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
193	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
194	/* 0xB8 - 0xBF */
195	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
198	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
199	/* 0xC0 - 0xC7 */
200	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
201	0, ImplicitOps | Stack, 0, 0,
202	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
203	/* 0xC8 - 0xCF */
204	0, 0, 0, ImplicitOps | Stack,
205	ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
206	/* 0xD0 - 0xD7 */
207	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
208	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
209	0, 0, 0, 0,
210	/* 0xD8 - 0xDF */
211	0, 0, 0, 0, 0, 0, 0, 0,
212	/* 0xE0 - 0xE7 */
213	0, 0, 0, 0,
214	ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
215	ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
216	/* 0xE8 - 0xEF */
217	SrcImm | Stack, SrcImm | ImplicitOps,
218	SrcImmFAddr | No64, SrcImmByte | ImplicitOps,
219	SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
220	SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
221	/* 0xF0 - 0xF7 */
222	0, 0, 0, 0,
223	ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
224	/* 0xF8 - 0xFF */
225	ImplicitOps, 0, ImplicitOps, ImplicitOps,
226	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
227};
228
229static u32 twobyte_table[256] = {
230	/* 0x00 - 0x0F */
231	0, Group | GroupDual | Group7, 0, 0,
232	0, ImplicitOps, ImplicitOps | Priv, 0,
233	ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
234	0, ImplicitOps | ModRM, 0, 0,
235	/* 0x10 - 0x1F */
236	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
237	/* 0x20 - 0x2F */
238	ModRM | ImplicitOps | Priv, ModRM | Priv,
239	ModRM | ImplicitOps | Priv, ModRM | Priv,
240	0, 0, 0, 0,
241	0, 0, 0, 0, 0, 0, 0, 0,
242	/* 0x30 - 0x3F */
243	ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
244	ImplicitOps, ImplicitOps | Priv, 0, 0,
245	0, 0, 0, 0, 0, 0, 0, 0,
246	/* 0x40 - 0x47 */
247	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
251	/* 0x48 - 0x4F */
252	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
255	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
256	/* 0x50 - 0x5F */
257	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258	/* 0x60 - 0x6F */
259	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260	/* 0x70 - 0x7F */
261	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262	/* 0x80 - 0x8F */
263	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
264	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
265	/* 0x90 - 0x9F */
266	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267	/* 0xA0 - 0xA7 */
268	ImplicitOps | Stack, ImplicitOps | Stack,
269	0, DstMem | SrcReg | ModRM | BitOp,
270	DstMem | SrcReg | Src2ImmByte | ModRM,
271	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
272	/* 0xA8 - 0xAF */
273	ImplicitOps | Stack, ImplicitOps | Stack,
274	0, DstMem | SrcReg | ModRM | BitOp | Lock,
275	DstMem | SrcReg | Src2ImmByte | ModRM,
276	DstMem | SrcReg | Src2CL | ModRM,
277	ModRM, 0,
278	/* 0xB0 - 0xB7 */
279	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
280	0, DstMem | SrcReg | ModRM | BitOp | Lock,
281	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
282	    DstReg | SrcMem16 | ModRM | Mov,
283	/* 0xB8 - 0xBF */
284	0, 0,
285	Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
286	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
287	    DstReg | SrcMem16 | ModRM | Mov,
288	/* 0xC0 - 0xCF */
289	0, 0, 0, DstMem | SrcReg | ModRM | Mov,
290	0, 0, 0, Group | GroupDual | Group9,
291	0, 0, 0, 0, 0, 0, 0, 0,
292	/* 0xD0 - 0xDF */
293	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294	/* 0xE0 - 0xEF */
295	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296	/* 0xF0 - 0xFF */
297	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
298};
299
300static u32 group_table[] = {
301	[Group1_80*8] =
302	ByteOp | DstMem | SrcImm | ModRM | Lock,
303	ByteOp | DstMem | SrcImm | ModRM | Lock,
304	ByteOp | DstMem | SrcImm | ModRM | Lock,
305	ByteOp | DstMem | SrcImm | ModRM | Lock,
306	ByteOp | DstMem | SrcImm | ModRM | Lock,
307	ByteOp | DstMem | SrcImm | ModRM | Lock,
308	ByteOp | DstMem | SrcImm | ModRM | Lock,
309	ByteOp | DstMem | SrcImm | ModRM,
310	[Group1_81*8] =
311	DstMem | SrcImm | ModRM | Lock,
312	DstMem | SrcImm | ModRM | Lock,
313	DstMem | SrcImm | ModRM | Lock,
314	DstMem | SrcImm | ModRM | Lock,
315	DstMem | SrcImm | ModRM | Lock,
316	DstMem | SrcImm | ModRM | Lock,
317	DstMem | SrcImm | ModRM | Lock,
318	DstMem | SrcImm | ModRM,
319	[Group1_82*8] =
320	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
327	ByteOp | DstMem | SrcImm | ModRM | No64,
328	[Group1_83*8] =
329	DstMem | SrcImmByte | ModRM | Lock,
330	DstMem | SrcImmByte | ModRM | Lock,
331	DstMem | SrcImmByte | ModRM | Lock,
332	DstMem | SrcImmByte | ModRM | Lock,
333	DstMem | SrcImmByte | ModRM | Lock,
334	DstMem | SrcImmByte | ModRM | Lock,
335	DstMem | SrcImmByte | ModRM | Lock,
336	DstMem | SrcImmByte | ModRM,
337	[Group1A*8] =
338	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
339	[Group3_Byte*8] =
340	ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM,
341	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
342	0, 0, 0, 0,
343	[Group3*8] =
344	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
345	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
346	0, 0, 0, 0,
347	[Group4*8] =
348	ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock,
349	0, 0, 0, 0, 0, 0,
350	[Group5*8] =
351	DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock,
352	SrcMem | ModRM | Stack, 0,
353	SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps,
354	SrcMem | ModRM | Stack, 0,
355	[Group7*8] =
356	0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
357	SrcNone | ModRM | DstMem | Mov, 0,
358	SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
359	[Group8*8] =
360	0, 0, 0, 0,
361	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
362	DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
363	[Group9*8] =
364	0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
365};
366
367static u32 group2_table[] = {
368	[Group7*8] =
369	SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
370	SrcNone | ModRM | DstMem | Mov, 0,
371	SrcMem16 | ModRM | Mov | Priv, 0,
372	[Group9*8] =
373	0, 0, 0, 0, 0, 0, 0, 0,
374};
375
376/* EFLAGS bit definitions. */
377#define EFLG_ID (1<<21)
378#define EFLG_VIP (1<<20)
379#define EFLG_VIF (1<<19)
380#define EFLG_AC (1<<18)
381#define EFLG_VM (1<<17)
382#define EFLG_RF (1<<16)
383#define EFLG_IOPL (3<<12)
384#define EFLG_NT (1<<14)
385#define EFLG_OF (1<<11)
386#define EFLG_DF (1<<10)
387#define EFLG_IF (1<<9)
388#define EFLG_TF (1<<8)
389#define EFLG_SF (1<<7)
390#define EFLG_ZF (1<<6)
391#define EFLG_AF (1<<4)
392#define EFLG_PF (1<<2)
393#define EFLG_CF (1<<0)
394
395/*
396 * Instruction emulation:
397 * Most instructions are emulated directly via a fragment of inline assembly
398 * code. This allows us to save/restore EFLAGS and thus very easily pick up
399 * any modified flags.
400 */
401
402#if defined(CONFIG_X86_64)
403#define _LO32 "k"		/* force 32-bit operand */
404#define _STK  "%%rsp"		/* stack pointer */
405#elif defined(__i386__)
406#define _LO32 ""		/* force 32-bit operand */
407#define _STK  "%%esp"		/* stack pointer */
408#endif
409
410/*
411 * These EFLAGS bits are restored from saved value during emulation, and
412 * any changes are written back to the saved value after emulation.
413 */
414#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
415
416/* Before executing instruction: restore necessary bits in EFLAGS. */
417#define _PRE_EFLAGS(_sav, _msk, _tmp)					\
418	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
419	"movl %"_sav",%"_LO32 _tmp"; "                                  \
420	"push %"_tmp"; "                                                \
421	"push %"_tmp"; "                                                \
422	"movl %"_msk",%"_LO32 _tmp"; "                                  \
423	"andl %"_LO32 _tmp",("_STK"); "                                 \
424	"pushf; "                                                       \
425	"notl %"_LO32 _tmp"; "                                          \
426	"andl %"_LO32 _tmp",("_STK"); "                                 \
427	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
428	"pop  %"_tmp"; "                                                \
429	"orl  %"_LO32 _tmp",("_STK"); "                                 \
430	"popf; "                                                        \
431	"pop  %"_sav"; "
432
433/* After executing instruction: write-back necessary bits in EFLAGS. */
434#define _POST_EFLAGS(_sav, _msk, _tmp) \
435	/* _sav |= EFLAGS & _msk; */		\
436	"pushf; "				\
437	"pop  %"_tmp"; "			\
438	"andl %"_msk",%"_LO32 _tmp"; "		\
439	"orl  %"_LO32 _tmp",%"_sav"; "
440
441#ifdef CONFIG_X86_64
442#define ON64(x) x
443#else
444#define ON64(x)
445#endif
446
447#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
448	do {								\
449		__asm__ __volatile__ (					\
450			_PRE_EFLAGS("0", "4", "2")			\
451			_op _suffix " %"_x"3,%1; "			\
452			_POST_EFLAGS("0", "4", "2")			\
453			: "=m" (_eflags), "=m" ((_dst).val),		\
454			  "=&r" (_tmp)					\
455			: _y ((_src).val), "i" (EFLAGS_MASK));		\
456	} while (0)
457
458
459/* Raw emulation: instruction has two explicit operands. */
460#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
461	do {								\
462		unsigned long _tmp;					\
463									\
464		switch ((_dst).bytes) {					\
465		case 2:							\
466			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
467			break;						\
468		case 4:							\
469			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
470			break;						\
471		case 8:							\
472			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
473			break;						\
474		}							\
475	} while (0)
476
477#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
478	do {								     \
479		unsigned long _tmp;					     \
480		switch ((_dst).bytes) {				             \
481		case 1:							     \
482			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
483			break;						     \
484		default:						     \
485			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
486					     _wx, _wy, _lx, _ly, _qx, _qy);  \
487			break;						     \
488		}							     \
489	} while (0)
490
491/* Source operand is byte-sized and may be restricted to just %cl. */
492#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
493	__emulate_2op(_op, _src, _dst, _eflags,				\
494		      "b", "c", "b", "c", "b", "c", "b", "c")
495
496/* Source operand is byte, word, long or quad sized. */
497#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
498	__emulate_2op(_op, _src, _dst, _eflags,				\
499		      "b", "q", "w", "r", _LO32, "r", "", "r")
500
501/* Source operand is word, long or quad sized. */
502#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
503	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
504			     "w", "r", _LO32, "r", "", "r")
505
506/* Instruction has three operands and one operand is stored in ECX register */
507#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
508	do {									\
509		unsigned long _tmp;						\
510		_type _clv  = (_cl).val;  					\
511		_type _srcv = (_src).val;    					\
512		_type _dstv = (_dst).val;					\
513										\
514		__asm__ __volatile__ (						\
515			_PRE_EFLAGS("0", "5", "2")				\
516			_op _suffix " %4,%1 \n"					\
517			_POST_EFLAGS("0", "5", "2")				\
518			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
519			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
520			); 							\
521										\
522		(_cl).val  = (unsigned long) _clv;				\
523		(_src).val = (unsigned long) _srcv;				\
524		(_dst).val = (unsigned long) _dstv;				\
525	} while (0)
526
527#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
528	do {									\
529		switch ((_dst).bytes) {						\
530		case 2:								\
531			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
532						"w", unsigned short);         	\
533			break;							\
534		case 4: 							\
535			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
536						"l", unsigned int);           	\
537			break;							\
538		case 8:								\
539			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
540						"q", unsigned long));  		\
541			break;							\
542		}								\
543	} while (0)
544
545#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
546	do {								\
547		unsigned long _tmp;					\
548									\
549		__asm__ __volatile__ (					\
550			_PRE_EFLAGS("0", "3", "2")			\
551			_op _suffix " %1; "				\
552			_POST_EFLAGS("0", "3", "2")			\
553			: "=m" (_eflags), "+m" ((_dst).val),		\
554			  "=&r" (_tmp)					\
555			: "i" (EFLAGS_MASK));				\
556	} while (0)
557
558/* Instruction has only one explicit operand (no source operand). */
559#define emulate_1op(_op, _dst, _eflags)                                    \
560	do {								\
561		switch ((_dst).bytes) {				        \
562		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
563		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
564		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
565		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
566		}							\
567	} while (0)
568
569/* Fetch next part of the instruction being emulated. */
570#define insn_fetch(_type, _size, _eip)                                  \
571({	unsigned long _x;						\
572	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
573	if (rc != X86EMUL_CONTINUE)					\
574		goto done;						\
575	(_eip) += (_size);						\
576	(_type)_x;							\
577})
578
579#define insn_fetch_arr(_arr, _size, _eip)                                \
580({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
581	if (rc != X86EMUL_CONTINUE)					\
582		goto done;						\
583	(_eip) += (_size);						\
584})
585
586static inline unsigned long ad_mask(struct decode_cache *c)
587{
588	return (1UL << (c->ad_bytes << 3)) - 1;
589}
590
591/* Access/update address held in a register, based on addressing mode. */
592static inline unsigned long
593address_mask(struct decode_cache *c, unsigned long reg)
594{
595	if (c->ad_bytes == sizeof(unsigned long))
596		return reg;
597	else
598		return reg & ad_mask(c);
599}
600
601static inline unsigned long
602register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
603{
604	return base + address_mask(c, reg);
605}
606
607static inline void
608register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
609{
610	if (c->ad_bytes == sizeof(unsigned long))
611		*reg += inc;
612	else
613		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
614}
615
616static inline void jmp_rel(struct decode_cache *c, int rel)
617{
618	register_address_increment(c, &c->eip, rel);
619}
620
621static void set_seg_override(struct decode_cache *c, int seg)
622{
623	c->has_seg_override = true;
624	c->seg_override = seg;
625}
626
627static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
628			      struct x86_emulate_ops *ops, int seg)
629{
630	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
631		return 0;
632
633	return ops->get_cached_segment_base(seg, ctxt->vcpu);
634}
635
636static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
637				       struct x86_emulate_ops *ops,
638				       struct decode_cache *c)
639{
640	if (!c->has_seg_override)
641		return 0;
642
643	return seg_base(ctxt, ops, c->seg_override);
644}
645
646static unsigned long es_base(struct x86_emulate_ctxt *ctxt,
647			     struct x86_emulate_ops *ops)
648{
649	return seg_base(ctxt, ops, VCPU_SREG_ES);
650}
651
652static unsigned long ss_base(struct x86_emulate_ctxt *ctxt,
653			     struct x86_emulate_ops *ops)
654{
655	return seg_base(ctxt, ops, VCPU_SREG_SS);
656}
657
658static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
659				      u32 error, bool valid)
660{
661	ctxt->exception = vec;
662	ctxt->error_code = error;
663	ctxt->error_code_valid = valid;
664	ctxt->restart = false;
665}
666
667static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
668{
669	emulate_exception(ctxt, GP_VECTOR, err, true);
670}
671
672static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr,
673		       int err)
674{
675	ctxt->cr2 = addr;
676	emulate_exception(ctxt, PF_VECTOR, err, true);
677}
678
679static void emulate_ud(struct x86_emulate_ctxt *ctxt)
680{
681	emulate_exception(ctxt, UD_VECTOR, 0, false);
682}
683
684static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
685{
686	emulate_exception(ctxt, TS_VECTOR, err, true);
687}
688
689static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
690			      struct x86_emulate_ops *ops,
691			      unsigned long eip, u8 *dest)
692{
693	struct fetch_cache *fc = &ctxt->decode.fetch;
694	int rc;
695	int size, cur_size;
696
697	if (eip == fc->end) {
698		cur_size = fc->end - fc->start;
699		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
700		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
701				size, ctxt->vcpu, NULL);
702		if (rc != X86EMUL_CONTINUE)
703			return rc;
704		fc->end += size;
705	}
706	*dest = fc->data[eip - fc->start];
707	return X86EMUL_CONTINUE;
708}
709
710static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
711			 struct x86_emulate_ops *ops,
712			 unsigned long eip, void *dest, unsigned size)
713{
714	int rc;
715
716	/* x86 instructions are limited to 15 bytes. */
717	if (eip + size - ctxt->eip > 15)
718		return X86EMUL_UNHANDLEABLE;
719	while (size--) {
720		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
721		if (rc != X86EMUL_CONTINUE)
722			return rc;
723	}
724	return X86EMUL_CONTINUE;
725}
726
727/*
728 * Given the 'reg' portion of a ModRM byte, and a register block, return a
729 * pointer into the block that addresses the relevant register.
730 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
731 */
732static void *decode_register(u8 modrm_reg, unsigned long *regs,
733			     int highbyte_regs)
734{
735	void *p;
736
737	p = &regs[modrm_reg];
738	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
739		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
740	return p;
741}
742
743static int read_descriptor(struct x86_emulate_ctxt *ctxt,
744			   struct x86_emulate_ops *ops,
745			   void *ptr,
746			   u16 *size, unsigned long *address, int op_bytes)
747{
748	int rc;
749
750	if (op_bytes == 2)
751		op_bytes = 3;
752	*address = 0;
753	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
754			   ctxt->vcpu, NULL);
755	if (rc != X86EMUL_CONTINUE)
756		return rc;
757	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
758			   ctxt->vcpu, NULL);
759	return rc;
760}
761
762static int test_cc(unsigned int condition, unsigned int flags)
763{
764	int rc = 0;
765
766	switch ((condition & 15) >> 1) {
767	case 0: /* o */
768		rc |= (flags & EFLG_OF);
769		break;
770	case 1: /* b/c/nae */
771		rc |= (flags & EFLG_CF);
772		break;
773	case 2: /* z/e */
774		rc |= (flags & EFLG_ZF);
775		break;
776	case 3: /* be/na */
777		rc |= (flags & (EFLG_CF|EFLG_ZF));
778		break;
779	case 4: /* s */
780		rc |= (flags & EFLG_SF);
781		break;
782	case 5: /* p/pe */
783		rc |= (flags & EFLG_PF);
784		break;
785	case 7: /* le/ng */
786		rc |= (flags & EFLG_ZF);
787		/* fall through */
788	case 6: /* l/nge */
789		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
790		break;
791	}
792
793	/* Odd condition identifiers (lsb == 1) have inverted sense. */
794	return (!!rc ^ (condition & 1));
795}
796
797static void decode_register_operand(struct operand *op,
798				    struct decode_cache *c,
799				    int inhibit_bytereg)
800{
801	unsigned reg = c->modrm_reg;
802	int highbyte_regs = c->rex_prefix == 0;
803
804	if (!(c->d & ModRM))
805		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
806	op->type = OP_REG;
807	if ((c->d & ByteOp) && !inhibit_bytereg) {
808		op->ptr = decode_register(reg, c->regs, highbyte_regs);
809		op->val = *(u8 *)op->ptr;
810		op->bytes = 1;
811	} else {
812		op->ptr = decode_register(reg, c->regs, 0);
813		op->bytes = c->op_bytes;
814		switch (op->bytes) {
815		case 2:
816			op->val = *(u16 *)op->ptr;
817			break;
818		case 4:
819			op->val = *(u32 *)op->ptr;
820			break;
821		case 8:
822			op->val = *(u64 *) op->ptr;
823			break;
824		}
825	}
826	op->orig_val = op->val;
827}
828
829static int decode_modrm(struct x86_emulate_ctxt *ctxt,
830			struct x86_emulate_ops *ops)
831{
832	struct decode_cache *c = &ctxt->decode;
833	u8 sib;
834	int index_reg = 0, base_reg = 0, scale;
835	int rc = X86EMUL_CONTINUE;
836
837	if (c->rex_prefix) {
838		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
839		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
840		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
841	}
842
843	c->modrm = insn_fetch(u8, 1, c->eip);
844	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
845	c->modrm_reg |= (c->modrm & 0x38) >> 3;
846	c->modrm_rm |= (c->modrm & 0x07);
847	c->modrm_ea = 0;
848	c->use_modrm_ea = 1;
849
850	if (c->modrm_mod == 3) {
851		c->modrm_ptr = decode_register(c->modrm_rm,
852					       c->regs, c->d & ByteOp);
853		c->modrm_val = *(unsigned long *)c->modrm_ptr;
854		return rc;
855	}
856
857	if (c->ad_bytes == 2) {
858		unsigned bx = c->regs[VCPU_REGS_RBX];
859		unsigned bp = c->regs[VCPU_REGS_RBP];
860		unsigned si = c->regs[VCPU_REGS_RSI];
861		unsigned di = c->regs[VCPU_REGS_RDI];
862
863		/* 16-bit ModR/M decode. */
864		switch (c->modrm_mod) {
865		case 0:
866			if (c->modrm_rm == 6)
867				c->modrm_ea += insn_fetch(u16, 2, c->eip);
868			break;
869		case 1:
870			c->modrm_ea += insn_fetch(s8, 1, c->eip);
871			break;
872		case 2:
873			c->modrm_ea += insn_fetch(u16, 2, c->eip);
874			break;
875		}
876		switch (c->modrm_rm) {
877		case 0:
878			c->modrm_ea += bx + si;
879			break;
880		case 1:
881			c->modrm_ea += bx + di;
882			break;
883		case 2:
884			c->modrm_ea += bp + si;
885			break;
886		case 3:
887			c->modrm_ea += bp + di;
888			break;
889		case 4:
890			c->modrm_ea += si;
891			break;
892		case 5:
893			c->modrm_ea += di;
894			break;
895		case 6:
896			if (c->modrm_mod != 0)
897				c->modrm_ea += bp;
898			break;
899		case 7:
900			c->modrm_ea += bx;
901			break;
902		}
903		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
904		    (c->modrm_rm == 6 && c->modrm_mod != 0))
905			if (!c->has_seg_override)
906				set_seg_override(c, VCPU_SREG_SS);
907		c->modrm_ea = (u16)c->modrm_ea;
908	} else {
909		/* 32/64-bit ModR/M decode. */
910		if ((c->modrm_rm & 7) == 4) {
911			sib = insn_fetch(u8, 1, c->eip);
912			index_reg |= (sib >> 3) & 7;
913			base_reg |= sib & 7;
914			scale = sib >> 6;
915
916			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
917				c->modrm_ea += insn_fetch(s32, 4, c->eip);
918			else
919				c->modrm_ea += c->regs[base_reg];
920			if (index_reg != 4)
921				c->modrm_ea += c->regs[index_reg] << scale;
922		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
923			if (ctxt->mode == X86EMUL_MODE_PROT64)
924				c->rip_relative = 1;
925		} else
926			c->modrm_ea += c->regs[c->modrm_rm];
927		switch (c->modrm_mod) {
928		case 0:
929			if (c->modrm_rm == 5)
930				c->modrm_ea += insn_fetch(s32, 4, c->eip);
931			break;
932		case 1:
933			c->modrm_ea += insn_fetch(s8, 1, c->eip);
934			break;
935		case 2:
936			c->modrm_ea += insn_fetch(s32, 4, c->eip);
937			break;
938		}
939	}
940done:
941	return rc;
942}
943
944static int decode_abs(struct x86_emulate_ctxt *ctxt,
945		      struct x86_emulate_ops *ops)
946{
947	struct decode_cache *c = &ctxt->decode;
948	int rc = X86EMUL_CONTINUE;
949
950	switch (c->ad_bytes) {
951	case 2:
952		c->modrm_ea = insn_fetch(u16, 2, c->eip);
953		break;
954	case 4:
955		c->modrm_ea = insn_fetch(u32, 4, c->eip);
956		break;
957	case 8:
958		c->modrm_ea = insn_fetch(u64, 8, c->eip);
959		break;
960	}
961done:
962	return rc;
963}
964
965int
966x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
967{
968	struct decode_cache *c = &ctxt->decode;
969	int rc = X86EMUL_CONTINUE;
970	int mode = ctxt->mode;
971	int def_op_bytes, def_ad_bytes, group;
972
973
974	/* we cannot decode insn before we complete previous rep insn */
975	WARN_ON(ctxt->restart);
976
977	c->eip = ctxt->eip;
978	c->fetch.start = c->fetch.end = c->eip;
979	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
980
981	switch (mode) {
982	case X86EMUL_MODE_REAL:
983	case X86EMUL_MODE_VM86:
984	case X86EMUL_MODE_PROT16:
985		def_op_bytes = def_ad_bytes = 2;
986		break;
987	case X86EMUL_MODE_PROT32:
988		def_op_bytes = def_ad_bytes = 4;
989		break;
990#ifdef CONFIG_X86_64
991	case X86EMUL_MODE_PROT64:
992		def_op_bytes = 4;
993		def_ad_bytes = 8;
994		break;
995#endif
996	default:
997		return -1;
998	}
999
1000	c->op_bytes = def_op_bytes;
1001	c->ad_bytes = def_ad_bytes;
1002
1003	/* Legacy prefixes. */
1004	for (;;) {
1005		switch (c->b = insn_fetch(u8, 1, c->eip)) {
1006		case 0x66:	/* operand-size override */
1007			/* switch between 2/4 bytes */
1008			c->op_bytes = def_op_bytes ^ 6;
1009			break;
1010		case 0x67:	/* address-size override */
1011			if (mode == X86EMUL_MODE_PROT64)
1012				/* switch between 4/8 bytes */
1013				c->ad_bytes = def_ad_bytes ^ 12;
1014			else
1015				/* switch between 2/4 bytes */
1016				c->ad_bytes = def_ad_bytes ^ 6;
1017			break;
1018		case 0x26:	/* ES override */
1019		case 0x2e:	/* CS override */
1020		case 0x36:	/* SS override */
1021		case 0x3e:	/* DS override */
1022			set_seg_override(c, (c->b >> 3) & 3);
1023			break;
1024		case 0x64:	/* FS override */
1025		case 0x65:	/* GS override */
1026			set_seg_override(c, c->b & 7);
1027			break;
1028		case 0x40 ... 0x4f: /* REX */
1029			if (mode != X86EMUL_MODE_PROT64)
1030				goto done_prefixes;
1031			c->rex_prefix = c->b;
1032			continue;
1033		case 0xf0:	/* LOCK */
1034			c->lock_prefix = 1;
1035			break;
1036		case 0xf2:	/* REPNE/REPNZ */
1037			c->rep_prefix = REPNE_PREFIX;
1038			break;
1039		case 0xf3:	/* REP/REPE/REPZ */
1040			c->rep_prefix = REPE_PREFIX;
1041			break;
1042		default:
1043			goto done_prefixes;
1044		}
1045
1046		/* Any legacy prefix after a REX prefix nullifies its effect. */
1047
1048		c->rex_prefix = 0;
1049	}
1050
1051done_prefixes:
1052
1053	/* REX prefix. */
1054	if (c->rex_prefix)
1055		if (c->rex_prefix & 8)
1056			c->op_bytes = 8;	/* REX.W */
1057
1058	/* Opcode byte(s). */
1059	c->d = opcode_table[c->b];
1060	if (c->d == 0) {
1061		/* Two-byte opcode? */
1062		if (c->b == 0x0f) {
1063			c->twobyte = 1;
1064			c->b = insn_fetch(u8, 1, c->eip);
1065			c->d = twobyte_table[c->b];
1066		}
1067	}
1068
1069	if (c->d & Group) {
1070		group = c->d & GroupMask;
1071		c->modrm = insn_fetch(u8, 1, c->eip);
1072		--c->eip;
1073
1074		group = (group << 3) + ((c->modrm >> 3) & 7);
1075		if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1076			c->d = group2_table[group];
1077		else
1078			c->d = group_table[group];
1079	}
1080
1081	/* Unrecognised? */
1082	if (c->d == 0) {
1083		DPRINTF("Cannot emulate %02x\n", c->b);
1084		return -1;
1085	}
1086
1087	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1088		c->op_bytes = 8;
1089
1090	/* ModRM and SIB bytes. */
1091	if (c->d & ModRM)
1092		rc = decode_modrm(ctxt, ops);
1093	else if (c->d & MemAbs)
1094		rc = decode_abs(ctxt, ops);
1095	if (rc != X86EMUL_CONTINUE)
1096		goto done;
1097
1098	if (!c->has_seg_override)
1099		set_seg_override(c, VCPU_SREG_DS);
1100
1101	if (!(!c->twobyte && c->b == 0x8d))
1102		c->modrm_ea += seg_override_base(ctxt, ops, c);
1103
1104	if (c->ad_bytes != 8)
1105		c->modrm_ea = (u32)c->modrm_ea;
1106
1107	if (c->rip_relative)
1108		c->modrm_ea += c->eip;
1109
1110	/*
1111	 * Decode and fetch the source operand: register, memory
1112	 * or immediate.
1113	 */
1114	switch (c->d & SrcMask) {
1115	case SrcNone:
1116		break;
1117	case SrcReg:
1118		decode_register_operand(&c->src, c, 0);
1119		break;
1120	case SrcMem16:
1121		c->src.bytes = 2;
1122		goto srcmem_common;
1123	case SrcMem32:
1124		c->src.bytes = 4;
1125		goto srcmem_common;
1126	case SrcMem:
1127		c->src.bytes = (c->d & ByteOp) ? 1 :
1128							   c->op_bytes;
1129		/* Don't fetch the address for invlpg: it could be unmapped. */
1130		if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1131			break;
1132	srcmem_common:
1133		/*
1134		 * For instructions with a ModR/M byte, switch to register
1135		 * access if Mod = 3.
1136		 */
1137		if ((c->d & ModRM) && c->modrm_mod == 3) {
1138			c->src.type = OP_REG;
1139			c->src.val = c->modrm_val;
1140			c->src.ptr = c->modrm_ptr;
1141			break;
1142		}
1143		c->src.type = OP_MEM;
1144		c->src.ptr = (unsigned long *)c->modrm_ea;
1145		c->src.val = 0;
1146		break;
1147	case SrcImm:
1148	case SrcImmU:
1149		c->src.type = OP_IMM;
1150		c->src.ptr = (unsigned long *)c->eip;
1151		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1152		if (c->src.bytes == 8)
1153			c->src.bytes = 4;
1154		/* NB. Immediates are sign-extended as necessary. */
1155		switch (c->src.bytes) {
1156		case 1:
1157			c->src.val = insn_fetch(s8, 1, c->eip);
1158			break;
1159		case 2:
1160			c->src.val = insn_fetch(s16, 2, c->eip);
1161			break;
1162		case 4:
1163			c->src.val = insn_fetch(s32, 4, c->eip);
1164			break;
1165		}
1166		if ((c->d & SrcMask) == SrcImmU) {
1167			switch (c->src.bytes) {
1168			case 1:
1169				c->src.val &= 0xff;
1170				break;
1171			case 2:
1172				c->src.val &= 0xffff;
1173				break;
1174			case 4:
1175				c->src.val &= 0xffffffff;
1176				break;
1177			}
1178		}
1179		break;
1180	case SrcImmByte:
1181	case SrcImmUByte:
1182		c->src.type = OP_IMM;
1183		c->src.ptr = (unsigned long *)c->eip;
1184		c->src.bytes = 1;
1185		if ((c->d & SrcMask) == SrcImmByte)
1186			c->src.val = insn_fetch(s8, 1, c->eip);
1187		else
1188			c->src.val = insn_fetch(u8, 1, c->eip);
1189		break;
1190	case SrcAcc:
1191		c->src.type = OP_REG;
1192		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1193		c->src.ptr = &c->regs[VCPU_REGS_RAX];
1194		switch (c->src.bytes) {
1195			case 1:
1196				c->src.val = *(u8 *)c->src.ptr;
1197				break;
1198			case 2:
1199				c->src.val = *(u16 *)c->src.ptr;
1200				break;
1201			case 4:
1202				c->src.val = *(u32 *)c->src.ptr;
1203				break;
1204			case 8:
1205				c->src.val = *(u64 *)c->src.ptr;
1206				break;
1207		}
1208		break;
1209	case SrcOne:
1210		c->src.bytes = 1;
1211		c->src.val = 1;
1212		break;
1213	case SrcSI:
1214		c->src.type = OP_MEM;
1215		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1216		c->src.ptr = (unsigned long *)
1217			register_address(c,  seg_override_base(ctxt, ops, c),
1218					 c->regs[VCPU_REGS_RSI]);
1219		c->src.val = 0;
1220		break;
1221	case SrcImmFAddr:
1222		c->src.type = OP_IMM;
1223		c->src.ptr = (unsigned long *)c->eip;
1224		c->src.bytes = c->op_bytes + 2;
1225		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
1226		break;
1227	case SrcMemFAddr:
1228		c->src.type = OP_MEM;
1229		c->src.ptr = (unsigned long *)c->modrm_ea;
1230		c->src.bytes = c->op_bytes + 2;
1231		break;
1232	}
1233
1234	/*
1235	 * Decode and fetch the second source operand: register, memory
1236	 * or immediate.
1237	 */
1238	switch (c->d & Src2Mask) {
1239	case Src2None:
1240		break;
1241	case Src2CL:
1242		c->src2.bytes = 1;
1243		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1244		break;
1245	case Src2ImmByte:
1246		c->src2.type = OP_IMM;
1247		c->src2.ptr = (unsigned long *)c->eip;
1248		c->src2.bytes = 1;
1249		c->src2.val = insn_fetch(u8, 1, c->eip);
1250		break;
1251	case Src2One:
1252		c->src2.bytes = 1;
1253		c->src2.val = 1;
1254		break;
1255	}
1256
1257	/* Decode and fetch the destination operand: register or memory. */
1258	switch (c->d & DstMask) {
1259	case ImplicitOps:
1260		/* Special instructions do their own operand decoding. */
1261		return 0;
1262	case DstReg:
1263		decode_register_operand(&c->dst, c,
1264			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1265		break;
1266	case DstMem:
1267	case DstMem64:
1268		if ((c->d & ModRM) && c->modrm_mod == 3) {
1269			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1270			c->dst.type = OP_REG;
1271			c->dst.val = c->dst.orig_val = c->modrm_val;
1272			c->dst.ptr = c->modrm_ptr;
1273			break;
1274		}
1275		c->dst.type = OP_MEM;
1276		c->dst.ptr = (unsigned long *)c->modrm_ea;
1277		if ((c->d & DstMask) == DstMem64)
1278			c->dst.bytes = 8;
1279		else
1280			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1281		c->dst.val = 0;
1282		if (c->d & BitOp) {
1283			unsigned long mask = ~(c->dst.bytes * 8 - 1);
1284
1285			c->dst.ptr = (void *)c->dst.ptr +
1286						   (c->src.val & mask) / 8;
1287		}
1288		break;
1289	case DstAcc:
1290		c->dst.type = OP_REG;
1291		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1292		c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1293		switch (c->dst.bytes) {
1294			case 1:
1295				c->dst.val = *(u8 *)c->dst.ptr;
1296				break;
1297			case 2:
1298				c->dst.val = *(u16 *)c->dst.ptr;
1299				break;
1300			case 4:
1301				c->dst.val = *(u32 *)c->dst.ptr;
1302				break;
1303			case 8:
1304				c->dst.val = *(u64 *)c->dst.ptr;
1305				break;
1306		}
1307		c->dst.orig_val = c->dst.val;
1308		break;
1309	case DstDI:
1310		c->dst.type = OP_MEM;
1311		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1312		c->dst.ptr = (unsigned long *)
1313			register_address(c, es_base(ctxt, ops),
1314					 c->regs[VCPU_REGS_RDI]);
1315		c->dst.val = 0;
1316		break;
1317	}
1318
1319done:
1320	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1321}
1322
1323static int read_emulated(struct x86_emulate_ctxt *ctxt,
1324			 struct x86_emulate_ops *ops,
1325			 unsigned long addr, void *dest, unsigned size)
1326{
1327	int rc;
1328	struct read_cache *mc = &ctxt->decode.mem_read;
1329	u32 err;
1330
1331	while (size) {
1332		int n = min(size, 8u);
1333		size -= n;
1334		if (mc->pos < mc->end)
1335			goto read_cached;
1336
1337		rc = ops->read_emulated(addr, mc->data + mc->end, n, &err,
1338					ctxt->vcpu);
1339		if (rc == X86EMUL_PROPAGATE_FAULT)
1340			emulate_pf(ctxt, addr, err);
1341		if (rc != X86EMUL_CONTINUE)
1342			return rc;
1343		mc->end += n;
1344
1345	read_cached:
1346		memcpy(dest, mc->data + mc->pos, n);
1347		mc->pos += n;
1348		dest += n;
1349		addr += n;
1350	}
1351	return X86EMUL_CONTINUE;
1352}
1353
1354static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1355			   struct x86_emulate_ops *ops,
1356			   unsigned int size, unsigned short port,
1357			   void *dest)
1358{
1359	struct read_cache *rc = &ctxt->decode.io_read;
1360
1361	if (rc->pos == rc->end) { /* refill pio read ahead */
1362		struct decode_cache *c = &ctxt->decode;
1363		unsigned int in_page, n;
1364		unsigned int count = c->rep_prefix ?
1365			address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1366		in_page = (ctxt->eflags & EFLG_DF) ?
1367			offset_in_page(c->regs[VCPU_REGS_RDI]) :
1368			PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1369		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1370			count);
1371		if (n == 0)
1372			n = 1;
1373		rc->pos = rc->end = 0;
1374		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1375			return 0;
1376		rc->end = n * size;
1377	}
1378
1379	memcpy(dest, rc->data + rc->pos, size);
1380	rc->pos += size;
1381	return 1;
1382}
1383
1384static u32 desc_limit_scaled(struct desc_struct *desc)
1385{
1386	u32 limit = get_desc_limit(desc);
1387
1388	return desc->g ? (limit << 12) | 0xfff : limit;
1389}
1390
1391static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1392				     struct x86_emulate_ops *ops,
1393				     u16 selector, struct desc_ptr *dt)
1394{
1395	if (selector & 1 << 2) {
1396		struct desc_struct desc;
1397		memset (dt, 0, sizeof *dt);
1398		if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1399			return;
1400
1401		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1402		dt->address = get_desc_base(&desc);
1403	} else
1404		ops->get_gdt(dt, ctxt->vcpu);
1405}
1406
1407/* allowed just for 8 bytes segments */
1408static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1409				   struct x86_emulate_ops *ops,
1410				   u16 selector, struct desc_struct *desc)
1411{
1412	struct desc_ptr dt;
1413	u16 index = selector >> 3;
1414	int ret;
1415	u32 err;
1416	ulong addr;
1417
1418	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1419
1420	if (dt.size < index * 8 + 7) {
1421		emulate_gp(ctxt, selector & 0xfffc);
1422		return X86EMUL_PROPAGATE_FAULT;
1423	}
1424	addr = dt.address + index * 8;
1425	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,  &err);
1426	if (ret == X86EMUL_PROPAGATE_FAULT)
1427		emulate_pf(ctxt, addr, err);
1428
1429       return ret;
1430}
1431
1432/* allowed just for 8 bytes segments */
1433static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1434				    struct x86_emulate_ops *ops,
1435				    u16 selector, struct desc_struct *desc)
1436{
1437	struct desc_ptr dt;
1438	u16 index = selector >> 3;
1439	u32 err;
1440	ulong addr;
1441	int ret;
1442
1443	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1444
1445	if (dt.size < index * 8 + 7) {
1446		emulate_gp(ctxt, selector & 0xfffc);
1447		return X86EMUL_PROPAGATE_FAULT;
1448	}
1449
1450	addr = dt.address + index * 8;
1451	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1452	if (ret == X86EMUL_PROPAGATE_FAULT)
1453		emulate_pf(ctxt, addr, err);
1454
1455	return ret;
1456}
1457
1458static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1459				   struct x86_emulate_ops *ops,
1460				   u16 selector, int seg)
1461{
1462	struct desc_struct seg_desc;
1463	u8 dpl, rpl, cpl;
1464	unsigned err_vec = GP_VECTOR;
1465	u32 err_code = 0;
1466	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1467	int ret;
1468
1469	memset(&seg_desc, 0, sizeof seg_desc);
1470
1471	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1472	    || ctxt->mode == X86EMUL_MODE_REAL) {
1473		/* set real mode segment descriptor */
1474		set_desc_base(&seg_desc, selector << 4);
1475		set_desc_limit(&seg_desc, 0xffff);
1476		seg_desc.type = 3;
1477		seg_desc.p = 1;
1478		seg_desc.s = 1;
1479		goto load;
1480	}
1481
1482	/* NULL selector is not valid for TR, CS and SS */
1483	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1484	    && null_selector)
1485		goto exception;
1486
1487	/* TR should be in GDT only */
1488	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1489		goto exception;
1490
1491	if (null_selector) /* for NULL selector skip all following checks */
1492		goto load;
1493
1494	ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1495	if (ret != X86EMUL_CONTINUE)
1496		return ret;
1497
1498	err_code = selector & 0xfffc;
1499	err_vec = GP_VECTOR;
1500
1501	/* can't load system descriptor into segment selecor */
1502	if (seg <= VCPU_SREG_GS && !seg_desc.s)
1503		goto exception;
1504
1505	if (!seg_desc.p) {
1506		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1507		goto exception;
1508	}
1509
1510	rpl = selector & 3;
1511	dpl = seg_desc.dpl;
1512	cpl = ops->cpl(ctxt->vcpu);
1513
1514	switch (seg) {
1515	case VCPU_SREG_SS:
1516		/*
1517		 * segment is not a writable data segment or segment
1518		 * selector's RPL != CPL or segment selector's RPL != CPL
1519		 */
1520		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1521			goto exception;
1522		break;
1523	case VCPU_SREG_CS:
1524		if (!(seg_desc.type & 8))
1525			goto exception;
1526
1527		if (seg_desc.type & 4) {
1528			/* conforming */
1529			if (dpl > cpl)
1530				goto exception;
1531		} else {
1532			/* nonconforming */
1533			if (rpl > cpl || dpl != cpl)
1534				goto exception;
1535		}
1536		/* CS(RPL) <- CPL */
1537		selector = (selector & 0xfffc) | cpl;
1538		break;
1539	case VCPU_SREG_TR:
1540		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1541			goto exception;
1542		break;
1543	case VCPU_SREG_LDTR:
1544		if (seg_desc.s || seg_desc.type != 2)
1545			goto exception;
1546		break;
1547	default: /*  DS, ES, FS, or GS */
1548		/*
1549		 * segment is not a data or readable code segment or
1550		 * ((segment is a data or nonconforming code segment)
1551		 * and (both RPL and CPL > DPL))
1552		 */
1553		if ((seg_desc.type & 0xa) == 0x8 ||
1554		    (((seg_desc.type & 0xc) != 0xc) &&
1555		     (rpl > dpl && cpl > dpl)))
1556			goto exception;
1557		break;
1558	}
1559
1560	if (seg_desc.s) {
1561		/* mark segment as accessed */
1562		seg_desc.type |= 1;
1563		ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1564		if (ret != X86EMUL_CONTINUE)
1565			return ret;
1566	}
1567load:
1568	ops->set_segment_selector(selector, seg, ctxt->vcpu);
1569	ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1570	return X86EMUL_CONTINUE;
1571exception:
1572	emulate_exception(ctxt, err_vec, err_code, true);
1573	return X86EMUL_PROPAGATE_FAULT;
1574}
1575
1576static inline int writeback(struct x86_emulate_ctxt *ctxt,
1577			    struct x86_emulate_ops *ops)
1578{
1579	int rc;
1580	struct decode_cache *c = &ctxt->decode;
1581	u32 err;
1582
1583	switch (c->dst.type) {
1584	case OP_REG:
1585		/* The 4-byte case *is* correct:
1586		 * in 64-bit mode we zero-extend.
1587		 */
1588		switch (c->dst.bytes) {
1589		case 1:
1590			*(u8 *)c->dst.ptr = (u8)c->dst.val;
1591			break;
1592		case 2:
1593			*(u16 *)c->dst.ptr = (u16)c->dst.val;
1594			break;
1595		case 4:
1596			*c->dst.ptr = (u32)c->dst.val;
1597			break;	/* 64b: zero-ext */
1598		case 8:
1599			*c->dst.ptr = c->dst.val;
1600			break;
1601		}
1602		break;
1603	case OP_MEM:
1604		if (c->lock_prefix)
1605			rc = ops->cmpxchg_emulated(
1606					(unsigned long)c->dst.ptr,
1607					&c->dst.orig_val,
1608					&c->dst.val,
1609					c->dst.bytes,
1610					&err,
1611					ctxt->vcpu);
1612		else
1613			rc = ops->write_emulated(
1614					(unsigned long)c->dst.ptr,
1615					&c->dst.val,
1616					c->dst.bytes,
1617					&err,
1618					ctxt->vcpu);
1619		if (rc == X86EMUL_PROPAGATE_FAULT)
1620			emulate_pf(ctxt,
1621					      (unsigned long)c->dst.ptr, err);
1622		if (rc != X86EMUL_CONTINUE)
1623			return rc;
1624		break;
1625	case OP_NONE:
1626		/* no writeback */
1627		break;
1628	default:
1629		break;
1630	}
1631	return X86EMUL_CONTINUE;
1632}
1633
1634static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
1635				struct x86_emulate_ops *ops)
1636{
1637	struct decode_cache *c = &ctxt->decode;
1638
1639	c->dst.type  = OP_MEM;
1640	c->dst.bytes = c->op_bytes;
1641	c->dst.val = c->src.val;
1642	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1643	c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops),
1644					       c->regs[VCPU_REGS_RSP]);
1645}
1646
1647static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1648		       struct x86_emulate_ops *ops,
1649		       void *dest, int len)
1650{
1651	struct decode_cache *c = &ctxt->decode;
1652	int rc;
1653
1654	rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops),
1655						       c->regs[VCPU_REGS_RSP]),
1656			   dest, len);
1657	if (rc != X86EMUL_CONTINUE)
1658		return rc;
1659
1660	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1661	return rc;
1662}
1663
1664static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1665		       struct x86_emulate_ops *ops,
1666		       void *dest, int len)
1667{
1668	int rc;
1669	unsigned long val, change_mask;
1670	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1671	int cpl = ops->cpl(ctxt->vcpu);
1672
1673	rc = emulate_pop(ctxt, ops, &val, len);
1674	if (rc != X86EMUL_CONTINUE)
1675		return rc;
1676
1677	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1678		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1679
1680	switch(ctxt->mode) {
1681	case X86EMUL_MODE_PROT64:
1682	case X86EMUL_MODE_PROT32:
1683	case X86EMUL_MODE_PROT16:
1684		if (cpl == 0)
1685			change_mask |= EFLG_IOPL;
1686		if (cpl <= iopl)
1687			change_mask |= EFLG_IF;
1688		break;
1689	case X86EMUL_MODE_VM86:
1690		if (iopl < 3) {
1691			emulate_gp(ctxt, 0);
1692			return X86EMUL_PROPAGATE_FAULT;
1693		}
1694		change_mask |= EFLG_IF;
1695		break;
1696	default: /* real mode */
1697		change_mask |= (EFLG_IOPL | EFLG_IF);
1698		break;
1699	}
1700
1701	*(unsigned long *)dest =
1702		(ctxt->eflags & ~change_mask) | (val & change_mask);
1703
1704	return rc;
1705}
1706
1707static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
1708			      struct x86_emulate_ops *ops, int seg)
1709{
1710	struct decode_cache *c = &ctxt->decode;
1711
1712	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
1713
1714	emulate_push(ctxt, ops);
1715}
1716
1717static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1718			     struct x86_emulate_ops *ops, int seg)
1719{
1720	struct decode_cache *c = &ctxt->decode;
1721	unsigned long selector;
1722	int rc;
1723
1724	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1725	if (rc != X86EMUL_CONTINUE)
1726		return rc;
1727
1728	rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1729	return rc;
1730}
1731
1732static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
1733			  struct x86_emulate_ops *ops)
1734{
1735	struct decode_cache *c = &ctxt->decode;
1736	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1737	int rc = X86EMUL_CONTINUE;
1738	int reg = VCPU_REGS_RAX;
1739
1740	while (reg <= VCPU_REGS_RDI) {
1741		(reg == VCPU_REGS_RSP) ?
1742		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1743
1744		emulate_push(ctxt, ops);
1745
1746		rc = writeback(ctxt, ops);
1747		if (rc != X86EMUL_CONTINUE)
1748			return rc;
1749
1750		++reg;
1751	}
1752
1753	/* Disable writeback. */
1754	c->dst.type = OP_NONE;
1755
1756	return rc;
1757}
1758
1759static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1760			struct x86_emulate_ops *ops)
1761{
1762	struct decode_cache *c = &ctxt->decode;
1763	int rc = X86EMUL_CONTINUE;
1764	int reg = VCPU_REGS_RDI;
1765
1766	while (reg >= VCPU_REGS_RAX) {
1767		if (reg == VCPU_REGS_RSP) {
1768			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1769							c->op_bytes);
1770			--reg;
1771		}
1772
1773		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1774		if (rc != X86EMUL_CONTINUE)
1775			break;
1776		--reg;
1777	}
1778	return rc;
1779}
1780
1781static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1782				struct x86_emulate_ops *ops)
1783{
1784	struct decode_cache *c = &ctxt->decode;
1785
1786	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1787}
1788
1789static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1790{
1791	struct decode_cache *c = &ctxt->decode;
1792	switch (c->modrm_reg) {
1793	case 0:	/* rol */
1794		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1795		break;
1796	case 1:	/* ror */
1797		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1798		break;
1799	case 2:	/* rcl */
1800		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1801		break;
1802	case 3:	/* rcr */
1803		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1804		break;
1805	case 4:	/* sal/shl */
1806	case 6:	/* sal/shl */
1807		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1808		break;
1809	case 5:	/* shr */
1810		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1811		break;
1812	case 7:	/* sar */
1813		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1814		break;
1815	}
1816}
1817
1818static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1819			       struct x86_emulate_ops *ops)
1820{
1821	struct decode_cache *c = &ctxt->decode;
1822
1823	switch (c->modrm_reg) {
1824	case 0 ... 1:	/* test */
1825		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1826		break;
1827	case 2:	/* not */
1828		c->dst.val = ~c->dst.val;
1829		break;
1830	case 3:	/* neg */
1831		emulate_1op("neg", c->dst, ctxt->eflags);
1832		break;
1833	default:
1834		return 0;
1835	}
1836	return 1;
1837}
1838
1839static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1840			       struct x86_emulate_ops *ops)
1841{
1842	struct decode_cache *c = &ctxt->decode;
1843
1844	switch (c->modrm_reg) {
1845	case 0:	/* inc */
1846		emulate_1op("inc", c->dst, ctxt->eflags);
1847		break;
1848	case 1:	/* dec */
1849		emulate_1op("dec", c->dst, ctxt->eflags);
1850		break;
1851	case 2: /* call near abs */ {
1852		long int old_eip;
1853		old_eip = c->eip;
1854		c->eip = c->src.val;
1855		c->src.val = old_eip;
1856		emulate_push(ctxt, ops);
1857		break;
1858	}
1859	case 4: /* jmp abs */
1860		c->eip = c->src.val;
1861		break;
1862	case 6:	/* push */
1863		emulate_push(ctxt, ops);
1864		break;
1865	}
1866	return X86EMUL_CONTINUE;
1867}
1868
1869static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1870			       struct x86_emulate_ops *ops)
1871{
1872	struct decode_cache *c = &ctxt->decode;
1873	u64 old = c->dst.orig_val64;
1874
1875	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1876	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1877		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1878		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1879		ctxt->eflags &= ~EFLG_ZF;
1880	} else {
1881		c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1882			(u32) c->regs[VCPU_REGS_RBX];
1883
1884		ctxt->eflags |= EFLG_ZF;
1885	}
1886	return X86EMUL_CONTINUE;
1887}
1888
1889static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1890			   struct x86_emulate_ops *ops)
1891{
1892	struct decode_cache *c = &ctxt->decode;
1893	int rc;
1894	unsigned long cs;
1895
1896	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1897	if (rc != X86EMUL_CONTINUE)
1898		return rc;
1899	if (c->op_bytes == 4)
1900		c->eip = (u32)c->eip;
1901	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1902	if (rc != X86EMUL_CONTINUE)
1903		return rc;
1904	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1905	return rc;
1906}
1907
1908static inline void
1909setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1910			struct x86_emulate_ops *ops, struct desc_struct *cs,
1911			struct desc_struct *ss)
1912{
1913	memset(cs, 0, sizeof(struct desc_struct));
1914	ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu);
1915	memset(ss, 0, sizeof(struct desc_struct));
1916
1917	cs->l = 0;		/* will be adjusted later */
1918	set_desc_base(cs, 0);	/* flat segment */
1919	cs->g = 1;		/* 4kb granularity */
1920	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1921	cs->type = 0x0b;	/* Read, Execute, Accessed */
1922	cs->s = 1;
1923	cs->dpl = 0;		/* will be adjusted later */
1924	cs->p = 1;
1925	cs->d = 1;
1926
1927	set_desc_base(ss, 0);	/* flat segment */
1928	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1929	ss->g = 1;		/* 4kb granularity */
1930	ss->s = 1;
1931	ss->type = 0x03;	/* Read/Write, Accessed */
1932	ss->d = 1;		/* 32bit stack segment */
1933	ss->dpl = 0;
1934	ss->p = 1;
1935}
1936
1937static int
1938emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1939{
1940	struct decode_cache *c = &ctxt->decode;
1941	struct desc_struct cs, ss;
1942	u64 msr_data;
1943	u16 cs_sel, ss_sel;
1944
1945	/* syscall is not available in real mode */
1946	if (ctxt->mode == X86EMUL_MODE_REAL ||
1947	    ctxt->mode == X86EMUL_MODE_VM86) {
1948		emulate_ud(ctxt);
1949		return X86EMUL_PROPAGATE_FAULT;
1950	}
1951
1952	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1953
1954	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1955	msr_data >>= 32;
1956	cs_sel = (u16)(msr_data & 0xfffc);
1957	ss_sel = (u16)(msr_data + 8);
1958
1959	if (is_long_mode(ctxt->vcpu)) {
1960		cs.d = 0;
1961		cs.l = 1;
1962	}
1963	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1964	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1965	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1966	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1967
1968	c->regs[VCPU_REGS_RCX] = c->eip;
1969	if (is_long_mode(ctxt->vcpu)) {
1970#ifdef CONFIG_X86_64
1971		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1972
1973		ops->get_msr(ctxt->vcpu,
1974			     ctxt->mode == X86EMUL_MODE_PROT64 ?
1975			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1976		c->eip = msr_data;
1977
1978		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1979		ctxt->eflags &= ~(msr_data | EFLG_RF);
1980#endif
1981	} else {
1982		/* legacy mode */
1983		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1984		c->eip = (u32)msr_data;
1985
1986		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1987	}
1988
1989	return X86EMUL_CONTINUE;
1990}
1991
1992static int
1993emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1994{
1995	struct decode_cache *c = &ctxt->decode;
1996	struct desc_struct cs, ss;
1997	u64 msr_data;
1998	u16 cs_sel, ss_sel;
1999
2000	/* inject #GP if in real mode */
2001	if (ctxt->mode == X86EMUL_MODE_REAL) {
2002		emulate_gp(ctxt, 0);
2003		return X86EMUL_PROPAGATE_FAULT;
2004	}
2005
2006	if (ctxt->mode == X86EMUL_MODE_PROT64) {
2007		emulate_ud(ctxt);
2008		return X86EMUL_PROPAGATE_FAULT;
2009	}
2010
2011	setup_syscalls_segments(ctxt, ops, &cs, &ss);
2012
2013	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
2014	switch (ctxt->mode) {
2015	case X86EMUL_MODE_PROT32:
2016		if ((msr_data & 0xfffc) == 0x0) {
2017			emulate_gp(ctxt, 0);
2018			return X86EMUL_PROPAGATE_FAULT;
2019		}
2020		break;
2021	case X86EMUL_MODE_PROT64:
2022		if (msr_data == 0x0) {
2023			emulate_gp(ctxt, 0);
2024			return X86EMUL_PROPAGATE_FAULT;
2025		}
2026		break;
2027	}
2028
2029	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
2030	cs_sel = (u16)msr_data;
2031	cs_sel &= ~SELECTOR_RPL_MASK;
2032	ss_sel = cs_sel + 8;
2033	ss_sel &= ~SELECTOR_RPL_MASK;
2034	if (ctxt->mode == X86EMUL_MODE_PROT64
2035		|| is_long_mode(ctxt->vcpu)) {
2036		cs.d = 0;
2037		cs.l = 1;
2038	}
2039
2040	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
2041	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
2042	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
2043	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
2044
2045	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
2046	c->eip = msr_data;
2047
2048	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
2049	c->regs[VCPU_REGS_RSP] = msr_data;
2050
2051	return X86EMUL_CONTINUE;
2052}
2053
2054static int
2055emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2056{
2057	struct decode_cache *c = &ctxt->decode;
2058	struct desc_struct cs, ss;
2059	u64 msr_data;
2060	int usermode;
2061	u16 cs_sel, ss_sel;
2062
2063	/* inject #GP if in real mode or Virtual 8086 mode */
2064	if (ctxt->mode == X86EMUL_MODE_REAL ||
2065	    ctxt->mode == X86EMUL_MODE_VM86) {
2066		emulate_gp(ctxt, 0);
2067		return X86EMUL_PROPAGATE_FAULT;
2068	}
2069
2070	setup_syscalls_segments(ctxt, ops, &cs, &ss);
2071
2072	if ((c->rex_prefix & 0x8) != 0x0)
2073		usermode = X86EMUL_MODE_PROT64;
2074	else
2075		usermode = X86EMUL_MODE_PROT32;
2076
2077	cs.dpl = 3;
2078	ss.dpl = 3;
2079	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
2080	switch (usermode) {
2081	case X86EMUL_MODE_PROT32:
2082		cs_sel = (u16)(msr_data + 16);
2083		if ((msr_data & 0xfffc) == 0x0) {
2084			emulate_gp(ctxt, 0);
2085			return X86EMUL_PROPAGATE_FAULT;
2086		}
2087		ss_sel = (u16)(msr_data + 24);
2088		break;
2089	case X86EMUL_MODE_PROT64:
2090		cs_sel = (u16)(msr_data + 32);
2091		if (msr_data == 0x0) {
2092			emulate_gp(ctxt, 0);
2093			return X86EMUL_PROPAGATE_FAULT;
2094		}
2095		ss_sel = cs_sel + 8;
2096		cs.d = 0;
2097		cs.l = 1;
2098		break;
2099	}
2100	cs_sel |= SELECTOR_RPL_MASK;
2101	ss_sel |= SELECTOR_RPL_MASK;
2102
2103	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
2104	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
2105	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
2106	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
2107
2108	c->eip = c->regs[VCPU_REGS_RDX];
2109	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
2110
2111	return X86EMUL_CONTINUE;
2112}
2113
2114static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2115			      struct x86_emulate_ops *ops)
2116{
2117	int iopl;
2118	if (ctxt->mode == X86EMUL_MODE_REAL)
2119		return false;
2120	if (ctxt->mode == X86EMUL_MODE_VM86)
2121		return true;
2122	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2123	return ops->cpl(ctxt->vcpu) > iopl;
2124}
2125
2126static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2127					    struct x86_emulate_ops *ops,
2128					    u16 port, u16 len)
2129{
2130	struct desc_struct tr_seg;
2131	int r;
2132	u16 io_bitmap_ptr;
2133	u8 perm, bit_idx = port & 0x7;
2134	unsigned mask = (1 << len) - 1;
2135
2136	ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu);
2137	if (!tr_seg.p)
2138		return false;
2139	if (desc_limit_scaled(&tr_seg) < 103)
2140		return false;
2141	r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2,
2142			  ctxt->vcpu, NULL);
2143	if (r != X86EMUL_CONTINUE)
2144		return false;
2145	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2146		return false;
2147	r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8,
2148			  &perm, 1, ctxt->vcpu, NULL);
2149	if (r != X86EMUL_CONTINUE)
2150		return false;
2151	if ((perm >> bit_idx) & mask)
2152		return false;
2153	return true;
2154}
2155
2156static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2157				 struct x86_emulate_ops *ops,
2158				 u16 port, u16 len)
2159{
2160	if (emulator_bad_iopl(ctxt, ops))
2161		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2162			return false;
2163	return true;
2164}
2165
2166static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2167				struct x86_emulate_ops *ops,
2168				struct tss_segment_16 *tss)
2169{
2170	struct decode_cache *c = &ctxt->decode;
2171
2172	tss->ip = c->eip;
2173	tss->flag = ctxt->eflags;
2174	tss->ax = c->regs[VCPU_REGS_RAX];
2175	tss->cx = c->regs[VCPU_REGS_RCX];
2176	tss->dx = c->regs[VCPU_REGS_RDX];
2177	tss->bx = c->regs[VCPU_REGS_RBX];
2178	tss->sp = c->regs[VCPU_REGS_RSP];
2179	tss->bp = c->regs[VCPU_REGS_RBP];
2180	tss->si = c->regs[VCPU_REGS_RSI];
2181	tss->di = c->regs[VCPU_REGS_RDI];
2182
2183	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2184	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2185	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2186	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2187	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2188}
2189
2190static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2191				 struct x86_emulate_ops *ops,
2192				 struct tss_segment_16 *tss)
2193{
2194	struct decode_cache *c = &ctxt->decode;
2195	int ret;
2196
2197	c->eip = tss->ip;
2198	ctxt->eflags = tss->flag | 2;
2199	c->regs[VCPU_REGS_RAX] = tss->ax;
2200	c->regs[VCPU_REGS_RCX] = tss->cx;
2201	c->regs[VCPU_REGS_RDX] = tss->dx;
2202	c->regs[VCPU_REGS_RBX] = tss->bx;
2203	c->regs[VCPU_REGS_RSP] = tss->sp;
2204	c->regs[VCPU_REGS_RBP] = tss->bp;
2205	c->regs[VCPU_REGS_RSI] = tss->si;
2206	c->regs[VCPU_REGS_RDI] = tss->di;
2207
2208	/*
2209	 * SDM says that segment selectors are loaded before segment
2210	 * descriptors
2211	 */
2212	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2213	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2214	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2215	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2216	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2217
2218	/*
2219	 * Now load segment descriptors. If fault happenes at this stage
2220	 * it is handled in a context of new task
2221	 */
2222	ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2223	if (ret != X86EMUL_CONTINUE)
2224		return ret;
2225	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2226	if (ret != X86EMUL_CONTINUE)
2227		return ret;
2228	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2229	if (ret != X86EMUL_CONTINUE)
2230		return ret;
2231	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2232	if (ret != X86EMUL_CONTINUE)
2233		return ret;
2234	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2235	if (ret != X86EMUL_CONTINUE)
2236		return ret;
2237
2238	return X86EMUL_CONTINUE;
2239}
2240
2241static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2242			  struct x86_emulate_ops *ops,
2243			  u16 tss_selector, u16 old_tss_sel,
2244			  ulong old_tss_base, struct desc_struct *new_desc)
2245{
2246	struct tss_segment_16 tss_seg;
2247	int ret;
2248	u32 err, new_tss_base = get_desc_base(new_desc);
2249
2250	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2251			    &err);
2252	if (ret == X86EMUL_PROPAGATE_FAULT) {
2253		emulate_pf(ctxt, old_tss_base, err);
2254		return ret;
2255	}
2256
2257	save_state_to_tss16(ctxt, ops, &tss_seg);
2258
2259	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2260			     &err);
2261	if (ret == X86EMUL_PROPAGATE_FAULT) {
2262		emulate_pf(ctxt, old_tss_base, err);
2263		return ret;
2264	}
2265
2266	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2267			    &err);
2268	if (ret == X86EMUL_PROPAGATE_FAULT) {
2269		emulate_pf(ctxt, new_tss_base, err);
2270		return ret;
2271	}
2272
2273	if (old_tss_sel != 0xffff) {
2274		tss_seg.prev_task_link = old_tss_sel;
2275
2276		ret = ops->write_std(new_tss_base,
2277				     &tss_seg.prev_task_link,
2278				     sizeof tss_seg.prev_task_link,
2279				     ctxt->vcpu, &err);
2280		if (ret == X86EMUL_PROPAGATE_FAULT) {
2281			emulate_pf(ctxt, new_tss_base, err);
2282			return ret;
2283		}
2284	}
2285
2286	return load_state_from_tss16(ctxt, ops, &tss_seg);
2287}
2288
2289static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2290				struct x86_emulate_ops *ops,
2291				struct tss_segment_32 *tss)
2292{
2293	struct decode_cache *c = &ctxt->decode;
2294
2295	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2296	tss->eip = c->eip;
2297	tss->eflags = ctxt->eflags;
2298	tss->eax = c->regs[VCPU_REGS_RAX];
2299	tss->ecx = c->regs[VCPU_REGS_RCX];
2300	tss->edx = c->regs[VCPU_REGS_RDX];
2301	tss->ebx = c->regs[VCPU_REGS_RBX];
2302	tss->esp = c->regs[VCPU_REGS_RSP];
2303	tss->ebp = c->regs[VCPU_REGS_RBP];
2304	tss->esi = c->regs[VCPU_REGS_RSI];
2305	tss->edi = c->regs[VCPU_REGS_RDI];
2306
2307	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2308	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2309	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2310	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2311	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2312	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2313	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2314}
2315
2316static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2317				 struct x86_emulate_ops *ops,
2318				 struct tss_segment_32 *tss)
2319{
2320	struct decode_cache *c = &ctxt->decode;
2321	int ret;
2322
2323	if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) {
2324		emulate_gp(ctxt, 0);
2325		return X86EMUL_PROPAGATE_FAULT;
2326	}
2327	c->eip = tss->eip;
2328	ctxt->eflags = tss->eflags | 2;
2329	c->regs[VCPU_REGS_RAX] = tss->eax;
2330	c->regs[VCPU_REGS_RCX] = tss->ecx;
2331	c->regs[VCPU_REGS_RDX] = tss->edx;
2332	c->regs[VCPU_REGS_RBX] = tss->ebx;
2333	c->regs[VCPU_REGS_RSP] = tss->esp;
2334	c->regs[VCPU_REGS_RBP] = tss->ebp;
2335	c->regs[VCPU_REGS_RSI] = tss->esi;
2336	c->regs[VCPU_REGS_RDI] = tss->edi;
2337
2338	/*
2339	 * SDM says that segment selectors are loaded before segment
2340	 * descriptors
2341	 */
2342	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2343	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2344	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2345	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2346	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2347	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2348	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2349
2350	/*
2351	 * Now load segment descriptors. If fault happenes at this stage
2352	 * it is handled in a context of new task
2353	 */
2354	ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2355	if (ret != X86EMUL_CONTINUE)
2356		return ret;
2357	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2358	if (ret != X86EMUL_CONTINUE)
2359		return ret;
2360	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2361	if (ret != X86EMUL_CONTINUE)
2362		return ret;
2363	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2364	if (ret != X86EMUL_CONTINUE)
2365		return ret;
2366	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2367	if (ret != X86EMUL_CONTINUE)
2368		return ret;
2369	ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2370	if (ret != X86EMUL_CONTINUE)
2371		return ret;
2372	ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2373	if (ret != X86EMUL_CONTINUE)
2374		return ret;
2375
2376	return X86EMUL_CONTINUE;
2377}
2378
2379static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2380			  struct x86_emulate_ops *ops,
2381			  u16 tss_selector, u16 old_tss_sel,
2382			  ulong old_tss_base, struct desc_struct *new_desc)
2383{
2384	struct tss_segment_32 tss_seg;
2385	int ret;
2386	u32 err, new_tss_base = get_desc_base(new_desc);
2387
2388	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2389			    &err);
2390	if (ret == X86EMUL_PROPAGATE_FAULT) {
2391		emulate_pf(ctxt, old_tss_base, err);
2392		return ret;
2393	}
2394
2395	save_state_to_tss32(ctxt, ops, &tss_seg);
2396
2397	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2398			     &err);
2399	if (ret == X86EMUL_PROPAGATE_FAULT) {
2400		emulate_pf(ctxt, old_tss_base, err);
2401		return ret;
2402	}
2403
2404	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2405			    &err);
2406	if (ret == X86EMUL_PROPAGATE_FAULT) {
2407		emulate_pf(ctxt, new_tss_base, err);
2408		return ret;
2409	}
2410
2411	if (old_tss_sel != 0xffff) {
2412		tss_seg.prev_task_link = old_tss_sel;
2413
2414		ret = ops->write_std(new_tss_base,
2415				     &tss_seg.prev_task_link,
2416				     sizeof tss_seg.prev_task_link,
2417				     ctxt->vcpu, &err);
2418		if (ret == X86EMUL_PROPAGATE_FAULT) {
2419			emulate_pf(ctxt, new_tss_base, err);
2420			return ret;
2421		}
2422	}
2423
2424	return load_state_from_tss32(ctxt, ops, &tss_seg);
2425}
2426
2427static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2428				   struct x86_emulate_ops *ops,
2429				   u16 tss_selector, int reason,
2430				   bool has_error_code, u32 error_code)
2431{
2432	struct desc_struct curr_tss_desc, next_tss_desc;
2433	int ret;
2434	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2435	ulong old_tss_base =
2436		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
2437	u32 desc_limit;
2438
2439
2440	ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2441	if (ret != X86EMUL_CONTINUE)
2442		return ret;
2443	ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2444	if (ret != X86EMUL_CONTINUE)
2445		return ret;
2446
2447
2448	if (reason != TASK_SWITCH_IRET) {
2449		if ((tss_selector & 3) > next_tss_desc.dpl ||
2450		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2451			emulate_gp(ctxt, 0);
2452			return X86EMUL_PROPAGATE_FAULT;
2453		}
2454	}
2455
2456	desc_limit = desc_limit_scaled(&next_tss_desc);
2457	if (!next_tss_desc.p ||
2458	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2459	     desc_limit < 0x2b)) {
2460		emulate_ts(ctxt, tss_selector & 0xfffc);
2461		return X86EMUL_PROPAGATE_FAULT;
2462	}
2463
2464	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2465		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2466		write_segment_descriptor(ctxt, ops, old_tss_sel,
2467					 &curr_tss_desc);
2468	}
2469
2470	if (reason == TASK_SWITCH_IRET)
2471		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2472
2473	/* set back link to prev task only if NT bit is set in eflags
2474	   note that old_tss_sel is not used afetr this point */
2475	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2476		old_tss_sel = 0xffff;
2477
2478	if (next_tss_desc.type & 8)
2479		ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2480				     old_tss_base, &next_tss_desc);
2481	else
2482		ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2483				     old_tss_base, &next_tss_desc);
2484	if (ret != X86EMUL_CONTINUE)
2485		return ret;
2486
2487	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2488		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2489
2490	if (reason != TASK_SWITCH_IRET) {
2491		next_tss_desc.type |= (1 << 1); /* set busy flag */
2492		write_segment_descriptor(ctxt, ops, tss_selector,
2493					 &next_tss_desc);
2494	}
2495
2496	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2497	ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2498	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2499
2500	if (has_error_code) {
2501		struct decode_cache *c = &ctxt->decode;
2502
2503		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2504		c->lock_prefix = 0;
2505		c->src.val = (unsigned long) error_code;
2506		emulate_push(ctxt, ops);
2507	}
2508
2509	return ret;
2510}
2511
2512int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2513			 struct x86_emulate_ops *ops,
2514			 u16 tss_selector, int reason,
2515			 bool has_error_code, u32 error_code)
2516{
2517	struct decode_cache *c = &ctxt->decode;
2518	int rc;
2519
2520	c->eip = ctxt->eip;
2521	c->dst.type = OP_NONE;
2522
2523	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2524				     has_error_code, error_code);
2525
2526	if (rc == X86EMUL_CONTINUE) {
2527		rc = writeback(ctxt, ops);
2528		if (rc == X86EMUL_CONTINUE)
2529			ctxt->eip = c->eip;
2530	}
2531
2532	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2533}
2534
2535static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2536			    int reg, struct operand *op)
2537{
2538	struct decode_cache *c = &ctxt->decode;
2539	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2540
2541	register_address_increment(c, &c->regs[reg], df * op->bytes);
2542	op->ptr = (unsigned long *)register_address(c,  base, c->regs[reg]);
2543}
2544
2545int
2546x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2547{
2548	u64 msr_data;
2549	struct decode_cache *c = &ctxt->decode;
2550	int rc = X86EMUL_CONTINUE;
2551	int saved_dst_type = c->dst.type;
2552
2553	ctxt->decode.mem_read.pos = 0;
2554
2555	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2556		emulate_ud(ctxt);
2557		goto done;
2558	}
2559
2560	/* LOCK prefix is allowed only with some instructions */
2561	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2562		emulate_ud(ctxt);
2563		goto done;
2564	}
2565
2566	/* Privileged instruction can be executed only in CPL=0 */
2567	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2568		emulate_gp(ctxt, 0);
2569		goto done;
2570	}
2571
2572	if (c->rep_prefix && (c->d & String)) {
2573		ctxt->restart = true;
2574		/* All REP prefixes have the same first termination condition */
2575		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2576		string_done:
2577			ctxt->restart = false;
2578			ctxt->eip = c->eip;
2579			goto done;
2580		}
2581		/* The second termination condition only applies for REPE
2582		 * and REPNE. Test if the repeat string operation prefix is
2583		 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2584		 * corresponding termination condition according to:
2585		 * 	- if REPE/REPZ and ZF = 0 then done
2586		 * 	- if REPNE/REPNZ and ZF = 1 then done
2587		 */
2588		if ((c->b == 0xa6) || (c->b == 0xa7) ||
2589		    (c->b == 0xae) || (c->b == 0xaf)) {
2590			if ((c->rep_prefix == REPE_PREFIX) &&
2591			    ((ctxt->eflags & EFLG_ZF) == 0))
2592				goto string_done;
2593			if ((c->rep_prefix == REPNE_PREFIX) &&
2594			    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
2595				goto string_done;
2596		}
2597		c->eip = ctxt->eip;
2598	}
2599
2600	if (c->src.type == OP_MEM) {
2601		rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr,
2602					c->src.valptr, c->src.bytes);
2603		if (rc != X86EMUL_CONTINUE)
2604			goto done;
2605		c->src.orig_val64 = c->src.val64;
2606	}
2607
2608	if (c->src2.type == OP_MEM) {
2609		rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr,
2610					&c->src2.val, c->src2.bytes);
2611		if (rc != X86EMUL_CONTINUE)
2612			goto done;
2613	}
2614
2615	if ((c->d & DstMask) == ImplicitOps)
2616		goto special_insn;
2617
2618
2619	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2620		/* optimisation - avoid slow emulated read if Mov */
2621		rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr,
2622				   &c->dst.val, c->dst.bytes);
2623		if (rc != X86EMUL_CONTINUE)
2624			goto done;
2625	}
2626	c->dst.orig_val = c->dst.val;
2627
2628special_insn:
2629
2630	if (c->twobyte)
2631		goto twobyte_insn;
2632
2633	switch (c->b) {
2634	case 0x00 ... 0x05:
2635	      add:		/* add */
2636		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2637		break;
2638	case 0x06:		/* push es */
2639		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
2640		break;
2641	case 0x07:		/* pop es */
2642		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2643		if (rc != X86EMUL_CONTINUE)
2644			goto done;
2645		break;
2646	case 0x08 ... 0x0d:
2647	      or:		/* or */
2648		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2649		break;
2650	case 0x0e:		/* push cs */
2651		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
2652		break;
2653	case 0x10 ... 0x15:
2654	      adc:		/* adc */
2655		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2656		break;
2657	case 0x16:		/* push ss */
2658		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
2659		break;
2660	case 0x17:		/* pop ss */
2661		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2662		if (rc != X86EMUL_CONTINUE)
2663			goto done;
2664		break;
2665	case 0x18 ... 0x1d:
2666	      sbb:		/* sbb */
2667		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2668		break;
2669	case 0x1e:		/* push ds */
2670		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
2671		break;
2672	case 0x1f:		/* pop ds */
2673		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2674		if (rc != X86EMUL_CONTINUE)
2675			goto done;
2676		break;
2677	case 0x20 ... 0x25:
2678	      and:		/* and */
2679		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2680		break;
2681	case 0x28 ... 0x2d:
2682	      sub:		/* sub */
2683		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2684		break;
2685	case 0x30 ... 0x35:
2686	      xor:		/* xor */
2687		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2688		break;
2689	case 0x38 ... 0x3d:
2690	      cmp:		/* cmp */
2691		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2692		break;
2693	case 0x40 ... 0x47: /* inc r16/r32 */
2694		emulate_1op("inc", c->dst, ctxt->eflags);
2695		break;
2696	case 0x48 ... 0x4f: /* dec r16/r32 */
2697		emulate_1op("dec", c->dst, ctxt->eflags);
2698		break;
2699	case 0x50 ... 0x57:  /* push reg */
2700		emulate_push(ctxt, ops);
2701		break;
2702	case 0x58 ... 0x5f: /* pop reg */
2703	pop_instruction:
2704		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2705		if (rc != X86EMUL_CONTINUE)
2706			goto done;
2707		break;
2708	case 0x60:	/* pusha */
2709		rc = emulate_pusha(ctxt, ops);
2710		if (rc != X86EMUL_CONTINUE)
2711			goto done;
2712		break;
2713	case 0x61:	/* popa */
2714		rc = emulate_popa(ctxt, ops);
2715		if (rc != X86EMUL_CONTINUE)
2716			goto done;
2717		break;
2718	case 0x63:		/* movsxd */
2719		if (ctxt->mode != X86EMUL_MODE_PROT64)
2720			goto cannot_emulate;
2721		c->dst.val = (s32) c->src.val;
2722		break;
2723	case 0x68: /* push imm */
2724	case 0x6a: /* push imm8 */
2725		emulate_push(ctxt, ops);
2726		break;
2727	case 0x6c:		/* insb */
2728	case 0x6d:		/* insw/insd */
2729		c->dst.bytes = min(c->dst.bytes, 4u);
2730		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2731					  c->dst.bytes)) {
2732			emulate_gp(ctxt, 0);
2733			goto done;
2734		}
2735		if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2736				     c->regs[VCPU_REGS_RDX], &c->dst.val))
2737			goto done; /* IO is needed, skip writeback */
2738		break;
2739	case 0x6e:		/* outsb */
2740	case 0x6f:		/* outsw/outsd */
2741		c->src.bytes = min(c->src.bytes, 4u);
2742		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2743					  c->src.bytes)) {
2744			emulate_gp(ctxt, 0);
2745			goto done;
2746		}
2747		ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2748				      &c->src.val, 1, ctxt->vcpu);
2749
2750		c->dst.type = OP_NONE; /* nothing to writeback */
2751		break;
2752	case 0x70 ... 0x7f: /* jcc (short) */
2753		if (test_cc(c->b, ctxt->eflags))
2754			jmp_rel(c, c->src.val);
2755		break;
2756	case 0x80 ... 0x83:	/* Grp1 */
2757		switch (c->modrm_reg) {
2758		case 0:
2759			goto add;
2760		case 1:
2761			goto or;
2762		case 2:
2763			goto adc;
2764		case 3:
2765			goto sbb;
2766		case 4:
2767			goto and;
2768		case 5:
2769			goto sub;
2770		case 6:
2771			goto xor;
2772		case 7:
2773			goto cmp;
2774		}
2775		break;
2776	case 0x84 ... 0x85:
2777	test:
2778		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2779		break;
2780	case 0x86 ... 0x87:	/* xchg */
2781	xchg:
2782		/* Write back the register source. */
2783		switch (c->dst.bytes) {
2784		case 1:
2785			*(u8 *) c->src.ptr = (u8) c->dst.val;
2786			break;
2787		case 2:
2788			*(u16 *) c->src.ptr = (u16) c->dst.val;
2789			break;
2790		case 4:
2791			*c->src.ptr = (u32) c->dst.val;
2792			break;	/* 64b reg: zero-extend */
2793		case 8:
2794			*c->src.ptr = c->dst.val;
2795			break;
2796		}
2797		/*
2798		 * Write back the memory destination with implicit LOCK
2799		 * prefix.
2800		 */
2801		c->dst.val = c->src.val;
2802		c->lock_prefix = 1;
2803		break;
2804	case 0x88 ... 0x8b:	/* mov */
2805		goto mov;
2806	case 0x8c:  /* mov r/m, sreg */
2807		if (c->modrm_reg > VCPU_SREG_GS) {
2808			emulate_ud(ctxt);
2809			goto done;
2810		}
2811		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
2812		break;
2813	case 0x8d: /* lea r16/r32, m */
2814		c->dst.val = c->modrm_ea;
2815		break;
2816	case 0x8e: { /* mov seg, r/m16 */
2817		uint16_t sel;
2818
2819		sel = c->src.val;
2820
2821		if (c->modrm_reg == VCPU_SREG_CS ||
2822		    c->modrm_reg > VCPU_SREG_GS) {
2823			emulate_ud(ctxt);
2824			goto done;
2825		}
2826
2827		if (c->modrm_reg == VCPU_SREG_SS)
2828			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
2829
2830		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2831
2832		c->dst.type = OP_NONE;  /* Disable writeback. */
2833		break;
2834	}
2835	case 0x8f:		/* pop (sole member of Grp1a) */
2836		rc = emulate_grp1a(ctxt, ops);
2837		if (rc != X86EMUL_CONTINUE)
2838			goto done;
2839		break;
2840	case 0x90: /* nop / xchg r8,rax */
2841		if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) {
2842			c->dst.type = OP_NONE;  /* nop */
2843			break;
2844		}
2845	case 0x91 ... 0x97: /* xchg reg,rax */
2846		c->src.type = OP_REG;
2847		c->src.bytes = c->op_bytes;
2848		c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2849		c->src.val = *(c->src.ptr);
2850		goto xchg;
2851	case 0x9c: /* pushf */
2852		c->src.val =  (unsigned long) ctxt->eflags;
2853		emulate_push(ctxt, ops);
2854		break;
2855	case 0x9d: /* popf */
2856		c->dst.type = OP_REG;
2857		c->dst.ptr = (unsigned long *) &ctxt->eflags;
2858		c->dst.bytes = c->op_bytes;
2859		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2860		if (rc != X86EMUL_CONTINUE)
2861			goto done;
2862		break;
2863	case 0xa0 ... 0xa3:	/* mov */
2864	case 0xa4 ... 0xa5:	/* movs */
2865		goto mov;
2866	case 0xa6 ... 0xa7:	/* cmps */
2867		c->dst.type = OP_NONE; /* Disable writeback. */
2868		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2869		goto cmp;
2870	case 0xa8 ... 0xa9:	/* test ax, imm */
2871		goto test;
2872	case 0xaa ... 0xab:	/* stos */
2873		c->dst.val = c->regs[VCPU_REGS_RAX];
2874		break;
2875	case 0xac ... 0xad:	/* lods */
2876		goto mov;
2877	case 0xae ... 0xaf:	/* scas */
2878		DPRINTF("Urk! I don't handle SCAS.\n");
2879		goto cannot_emulate;
2880	case 0xb0 ... 0xbf: /* mov r, imm */
2881		goto mov;
2882	case 0xc0 ... 0xc1:
2883		emulate_grp2(ctxt);
2884		break;
2885	case 0xc3: /* ret */
2886		c->dst.type = OP_REG;
2887		c->dst.ptr = &c->eip;
2888		c->dst.bytes = c->op_bytes;
2889		goto pop_instruction;
2890	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
2891	mov:
2892		c->dst.val = c->src.val;
2893		break;
2894	case 0xcb:		/* ret far */
2895		rc = emulate_ret_far(ctxt, ops);
2896		if (rc != X86EMUL_CONTINUE)
2897			goto done;
2898		break;
2899	case 0xd0 ... 0xd1:	/* Grp2 */
2900		c->src.val = 1;
2901		emulate_grp2(ctxt);
2902		break;
2903	case 0xd2 ... 0xd3:	/* Grp2 */
2904		c->src.val = c->regs[VCPU_REGS_RCX];
2905		emulate_grp2(ctxt);
2906		break;
2907	case 0xe4: 	/* inb */
2908	case 0xe5: 	/* in */
2909		goto do_io_in;
2910	case 0xe6: /* outb */
2911	case 0xe7: /* out */
2912		goto do_io_out;
2913	case 0xe8: /* call (near) */ {
2914		long int rel = c->src.val;
2915		c->src.val = (unsigned long) c->eip;
2916		jmp_rel(c, rel);
2917		emulate_push(ctxt, ops);
2918		break;
2919	}
2920	case 0xe9: /* jmp rel */
2921		goto jmp;
2922	case 0xea: { /* jmp far */
2923		unsigned short sel;
2924	jump_far:
2925		memcpy(&sel, c->src.valptr + c->op_bytes, 2);
2926
2927		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
2928			goto done;
2929
2930		c->eip = 0;
2931		memcpy(&c->eip, c->src.valptr, c->op_bytes);
2932		break;
2933	}
2934	case 0xeb:
2935	      jmp:		/* jmp rel short */
2936		jmp_rel(c, c->src.val);
2937		c->dst.type = OP_NONE; /* Disable writeback. */
2938		break;
2939	case 0xec: /* in al,dx */
2940	case 0xed: /* in (e/r)ax,dx */
2941		c->src.val = c->regs[VCPU_REGS_RDX];
2942	do_io_in:
2943		c->dst.bytes = min(c->dst.bytes, 4u);
2944		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2945			emulate_gp(ctxt, 0);
2946			goto done;
2947		}
2948		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2949				     &c->dst.val))
2950			goto done; /* IO is needed */
2951		break;
2952	case 0xee: /* out dx,al */
2953	case 0xef: /* out dx,(e/r)ax */
2954		c->src.val = c->regs[VCPU_REGS_RDX];
2955	do_io_out:
2956		c->dst.bytes = min(c->dst.bytes, 4u);
2957		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2958			emulate_gp(ctxt, 0);
2959			goto done;
2960		}
2961		ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2962				      ctxt->vcpu);
2963		c->dst.type = OP_NONE;	/* Disable writeback. */
2964		break;
2965	case 0xf4:              /* hlt */
2966		ctxt->vcpu->arch.halt_request = 1;
2967		break;
2968	case 0xf5:	/* cmc */
2969		/* complement carry flag from eflags reg */
2970		ctxt->eflags ^= EFLG_CF;
2971		c->dst.type = OP_NONE;	/* Disable writeback. */
2972		break;
2973	case 0xf6 ... 0xf7:	/* Grp3 */
2974		if (!emulate_grp3(ctxt, ops))
2975			goto cannot_emulate;
2976		break;
2977	case 0xf8: /* clc */
2978		ctxt->eflags &= ~EFLG_CF;
2979		c->dst.type = OP_NONE;	/* Disable writeback. */
2980		break;
2981	case 0xfa: /* cli */
2982		if (emulator_bad_iopl(ctxt, ops)) {
2983			emulate_gp(ctxt, 0);
2984			goto done;
2985		} else {
2986			ctxt->eflags &= ~X86_EFLAGS_IF;
2987			c->dst.type = OP_NONE;	/* Disable writeback. */
2988		}
2989		break;
2990	case 0xfb: /* sti */
2991		if (emulator_bad_iopl(ctxt, ops)) {
2992			emulate_gp(ctxt, 0);
2993			goto done;
2994		} else {
2995			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
2996			ctxt->eflags |= X86_EFLAGS_IF;
2997			c->dst.type = OP_NONE;	/* Disable writeback. */
2998		}
2999		break;
3000	case 0xfc: /* cld */
3001		ctxt->eflags &= ~EFLG_DF;
3002		c->dst.type = OP_NONE;	/* Disable writeback. */
3003		break;
3004	case 0xfd: /* std */
3005		ctxt->eflags |= EFLG_DF;
3006		c->dst.type = OP_NONE;	/* Disable writeback. */
3007		break;
3008	case 0xfe: /* Grp4 */
3009	grp45:
3010		rc = emulate_grp45(ctxt, ops);
3011		if (rc != X86EMUL_CONTINUE)
3012			goto done;
3013		break;
3014	case 0xff: /* Grp5 */
3015		if (c->modrm_reg == 5)
3016			goto jump_far;
3017		goto grp45;
3018	}
3019
3020writeback:
3021	rc = writeback(ctxt, ops);
3022	if (rc != X86EMUL_CONTINUE)
3023		goto done;
3024
3025	/*
3026	 * restore dst type in case the decoding will be reused
3027	 * (happens for string instruction )
3028	 */
3029	c->dst.type = saved_dst_type;
3030
3031	if ((c->d & SrcMask) == SrcSI)
3032		string_addr_inc(ctxt, seg_override_base(ctxt, ops, c),
3033				VCPU_REGS_RSI, &c->src);
3034
3035	if ((c->d & DstMask) == DstDI)
3036		string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI,
3037				&c->dst);
3038
3039	if (c->rep_prefix && (c->d & String)) {
3040		struct read_cache *rc = &ctxt->decode.io_read;
3041		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3042		/*
3043		 * Re-enter guest when pio read ahead buffer is empty or,
3044		 * if it is not used, after each 1024 iteration.
3045		 */
3046		if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
3047		    (rc->end != 0 && rc->end == rc->pos))
3048			ctxt->restart = false;
3049	}
3050	/*
3051	 * reset read cache here in case string instruction is restared
3052	 * without decoding
3053	 */
3054	ctxt->decode.mem_read.end = 0;
3055	ctxt->eip = c->eip;
3056
3057done:
3058	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
3059
3060twobyte_insn:
3061	switch (c->b) {
3062	case 0x01: /* lgdt, lidt, lmsw */
3063		switch (c->modrm_reg) {
3064			u16 size;
3065			unsigned long address;
3066
3067		case 0: /* vmcall */
3068			if (c->modrm_mod != 3 || c->modrm_rm != 1)
3069				goto cannot_emulate;
3070
3071			rc = kvm_fix_hypercall(ctxt->vcpu);
3072			if (rc != X86EMUL_CONTINUE)
3073				goto done;
3074
3075			/* Let the processor re-execute the fixed hypercall */
3076			c->eip = ctxt->eip;
3077			/* Disable writeback. */
3078			c->dst.type = OP_NONE;
3079			break;
3080		case 2: /* lgdt */
3081			rc = read_descriptor(ctxt, ops, c->src.ptr,
3082					     &size, &address, c->op_bytes);
3083			if (rc != X86EMUL_CONTINUE)
3084				goto done;
3085			realmode_lgdt(ctxt->vcpu, size, address);
3086			/* Disable writeback. */
3087			c->dst.type = OP_NONE;
3088			break;
3089		case 3: /* lidt/vmmcall */
3090			if (c->modrm_mod == 3) {
3091				switch (c->modrm_rm) {
3092				case 1:
3093					rc = kvm_fix_hypercall(ctxt->vcpu);
3094					if (rc != X86EMUL_CONTINUE)
3095						goto done;
3096					break;
3097				default:
3098					goto cannot_emulate;
3099				}
3100			} else {
3101				rc = read_descriptor(ctxt, ops, c->src.ptr,
3102						     &size, &address,
3103						     c->op_bytes);
3104				if (rc != X86EMUL_CONTINUE)
3105					goto done;
3106				realmode_lidt(ctxt->vcpu, size, address);
3107			}
3108			/* Disable writeback. */
3109			c->dst.type = OP_NONE;
3110			break;
3111		case 4: /* smsw */
3112			c->dst.bytes = 2;
3113			c->dst.val = ops->get_cr(0, ctxt->vcpu);
3114			break;
3115		case 6: /* lmsw */
3116			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3117				    (c->src.val & 0x0f), ctxt->vcpu);
3118			c->dst.type = OP_NONE;
3119			break;
3120		case 5: /* not defined */
3121			emulate_ud(ctxt);
3122			goto done;
3123		case 7: /* invlpg*/
3124			emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3125			/* Disable writeback. */
3126			c->dst.type = OP_NONE;
3127			break;
3128		default:
3129			goto cannot_emulate;
3130		}
3131		break;
3132	case 0x05: 		/* syscall */
3133		rc = emulate_syscall(ctxt, ops);
3134		if (rc != X86EMUL_CONTINUE)
3135			goto done;
3136		else
3137			goto writeback;
3138		break;
3139	case 0x06:
3140		emulate_clts(ctxt->vcpu);
3141		c->dst.type = OP_NONE;
3142		break;
3143	case 0x09:		/* wbinvd */
3144		kvm_emulate_wbinvd(ctxt->vcpu);
3145		c->dst.type = OP_NONE;
3146		break;
3147	case 0x08:		/* invd */
3148	case 0x0d:		/* GrpP (prefetch) */
3149	case 0x18:		/* Grp16 (prefetch/nop) */
3150		c->dst.type = OP_NONE;
3151		break;
3152	case 0x20: /* mov cr, reg */
3153		switch (c->modrm_reg) {
3154		case 1:
3155		case 5 ... 7:
3156		case 9 ... 15:
3157			emulate_ud(ctxt);
3158			goto done;
3159		}
3160		c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3161		c->dst.type = OP_NONE;	/* no writeback */
3162		break;
3163	case 0x21: /* mov from dr to reg */
3164		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3165		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3166			emulate_ud(ctxt);
3167			goto done;
3168		}
3169		ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu);
3170		c->dst.type = OP_NONE;	/* no writeback */
3171		break;
3172	case 0x22: /* mov reg, cr */
3173		if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) {
3174			emulate_gp(ctxt, 0);
3175			goto done;
3176		}
3177		c->dst.type = OP_NONE;
3178		break;
3179	case 0x23: /* mov from reg to dr */
3180		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3181		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3182			emulate_ud(ctxt);
3183			goto done;
3184		}
3185
3186		if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] &
3187				((ctxt->mode == X86EMUL_MODE_PROT64) ?
3188				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
3189			/* #UD condition is already handled by the code above */
3190			emulate_gp(ctxt, 0);
3191			goto done;
3192		}
3193
3194		c->dst.type = OP_NONE;	/* no writeback */
3195		break;
3196	case 0x30:
3197		/* wrmsr */
3198		msr_data = (u32)c->regs[VCPU_REGS_RAX]
3199			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
3200		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3201			emulate_gp(ctxt, 0);
3202			goto done;
3203		}
3204		rc = X86EMUL_CONTINUE;
3205		c->dst.type = OP_NONE;
3206		break;
3207	case 0x32:
3208		/* rdmsr */
3209		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3210			emulate_gp(ctxt, 0);
3211			goto done;
3212		} else {
3213			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3214			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3215		}
3216		rc = X86EMUL_CONTINUE;
3217		c->dst.type = OP_NONE;
3218		break;
3219	case 0x34:		/* sysenter */
3220		rc = emulate_sysenter(ctxt, ops);
3221		if (rc != X86EMUL_CONTINUE)
3222			goto done;
3223		else
3224			goto writeback;
3225		break;
3226	case 0x35:		/* sysexit */
3227		rc = emulate_sysexit(ctxt, ops);
3228		if (rc != X86EMUL_CONTINUE)
3229			goto done;
3230		else
3231			goto writeback;
3232		break;
3233	case 0x40 ... 0x4f:	/* cmov */
3234		c->dst.val = c->dst.orig_val = c->src.val;
3235		if (!test_cc(c->b, ctxt->eflags))
3236			c->dst.type = OP_NONE; /* no writeback */
3237		break;
3238	case 0x80 ... 0x8f: /* jnz rel, etc*/
3239		if (test_cc(c->b, ctxt->eflags))
3240			jmp_rel(c, c->src.val);
3241		c->dst.type = OP_NONE;
3242		break;
3243	case 0xa0:	  /* push fs */
3244		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3245		break;
3246	case 0xa1:	 /* pop fs */
3247		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3248		if (rc != X86EMUL_CONTINUE)
3249			goto done;
3250		break;
3251	case 0xa3:
3252	      bt:		/* bt */
3253		c->dst.type = OP_NONE;
3254		/* only subword offset */
3255		c->src.val &= (c->dst.bytes << 3) - 1;
3256		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3257		break;
3258	case 0xa4: /* shld imm8, r, r/m */
3259	case 0xa5: /* shld cl, r, r/m */
3260		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3261		break;
3262	case 0xa8:	/* push gs */
3263		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3264		break;
3265	case 0xa9:	/* pop gs */
3266		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3267		if (rc != X86EMUL_CONTINUE)
3268			goto done;
3269		break;
3270	case 0xab:
3271	      bts:		/* bts */
3272		/* only subword offset */
3273		c->src.val &= (c->dst.bytes << 3) - 1;
3274		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3275		break;
3276	case 0xac: /* shrd imm8, r, r/m */
3277	case 0xad: /* shrd cl, r, r/m */
3278		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3279		break;
3280	case 0xae:              /* clflush */
3281		break;
3282	case 0xb0 ... 0xb1:	/* cmpxchg */
3283		/*
3284		 * Save real source value, then compare EAX against
3285		 * destination.
3286		 */
3287		c->src.orig_val = c->src.val;
3288		c->src.val = c->regs[VCPU_REGS_RAX];
3289		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3290		if (ctxt->eflags & EFLG_ZF) {
3291			/* Success: write back to memory. */
3292			c->dst.val = c->src.orig_val;
3293		} else {
3294			/* Failure: write the value we saw to EAX. */
3295			c->dst.type = OP_REG;
3296			c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3297		}
3298		break;
3299	case 0xb3:
3300	      btr:		/* btr */
3301		/* only subword offset */
3302		c->src.val &= (c->dst.bytes << 3) - 1;
3303		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3304		break;
3305	case 0xb6 ... 0xb7:	/* movzx */
3306		c->dst.bytes = c->op_bytes;
3307		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3308						       : (u16) c->src.val;
3309		break;
3310	case 0xba:		/* Grp8 */
3311		switch (c->modrm_reg & 3) {
3312		case 0:
3313			goto bt;
3314		case 1:
3315			goto bts;
3316		case 2:
3317			goto btr;
3318		case 3:
3319			goto btc;
3320		}
3321		break;
3322	case 0xbb:
3323	      btc:		/* btc */
3324		/* only subword offset */
3325		c->src.val &= (c->dst.bytes << 3) - 1;
3326		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3327		break;
3328	case 0xbe ... 0xbf:	/* movsx */
3329		c->dst.bytes = c->op_bytes;
3330		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3331							(s16) c->src.val;
3332		break;
3333	case 0xc3:		/* movnti */
3334		c->dst.bytes = c->op_bytes;
3335		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3336							(u64) c->src.val;
3337		break;
3338	case 0xc7:		/* Grp9 (cmpxchg8b) */
3339		rc = emulate_grp9(ctxt, ops);
3340		if (rc != X86EMUL_CONTINUE)
3341			goto done;
3342		break;
3343	}
3344	goto writeback;
3345
3346cannot_emulate:
3347	DPRINTF("Cannot emulate %02x\n", c->b);
3348	return -1;
3349}
3350