1/*	$OpenBSD: mmio.c,v 1.3 2024/02/10 12:31:16 dv Exp $	*/
2
3/*
4 * Copyright (c) 2022 Dave Voutila <dv@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <errno.h>
20#include <string.h>
21
22#include <sys/types.h>
23#include <machine/specialreg.h>
24
25#include "vmd.h"
26#include "mmio.h"
27
28#define MMIO_DEBUG 0
29
30extern char* __progname;
31
32struct x86_decode_state {
33	uint8_t	s_bytes[15];
34	size_t	s_len;
35	size_t	s_idx;
36};
37
38enum decode_result {
39	DECODE_ERROR = 0,	/* Something went wrong. */
40	DECODE_DONE,		/* Decode success and no more work needed. */
41	DECODE_MORE,		/* Decode success and more work required. */
42};
43
44static const char *str_cpu_mode(int);
45static const char *str_decode_res(enum decode_result);
46static const char *str_opcode(struct x86_opcode *);
47static const char *str_operand_enc(struct x86_opcode *);
48static const char *str_reg(int);
49static const char *str_sreg(int);
50static int detect_cpu_mode(struct vcpu_reg_state *);
51
52static enum decode_result decode_prefix(struct x86_decode_state *,
53    struct x86_insn *);
54static enum decode_result decode_opcode(struct x86_decode_state *,
55    struct x86_insn *);
56static enum decode_result decode_modrm(struct x86_decode_state *,
57    struct x86_insn *);
58static int get_modrm_reg(struct x86_insn *);
59static int get_modrm_addr(struct x86_insn *, struct vcpu_reg_state *vrs);
60static enum decode_result decode_disp(struct x86_decode_state *,
61    struct x86_insn *);
62static enum decode_result decode_sib(struct x86_decode_state *,
63    struct x86_insn *);
64static enum decode_result decode_imm(struct x86_decode_state *,
65    struct x86_insn *);
66
67static enum decode_result peek_byte(struct x86_decode_state *, uint8_t *);
68static enum decode_result next_byte(struct x86_decode_state *, uint8_t *);
69static enum decode_result next_value(struct x86_decode_state *, size_t,
70    uint64_t *);
71static int is_valid_state(struct x86_decode_state *, const char *);
72
73static int emulate_mov(struct x86_insn *, struct vm_exit *);
74static int emulate_movzx(struct x86_insn *, struct vm_exit *);
75
76/* Lookup table for 1-byte opcodes, in opcode alphabetical order. */
77const enum x86_opcode_type x86_1byte_opcode_tbl[255] = {
78	/* MOV */
79	[0x88] = OP_MOV,
80	[0x89] = OP_MOV,
81	[0x8A] = OP_MOV,
82	[0x8B] = OP_MOV,
83	[0x8C] = OP_MOV,
84	[0xA0] = OP_MOV,
85	[0xA1] = OP_MOV,
86	[0xA2] = OP_MOV,
87	[0xA3] = OP_MOV,
88
89	/* MOVS */
90	[0xA4] = OP_UNSUPPORTED,
91	[0xA5] = OP_UNSUPPORTED,
92
93	[ESCAPE] = OP_TWO_BYTE,
94};
95
96/* Lookup table for 1-byte operand encodings, in opcode alphabetical order. */
97const enum x86_operand_enc x86_1byte_operand_enc_tbl[255] = {
98	/* MOV */
99	[0x88] = OP_ENC_MR,
100	[0x89] = OP_ENC_MR,
101	[0x8A] = OP_ENC_RM,
102	[0x8B] = OP_ENC_RM,
103	[0x8C] = OP_ENC_MR,
104	[0xA0] = OP_ENC_FD,
105	[0xA1] = OP_ENC_FD,
106	[0xA2] = OP_ENC_TD,
107	[0xA3] = OP_ENC_TD,
108
109	/* MOVS */
110	[0xA4] = OP_ENC_ZO,
111	[0xA5] = OP_ENC_ZO,
112};
113
114const enum x86_opcode_type x86_2byte_opcode_tbl[255] = {
115	/* MOVZX */
116	[0xB6] = OP_MOVZX,
117	[0xB7] = OP_MOVZX,
118};
119
120const enum x86_operand_enc x86_2byte_operand_enc_table[255] = {
121	/* MOVZX */
122	[0xB6] = OP_ENC_RM,
123	[0xB7] = OP_ENC_RM,
124};
125
126/*
127 * peek_byte
128 *
129 * Fetch the next byte fron the instruction bytes without advancing the
130 * position in the stream.
131 *
132 * Return values:
133 *  DECODE_DONE: byte was found and is the last in the stream
134 *  DECODE_MORE: byte was found and there are more remaining to be read
135 *  DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
136 */
137static enum decode_result
138peek_byte(struct x86_decode_state *state, uint8_t *byte)
139{
140	enum decode_result res;
141
142	if (state == NULL)
143		return (DECODE_ERROR);
144
145	if (state->s_idx == state->s_len)
146		return (DECODE_ERROR);
147
148	if (state->s_idx + 1 == state->s_len)
149		res = DECODE_DONE;
150	else
151		res = DECODE_MORE;
152
153	if (byte != NULL)
154		*byte = state->s_bytes[state->s_idx];
155	return (res);
156}
157
158/*
159 * next_byte
160 *
161 * Fetch the next byte fron the instruction bytes, advancing the position in the
162 * stream and mutating decode state.
163 *
164 * Return values:
165 *  DECODE_DONE: byte was found and is the last in the stream
166 *  DECODE_MORE: byte was found and there are more remaining to be read
167 *  DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
168 */
169static enum decode_result
170next_byte(struct x86_decode_state *state, uint8_t *byte)
171{
172	uint8_t next;
173
174	/* Cheat and see if we're going to fail. */
175	if (peek_byte(state, &next) == DECODE_ERROR)
176		return (DECODE_ERROR);
177
178	if (byte != NULL)
179		*byte = next;
180	state->s_idx++;
181
182	return (state->s_idx < state->s_len ? DECODE_MORE : DECODE_DONE);
183}
184
185/*
186 * Fetch the next `n' bytes as a single uint64_t value.
187 */
188static enum decode_result
189next_value(struct x86_decode_state *state, size_t n, uint64_t *value)
190{
191	uint8_t bytes[8];
192	size_t i;
193	enum decode_result res;
194
195	if (value == NULL)
196		return (DECODE_ERROR);
197
198	if (n == 0 || n > sizeof(bytes))
199		return (DECODE_ERROR);
200
201	memset(bytes, 0, sizeof(bytes));
202	for (i = 0; i < n; i++)
203		if ((res = next_byte(state, &bytes[i])) == DECODE_ERROR)
204			return (DECODE_ERROR);
205
206	*value = *((uint64_t*)bytes);
207
208	return (res);
209}
210
211/*
212 * is_valid_state
213 *
214 * Validate the decode state looks viable.
215 *
216 * Returns:
217 *  1: if state is valid
218 *  0: if an invariant is detected
219 */
220static int
221is_valid_state(struct x86_decode_state *state, const char *fn_name)
222{
223	const char *s = (fn_name != NULL) ? fn_name : __func__;
224
225	if (state == NULL) {
226		log_warnx("%s: null state", s);
227		return (0);
228	}
229	if (state->s_len > sizeof(state->s_bytes)) {
230		log_warnx("%s: invalid length", s);
231		return (0);
232	}
233	if (state->s_idx + 1 > state->s_len) {
234		log_warnx("%s: invalid index", s);
235		return (0);
236	}
237
238	return (1);
239}
240
241#ifdef MMIO_DEBUG
242static void
243dump_regs(struct vcpu_reg_state *vrs)
244{
245	size_t i;
246	struct vcpu_segment_info *vsi;
247
248	for (i = 0; i < VCPU_REGS_NGPRS; i++)
249		log_info("%s: %s 0x%llx", __progname, str_reg(i),
250		    vrs->vrs_gprs[i]);
251
252	for (i = 0; i < VCPU_REGS_NSREGS; i++) {
253		vsi = &vrs->vrs_sregs[i];
254		log_info("%s: %s { sel: 0x%04x, lim: 0x%08x, ar: 0x%08x, "
255		    "base: 0x%llx }", __progname, str_sreg(i),
256		    vsi->vsi_sel, vsi->vsi_limit, vsi->vsi_ar, vsi->vsi_base);
257	}
258}
259
260static void
261dump_insn(struct x86_insn *insn)
262{
263	log_info("instruction { %s, enc=%s, len=%d, mod=0x%02x, ("
264	    "reg=%s, addr=0x%lx) sib=0x%02x }",
265	    str_opcode(&insn->insn_opcode),
266	    str_operand_enc(&insn->insn_opcode), insn->insn_bytes_len,
267	    insn->insn_modrm, str_reg(insn->insn_reg),
268	    insn->insn_gva, insn->insn_sib);
269}
270#endif /* MMIO_DEBUG */
271
272static const char *
273str_cpu_mode(int mode)
274{
275	switch (mode) {
276	case VMM_CPU_MODE_REAL: return "REAL";
277	case VMM_CPU_MODE_PROT: return "PROT";
278	case VMM_CPU_MODE_PROT32: return "PROT32";
279	case VMM_CPU_MODE_COMPAT: return "COMPAT";
280	case VMM_CPU_MODE_LONG: return "LONG";
281	default: return "UKNOWN";
282	}
283}
284
285__unused static const char *
286str_decode_res(enum decode_result res) {
287	switch (res) {
288	case DECODE_DONE: return "DONE";
289	case DECODE_MORE: return "MORE";
290	case DECODE_ERROR: return "ERROR";
291	default: return "UNKNOWN";
292	}
293}
294
295static const char *
296str_opcode(struct x86_opcode *opcode)
297{
298	switch (opcode->op_type) {
299	case OP_IN: return "IN";
300	case OP_INS: return "INS";
301	case OP_MOV: return "MOV";
302	case OP_MOVZX: return "MOVZX";
303	case OP_OUT: return "OUT";
304	case OP_OUTS: return "OUTS";
305	case OP_UNSUPPORTED: return "UNSUPPORTED";
306	default: return "UNKNOWN";
307	}
308}
309
310static const char *
311str_operand_enc(struct x86_opcode *opcode)
312{
313	switch (opcode->op_encoding) {
314	case OP_ENC_I: return "I";
315	case OP_ENC_MI: return "MI";
316	case OP_ENC_MR: return "MR";
317	case OP_ENC_RM: return "RM";
318	case OP_ENC_FD: return "FD";
319	case OP_ENC_TD: return "TD";
320	case OP_ENC_OI: return "OI";
321	case OP_ENC_ZO: return "ZO";
322	default: return "UNKNOWN";
323	}
324}
325
326static const char *
327str_reg(int reg) {
328	switch (reg) {
329	case VCPU_REGS_RAX: return "RAX";
330	case VCPU_REGS_RCX: return "RCX";
331	case VCPU_REGS_RDX: return "RDX";
332	case VCPU_REGS_RBX: return "RBX";
333	case VCPU_REGS_RSI: return "RSI";
334	case VCPU_REGS_RDI: return "RDI";
335	case VCPU_REGS_R8:  return " R8";
336	case VCPU_REGS_R9:  return " R9";
337	case VCPU_REGS_R10: return "R10";
338	case VCPU_REGS_R11: return "R11";
339	case VCPU_REGS_R12: return "R12";
340	case VCPU_REGS_R13: return "R13";
341	case VCPU_REGS_R14: return "R14";
342	case VCPU_REGS_R15: return "R15";
343	case VCPU_REGS_RSP: return "RSP";
344	case VCPU_REGS_RBP: return "RBP";
345	case VCPU_REGS_RIP: return "RIP";
346	case VCPU_REGS_RFLAGS: return "RFLAGS";
347	default: return "UNKNOWN";
348	}
349}
350
351static const char *
352str_sreg(int sreg) {
353	switch (sreg) {
354	case VCPU_REGS_CS: return "CS";
355	case VCPU_REGS_DS: return "DS";
356	case VCPU_REGS_ES: return "ES";
357	case VCPU_REGS_FS: return "FS";
358	case VCPU_REGS_GS: return "GS";
359	case VCPU_REGS_SS: return "GS";
360	case VCPU_REGS_LDTR: return "LDTR";
361	case VCPU_REGS_TR: return "TR";
362	default: return "UKNOWN";
363	}
364}
365
366static int
367detect_cpu_mode(struct vcpu_reg_state *vrs)
368{
369	uint64_t cr0, cr4, cs, efer, rflags;
370
371	/* Is protected mode enabled? */
372	cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
373	if (!(cr0 & CR0_PE))
374		return (VMM_CPU_MODE_REAL);
375
376	cr4 = vrs->vrs_crs[VCPU_REGS_CR4];
377	cs = vrs->vrs_sregs[VCPU_REGS_CS].vsi_ar;
378	efer = vrs->vrs_msrs[VCPU_REGS_EFER];
379	rflags = vrs->vrs_gprs[VCPU_REGS_RFLAGS];
380
381	/* Check for Long modes. */
382	if ((efer & EFER_LME) && (cr4 & CR4_PAE) && (cr0 & CR0_PG)) {
383		if (cs & CS_L) {
384			/* Long Modes */
385			if (!(cs & CS_D))
386				return (VMM_CPU_MODE_LONG);
387			log_warnx("%s: invalid cpu mode", __progname);
388			return (VMM_CPU_MODE_UNKNOWN);
389		} else {
390			/* Compatibility Modes */
391			if (cs & CS_D) /* XXX Add Compat32 mode */
392				return (VMM_CPU_MODE_UNKNOWN);
393			return (VMM_CPU_MODE_COMPAT);
394		}
395	}
396
397	/* Check for 32-bit Protected Mode. */
398	if (cs & CS_D)
399		return (VMM_CPU_MODE_PROT32);
400
401	/* Check for virtual 8086 mode. */
402	if (rflags & EFLAGS_VM) {
403		/* XXX add Virtual8086 mode */
404		log_warnx("%s: Virtual 8086 mode", __progname);
405		return (VMM_CPU_MODE_UNKNOWN);
406	}
407
408	/* Can't determine mode. */
409	log_warnx("%s: invalid cpu mode", __progname);
410	return (VMM_CPU_MODE_UNKNOWN);
411}
412
413static enum decode_result
414decode_prefix(struct x86_decode_state *state, struct x86_insn *insn)
415{
416	enum decode_result res = DECODE_ERROR;
417	struct x86_prefix *prefix;
418	uint8_t byte;
419
420	if (!is_valid_state(state, __func__) || insn == NULL)
421		return (-1);
422
423	prefix = &insn->insn_prefix;
424	memset(prefix, 0, sizeof(*prefix));
425
426	/*
427	 * Decode prefixes. The last of its kind wins. The behavior is undefined
428	 * in the Intel SDM (see Vol 2, 2.1.1 Instruction Prefixes.)
429	 */
430	while ((res = peek_byte(state, &byte)) != DECODE_ERROR) {
431		switch (byte) {
432		case LEG_1_LOCK:
433		case LEG_1_REPNE:
434		case LEG_1_REP:
435			prefix->pfx_group1 = byte;
436			break;
437		case LEG_2_CS:
438		case LEG_2_SS:
439		case LEG_2_DS:
440		case LEG_2_ES:
441		case LEG_2_FS:
442		case LEG_2_GS:
443			prefix->pfx_group2 = byte;
444			break;
445		case LEG_3_OPSZ:
446			prefix->pfx_group3 = byte;
447			break;
448		case LEG_4_ADDRSZ:
449			prefix->pfx_group4 = byte;
450			break;
451		case REX_BASE...REX_BASE + 0x0F:
452			if (insn->insn_cpu_mode == VMM_CPU_MODE_LONG)
453				prefix->pfx_rex = byte;
454			else /* INC encountered */
455				return (DECODE_ERROR);
456			break;
457		case VEX_2_BYTE:
458		case VEX_3_BYTE:
459			log_warnx("%s: VEX not supported", __func__);
460			return (DECODE_ERROR);
461		default:
462			/* Something other than a valid prefix. */
463			return (DECODE_MORE);
464		}
465		/* Advance our position. */
466		next_byte(state, NULL);
467	}
468
469	return (res);
470}
471
472static enum decode_result
473decode_modrm(struct x86_decode_state *state, struct x86_insn *insn)
474{
475	enum decode_result res;
476	uint8_t byte = 0;
477
478	if (!is_valid_state(state, __func__) || insn == NULL)
479		return (DECODE_ERROR);
480
481	insn->insn_modrm_valid = 0;
482
483	/* Check the operand encoding to see if we fetch a byte or abort. */
484	switch (insn->insn_opcode.op_encoding) {
485	case OP_ENC_MR:
486	case OP_ENC_RM:
487	case OP_ENC_MI:
488		res = next_byte(state, &byte);
489		if (res == DECODE_ERROR) {
490			log_warnx("%s: failed to get modrm byte", __func__);
491			break;
492		}
493		insn->insn_modrm = byte;
494		insn->insn_modrm_valid = 1;
495		break;
496
497	case OP_ENC_I:
498	case OP_ENC_OI:
499		log_warnx("%s: instruction does not need memory assist",
500		    __func__);
501		res = DECODE_ERROR;
502		break;
503
504	default:
505		/* Peek to see if we're done decode. */
506		res = peek_byte(state, NULL);
507	}
508
509	return (res);
510}
511
512static int
513get_modrm_reg(struct x86_insn *insn)
514{
515	if (insn == NULL)
516		return (-1);
517
518	if (insn->insn_modrm_valid) {
519		switch (MODRM_REGOP(insn->insn_modrm)) {
520		case 0:
521			insn->insn_reg = VCPU_REGS_RAX;
522			break;
523		case 1:
524			insn->insn_reg = VCPU_REGS_RCX;
525			break;
526		case 2:
527			insn->insn_reg = VCPU_REGS_RDX;
528			break;
529		case 3:
530			insn->insn_reg = VCPU_REGS_RBX;
531			break;
532		case 4:
533			insn->insn_reg = VCPU_REGS_RSP;
534			break;
535		case 5:
536			insn->insn_reg = VCPU_REGS_RBP;
537			break;
538		case 6:
539			insn->insn_reg = VCPU_REGS_RSI;
540			break;
541		case 7:
542			insn->insn_reg = VCPU_REGS_RDI;
543			break;
544		}
545	}
546
547	/* REX R bit selects extended registers in LONG mode. */
548	if (insn->insn_prefix.pfx_rex & REX_R)
549		insn->insn_reg += 8;
550
551	return (0);
552}
553
554static int
555get_modrm_addr(struct x86_insn *insn, struct vcpu_reg_state *vrs)
556{
557	uint8_t mod, rm;
558	vaddr_t addr = 0x0UL;
559
560	if (insn == NULL || vrs == NULL)
561		return (-1);
562
563	if (insn->insn_modrm_valid) {
564		rm = MODRM_RM(insn->insn_modrm);
565		mod = MODRM_MOD(insn->insn_modrm);
566
567		switch (rm) {
568		case 0b000:
569			addr = vrs->vrs_gprs[VCPU_REGS_RAX];
570			break;
571		case 0b001:
572			addr = vrs->vrs_gprs[VCPU_REGS_RCX];
573			break;
574		case 0b010:
575			addr = vrs->vrs_gprs[VCPU_REGS_RDX];
576			break;
577		case 0b011:
578			addr = vrs->vrs_gprs[VCPU_REGS_RBX];
579			break;
580		case 0b100:
581			if (mod == 0b11)
582				addr = vrs->vrs_gprs[VCPU_REGS_RSP];
583			break;
584		case 0b101:
585			if (mod != 0b00)
586				addr = vrs->vrs_gprs[VCPU_REGS_RBP];
587			break;
588		case 0b110:
589			addr = vrs->vrs_gprs[VCPU_REGS_RSI];
590			break;
591		case 0b111:
592			addr = vrs->vrs_gprs[VCPU_REGS_RDI];
593			break;
594		}
595
596		insn->insn_gva = addr;
597	}
598
599	return (0);
600}
601
602static enum decode_result
603decode_disp(struct x86_decode_state *state, struct x86_insn *insn)
604{
605	enum decode_result res = DECODE_ERROR;
606	uint64_t disp = 0;
607
608	if (!is_valid_state(state, __func__) || insn == NULL)
609		return (DECODE_ERROR);
610
611	if (!insn->insn_modrm_valid)
612		return (DECODE_ERROR);
613
614	switch (MODRM_MOD(insn->insn_modrm)) {
615	case 0x00:
616		insn->insn_disp_type = DISP_0;
617		res = DECODE_MORE;
618		break;
619	case 0x01:
620		insn->insn_disp_type = DISP_1;
621		res = next_value(state, 1, &disp);
622		if (res == DECODE_ERROR)
623			return (res);
624		insn->insn_disp = disp;
625		break;
626	case 0x02:
627		if (insn->insn_prefix.pfx_group4 == LEG_4_ADDRSZ) {
628			insn->insn_disp_type = DISP_2;
629			res = next_value(state, 2, &disp);
630		} else {
631			insn->insn_disp_type = DISP_4;
632			res = next_value(state, 4, &disp);
633		}
634		if (res == DECODE_ERROR)
635			return (res);
636		insn->insn_disp = disp;
637		break;
638	default:
639		insn->insn_disp_type = DISP_NONE;
640		res = DECODE_MORE;
641	}
642
643	return (res);
644}
645
646static enum decode_result
647decode_opcode(struct x86_decode_state *state, struct x86_insn *insn)
648{
649	enum decode_result res;
650	enum x86_opcode_type type;
651	enum x86_operand_enc enc;
652	struct x86_opcode *opcode = &insn->insn_opcode;
653	uint8_t byte, byte2;
654
655	if (!is_valid_state(state, __func__) || insn == NULL)
656		return (-1);
657
658	memset(opcode, 0, sizeof(*opcode));
659
660	res = next_byte(state, &byte);
661	if (res == DECODE_ERROR)
662		return (res);
663
664	type = x86_1byte_opcode_tbl[byte];
665	switch(type) {
666	case OP_UNKNOWN:
667	case OP_UNSUPPORTED:
668		log_warnx("%s: unsupported opcode", __func__);
669		return (DECODE_ERROR);
670
671	case OP_TWO_BYTE:
672		res = next_byte(state, &byte2);
673		if (res == DECODE_ERROR)
674			return (res);
675
676		type = x86_2byte_opcode_tbl[byte2];
677		if (type == OP_UNKNOWN || type == OP_UNSUPPORTED) {
678			log_warnx("%s: unsupported 2-byte opcode", __func__);
679			return (DECODE_ERROR);
680		}
681
682		opcode->op_bytes[0] = byte;
683		opcode->op_bytes[1] = byte2;
684		opcode->op_bytes_len = 2;
685		enc = x86_2byte_operand_enc_table[byte2];
686		break;
687
688	default:
689		/* We've potentially got a known 1-byte opcode. */
690		opcode->op_bytes[0] = byte;
691		opcode->op_bytes_len = 1;
692		enc = x86_1byte_operand_enc_tbl[byte];
693	}
694
695	if (enc == OP_ENC_UNKNOWN)
696		return (DECODE_ERROR);
697
698	opcode->op_type = type;
699	opcode->op_encoding = enc;
700
701	return (res);
702}
703
704static enum decode_result
705decode_sib(struct x86_decode_state *state, struct x86_insn *insn)
706{
707	enum decode_result res;
708	uint8_t byte;
709
710	if (!is_valid_state(state, __func__) || insn == NULL)
711		return (-1);
712
713	/* SIB is optional, so assume we will be continuing. */
714	res = DECODE_MORE;
715
716	insn->insn_sib_valid = 0;
717	if (!insn->insn_modrm_valid)
718		return (res);
719
720	/* XXX is SIB valid in all cpu modes? */
721	if (MODRM_RM(insn->insn_modrm) == 0b100) {
722		res = next_byte(state, &byte);
723		if (res != DECODE_ERROR) {
724			insn->insn_sib_valid = 1;
725			insn->insn_sib = byte;
726		}
727	}
728
729	return (res);
730}
731
732static enum decode_result
733decode_imm(struct x86_decode_state *state, struct x86_insn *insn)
734{
735	enum decode_result res;
736	size_t num_bytes;
737	uint64_t value;
738
739	if (!is_valid_state(state, __func__) || insn == NULL)
740		return (DECODE_ERROR);
741
742	/* Only handle MI encoded instructions. Others shouldn't need assist. */
743	if (insn->insn_opcode.op_encoding != OP_ENC_MI)
744		return (DECODE_DONE);
745
746	/* Exceptions related to MOV instructions. */
747	if (insn->insn_opcode.op_type == OP_MOV) {
748		switch (insn->insn_opcode.op_bytes[0]) {
749		case 0xC6:
750			num_bytes = 1;
751			break;
752		case 0xC7:
753			if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
754				num_bytes = 2;
755			else
756				num_bytes = 4;
757			break;
758		default:
759			log_warnx("%s: cannot decode immediate bytes for MOV",
760			    __func__);
761			return (DECODE_ERROR);
762		}
763	} else {
764		/* Fallback to interpreting based on cpu mode and REX. */
765		if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
766			num_bytes = 2;
767		else if (insn->insn_prefix.pfx_rex == REX_NONE)
768			num_bytes = 4;
769		else
770			num_bytes = 8;
771	}
772
773	res = next_value(state, num_bytes, &value);
774	if (res != DECODE_ERROR) {
775		insn->insn_immediate = value;
776		insn->insn_immediate_len = num_bytes;
777	}
778
779	return (res);
780}
781
782
783/*
784 * insn_decode
785 *
786 * Decode an x86 instruction from the provided instruction bytes.
787 *
788 * Return values:
789 *  0: successful decode
790 *  Non-zero: an exception occurred during decode
791 */
792int
793insn_decode(struct vm_exit *exit, struct x86_insn *insn)
794{
795	enum decode_result res;
796	struct vcpu_reg_state *vrs = &exit->vrs;
797	struct x86_decode_state state;
798	uint8_t *bytes, len;
799	int mode;
800
801	if (exit == NULL || insn == NULL) {
802		log_warnx("%s: invalid input", __func__);
803		return (DECODE_ERROR);
804	}
805
806	bytes = exit->vee.vee_insn_bytes;
807	len = exit->vee.vee_insn_len;
808
809	/* 0. Initialize state and instruction objects. */
810	memset(insn, 0, sizeof(*insn));
811	memset(&state, 0, sizeof(state));
812	state.s_len = len;
813	memcpy(&state.s_bytes, bytes, len);
814
815	/* 1. Detect CPU mode. */
816	mode = detect_cpu_mode(vrs);
817	if (mode == VMM_CPU_MODE_UNKNOWN) {
818		log_warnx("%s: failed to identify cpu mode", __func__);
819#ifdef MMIO_DEBUG
820		dump_regs(vrs);
821#endif
822		return (-1);
823	}
824	insn->insn_cpu_mode = mode;
825
826#ifdef MMIO_DEBUG
827	log_info("%s: cpu mode %s detected", __progname, str_cpu_mode(mode));
828	printf("%s: got bytes: [ ", __progname);
829	for (int i = 0; i < len; i++) {
830		printf("%02x ", bytes[i]);
831	}
832	printf("]\n");
833#endif
834	/* 2. Decode prefixes. */
835	res = decode_prefix(&state, insn);
836	if (res == DECODE_ERROR) {
837		log_warnx("%s: error decoding prefixes", __func__);
838		goto err;
839	} else if (res == DECODE_DONE)
840		goto done;
841
842#ifdef MMIO_DEBUG
843	log_info("%s: prefixes {g1: 0x%02x, g2: 0x%02x, g3: 0x%02x, g4: 0x%02x,"
844	    " rex: 0x%02x }", __progname, insn->insn_prefix.pfx_group1,
845	    insn->insn_prefix.pfx_group2, insn->insn_prefix.pfx_group3,
846	    insn->insn_prefix.pfx_group4, insn->insn_prefix.pfx_rex);
847#endif
848
849	/* 3. Pick apart opcode. Here we can start short-circuiting. */
850	res = decode_opcode(&state, insn);
851	if (res == DECODE_ERROR) {
852		log_warnx("%s: error decoding opcode", __func__);
853		goto err;
854	} else if (res == DECODE_DONE)
855		goto done;
856
857#ifdef MMIO_DEBUG
858	log_info("%s: found opcode %s (operand encoding %s) (%s)", __progname,
859	    str_opcode(&insn->insn_opcode), str_operand_enc(&insn->insn_opcode),
860	    str_decode_res(res));
861#endif
862
863	/* Process optional ModR/M byte. */
864	res = decode_modrm(&state, insn);
865	if (res == DECODE_ERROR) {
866		log_warnx("%s: error decoding modrm", __func__);
867		goto err;
868	}
869	if (get_modrm_addr(insn, vrs) != 0)
870		goto err;
871	if (get_modrm_reg(insn) != 0)
872		goto err;
873	if (res == DECODE_DONE)
874		goto done;
875
876#ifdef MMIO_DEBUG
877	if (insn->insn_modrm_valid)
878		log_info("%s: found ModRM 0x%02x (%s)", __progname,
879		    insn->insn_modrm, str_decode_res(res));
880#endif
881
882	/* Process optional SIB byte. */
883	res = decode_sib(&state, insn);
884	if (res == DECODE_ERROR) {
885		log_warnx("%s: error decoding sib", __func__);
886		goto err;
887	} else if (res == DECODE_DONE)
888		goto done;
889
890#ifdef MMIO_DEBUG
891	if (insn->insn_sib_valid)
892		log_info("%s: found SIB 0x%02x (%s)", __progname,
893		    insn->insn_sib, str_decode_res(res));
894#endif
895
896	/* Process any Displacement bytes. */
897	res = decode_disp(&state, insn);
898	if (res == DECODE_ERROR) {
899		log_warnx("%s: error decoding displacement", __func__);
900		goto err;
901	} else if (res == DECODE_DONE)
902		goto done;
903
904	/* Process any Immediate data bytes. */
905	res = decode_imm(&state, insn);
906	if (res == DECODE_ERROR) {
907		log_warnx("%s: error decoding immediate bytes", __func__);
908		goto err;
909	}
910
911done:
912	insn->insn_bytes_len = state.s_idx;
913
914#ifdef MMIO_DEBUG
915	log_info("%s: final instruction length is %u", __func__,
916		insn->insn_bytes_len);
917	dump_insn(insn);
918	log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
919	    MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
920	    MODRM_RM(insn->insn_modrm));
921	dump_regs(vrs);
922#endif /* MMIO_DEBUG */
923	return (0);
924
925err:
926#ifdef MMIO_DEBUG
927	dump_insn(insn);
928	log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
929	    MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
930	    MODRM_RM(insn->insn_modrm));
931	dump_regs(vrs);
932#endif /* MMIO_DEBUG */
933	return (-1);
934}
935
936static int
937emulate_mov(struct x86_insn *insn, struct vm_exit *exit)
938{
939	/* XXX Only supports read to register for now */
940	if (insn->insn_opcode.op_encoding != OP_ENC_RM)
941		return (-1);
942
943	/* XXX No device emulation yet. Fill with 0xFFs. */
944	exit->vrs.vrs_gprs[insn->insn_reg] = 0xFFFFFFFFFFFFFFFF;
945
946	return (0);
947}
948
949static int
950emulate_movzx(struct x86_insn *insn, struct vm_exit *exit)
951{
952	uint8_t byte, len, src = 1, dst = 2;
953	uint64_t value = 0;
954
955	/* Only RM is valid for MOVZX. */
956	if (insn->insn_opcode.op_encoding != OP_ENC_RM) {
957		log_warnx("invalid op encoding for MOVZX: %d",
958		    insn->insn_opcode.op_encoding);
959		return (-1);
960	}
961
962	len = insn->insn_opcode.op_bytes_len;
963	if (len < 1 || len > sizeof(insn->insn_opcode.op_bytes)) {
964		log_warnx("invalid opcode byte length: %d", len);
965		return (-1);
966	}
967
968	byte = insn->insn_opcode.op_bytes[len - 1];
969	switch (byte) {
970	case 0xB6:
971		src = 1;
972		if (insn->insn_cpu_mode == VMM_CPU_MODE_PROT
973		    || insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
974			dst = 2;
975		else if (insn->insn_prefix.pfx_rex == REX_NONE)
976			dst = 4;
977		else // XXX validate CPU mode
978			dst = 8;
979		break;
980	case 0xB7:
981		src = 2;
982		if (insn->insn_prefix.pfx_rex == REX_NONE)
983			dst = 4;
984		else // XXX validate CPU mode
985			dst = 8;
986		break;
987	default:
988		log_warnx("invalid byte in MOVZX opcode: %x", byte);
989		return (-1);
990	}
991
992	if (dst == 4)
993		exit->vrs.vrs_gprs[insn->insn_reg] &= 0xFFFFFFFF00000000;
994	else
995		exit->vrs.vrs_gprs[insn->insn_reg] = 0x0UL;
996
997	/* XXX No device emulation yet. Fill with 0xFFs. */
998	switch (src) {
999	case 1: value = 0xFF; break;
1000	case 2: value = 0xFFFF; break;
1001	case 4: value = 0xFFFFFFFF; break;
1002	case 8: value = 0xFFFFFFFFFFFFFFFF; break;
1003	default:
1004		log_warnx("invalid source size: %d", src);
1005		return (-1);
1006	}
1007
1008	exit->vrs.vrs_gprs[insn->insn_reg] |= value;
1009
1010	return (0);
1011}
1012
1013/*
1014 * insn_emulate
1015 *
1016 * Returns:
1017 *  0: success
1018 *  EINVAL: exception occurred
1019 *  EFAULT: page fault occurred, requires retry
1020 *  ENOTSUP: an unsupported instruction was provided
1021 */
1022int
1023insn_emulate(struct vm_exit *exit, struct x86_insn *insn)
1024{
1025	int res;
1026
1027	switch (insn->insn_opcode.op_type) {
1028	case OP_MOV:
1029		res = emulate_mov(insn, exit);
1030		break;
1031
1032	case OP_MOVZX:
1033		res = emulate_movzx(insn, exit);
1034		break;
1035
1036	default:
1037		log_warnx("%s: emulation not defined for %s", __func__,
1038		    str_opcode(&insn->insn_opcode));
1039		res = ENOTSUP;
1040	}
1041
1042	if (res == 0)
1043		exit->vrs.vrs_gprs[VCPU_REGS_RIP] += insn->insn_bytes_len;
1044
1045	return (res);
1046}
1047