1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30
31#include <machine/vmm.h>
32#include <machine/vmm_dev.h>
33#include <machine/vmm_instruction_emul.h>
34#include <amd64/vmm/intel/vmcs.h>
35#include <x86/apicreg.h>
36
37#include <assert.h>
38#include <err.h>
39#include <errno.h>
40#include <stdlib.h>
41#include <strings.h>
42#include <unistd.h>
43
44#include <vmmapi.h>
45
46#include "bhyverun.h"
47#include "config.h"
48#include "debug.h"
49#include "gdb.h"
50#include "inout.h"
51#include "mem.h"
52#ifdef BHYVE_SNAPSHOT
53#include "snapshot.h"
54#endif
55#include "spinup_ap.h"
56#include "vmexit.h"
57#include "xmsr.h"
58
59void
60vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid,
61    int errcode)
62{
63	int error, restart_instruction;
64
65	restart_instruction = 1;
66
67	error = vm_inject_exception(vcpu, vector, errcode_valid, errcode,
68	    restart_instruction);
69	assert(error == 0);
70}
71
72static int
73vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
74{
75	struct vm_exit *vme;
76	int error;
77	int bytes, port, in;
78
79	vme = vmrun->vm_exit;
80	port = vme->u.inout.port;
81	bytes = vme->u.inout.bytes;
82	in = vme->u.inout.in;
83
84	error = emulate_inout(ctx, vcpu, vme);
85	if (error) {
86		EPRINTLN("Unhandled %s%c 0x%04x at 0x%lx",
87		    in ? "in" : "out",
88		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
89		    port, vme->rip);
90		return (VMEXIT_ABORT);
91	} else {
92		return (VMEXIT_CONTINUE);
93	}
94}
95
96static int
97vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
98    struct vm_run *vmrun)
99{
100	struct vm_exit *vme;
101	uint64_t val;
102	uint32_t eax, edx;
103	int error;
104
105	vme = vmrun->vm_exit;
106
107	val = 0;
108	error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
109	if (error != 0) {
110		EPRINTLN("rdmsr to register %#x on vcpu %d",
111		    vme->u.msr.code, vcpu_id(vcpu));
112		if (get_config_bool("x86.strictmsr")) {
113			vm_inject_gp(vcpu);
114			return (VMEXIT_CONTINUE);
115		}
116	}
117
118	eax = val;
119	error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
120	assert(error == 0);
121
122	edx = val >> 32;
123	error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
124	assert(error == 0);
125
126	return (VMEXIT_CONTINUE);
127}
128
129static int
130vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
131    struct vm_run *vmrun)
132{
133	struct vm_exit *vme;
134	int error;
135
136	vme = vmrun->vm_exit;
137
138	error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
139	if (error != 0) {
140		EPRINTLN("wrmsr to register %#x(%#lx) on vcpu %d",
141		    vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
142		if (get_config_bool("x86.strictmsr")) {
143			vm_inject_gp(vcpu);
144			return (VMEXIT_CONTINUE);
145		}
146	}
147	return (VMEXIT_CONTINUE);
148}
149
150static const char * const vmx_exit_reason_desc[] = {
151	[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
152	[EXIT_REASON_EXT_INTR] = "External interrupt",
153	[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
154	[EXIT_REASON_INIT] = "INIT signal",
155	[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
156	[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
157	[EXIT_REASON_SMI] = "Other SMI",
158	[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
159	[EXIT_REASON_NMI_WINDOW] = "NMI window",
160	[EXIT_REASON_TASK_SWITCH] = "Task switch",
161	[EXIT_REASON_CPUID] = "CPUID",
162	[EXIT_REASON_GETSEC] = "GETSEC",
163	[EXIT_REASON_HLT] = "HLT",
164	[EXIT_REASON_INVD] = "INVD",
165	[EXIT_REASON_INVLPG] = "INVLPG",
166	[EXIT_REASON_RDPMC] = "RDPMC",
167	[EXIT_REASON_RDTSC] = "RDTSC",
168	[EXIT_REASON_RSM] = "RSM",
169	[EXIT_REASON_VMCALL] = "VMCALL",
170	[EXIT_REASON_VMCLEAR] = "VMCLEAR",
171	[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
172	[EXIT_REASON_VMPTRLD] = "VMPTRLD",
173	[EXIT_REASON_VMPTRST] = "VMPTRST",
174	[EXIT_REASON_VMREAD] = "VMREAD",
175	[EXIT_REASON_VMRESUME] = "VMRESUME",
176	[EXIT_REASON_VMWRITE] = "VMWRITE",
177	[EXIT_REASON_VMXOFF] = "VMXOFF",
178	[EXIT_REASON_VMXON] = "VMXON",
179	[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
180	[EXIT_REASON_DR_ACCESS] = "MOV DR",
181	[EXIT_REASON_INOUT] = "I/O instruction",
182	[EXIT_REASON_RDMSR] = "RDMSR",
183	[EXIT_REASON_WRMSR] = "WRMSR",
184	[EXIT_REASON_INVAL_VMCS] =
185	    "VM-entry failure due to invalid guest state",
186	[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
187	[EXIT_REASON_MWAIT] = "MWAIT",
188	[EXIT_REASON_MTF] = "Monitor trap flag",
189	[EXIT_REASON_MONITOR] = "MONITOR",
190	[EXIT_REASON_PAUSE] = "PAUSE",
191	[EXIT_REASON_MCE_DURING_ENTRY] =
192	    "VM-entry failure due to machine-check event",
193	[EXIT_REASON_TPR] = "TPR below threshold",
194	[EXIT_REASON_APIC_ACCESS] = "APIC access",
195	[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
196	[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
197	[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
198	[EXIT_REASON_EPT_FAULT] = "EPT violation",
199	[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
200	[EXIT_REASON_INVEPT] = "INVEPT",
201	[EXIT_REASON_RDTSCP] = "RDTSCP",
202	[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
203	[EXIT_REASON_INVVPID] = "INVVPID",
204	[EXIT_REASON_WBINVD] = "WBINVD",
205	[EXIT_REASON_XSETBV] = "XSETBV",
206	[EXIT_REASON_APIC_WRITE] = "APIC write",
207	[EXIT_REASON_RDRAND] = "RDRAND",
208	[EXIT_REASON_INVPCID] = "INVPCID",
209	[EXIT_REASON_VMFUNC] = "VMFUNC",
210	[EXIT_REASON_ENCLS] = "ENCLS",
211	[EXIT_REASON_RDSEED] = "RDSEED",
212	[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
213	[EXIT_REASON_XSAVES] = "XSAVES",
214	[EXIT_REASON_XRSTORS] = "XRSTORS"
215};
216
217static const char *
218vmexit_vmx_desc(uint32_t exit_reason)
219{
220
221	if (exit_reason >= nitems(vmx_exit_reason_desc) ||
222	    vmx_exit_reason_desc[exit_reason] == NULL)
223		return ("Unknown");
224	return (vmx_exit_reason_desc[exit_reason]);
225}
226
227#define	DEBUG_EPT_MISCONFIG
228#ifdef DEBUG_EPT_MISCONFIG
229#define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
230
231static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
232static int ept_misconfig_ptenum;
233#endif
234
235static int
236vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
237{
238	struct vm_exit *vme;
239
240	vme = vmrun->vm_exit;
241
242	EPRINTLN("vm exit[%d]", vcpu_id(vcpu));
243	EPRINTLN("\treason\t\tVMX");
244	EPRINTLN("\trip\t\t0x%016lx", vme->rip);
245	EPRINTLN("\tinst_length\t%d", vme->inst_length);
246	EPRINTLN("\tstatus\t\t%d", vme->u.vmx.status);
247	EPRINTLN("\texit_reason\t%u (%s)", vme->u.vmx.exit_reason,
248	    vmexit_vmx_desc(vme->u.vmx.exit_reason));
249	EPRINTLN("\tqualification\t0x%016lx",
250	    vme->u.vmx.exit_qualification);
251	EPRINTLN("\tinst_type\t\t%d", vme->u.vmx.inst_type);
252	EPRINTLN("\tinst_error\t\t%d", vme->u.vmx.inst_error);
253#ifdef DEBUG_EPT_MISCONFIG
254	if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
255		vm_get_register(vcpu,
256		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
257		    &ept_misconfig_gpa);
258		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
259		    &ept_misconfig_ptenum);
260		EPRINTLN("\tEPT misconfiguration:");
261		EPRINTLN("\t\tGPA: %#lx", ept_misconfig_gpa);
262		EPRINTLN("\t\tPTE(%d): %#lx %#lx %#lx %#lx",
263		    ept_misconfig_ptenum, ept_misconfig_pte[0],
264		    ept_misconfig_pte[1], ept_misconfig_pte[2],
265		    ept_misconfig_pte[3]);
266	}
267#endif	/* DEBUG_EPT_MISCONFIG */
268	return (VMEXIT_ABORT);
269}
270
271static int
272vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
273{
274	struct vm_exit *vme;
275
276	vme = vmrun->vm_exit;
277
278	EPRINTLN("vm exit[%d]", vcpu_id(vcpu));
279	EPRINTLN("\treason\t\tSVM");
280	EPRINTLN("\trip\t\t0x%016lx", vme->rip);
281	EPRINTLN("\tinst_length\t%d", vme->inst_length);
282	EPRINTLN("\texitcode\t%#lx", vme->u.svm.exitcode);
283	EPRINTLN("\texitinfo1\t%#lx", vme->u.svm.exitinfo1);
284	EPRINTLN("\texitinfo2\t%#lx", vme->u.svm.exitinfo2);
285	return (VMEXIT_ABORT);
286}
287
288static int
289vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
290    struct vm_run *vmrun)
291{
292	assert(vmrun->vm_exit->inst_length == 0);
293
294	return (VMEXIT_CONTINUE);
295}
296
297static int
298vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
299    struct vm_run *vmrun)
300{
301	assert(vmrun->vm_exit->inst_length == 0);
302
303	return (VMEXIT_CONTINUE);
304}
305
306static int
307vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
308    struct vm_run *vmrun __unused)
309{
310	/*
311	 * Just continue execution with the next instruction. We use
312	 * the HLT VM exit as a way to be friendly with the host
313	 * scheduler.
314	 */
315	return (VMEXIT_CONTINUE);
316}
317
318static int
319vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
320    struct vm_run *vmrun __unused)
321{
322	return (VMEXIT_CONTINUE);
323}
324
325static int
326vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
327    struct vm_run *vmrun)
328{
329	assert(vmrun->vm_exit->inst_length == 0);
330
331#ifdef BHYVE_SNAPSHOT
332	checkpoint_cpu_suspend(vcpu_id(vcpu));
333#endif
334	gdb_cpu_mtrap(vcpu);
335#ifdef BHYVE_SNAPSHOT
336	checkpoint_cpu_resume(vcpu_id(vcpu));
337#endif
338
339	return (VMEXIT_CONTINUE);
340}
341
342static int
343vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
344    struct vm_run *vmrun)
345{
346	struct vm_exit *vme;
347	struct vie *vie;
348	int err, i, cs_d;
349	enum vm_cpu_mode mode;
350
351	vme = vmrun->vm_exit;
352
353	vie = &vme->u.inst_emul.vie;
354	if (!vie->decoded) {
355		/*
356		 * Attempt to decode in userspace as a fallback.  This allows
357		 * updating instruction decode in bhyve without rebooting the
358		 * kernel (rapid prototyping), albeit with much slower
359		 * emulation.
360		 */
361		vie_restart(vie);
362		mode = vme->u.inst_emul.paging.cpu_mode;
363		cs_d = vme->u.inst_emul.cs_d;
364		if (vmm_decode_instruction(mode, cs_d, vie) != 0)
365			goto fail;
366		if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
367		    vme->rip + vie->num_processed) != 0)
368			goto fail;
369	}
370
371	err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
372	    &vme->u.inst_emul.paging);
373	if (err) {
374		if (err == ESRCH) {
375			EPRINTLN("Unhandled memory access to 0x%lx\n",
376			    vme->u.inst_emul.gpa);
377		}
378		goto fail;
379	}
380
381	return (VMEXIT_CONTINUE);
382
383fail:
384	fprintf(stderr, "Failed to emulate instruction sequence [ ");
385	for (i = 0; i < vie->num_valid; i++)
386		fprintf(stderr, "%02x", vie->inst[i]);
387	FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
388	return (VMEXIT_ABORT);
389}
390
391static int
392vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
393{
394	struct vm_exit *vme;
395	enum vm_suspend_how how;
396	int vcpuid = vcpu_id(vcpu);
397
398	vme = vmrun->vm_exit;
399
400	how = vme->u.suspended.how;
401
402	fbsdrun_deletecpu(vcpuid);
403
404	switch (how) {
405	case VM_SUSPEND_RESET:
406		exit(0);
407	case VM_SUSPEND_POWEROFF:
408		if (get_config_bool_default("destroy_on_poweroff", false))
409			vm_destroy(ctx);
410		exit(1);
411	case VM_SUSPEND_HALT:
412		exit(2);
413	case VM_SUSPEND_TRIPLEFAULT:
414		exit(3);
415	default:
416		EPRINTLN("vmexit_suspend: invalid reason %d", how);
417		exit(100);
418	}
419	return (0);	/* NOTREACHED */
420}
421
422static int
423vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
424    struct vm_run *vmrun __unused)
425{
426
427#ifdef BHYVE_SNAPSHOT
428	checkpoint_cpu_suspend(vcpu_id(vcpu));
429#endif
430	gdb_cpu_suspend(vcpu);
431#ifdef BHYVE_SNAPSHOT
432	checkpoint_cpu_resume(vcpu_id(vcpu));
433#endif
434	/*
435	 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
436	 * window between activation of the vCPU thread and the STARTUP IPI.
437	 */
438	usleep(1000);
439	return (VMEXIT_CONTINUE);
440}
441
442static int
443vmexit_db(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
444{
445
446#ifdef BHYVE_SNAPSHOT
447	checkpoint_cpu_suspend(vcpu_id(vcpu));
448#endif
449	gdb_cpu_debug(vcpu, vmrun->vm_exit);
450#ifdef BHYVE_SNAPSHOT
451	checkpoint_cpu_resume(vcpu_id(vcpu));
452#endif
453	return (VMEXIT_CONTINUE);
454}
455
456static int
457vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
458    struct vm_run *vmrun)
459{
460	gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
461	return (VMEXIT_CONTINUE);
462}
463
464static int
465vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
466    struct vm_run *vmrun)
467{
468	struct vm_exit *vme;
469	cpuset_t *dmask;
470	int error = -1;
471	int i;
472
473	dmask = vmrun->cpuset;
474	vme = vmrun->vm_exit;
475
476	switch (vme->u.ipi.mode) {
477	case APIC_DELMODE_INIT:
478		CPU_FOREACH_ISSET(i, dmask) {
479			error = fbsdrun_suspendcpu(i);
480			if (error) {
481				warnx("failed to suspend cpu %d", i);
482				break;
483			}
484		}
485		break;
486	case APIC_DELMODE_STARTUP:
487		CPU_FOREACH_ISSET(i, dmask) {
488			spinup_ap(fbsdrun_vcpu(i),
489			    vme->u.ipi.vector << PAGE_SHIFT);
490		}
491		error = 0;
492		break;
493	default:
494		break;
495	}
496
497	return (error);
498}
499
500int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
501
502const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
503	[VM_EXITCODE_INOUT]  = vmexit_inout,
504	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
505	[VM_EXITCODE_VMX]    = vmexit_vmx,
506	[VM_EXITCODE_SVM]    = vmexit_svm,
507	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
508	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
509	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
510	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
511	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
512	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
513	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
514	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
515	[VM_EXITCODE_DEBUG] = vmexit_debug,
516	[VM_EXITCODE_BPT] = vmexit_breakpoint,
517	[VM_EXITCODE_IPI] = vmexit_ipi,
518	[VM_EXITCODE_HLT] = vmexit_hlt,
519	[VM_EXITCODE_PAUSE] = vmexit_pause,
520	[VM_EXITCODE_DB] = vmexit_db,
521};
522