1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/types.h>
33221828Sgrehan#include <sys/mman.h>
34221828Sgrehan#include <sys/time.h>
35221828Sgrehan
36221828Sgrehan#include <machine/segments.h>
37221828Sgrehan
38221828Sgrehan#include <stdio.h>
39221828Sgrehan#include <stdlib.h>
40257396Sneel#include <string.h>
41256176Sneel#include <err.h>
42221828Sgrehan#include <libgen.h>
43221828Sgrehan#include <unistd.h>
44221828Sgrehan#include <assert.h>
45221828Sgrehan#include <errno.h>
46221828Sgrehan#include <pthread.h>
47242404Sgrehan#include <pthread_np.h>
48256176Sneel#include <sysexits.h>
49221828Sgrehan
50221828Sgrehan#include <machine/vmm.h>
51221828Sgrehan#include <vmmapi.h>
52221828Sgrehan
53244167Sgrehan#include "bhyverun.h"
54243327Sgrehan#include "acpi.h"
55221828Sgrehan#include "inout.h"
56221828Sgrehan#include "dbgport.h"
57257396Sneel#include "legacy_irq.h"
58241744Sgrehan#include "mem.h"
59221828Sgrehan#include "mevent.h"
60242131Sgrehan#include "mptbl.h"
61221828Sgrehan#include "pci_emul.h"
62257396Sneel#include "pci_lpc.h"
63221828Sgrehan#include "xmsr.h"
64239045Sneel#include "ioapic.h"
65240912Sneel#include "spinup_ap.h"
66253181Sgrehan#include "rtc.h"
67221828Sgrehan
68221828Sgrehan#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
69221828Sgrehan
70221828Sgrehan#define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
71221828Sgrehan#define	VMEXIT_CONTINUE		1	/* continue from next instruction */
72221828Sgrehan#define	VMEXIT_RESTART		2	/* restart current instruction */
73221828Sgrehan#define	VMEXIT_ABORT		3	/* abort the vm run loop */
74221828Sgrehan#define	VMEXIT_RESET		4	/* guest machine has reset */
75221828Sgrehan
76221828Sgrehan#define MB		(1024UL * 1024)
77221828Sgrehan#define GB		(1024UL * MB)
78221828Sgrehan
79221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
80221828Sgrehan
81221828Sgrehanchar *vmname;
82221828Sgrehan
83221828Sgrehanint guest_ncpus;
84221828Sgrehan
85221828Sgrehanstatic int pincpu = -1;
86240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
87256755Sgrehanstatic int virtio_msix = 1;
88221828Sgrehan
89221828Sgrehanstatic int foundcpus;
90221828Sgrehan
91222105Sgrehanstatic int strictio;
92222105Sgrehan
93243327Sgrehanstatic int acpi;
94243327Sgrehan
95221828Sgrehanstatic char *progname;
96221828Sgrehanstatic const int BSP = 0;
97221828Sgrehan
98221828Sgrehanstatic int cpumask;
99221828Sgrehan
100221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
101221828Sgrehan
102221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU];
103221828Sgrehan
104256062Sgrehanstruct bhyvestats {
105221828Sgrehan        uint64_t        vmexit_bogus;
106221828Sgrehan        uint64_t        vmexit_bogus_switch;
107221828Sgrehan        uint64_t        vmexit_hlt;
108221828Sgrehan        uint64_t        vmexit_pause;
109221828Sgrehan        uint64_t        vmexit_mtrap;
110256072Sneel        uint64_t        vmexit_inst_emul;
111221828Sgrehan        uint64_t        cpu_switch_rotate;
112221828Sgrehan        uint64_t        cpu_switch_direct;
113221828Sgrehan        int             io_reset;
114221828Sgrehan} stats;
115221828Sgrehan
116221828Sgrehanstruct mt_vmm_info {
117221828Sgrehan	pthread_t	mt_thr;
118221828Sgrehan	struct vmctx	*mt_ctx;
119221828Sgrehan	int		mt_vcpu;
120221828Sgrehan} mt_vmm_info[VM_MAXCPU];
121221828Sgrehan
122221828Sgrehanstatic void
123221828Sgrehanusage(int code)
124221828Sgrehan{
125221828Sgrehan
126221828Sgrehan        fprintf(stderr,
127257396Sneel                "Usage: %s [-aehAHIPW] [-g <gdb port>] [-s <pci>] [-S <pci>]\n"
128257396Sneel		"       %*s [-c vcpus] [-p pincpu] [-m mem] [-l <lpc>] <vm>\n"
129240943Sneel		"       -a: local apic is in XAPIC mode (default is X2APIC)\n"
130243327Sgrehan		"       -A: create an ACPI table\n"
131256156Sneel		"       -g: gdb port\n"
132221828Sgrehan		"       -c: # cpus (default 1)\n"
133221828Sgrehan		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
134221828Sgrehan		"       -H: vmexit from the guest on hlt\n"
135221828Sgrehan		"       -P: vmexit from the guest on pause\n"
136257396Sneel		"       -W: force virtio to use single-vector MSI\n"
137257396Sneel		"       -e: exit on unhandled I/O access\n"
138221828Sgrehan		"       -h: help\n"
139221828Sgrehan		"       -s: <slot,driver,configinfo> PCI slot config\n"
140234938Sgrehan		"       -S: <slot,driver,configinfo> legacy PCI slot config\n"
141257396Sneel		"       -l: LPC device configuration\n"
142256062Sgrehan		"       -m: memory size in MB\n",
143257396Sneel		progname, (int)strlen(progname), "");
144256062Sgrehan
145221828Sgrehan	exit(code);
146221828Sgrehan}
147221828Sgrehan
148221828Sgrehanvoid *
149248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
150221828Sgrehan{
151221828Sgrehan
152248477Sneel	return (vm_map_gpa(ctx, gaddr, len));
153221828Sgrehan}
154221828Sgrehan
155221828Sgrehanint
156240943Sneelfbsdrun_disable_x2apic(void)
157240943Sneel{
158240943Sneel
159240943Sneel	return (disable_x2apic);
160240943Sneel}
161240943Sneel
162240943Sneelint
163221828Sgrehanfbsdrun_vmexit_on_pause(void)
164221828Sgrehan{
165221828Sgrehan
166221828Sgrehan	return (guest_vmexit_on_pause);
167221828Sgrehan}
168221828Sgrehan
169221828Sgrehanint
170221828Sgrehanfbsdrun_vmexit_on_hlt(void)
171221828Sgrehan{
172221828Sgrehan
173221828Sgrehan	return (guest_vmexit_on_hlt);
174221828Sgrehan}
175221828Sgrehan
176256755Sgrehanint
177256755Sgrehanfbsdrun_virtio_msix(void)
178256755Sgrehan{
179256755Sgrehan
180256755Sgrehan	return (virtio_msix);
181256755Sgrehan}
182256755Sgrehan
183221942Sjhbstatic void *
184221828Sgrehanfbsdrun_start_thread(void *param)
185221828Sgrehan{
186242404Sgrehan	char tname[MAXCOMLEN + 1];
187242404Sgrehan	struct mt_vmm_info *mtp;
188221828Sgrehan	int vcpu;
189221828Sgrehan
190242404Sgrehan	mtp = param;
191221828Sgrehan	vcpu = mtp->mt_vcpu;
192242404Sgrehan
193259496Sgrehan	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
194242404Sgrehan	pthread_set_name_np(mtp->mt_thr, tname);
195242404Sgrehan
196221828Sgrehan	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
197221828Sgrehan
198221828Sgrehan	/* not reached */
199221828Sgrehan	exit(1);
200221828Sgrehan	return (NULL);
201221828Sgrehan}
202221828Sgrehan
203221828Sgrehanvoid
204221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
205221828Sgrehan{
206221828Sgrehan	int error;
207221828Sgrehan
208221828Sgrehan	if (cpumask & (1 << vcpu)) {
209242385Sgrehan		fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
210242385Sgrehan		    vcpu);
211221828Sgrehan		exit(1);
212221828Sgrehan	}
213221828Sgrehan
214221828Sgrehan	cpumask |= 1 << vcpu;
215221828Sgrehan	foundcpus++;
216221828Sgrehan
217221828Sgrehan	/*
218221828Sgrehan	 * Set up the vmexit struct to allow execution to start
219221828Sgrehan	 * at the given RIP
220221828Sgrehan	 */
221221828Sgrehan	vmexit[vcpu].rip = rip;
222221828Sgrehan	vmexit[vcpu].inst_length = 0;
223221828Sgrehan
224256072Sneel	mt_vmm_info[vcpu].mt_ctx = ctx;
225256072Sneel	mt_vmm_info[vcpu].mt_vcpu = vcpu;
226256072Sneel
227256072Sneel	error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
228256072Sneel	    fbsdrun_start_thread, &mt_vmm_info[vcpu]);
229256072Sneel	assert(error == 0);
230221828Sgrehan}
231221828Sgrehan
232221828Sgrehanstatic int
233221828Sgrehanvmexit_catch_reset(void)
234221828Sgrehan{
235221828Sgrehan        stats.io_reset++;
236221828Sgrehan        return (VMEXIT_RESET);
237221828Sgrehan}
238221828Sgrehan
239221942Sjhbstatic int
240221828Sgrehanvmexit_catch_inout(void)
241221828Sgrehan{
242221828Sgrehan	return (VMEXIT_ABORT);
243221828Sgrehan}
244221828Sgrehan
245221942Sjhbstatic int
246221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
247221828Sgrehan		     uint32_t eax)
248221828Sgrehan{
249256062Sgrehan#if BHYVE_DEBUG
250256062Sgrehan	/*
251256062Sgrehan	 * put guest-driven debug here
252256062Sgrehan	 */
253221828Sgrehan#endif
254221828Sgrehan        return (VMEXIT_CONTINUE);
255221828Sgrehan}
256221828Sgrehan
257221828Sgrehanstatic int
258221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
259221828Sgrehan{
260221828Sgrehan	int error;
261221828Sgrehan	int bytes, port, in, out;
262221828Sgrehan	uint32_t eax;
263221828Sgrehan	int vcpu;
264221828Sgrehan
265221828Sgrehan	vcpu = *pvcpu;
266221828Sgrehan
267221828Sgrehan	port = vme->u.inout.port;
268221828Sgrehan	bytes = vme->u.inout.bytes;
269221828Sgrehan	eax = vme->u.inout.eax;
270221828Sgrehan	in = vme->u.inout.in;
271221828Sgrehan	out = !in;
272221828Sgrehan
273221828Sgrehan	/* We don't deal with these */
274221828Sgrehan	if (vme->u.inout.string || vme->u.inout.rep)
275221828Sgrehan		return (VMEXIT_ABORT);
276221828Sgrehan
277221828Sgrehan	/* Special case of guest reset */
278221828Sgrehan	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
279221828Sgrehan		return (vmexit_catch_reset());
280221828Sgrehan
281221828Sgrehan        /* Extra-special case of host notifications */
282221828Sgrehan        if (out && port == GUEST_NIO_PORT)
283221828Sgrehan                return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
284221828Sgrehan
285222105Sgrehan	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
286221828Sgrehan	if (error == 0 && in)
287221828Sgrehan		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
288221828Sgrehan
289221828Sgrehan	if (error == 0)
290221828Sgrehan		return (VMEXIT_CONTINUE);
291221828Sgrehan	else {
292221828Sgrehan		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
293221828Sgrehan			in ? "in" : "out",
294221828Sgrehan			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
295221828Sgrehan		return (vmexit_catch_inout());
296221828Sgrehan	}
297221828Sgrehan}
298221828Sgrehan
299221828Sgrehanstatic int
300221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
301221828Sgrehan{
302242385Sgrehan	fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code,
303242385Sgrehan	    *pvcpu);
304221828Sgrehan	return (VMEXIT_ABORT);
305221828Sgrehan}
306221828Sgrehan
307221828Sgrehanstatic int
308221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
309221828Sgrehan{
310221828Sgrehan	int newcpu;
311221828Sgrehan	int retval = VMEXIT_CONTINUE;
312221828Sgrehan
313221828Sgrehan	newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
314221828Sgrehan
315221828Sgrehan        return (retval);
316221828Sgrehan}
317221828Sgrehan
318221828Sgrehanstatic int
319240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
320240912Sneel{
321240912Sneel	int newcpu;
322240912Sneel	int retval = VMEXIT_CONTINUE;
323240912Sneel
324240912Sneel	newcpu = spinup_ap(ctx, *pvcpu,
325240912Sneel			   vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
326240912Sneel
327240912Sneel	return (retval);
328240912Sneel}
329240912Sneel
330240912Sneelstatic int
331221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
332221828Sgrehan{
333221828Sgrehan
334242385Sgrehan	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
335242385Sgrehan	fprintf(stderr, "\treason\t\tVMX\n");
336242385Sgrehan	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
337242385Sgrehan	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
338242385Sgrehan	fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error);
339242385Sgrehan	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
340242385Sgrehan	fprintf(stderr, "\tqualification\t0x%016lx\n",
341242385Sgrehan	    vmexit->u.vmx.exit_qualification);
342221828Sgrehan
343221828Sgrehan	return (VMEXIT_ABORT);
344221828Sgrehan}
345221828Sgrehan
346221828Sgrehanstatic int
347221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
348221828Sgrehan{
349256062Sgrehan
350221828Sgrehan	stats.vmexit_bogus++;
351221828Sgrehan
352256062Sgrehan	return (VMEXIT_RESTART);
353221828Sgrehan}
354221828Sgrehan
355221828Sgrehanstatic int
356221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
357221828Sgrehan{
358256062Sgrehan
359221828Sgrehan	stats.vmexit_hlt++;
360256062Sgrehan
361256062Sgrehan	/*
362256062Sgrehan	 * Just continue execution with the next instruction. We use
363256062Sgrehan	 * the HLT VM exit as a way to be friendly with the host
364256062Sgrehan	 * scheduler.
365256062Sgrehan	 */
366256062Sgrehan	return (VMEXIT_CONTINUE);
367221828Sgrehan}
368221828Sgrehan
369221828Sgrehanstatic int
370221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
371221828Sgrehan{
372256062Sgrehan
373221828Sgrehan	stats.vmexit_pause++;
374221828Sgrehan
375256062Sgrehan	return (VMEXIT_CONTINUE);
376221828Sgrehan}
377221828Sgrehan
378221828Sgrehanstatic int
379221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
380221828Sgrehan{
381256062Sgrehan
382221828Sgrehan	stats.vmexit_mtrap++;
383221828Sgrehan
384221828Sgrehan	return (VMEXIT_RESTART);
385221828Sgrehan}
386221828Sgrehan
387234761Sgrehanstatic int
388256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
389234761Sgrehan{
390241744Sgrehan	int err;
391256072Sneel	stats.vmexit_inst_emul++;
392234761Sgrehan
393256072Sneel	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
394256072Sneel			  &vmexit->u.inst_emul.vie);
395241744Sgrehan
396241744Sgrehan	if (err) {
397241744Sgrehan		if (err == EINVAL) {
398242385Sgrehan			fprintf(stderr,
399242385Sgrehan			    "Failed to emulate instruction at 0x%lx\n",
400242385Sgrehan			    vmexit->rip);
401241744Sgrehan		} else if (err == ESRCH) {
402242385Sgrehan			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
403256072Sneel			    vmexit->u.inst_emul.gpa);
404241744Sgrehan		}
405241744Sgrehan
406234761Sgrehan		return (VMEXIT_ABORT);
407234761Sgrehan	}
408234761Sgrehan
409234761Sgrehan	return (VMEXIT_CONTINUE);
410234761Sgrehan}
411234761Sgrehan
412221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = {
413234761Sgrehan	[VM_EXITCODE_INOUT]  = vmexit_inout,
414234761Sgrehan	[VM_EXITCODE_VMX]    = vmexit_vmx,
415234761Sgrehan	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
416234761Sgrehan	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
417234761Sgrehan	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
418234761Sgrehan	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
419256072Sneel	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
420240912Sneel	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
421221828Sgrehan};
422221828Sgrehan
423221828Sgrehanstatic void
424221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
425221828Sgrehan{
426246686Sneel	cpuset_t mask;
427221828Sgrehan	int error, rc, prevcpu;
428253452Sgrehan	enum vm_exitcode exitcode;
429221828Sgrehan
430221828Sgrehan	if (pincpu >= 0) {
431246686Sneel		CPU_ZERO(&mask);
432246686Sneel		CPU_SET(pincpu + vcpu, &mask);
433246686Sneel		error = pthread_setaffinity_np(pthread_self(),
434246686Sneel					       sizeof(mask), &mask);
435221828Sgrehan		assert(error == 0);
436221828Sgrehan	}
437221828Sgrehan
438221828Sgrehan	while (1) {
439221828Sgrehan		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
440241490Sneel		if (error != 0) {
441241490Sneel			/*
442241490Sneel			 * It is possible that 'vmmctl' or some other process
443241490Sneel			 * has transitioned the vcpu to CANNOT_RUN state right
444241490Sneel			 * before we tried to transition it to RUNNING.
445241490Sneel			 *
446241490Sneel			 * This is expected to be temporary so just retry.
447241490Sneel			 */
448241490Sneel			if (errno == EBUSY)
449241490Sneel				continue;
450241490Sneel			else
451241490Sneel				break;
452241490Sneel		}
453221828Sgrehan
454221828Sgrehan		prevcpu = vcpu;
455253452Sgrehan
456253452Sgrehan		exitcode = vmexit[vcpu].exitcode;
457253452Sgrehan		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
458253452Sgrehan			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
459253452Sgrehan			    exitcode);
460253452Sgrehan			exit(1);
461253452Sgrehan		}
462253452Sgrehan
463253452Sgrehan                rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
464253452Sgrehan
465221828Sgrehan		switch (rc) {
466221828Sgrehan		case VMEXIT_CONTINUE:
467221828Sgrehan                        rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
468221828Sgrehan			break;
469221828Sgrehan		case VMEXIT_RESTART:
470221828Sgrehan                        rip = vmexit[vcpu].rip;
471221828Sgrehan			break;
472221828Sgrehan		case VMEXIT_RESET:
473221828Sgrehan			exit(0);
474221828Sgrehan		default:
475221828Sgrehan			exit(1);
476221828Sgrehan		}
477221828Sgrehan	}
478221828Sgrehan	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
479221828Sgrehan}
480221828Sgrehan
481245020Sneelstatic int
482245020Sneelnum_vcpus_allowed(struct vmctx *ctx)
483245020Sneel{
484245020Sneel	int tmp, error;
485221828Sgrehan
486245020Sneel	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
487245020Sneel
488245020Sneel	/*
489245020Sneel	 * The guest is allowed to spinup more than one processor only if the
490245020Sneel	 * UNRESTRICTED_GUEST capability is available.
491245020Sneel	 */
492245020Sneel	if (error == 0)
493245020Sneel		return (VM_MAXCPU);
494245020Sneel	else
495245020Sneel		return (1);
496245020Sneel}
497245020Sneel
498256869Sneelvoid
499256869Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
500256869Sneel{
501256869Sneel	int err, tmp;
502256869Sneel
503256869Sneel	if (fbsdrun_vmexit_on_hlt()) {
504256869Sneel		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
505256869Sneel		if (err < 0) {
506256869Sneel			fprintf(stderr, "VM exit on HLT not supported\n");
507256869Sneel			exit(1);
508256869Sneel		}
509256869Sneel		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
510256869Sneel		if (cpu == BSP)
511256869Sneel			handler[VM_EXITCODE_HLT] = vmexit_hlt;
512256869Sneel	}
513256869Sneel
514256869Sneel        if (fbsdrun_vmexit_on_pause()) {
515256869Sneel		/*
516256869Sneel		 * pause exit support required for this mode
517256869Sneel		 */
518256869Sneel		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
519256869Sneel		if (err < 0) {
520256869Sneel			fprintf(stderr,
521256869Sneel			    "SMP mux requested, no pause support\n");
522256869Sneel			exit(1);
523256869Sneel		}
524256869Sneel		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
525256869Sneel		if (cpu == BSP)
526256869Sneel			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
527256869Sneel        }
528256869Sneel
529256869Sneel	if (fbsdrun_disable_x2apic())
530256869Sneel		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
531256869Sneel	else
532256869Sneel		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
533256869Sneel
534256869Sneel	if (err) {
535256869Sneel		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
536256869Sneel		exit(1);
537256869Sneel	}
538256869Sneel
539256869Sneel	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
540256869Sneel}
541256869Sneel
542221828Sgrehanint
543221828Sgrehanmain(int argc, char *argv[])
544221828Sgrehan{
545259496Sgrehan	int c, error, gdb_port, err, bvmcons;
546245020Sneel	int max_vcpus;
547221828Sgrehan	struct vmctx *ctx;
548221828Sgrehan	uint64_t rip;
549248477Sneel	size_t memsize;
550221828Sgrehan
551242192Sneel	bvmcons = 0;
552221828Sgrehan	progname = basename(argv[0]);
553256156Sneel	gdb_port = 0;
554221828Sgrehan	guest_ncpus = 1;
555248477Sneel	memsize = 256 * MB;
556221828Sgrehan
557257396Sneel	while ((c = getopt(argc, argv, "abehAHIPWp:g:c:s:S:m:l:")) != -1) {
558221828Sgrehan		switch (c) {
559240943Sneel		case 'a':
560240943Sneel			disable_x2apic = 1;
561240943Sneel			break;
562243327Sgrehan		case 'A':
563243327Sgrehan			acpi = 1;
564243327Sgrehan			break;
565242192Sneel		case 'b':
566242192Sneel			bvmcons = 1;
567242192Sneel			break;
568221828Sgrehan		case 'p':
569221828Sgrehan			pincpu = atoi(optarg);
570221828Sgrehan			break;
571221828Sgrehan                case 'c':
572221828Sgrehan			guest_ncpus = atoi(optarg);
573221828Sgrehan			break;
574221828Sgrehan		case 'g':
575221828Sgrehan			gdb_port = atoi(optarg);
576221828Sgrehan			break;
577257396Sneel		case 'l':
578257396Sneel			if (lpc_device_parse(optarg) != 0) {
579257396Sneel				errx(EX_USAGE, "invalid lpc device "
580257396Sneel				    "configuration '%s'", optarg);
581257396Sneel			}
582257396Sneel			break;
583221828Sgrehan		case 's':
584249916Sneel			if (pci_parse_slot(optarg, 0) != 0)
585249916Sneel				exit(1);
586249916Sneel			else
587249916Sneel				break;
588234938Sgrehan		case 'S':
589249916Sneel			if (pci_parse_slot(optarg, 1) != 0)
590249916Sneel				exit(1);
591249916Sneel			else
592249916Sneel				break;
593221828Sgrehan                case 'm':
594256176Sneel			error = vm_parse_memsize(optarg, &memsize);
595256176Sneel			if (error)
596256176Sneel				errx(EX_USAGE, "invalid memsize '%s'", optarg);
597221828Sgrehan			break;
598221828Sgrehan		case 'H':
599221828Sgrehan			guest_vmexit_on_hlt = 1;
600221828Sgrehan			break;
601239043Sneel		case 'I':
602259496Sgrehan			/*
603259496Sgrehan			 * The "-I" option was used to add an ioapic to the
604259496Sgrehan			 * virtual machine.
605259496Sgrehan			 *
606259496Sgrehan			 * An ioapic is now provided unconditionally for each
607259496Sgrehan			 * virtual machine and this option is now deprecated.
608259496Sgrehan			 */
609239043Sneel			break;
610221828Sgrehan		case 'P':
611221828Sgrehan			guest_vmexit_on_pause = 1;
612221828Sgrehan			break;
613222105Sgrehan		case 'e':
614222105Sgrehan			strictio = 1;
615222105Sgrehan			break;
616256755Sgrehan		case 'W':
617256755Sgrehan			virtio_msix = 0;
618256755Sgrehan			break;
619221828Sgrehan		case 'h':
620221828Sgrehan			usage(0);
621221828Sgrehan		default:
622221828Sgrehan			usage(1);
623221828Sgrehan		}
624221828Sgrehan	}
625221828Sgrehan	argc -= optind;
626221828Sgrehan	argv += optind;
627221828Sgrehan
628221828Sgrehan	if (argc != 1)
629221828Sgrehan		usage(1);
630221828Sgrehan
631221828Sgrehan	vmname = argv[0];
632221828Sgrehan
633221828Sgrehan	ctx = vm_open(vmname);
634221828Sgrehan	if (ctx == NULL) {
635221828Sgrehan		perror("vm_open");
636221828Sgrehan		exit(1);
637221828Sgrehan	}
638221828Sgrehan
639245020Sneel	max_vcpus = num_vcpus_allowed(ctx);
640245020Sneel	if (guest_ncpus > max_vcpus) {
641245020Sneel		fprintf(stderr, "%d vCPUs requested but only %d available\n",
642245020Sneel			guest_ncpus, max_vcpus);
643245020Sneel		exit(1);
644245020Sneel	}
645245020Sneel
646256869Sneel	fbsdrun_set_capabilities(ctx, BSP);
647221828Sgrehan
648248477Sneel	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
649248477Sneel	if (err) {
650248477Sneel		fprintf(stderr, "Unable to setup memory (%d)\n", err);
651248477Sneel		exit(1);
652221828Sgrehan	}
653221828Sgrehan
654249343Sneel	init_mem();
655221828Sgrehan	init_inout();
656257396Sneel	legacy_irq_init();
657252682Sgrehan
658253181Sgrehan	rtc_init(ctx);
659253181Sgrehan
660252682Sgrehan	/*
661252682Sgrehan	 * Exit if a device emulation finds an error in it's initilization
662252682Sgrehan	 */
663252682Sgrehan	if (init_pci(ctx) != 0)
664252682Sgrehan		exit(1);
665252682Sgrehan
666259496Sgrehan	ioapic_init(0);
667221828Sgrehan
668221828Sgrehan	if (gdb_port != 0)
669221828Sgrehan		init_dbgport(gdb_port);
670221828Sgrehan
671242192Sneel	if (bvmcons)
672242192Sneel		init_bvmcons();
673242192Sneel
674221828Sgrehan	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
675221828Sgrehan	assert(error == 0);
676221828Sgrehan
677221828Sgrehan	/*
678221828Sgrehan	 * build the guest tables, MP etc.
679221828Sgrehan	 */
680259496Sgrehan	mptable_build(ctx, guest_ncpus);
681221828Sgrehan
682243327Sgrehan	if (acpi) {
683259496Sgrehan		error = acpi_build(ctx, guest_ncpus);
684243327Sgrehan		assert(error == 0);
685243327Sgrehan	}
686243327Sgrehan
687221828Sgrehan	/*
688259496Sgrehan	 * Change the proc title to include the VM name.
689259496Sgrehan	 */
690259496Sgrehan	setproctitle("%s", vmname);
691259496Sgrehan
692259496Sgrehan	/*
693221828Sgrehan	 * Add CPU 0
694221828Sgrehan	 */
695221828Sgrehan	fbsdrun_addcpu(ctx, BSP, rip);
696221828Sgrehan
697221828Sgrehan	/*
698221828Sgrehan	 * Head off to the main event dispatch loop
699221828Sgrehan	 */
700221828Sgrehan	mevent_dispatch();
701221828Sgrehan
702221828Sgrehan	exit(1);
703221828Sgrehan}
704