bhyverun.c revision 234938
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/types.h>
33221828Sgrehan#include <sys/mman.h>
34221828Sgrehan#include <sys/time.h>
35221828Sgrehan
36221828Sgrehan#include <machine/segments.h>
37221828Sgrehan
38221828Sgrehan#include <stdio.h>
39221828Sgrehan#include <stdlib.h>
40221828Sgrehan#include <libgen.h>
41221828Sgrehan#include <unistd.h>
42221828Sgrehan#include <assert.h>
43221828Sgrehan#include <errno.h>
44221828Sgrehan#include <signal.h>
45221828Sgrehan#include <pthread.h>
46221828Sgrehan
47221828Sgrehan#include <machine/vmm.h>
48221828Sgrehan#include <vmmapi.h>
49221828Sgrehan
50221828Sgrehan#include "fbsdrun.h"
51221828Sgrehan#include "inout.h"
52221828Sgrehan#include "dbgport.h"
53221828Sgrehan#include "mevent.h"
54221828Sgrehan#include "pci_emul.h"
55221828Sgrehan#include "xmsr.h"
56234761Sgrehan#include "instruction_emul.h"
57221828Sgrehan
58221828Sgrehan#define	DEFAULT_GUEST_HZ	100
59221828Sgrehan#define	DEFAULT_GUEST_TSLICE	200
60221828Sgrehan
61221828Sgrehan#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
62221828Sgrehan
63221828Sgrehan#define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
64221828Sgrehan#define	VMEXIT_CONTINUE		1	/* continue from next instruction */
65221828Sgrehan#define	VMEXIT_RESTART		2	/* restart current instruction */
66221828Sgrehan#define	VMEXIT_ABORT		3	/* abort the vm run loop */
67221828Sgrehan#define	VMEXIT_RESET		4	/* guest machine has reset */
68221828Sgrehan
69221828Sgrehan#define MB		(1024UL * 1024)
70221828Sgrehan#define GB		(1024UL * MB)
71221828Sgrehan
72221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
73221828Sgrehan
74221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE;
75221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ;
76221828Sgrehanchar *vmname;
77221828Sgrehan
78221828Sgrehanu_long lomem_sz;
79221828Sgrehanu_long himem_sz;
80221828Sgrehan
81221828Sgrehanint guest_ncpus;
82221828Sgrehan
83221828Sgrehanstatic int pincpu = -1;
84221828Sgrehanstatic int guest_vcpu_mux;
85221828Sgrehanstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause;
86221828Sgrehan
87221828Sgrehanstatic int foundcpus;
88221828Sgrehan
89222105Sgrehanstatic int strictio;
90222105Sgrehan
91221828Sgrehanstatic char *lomem_addr;
92221828Sgrehanstatic char *himem_addr;
93221828Sgrehan
94221828Sgrehanstatic char *progname;
95221828Sgrehanstatic const int BSP = 0;
96221828Sgrehan
97221828Sgrehanstatic int cpumask;
98221828Sgrehan
99221828Sgrehanstatic void *oem_tbl_start;
100221828Sgrehanstatic int oem_tbl_size;
101221828Sgrehan
102221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
103221828Sgrehan
104221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU];
105221828Sgrehan
106221828Sgrehanstruct fbsdstats {
107221828Sgrehan        uint64_t        vmexit_bogus;
108221828Sgrehan        uint64_t        vmexit_bogus_switch;
109221828Sgrehan        uint64_t        vmexit_hlt;
110221828Sgrehan        uint64_t        vmexit_pause;
111221828Sgrehan        uint64_t        vmexit_mtrap;
112234761Sgrehan        uint64_t        vmexit_paging;
113221828Sgrehan        uint64_t        cpu_switch_rotate;
114221828Sgrehan        uint64_t        cpu_switch_direct;
115221828Sgrehan        int             io_reset;
116221828Sgrehan} stats;
117221828Sgrehan
118221828Sgrehanstruct mt_vmm_info {
119221828Sgrehan	pthread_t	mt_thr;
120221828Sgrehan	struct vmctx	*mt_ctx;
121221828Sgrehan	int		mt_vcpu;
122221828Sgrehan} mt_vmm_info[VM_MAXCPU];
123221828Sgrehan
124221828Sgrehanstatic void
125221828Sgrehanusage(int code)
126221828Sgrehan{
127221828Sgrehan
128221828Sgrehan        fprintf(stderr,
129222105Sgrehan                "Usage: %s [-ehBHP][-g <gdb port>][-z <hz>][-s <pci>][-p pincpu]"
130221828Sgrehan		"[-n <pci>][-m lowmem][-M highmem] <vm>\n"
131221828Sgrehan		"       -g: gdb port (default is %d and 0 means don't open)\n"
132221828Sgrehan		"       -c: # cpus (default 1)\n"
133221828Sgrehan		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
134221828Sgrehan		"       -B: inject breakpoint exception on vm entry\n"
135221828Sgrehan		"       -H: vmexit from the guest on hlt\n"
136221828Sgrehan		"       -P: vmexit from the guest on pause\n"
137222105Sgrehan		"	-e: exit on unhandled i/o access\n"
138221828Sgrehan		"       -h: help\n"
139221828Sgrehan		"       -z: guest hz (default is %d)\n"
140221828Sgrehan		"       -s: <slot,driver,configinfo> PCI slot config\n"
141234938Sgrehan		"       -S: <slot,driver,configinfo> legacy PCI slot config\n"
142221828Sgrehan		"	-n: <slot,name> PCI slot naming\n"
143221828Sgrehan		"       -m: lowmem in MB\n"
144221828Sgrehan		"       -M: highmem in MB\n"
145221828Sgrehan		"       -x: mux vcpus to 1 hcpu\n"
146221828Sgrehan		"       -t: mux vcpu timeslice hz (default %d)\n",
147221828Sgrehan		progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
148221828Sgrehan		DEFAULT_GUEST_TSLICE);
149221828Sgrehan	exit(code);
150221828Sgrehan}
151221828Sgrehan
152221828Sgrehanvoid *
153221828Sgrehanpaddr_guest2host(uintptr_t gaddr)
154221828Sgrehan{
155221828Sgrehan	if (lomem_sz == 0)
156221828Sgrehan		return (NULL);
157221828Sgrehan
158221828Sgrehan	if (gaddr < lomem_sz) {
159221828Sgrehan		return ((void *)(lomem_addr + gaddr));
160221828Sgrehan	} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
161221828Sgrehan		return ((void *)(himem_addr + gaddr - 4*GB));
162221828Sgrehan	} else
163221828Sgrehan		return (NULL);
164221828Sgrehan}
165221828Sgrehan
166221828Sgrehanvoid
167221828Sgrehanfbsdrun_add_oemtbl(void *tbl, int tblsz)
168221828Sgrehan{
169221828Sgrehan	oem_tbl_start = tbl;
170221828Sgrehan	oem_tbl_size = tblsz;
171221828Sgrehan}
172221828Sgrehan
173221828Sgrehanint
174221828Sgrehanfbsdrun_vmexit_on_pause(void)
175221828Sgrehan{
176221828Sgrehan
177221828Sgrehan	return (guest_vmexit_on_pause);
178221828Sgrehan}
179221828Sgrehan
180221828Sgrehanint
181221828Sgrehanfbsdrun_vmexit_on_hlt(void)
182221828Sgrehan{
183221828Sgrehan
184221828Sgrehan	return (guest_vmexit_on_hlt);
185221828Sgrehan}
186221828Sgrehan
187221828Sgrehanint
188221828Sgrehanfbsdrun_muxed(void)
189221828Sgrehan{
190221828Sgrehan
191221828Sgrehan	return (guest_vcpu_mux);
192221828Sgrehan}
193221828Sgrehan
194221942Sjhbstatic void *
195221828Sgrehanfbsdrun_start_thread(void *param)
196221828Sgrehan{
197221828Sgrehan	int vcpu;
198221828Sgrehan	struct mt_vmm_info *mtp = param;
199221828Sgrehan
200221828Sgrehan	vcpu = mtp->mt_vcpu;
201221828Sgrehan	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
202221828Sgrehan
203221828Sgrehan	/* not reached */
204221828Sgrehan	exit(1);
205221828Sgrehan	return (NULL);
206221828Sgrehan}
207221828Sgrehan
208221828Sgrehanvoid
209221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
210221828Sgrehan{
211221828Sgrehan	int error;
212221828Sgrehan
213221828Sgrehan	if (cpumask & (1 << vcpu)) {
214221828Sgrehan		printf("addcpu: attempting to add existing cpu %d\n", vcpu);
215221828Sgrehan		exit(1);
216221828Sgrehan	}
217221828Sgrehan
218221828Sgrehan	cpumask |= 1 << vcpu;
219221828Sgrehan	foundcpus++;
220221828Sgrehan
221221828Sgrehan	/*
222221828Sgrehan	 * Set up the vmexit struct to allow execution to start
223221828Sgrehan	 * at the given RIP
224221828Sgrehan	 */
225221828Sgrehan	vmexit[vcpu].rip = rip;
226221828Sgrehan	vmexit[vcpu].inst_length = 0;
227221828Sgrehan
228221828Sgrehan	if (vcpu == BSP || !guest_vcpu_mux){
229221828Sgrehan		mt_vmm_info[vcpu].mt_ctx = ctx;
230221828Sgrehan		mt_vmm_info[vcpu].mt_vcpu = vcpu;
231221828Sgrehan
232221828Sgrehan		error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
233221828Sgrehan				fbsdrun_start_thread, &mt_vmm_info[vcpu]);
234221828Sgrehan		assert(error == 0);
235221828Sgrehan	}
236221828Sgrehan}
237221828Sgrehan
238221828Sgrehanstatic int
239221828Sgrehanfbsdrun_get_next_cpu(int curcpu)
240221828Sgrehan{
241221828Sgrehan
242221828Sgrehan	/*
243221828Sgrehan	 * Get the next available CPU. Assumes they arrive
244221828Sgrehan	 * in ascending order with no gaps.
245221828Sgrehan	 */
246221828Sgrehan	return ((curcpu + 1) % foundcpus);
247221828Sgrehan}
248221828Sgrehan
249221942Sjhbstatic int
250221828Sgrehanvmexit_catch_reset(void)
251221828Sgrehan{
252221828Sgrehan        stats.io_reset++;
253221828Sgrehan        return (VMEXIT_RESET);
254221828Sgrehan}
255221828Sgrehan
256221942Sjhbstatic int
257221828Sgrehanvmexit_catch_inout(void)
258221828Sgrehan{
259221828Sgrehan	return (VMEXIT_ABORT);
260221828Sgrehan}
261221828Sgrehan
262221942Sjhbstatic int
263221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
264221828Sgrehan		     uint32_t eax)
265221828Sgrehan{
266221828Sgrehan#if PG_DEBUG /* put all types of debug here */
267221828Sgrehan        if (eax == 0) {
268221828Sgrehan		pause_noswitch = 1;
269221828Sgrehan	} else if (eax == 1) {
270221828Sgrehan		pause_noswitch = 0;
271221828Sgrehan	} else {
272221828Sgrehan		pause_noswitch = 0;
273221828Sgrehan		if (eax == 5) {
274221828Sgrehan			vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
275221828Sgrehan		}
276221828Sgrehan	}
277221828Sgrehan#endif
278221828Sgrehan        return (VMEXIT_CONTINUE);
279221828Sgrehan}
280221828Sgrehan
281221828Sgrehanstatic int
282221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
283221828Sgrehan{
284221828Sgrehan	int error;
285221828Sgrehan	int bytes, port, in, out;
286221828Sgrehan	uint32_t eax;
287221828Sgrehan	int vcpu;
288221828Sgrehan
289221828Sgrehan	vcpu = *pvcpu;
290221828Sgrehan
291221828Sgrehan	port = vme->u.inout.port;
292221828Sgrehan	bytes = vme->u.inout.bytes;
293221828Sgrehan	eax = vme->u.inout.eax;
294221828Sgrehan	in = vme->u.inout.in;
295221828Sgrehan	out = !in;
296221828Sgrehan
297221828Sgrehan	/* We don't deal with these */
298221828Sgrehan	if (vme->u.inout.string || vme->u.inout.rep)
299221828Sgrehan		return (VMEXIT_ABORT);
300221828Sgrehan
301221828Sgrehan	/* Special case of guest reset */
302221828Sgrehan	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
303221828Sgrehan		return (vmexit_catch_reset());
304221828Sgrehan
305221828Sgrehan        /* Extra-special case of host notifications */
306221828Sgrehan        if (out && port == GUEST_NIO_PORT)
307221828Sgrehan                return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
308221828Sgrehan
309222105Sgrehan	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
310221828Sgrehan	if (error == 0 && in)
311221828Sgrehan		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
312221828Sgrehan
313221828Sgrehan	if (error == 0)
314221828Sgrehan		return (VMEXIT_CONTINUE);
315221828Sgrehan	else {
316221828Sgrehan		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
317221828Sgrehan			in ? "in" : "out",
318221828Sgrehan			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
319221828Sgrehan		return (vmexit_catch_inout());
320221828Sgrehan	}
321221828Sgrehan}
322221828Sgrehan
323221828Sgrehanstatic int
324221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
325221828Sgrehan{
326221828Sgrehan	printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu);
327221828Sgrehan	return (VMEXIT_ABORT);
328221828Sgrehan}
329221828Sgrehan
330221828Sgrehanstatic int
331221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
332221828Sgrehan{
333221828Sgrehan	int newcpu;
334221828Sgrehan	int retval = VMEXIT_CONTINUE;
335221828Sgrehan
336221828Sgrehan	newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
337221828Sgrehan
338221828Sgrehan	if (guest_vcpu_mux && *pvcpu != newcpu) {
339221828Sgrehan                retval = VMEXIT_SWITCH;
340221828Sgrehan                *pvcpu = newcpu;
341221828Sgrehan        }
342221828Sgrehan
343221828Sgrehan        return (retval);
344221828Sgrehan}
345221828Sgrehan
346221828Sgrehanstatic int
347221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
348221828Sgrehan{
349221828Sgrehan
350221828Sgrehan	printf("vm exit[%d]\n", *pvcpu);
351221828Sgrehan	printf("\treason\t\tVMX\n");
352221828Sgrehan	printf("\trip\t\t0x%016lx\n", vmexit->rip);
353221828Sgrehan	printf("\tinst_length\t%d\n", vmexit->inst_length);
354221828Sgrehan	printf("\terror\t\t%d\n", vmexit->u.vmx.error);
355221828Sgrehan	printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
356221828Sgrehan	printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification);
357221828Sgrehan
358221828Sgrehan	return (VMEXIT_ABORT);
359221828Sgrehan}
360221828Sgrehan
361221828Sgrehanstatic int bogus_noswitch = 1;
362221828Sgrehan
363221828Sgrehanstatic int
364221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
365221828Sgrehan{
366221828Sgrehan	stats.vmexit_bogus++;
367221828Sgrehan
368221828Sgrehan	if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
369221828Sgrehan		return (VMEXIT_RESTART);
370221828Sgrehan	} else {
371221828Sgrehan		stats.vmexit_bogus_switch++;
372221828Sgrehan		vmexit->inst_length = 0;
373221828Sgrehan		*pvcpu = -1;
374221828Sgrehan		return (VMEXIT_SWITCH);
375221828Sgrehan	}
376221828Sgrehan}
377221828Sgrehan
378221828Sgrehanstatic int
379221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
380221828Sgrehan{
381221828Sgrehan	stats.vmexit_hlt++;
382221828Sgrehan	if (fbsdrun_muxed()) {
383221828Sgrehan		*pvcpu = -1;
384221828Sgrehan		return (VMEXIT_SWITCH);
385221828Sgrehan	} else {
386221828Sgrehan		/*
387221828Sgrehan		 * Just continue execution with the next instruction. We use
388221828Sgrehan		 * the HLT VM exit as a way to be friendly with the host
389221828Sgrehan		 * scheduler.
390221828Sgrehan		 */
391221828Sgrehan		return (VMEXIT_CONTINUE);
392221828Sgrehan	}
393221828Sgrehan}
394221828Sgrehan
395221828Sgrehanstatic int pause_noswitch;
396221828Sgrehan
397221828Sgrehanstatic int
398221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
399221828Sgrehan{
400221828Sgrehan	stats.vmexit_pause++;
401221828Sgrehan
402221828Sgrehan	if (fbsdrun_muxed() && !pause_noswitch) {
403221828Sgrehan		*pvcpu = -1;
404221828Sgrehan		return (VMEXIT_SWITCH);
405221828Sgrehan        } else {
406221828Sgrehan		return (VMEXIT_CONTINUE);
407221828Sgrehan	}
408221828Sgrehan}
409221828Sgrehan
410221828Sgrehanstatic int
411221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
412221828Sgrehan{
413221828Sgrehan	stats.vmexit_mtrap++;
414221828Sgrehan
415221828Sgrehan	return (VMEXIT_RESTART);
416221828Sgrehan}
417221828Sgrehan
418234761Sgrehanstatic int
419234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
420234761Sgrehan{
421234761Sgrehan
422234761Sgrehan	stats.vmexit_paging++;
423234761Sgrehan
424234761Sgrehan	if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) {
425234761Sgrehan		printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip);
426234761Sgrehan		return (VMEXIT_ABORT);
427234761Sgrehan	}
428234761Sgrehan
429234761Sgrehan	return (VMEXIT_CONTINUE);
430234761Sgrehan}
431234761Sgrehan
432221828Sgrehanstatic void
433221828Sgrehansigalrm(int sig)
434221828Sgrehan{
435221828Sgrehan	return;
436221828Sgrehan}
437221828Sgrehan
438221828Sgrehanstatic void
439221828Sgrehansetup_timeslice(void)
440221828Sgrehan{
441221828Sgrehan	struct sigaction sa;
442221828Sgrehan	struct itimerval itv;
443221828Sgrehan	int error;
444221828Sgrehan
445221828Sgrehan	/*
446221828Sgrehan	 * Setup a realtime timer to generate a SIGALRM at a
447221828Sgrehan	 * frequency of 'guest_tslice' ticks per second.
448221828Sgrehan	 */
449221828Sgrehan	sigemptyset(&sa.sa_mask);
450221828Sgrehan	sa.sa_flags = 0;
451221828Sgrehan	sa.sa_handler = sigalrm;
452221828Sgrehan
453221828Sgrehan	error = sigaction(SIGALRM, &sa, NULL);
454221828Sgrehan	assert(error == 0);
455221828Sgrehan
456221828Sgrehan	itv.it_interval.tv_sec = 0;
457221828Sgrehan	itv.it_interval.tv_usec = 1000000 / guest_tslice;
458221828Sgrehan	itv.it_value.tv_sec = 0;
459221828Sgrehan	itv.it_value.tv_usec = 1000000 / guest_tslice;
460221828Sgrehan
461221828Sgrehan	error = setitimer(ITIMER_REAL, &itv, NULL);
462221828Sgrehan	assert(error == 0);
463221828Sgrehan}
464221828Sgrehan
465221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = {
466234761Sgrehan	[VM_EXITCODE_INOUT]  = vmexit_inout,
467234761Sgrehan	[VM_EXITCODE_VMX]    = vmexit_vmx,
468234761Sgrehan	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
469234761Sgrehan	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
470234761Sgrehan	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
471234761Sgrehan	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
472234761Sgrehan	[VM_EXITCODE_PAGING] = vmexit_paging
473221828Sgrehan};
474221828Sgrehan
475221828Sgrehanstatic void
476221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
477221828Sgrehan{
478221828Sgrehan	int error, rc, prevcpu;
479221828Sgrehan
480221828Sgrehan	if (guest_vcpu_mux)
481221828Sgrehan		setup_timeslice();
482221828Sgrehan
483221828Sgrehan	if (pincpu >= 0) {
484221828Sgrehan		error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
485221828Sgrehan		assert(error == 0);
486221828Sgrehan	}
487221828Sgrehan
488221828Sgrehan	while (1) {
489221828Sgrehan		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
490221828Sgrehan		if (error != 0)
491221828Sgrehan			break;
492221828Sgrehan
493221828Sgrehan		prevcpu = vcpu;
494221828Sgrehan                rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
495221828Sgrehan                                                       &vcpu);
496221828Sgrehan		switch (rc) {
497221828Sgrehan                case VMEXIT_SWITCH:
498221828Sgrehan			assert(guest_vcpu_mux);
499221828Sgrehan			if (vcpu == -1) {
500221828Sgrehan				stats.cpu_switch_rotate++;
501221828Sgrehan				vcpu = fbsdrun_get_next_cpu(prevcpu);
502221828Sgrehan			} else {
503221828Sgrehan				stats.cpu_switch_direct++;
504221828Sgrehan			}
505221828Sgrehan			/* fall through */
506221828Sgrehan		case VMEXIT_CONTINUE:
507221828Sgrehan                        rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
508221828Sgrehan			break;
509221828Sgrehan		case VMEXIT_RESTART:
510221828Sgrehan                        rip = vmexit[vcpu].rip;
511221828Sgrehan			break;
512221828Sgrehan		case VMEXIT_RESET:
513221828Sgrehan			exit(0);
514221828Sgrehan		default:
515221828Sgrehan			exit(1);
516221828Sgrehan		}
517221828Sgrehan	}
518221828Sgrehan	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
519221828Sgrehan}
520221828Sgrehan
521221828Sgrehan
522221828Sgrehanint
523221828Sgrehanmain(int argc, char *argv[])
524221828Sgrehan{
525221828Sgrehan	int c, error, gdb_port, inject_bkpt, tmp, err;
526221828Sgrehan	struct vmctx *ctx;
527221828Sgrehan	uint64_t rip;
528221828Sgrehan
529221828Sgrehan	inject_bkpt = 0;
530221828Sgrehan	progname = basename(argv[0]);
531221828Sgrehan	gdb_port = DEFAULT_GDB_PORT;
532221828Sgrehan	guest_ncpus = 1;
533221828Sgrehan
534234938Sgrehan	while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:S:n:m:M:")) != -1) {
535221828Sgrehan		switch (c) {
536221828Sgrehan		case 'B':
537221828Sgrehan			inject_bkpt = 1;
538221828Sgrehan			break;
539221828Sgrehan		case 'x':
540221828Sgrehan			guest_vcpu_mux = 1;
541221828Sgrehan			break;
542221828Sgrehan		case 'p':
543221828Sgrehan			pincpu = atoi(optarg);
544221828Sgrehan			break;
545221828Sgrehan                case 'c':
546221828Sgrehan			guest_ncpus = atoi(optarg);
547221828Sgrehan			break;
548221828Sgrehan		case 'g':
549221828Sgrehan			gdb_port = atoi(optarg);
550221828Sgrehan			break;
551221828Sgrehan		case 'z':
552221828Sgrehan			guest_hz = atoi(optarg);
553221828Sgrehan			break;
554221828Sgrehan		case 't':
555221828Sgrehan			guest_tslice = atoi(optarg);
556221828Sgrehan			break;
557221828Sgrehan		case 's':
558234938Sgrehan			pci_parse_slot(optarg, 0);
559221828Sgrehan			break;
560234938Sgrehan		case 'S':
561234938Sgrehan			pci_parse_slot(optarg, 1);
562234938Sgrehan			break;
563221828Sgrehan		case 'n':
564221828Sgrehan			pci_parse_name(optarg);
565221828Sgrehan			break;
566221828Sgrehan                case 'm':
567221828Sgrehan			lomem_sz = strtoul(optarg, NULL, 0) * MB;
568221828Sgrehan			break;
569221828Sgrehan                case 'M':
570221828Sgrehan			himem_sz = strtoul(optarg, NULL, 0) * MB;
571221828Sgrehan			break;
572221828Sgrehan		case 'H':
573221828Sgrehan			guest_vmexit_on_hlt = 1;
574221828Sgrehan			break;
575221828Sgrehan		case 'P':
576221828Sgrehan			guest_vmexit_on_pause = 1;
577221828Sgrehan			break;
578222105Sgrehan		case 'e':
579222105Sgrehan			strictio = 1;
580222105Sgrehan			break;
581221828Sgrehan		case 'h':
582221828Sgrehan			usage(0);
583221828Sgrehan		default:
584221828Sgrehan			usage(1);
585221828Sgrehan		}
586221828Sgrehan	}
587221828Sgrehan	argc -= optind;
588221828Sgrehan	argv += optind;
589221828Sgrehan
590221828Sgrehan	if (argc != 1)
591221828Sgrehan		usage(1);
592221828Sgrehan
593221828Sgrehan	/* No need to mux if guest is uni-processor */
594221828Sgrehan	if (guest_ncpus <= 1)
595221828Sgrehan		guest_vcpu_mux = 0;
596221828Sgrehan
597221828Sgrehan	/* vmexit on hlt if guest is muxed */
598221828Sgrehan	if (guest_vcpu_mux) {
599221828Sgrehan		guest_vmexit_on_hlt = 1;
600221828Sgrehan		guest_vmexit_on_pause = 1;
601221828Sgrehan	}
602221828Sgrehan
603221828Sgrehan	vmname = argv[0];
604221828Sgrehan
605221828Sgrehan	ctx = vm_open(vmname);
606221828Sgrehan	if (ctx == NULL) {
607221828Sgrehan		perror("vm_open");
608221828Sgrehan		exit(1);
609221828Sgrehan	}
610221828Sgrehan
611221828Sgrehan	if (fbsdrun_vmexit_on_hlt()) {
612221828Sgrehan		err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
613221828Sgrehan		if (err < 0) {
614221828Sgrehan			printf("VM exit on HLT not supported\n");
615221828Sgrehan			exit(1);
616221828Sgrehan		}
617221828Sgrehan		vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
618221828Sgrehan		handler[VM_EXITCODE_HLT] = vmexit_hlt;
619221828Sgrehan	}
620221828Sgrehan
621221828Sgrehan        if (fbsdrun_vmexit_on_pause()) {
622221828Sgrehan		/*
623221828Sgrehan		 * pause exit support required for this mode
624221828Sgrehan		 */
625221828Sgrehan		err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
626221828Sgrehan		if (err < 0) {
627221828Sgrehan			printf("SMP mux requested, no pause support\n");
628221828Sgrehan			exit(1);
629221828Sgrehan		}
630221828Sgrehan		vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
631221828Sgrehan		handler[VM_EXITCODE_PAUSE] = vmexit_pause;
632221828Sgrehan        }
633221828Sgrehan
634221828Sgrehan	if (lomem_sz != 0) {
635221828Sgrehan		lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
636221828Sgrehan		if (lomem_addr == (char *) MAP_FAILED) {
637221828Sgrehan			lomem_sz = 0;
638221828Sgrehan		} else if (himem_sz != 0) {
639221828Sgrehan			himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
640221828Sgrehan			if (himem_addr == (char *) MAP_FAILED) {
641221828Sgrehan				lomem_sz = 0;
642221828Sgrehan				himem_sz = 0;
643221828Sgrehan			}
644221828Sgrehan		}
645221828Sgrehan	}
646221828Sgrehan
647221828Sgrehan	init_inout();
648221828Sgrehan	init_pci(ctx);
649221828Sgrehan
650221828Sgrehan	if (gdb_port != 0)
651221828Sgrehan		init_dbgport(gdb_port);
652221828Sgrehan
653221828Sgrehan	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
654221828Sgrehan	assert(error == 0);
655221828Sgrehan
656221828Sgrehan	if (inject_bkpt) {
657221828Sgrehan		error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
658221828Sgrehan		assert(error == 0);
659221828Sgrehan	}
660221828Sgrehan
661221828Sgrehan	/*
662221828Sgrehan	 * build the guest tables, MP etc.
663221828Sgrehan	 */
664221828Sgrehan	vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size);
665221828Sgrehan
666221828Sgrehan	/*
667221828Sgrehan	 * Add CPU 0
668221828Sgrehan	 */
669221828Sgrehan	fbsdrun_addcpu(ctx, BSP, rip);
670221828Sgrehan
671221828Sgrehan	/*
672221828Sgrehan	 * Head off to the main event dispatch loop
673221828Sgrehan	 */
674221828Sgrehan	mevent_dispatch();
675221828Sgrehan
676221828Sgrehan	exit(1);
677221828Sgrehan}
678