bhyverun.c revision 234761
1353944Sdim/*-
2353944Sdim * Copyright (c) 2011 NetApp, Inc.
3353944Sdim * All rights reserved.
4353944Sdim *
5353944Sdim * Redistribution and use in source and binary forms, with or without
6353944Sdim * modification, are permitted provided that the following conditions
7353944Sdim * are met:
8353944Sdim * 1. Redistributions of source code must retain the above copyright
9353944Sdim *    notice, this list of conditions and the following disclaimer.
10353944Sdim * 2. Redistributions in binary form must reproduce the above copyright
11353944Sdim *    notice, this list of conditions and the following disclaimer in the
12353944Sdim *    documentation and/or other materials provided with the distribution.
13353944Sdim *
14353944Sdim * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15353944Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16353944Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17353944Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18353944Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19353944Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20353944Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21353944Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22353944Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23353944Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24353944Sdim * SUCH DAMAGE.
25353944Sdim *
26353944Sdim * $FreeBSD$
27353944Sdim */
28353944Sdim
29353944Sdim#include <sys/cdefs.h>
30353944Sdim__FBSDID("$FreeBSD$");
31353944Sdim
32353944Sdim#include <sys/types.h>
33353944Sdim#include <sys/mman.h>
34353944Sdim#include <sys/time.h>
35353944Sdim
36353944Sdim#include <machine/segments.h>
37353944Sdim
38353944Sdim#include <stdio.h>
39353944Sdim#include <stdlib.h>
40353944Sdim#include <libgen.h>
41353944Sdim#include <unistd.h>
42353944Sdim#include <assert.h>
43353944Sdim#include <errno.h>
44353944Sdim#include <signal.h>
45353944Sdim#include <pthread.h>
46353944Sdim
47353944Sdim#include <machine/vmm.h>
48353944Sdim#include <vmmapi.h>
49353944Sdim
50353944Sdim#include "fbsdrun.h"
51353944Sdim#include "inout.h"
52353944Sdim#include "dbgport.h"
53353944Sdim#include "mevent.h"
54353944Sdim#include "pci_emul.h"
55353944Sdim#include "xmsr.h"
56353944Sdim#include "instruction_emul.h"
57353944Sdim
58353944Sdim#define	DEFAULT_GUEST_HZ	100
59353944Sdim#define	DEFAULT_GUEST_TSLICE	200
60353944Sdim
61353944Sdim#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
62353944Sdim
63353944Sdim#define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
64353944Sdim#define	VMEXIT_CONTINUE		1	/* continue from next instruction */
65353944Sdim#define	VMEXIT_RESTART		2	/* restart current instruction */
66353944Sdim#define	VMEXIT_ABORT		3	/* abort the vm run loop */
67353944Sdim#define	VMEXIT_RESET		4	/* guest machine has reset */
68353944Sdim
69353944Sdim#define MB		(1024UL * 1024)
70353944Sdim#define GB		(1024UL * MB)
71353944Sdim
72353944Sdimtypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
73353944Sdim
74353944Sdimint guest_tslice = DEFAULT_GUEST_TSLICE;
75353944Sdimint guest_hz = DEFAULT_GUEST_HZ;
76353944Sdimchar *vmname;
77353944Sdim
78353944Sdimu_long lomem_sz;
79353944Sdimu_long himem_sz;
80353944Sdim
81353944Sdimint guest_ncpus;
82353944Sdim
83353944Sdimstatic int pincpu = -1;
84353944Sdimstatic int guest_vcpu_mux;
85353944Sdimstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause;
86353944Sdim
87353944Sdimstatic int foundcpus;
88353944Sdim
89353944Sdimstatic int strictio;
90353944Sdim
91353944Sdimstatic char *lomem_addr;
92353944Sdimstatic char *himem_addr;
93353944Sdim
94353944Sdimstatic char *progname;
95353944Sdimstatic const int BSP = 0;
96353944Sdim
97353944Sdimstatic int cpumask;
98353944Sdim
99353944Sdimstatic void *oem_tbl_start;
100353944Sdimstatic int oem_tbl_size;
101353944Sdim
102353944Sdimstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
103353944Sdim
104353944Sdimstruct vm_exit vmexit[VM_MAXCPU];
105353944Sdim
106353944Sdimstruct fbsdstats {
107353944Sdim        uint64_t        vmexit_bogus;
108353944Sdim        uint64_t        vmexit_bogus_switch;
109353944Sdim        uint64_t        vmexit_hlt;
110353944Sdim        uint64_t        vmexit_pause;
111353944Sdim        uint64_t        vmexit_mtrap;
112353944Sdim        uint64_t        vmexit_paging;
113353944Sdim        uint64_t        cpu_switch_rotate;
114353944Sdim        uint64_t        cpu_switch_direct;
115353944Sdim        int             io_reset;
116353944Sdim} stats;
117353944Sdim
118353944Sdimstruct mt_vmm_info {
119353944Sdim	pthread_t	mt_thr;
120353944Sdim	struct vmctx	*mt_ctx;
121353944Sdim	int		mt_vcpu;
122353944Sdim} mt_vmm_info[VM_MAXCPU];
123353944Sdim
124353944Sdimstatic void
125353944Sdimusage(int code)
126353944Sdim{
127353944Sdim
128353944Sdim        fprintf(stderr,
129353944Sdim                "Usage: %s [-ehBHP][-g <gdb port>][-z <hz>][-s <pci>][-p pincpu]"
130353944Sdim		"[-n <pci>][-m lowmem][-M highmem] <vm>\n"
131353944Sdim		"       -g: gdb port (default is %d and 0 means don't open)\n"
132353944Sdim		"       -c: # cpus (default 1)\n"
133353944Sdim		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
134353944Sdim		"       -B: inject breakpoint exception on vm entry\n"
135353944Sdim		"       -H: vmexit from the guest on hlt\n"
136353944Sdim		"       -P: vmexit from the guest on pause\n"
137353944Sdim		"	-e: exit on unhandled i/o access\n"
138353944Sdim		"       -h: help\n"
139353944Sdim		"       -z: guest hz (default is %d)\n"
140353944Sdim		"       -s: <slot,driver,configinfo> PCI slot config\n"
141353944Sdim		"	-n: <slot,name> PCI slot naming\n"
142353944Sdim		"       -m: lowmem in MB\n"
143353944Sdim		"       -M: highmem in MB\n"
144353944Sdim		"       -x: mux vcpus to 1 hcpu\n"
145353944Sdim		"       -t: mux vcpu timeslice hz (default %d)\n",
146353944Sdim		progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
147353944Sdim		DEFAULT_GUEST_TSLICE);
148353944Sdim	exit(code);
149353944Sdim}
150353944Sdim
151353944Sdimvoid *
152353944Sdimpaddr_guest2host(uintptr_t gaddr)
153353944Sdim{
154353944Sdim	if (lomem_sz == 0)
155353944Sdim		return (NULL);
156353944Sdim
157353944Sdim	if (gaddr < lomem_sz) {
158353944Sdim		return ((void *)(lomem_addr + gaddr));
159353944Sdim	} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
160353944Sdim		return ((void *)(himem_addr + gaddr - 4*GB));
161353944Sdim	} else
162353944Sdim		return (NULL);
163353944Sdim}
164353944Sdim
165353944Sdimvoid
166353944Sdimfbsdrun_add_oemtbl(void *tbl, int tblsz)
167353944Sdim{
168353944Sdim	oem_tbl_start = tbl;
169353944Sdim	oem_tbl_size = tblsz;
170353944Sdim}
171353944Sdim
172353944Sdimint
173353944Sdimfbsdrun_vmexit_on_pause(void)
174353944Sdim{
175353944Sdim
176353944Sdim	return (guest_vmexit_on_pause);
177353944Sdim}
178353944Sdim
179353944Sdimint
180353944Sdimfbsdrun_vmexit_on_hlt(void)
181353944Sdim{
182353944Sdim
183353944Sdim	return (guest_vmexit_on_hlt);
184353944Sdim}
185353944Sdim
186353944Sdimint
187353944Sdimfbsdrun_muxed(void)
188353944Sdim{
189353944Sdim
190353944Sdim	return (guest_vcpu_mux);
191353944Sdim}
192353944Sdim
193353944Sdimstatic void *
194353944Sdimfbsdrun_start_thread(void *param)
195353944Sdim{
196353944Sdim	int vcpu;
197353944Sdim	struct mt_vmm_info *mtp = param;
198353944Sdim
199353944Sdim	vcpu = mtp->mt_vcpu;
200353944Sdim	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
201353944Sdim
202353944Sdim	/* not reached */
203353944Sdim	exit(1);
204353944Sdim	return (NULL);
205353944Sdim}
206353944Sdim
207353944Sdimvoid
208353944Sdimfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
209353944Sdim{
210353944Sdim	int error;
211353944Sdim
212353944Sdim	if (cpumask & (1 << vcpu)) {
213353944Sdim		printf("addcpu: attempting to add existing cpu %d\n", vcpu);
214353944Sdim		exit(1);
215353944Sdim	}
216353944Sdim
217353944Sdim	cpumask |= 1 << vcpu;
218353944Sdim	foundcpus++;
219353944Sdim
220353944Sdim	/*
221353944Sdim	 * Set up the vmexit struct to allow execution to start
222353944Sdim	 * at the given RIP
223353944Sdim	 */
224353944Sdim	vmexit[vcpu].rip = rip;
225353944Sdim	vmexit[vcpu].inst_length = 0;
226353944Sdim
227353944Sdim	if (vcpu == BSP || !guest_vcpu_mux){
228353944Sdim		mt_vmm_info[vcpu].mt_ctx = ctx;
229353944Sdim		mt_vmm_info[vcpu].mt_vcpu = vcpu;
230353944Sdim
231353944Sdim		error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
232353944Sdim				fbsdrun_start_thread, &mt_vmm_info[vcpu]);
233353944Sdim		assert(error == 0);
234353944Sdim	}
235353944Sdim}
236353944Sdim
237353944Sdimstatic int
238353944Sdimfbsdrun_get_next_cpu(int curcpu)
239353944Sdim{
240353944Sdim
241353944Sdim	/*
242353944Sdim	 * Get the next available CPU. Assumes they arrive
243353944Sdim	 * in ascending order with no gaps.
244353944Sdim	 */
245353944Sdim	return ((curcpu + 1) % foundcpus);
246353944Sdim}
247353944Sdim
248353944Sdimstatic int
249353944Sdimvmexit_catch_reset(void)
250353944Sdim{
251353944Sdim        stats.io_reset++;
252353944Sdim        return (VMEXIT_RESET);
253353944Sdim}
254353944Sdim
255353944Sdimstatic int
256353944Sdimvmexit_catch_inout(void)
257353944Sdim{
258353944Sdim	return (VMEXIT_ABORT);
259353944Sdim}
260353944Sdim
261353944Sdimstatic int
262353944Sdimvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
263353944Sdim		     uint32_t eax)
264353944Sdim{
265353944Sdim#if PG_DEBUG /* put all types of debug here */
266353944Sdim        if (eax == 0) {
267353944Sdim		pause_noswitch = 1;
268353944Sdim	} else if (eax == 1) {
269353944Sdim		pause_noswitch = 0;
270353944Sdim	} else {
271353944Sdim		pause_noswitch = 0;
272353944Sdim		if (eax == 5) {
273353944Sdim			vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
274353944Sdim		}
275353944Sdim	}
276353944Sdim#endif
277353944Sdim        return (VMEXIT_CONTINUE);
278353944Sdim}
279353944Sdim
280353944Sdimstatic int
281353944Sdimvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
282353944Sdim{
283353944Sdim	int error;
284353944Sdim	int bytes, port, in, out;
285353944Sdim	uint32_t eax;
286353944Sdim	int vcpu;
287353944Sdim
288353944Sdim	vcpu = *pvcpu;
289353944Sdim
290353944Sdim	port = vme->u.inout.port;
291353944Sdim	bytes = vme->u.inout.bytes;
292353944Sdim	eax = vme->u.inout.eax;
293353944Sdim	in = vme->u.inout.in;
294353944Sdim	out = !in;
295353944Sdim
296353944Sdim	/* We don't deal with these */
297353944Sdim	if (vme->u.inout.string || vme->u.inout.rep)
298353944Sdim		return (VMEXIT_ABORT);
299353944Sdim
300353944Sdim	/* Special case of guest reset */
301353944Sdim	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
302353944Sdim		return (vmexit_catch_reset());
303353944Sdim
304353944Sdim        /* Extra-special case of host notifications */
305353944Sdim        if (out && port == GUEST_NIO_PORT)
306353944Sdim                return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
307353944Sdim
308353944Sdim	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
309353944Sdim	if (error == 0 && in)
310353944Sdim		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
311353944Sdim
312353944Sdim	if (error == 0)
313353944Sdim		return (VMEXIT_CONTINUE);
314353944Sdim	else {
315353944Sdim		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
316353944Sdim			in ? "in" : "out",
317353944Sdim			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
318353944Sdim		return (vmexit_catch_inout());
319	}
320}
321
322static int
323vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
324{
325	printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu);
326	return (VMEXIT_ABORT);
327}
328
329static int
330vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
331{
332	int newcpu;
333	int retval = VMEXIT_CONTINUE;
334
335	newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
336
337	if (guest_vcpu_mux && *pvcpu != newcpu) {
338                retval = VMEXIT_SWITCH;
339                *pvcpu = newcpu;
340        }
341
342        return (retval);
343}
344
345static int
346vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
347{
348
349	printf("vm exit[%d]\n", *pvcpu);
350	printf("\treason\t\tVMX\n");
351	printf("\trip\t\t0x%016lx\n", vmexit->rip);
352	printf("\tinst_length\t%d\n", vmexit->inst_length);
353	printf("\terror\t\t%d\n", vmexit->u.vmx.error);
354	printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
355	printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification);
356
357	return (VMEXIT_ABORT);
358}
359
360static int bogus_noswitch = 1;
361
362static int
363vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
364{
365	stats.vmexit_bogus++;
366
367	if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
368		return (VMEXIT_RESTART);
369	} else {
370		stats.vmexit_bogus_switch++;
371		vmexit->inst_length = 0;
372		*pvcpu = -1;
373		return (VMEXIT_SWITCH);
374	}
375}
376
377static int
378vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
379{
380	stats.vmexit_hlt++;
381	if (fbsdrun_muxed()) {
382		*pvcpu = -1;
383		return (VMEXIT_SWITCH);
384	} else {
385		/*
386		 * Just continue execution with the next instruction. We use
387		 * the HLT VM exit as a way to be friendly with the host
388		 * scheduler.
389		 */
390		return (VMEXIT_CONTINUE);
391	}
392}
393
394static int pause_noswitch;
395
396static int
397vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
398{
399	stats.vmexit_pause++;
400
401	if (fbsdrun_muxed() && !pause_noswitch) {
402		*pvcpu = -1;
403		return (VMEXIT_SWITCH);
404        } else {
405		return (VMEXIT_CONTINUE);
406	}
407}
408
409static int
410vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
411{
412	stats.vmexit_mtrap++;
413
414	return (VMEXIT_RESTART);
415}
416
417static int
418vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
419{
420
421	stats.vmexit_paging++;
422
423	if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) {
424		printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip);
425		return (VMEXIT_ABORT);
426	}
427
428	return (VMEXIT_CONTINUE);
429}
430
431static void
432sigalrm(int sig)
433{
434	return;
435}
436
437static void
438setup_timeslice(void)
439{
440	struct sigaction sa;
441	struct itimerval itv;
442	int error;
443
444	/*
445	 * Setup a realtime timer to generate a SIGALRM at a
446	 * frequency of 'guest_tslice' ticks per second.
447	 */
448	sigemptyset(&sa.sa_mask);
449	sa.sa_flags = 0;
450	sa.sa_handler = sigalrm;
451
452	error = sigaction(SIGALRM, &sa, NULL);
453	assert(error == 0);
454
455	itv.it_interval.tv_sec = 0;
456	itv.it_interval.tv_usec = 1000000 / guest_tslice;
457	itv.it_value.tv_sec = 0;
458	itv.it_value.tv_usec = 1000000 / guest_tslice;
459
460	error = setitimer(ITIMER_REAL, &itv, NULL);
461	assert(error == 0);
462}
463
464static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
465	[VM_EXITCODE_INOUT]  = vmexit_inout,
466	[VM_EXITCODE_VMX]    = vmexit_vmx,
467	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
468	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
469	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
470	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
471	[VM_EXITCODE_PAGING] = vmexit_paging
472};
473
474static void
475vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
476{
477	int error, rc, prevcpu;
478
479	if (guest_vcpu_mux)
480		setup_timeslice();
481
482	if (pincpu >= 0) {
483		error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
484		assert(error == 0);
485	}
486
487	while (1) {
488		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
489		if (error != 0)
490			break;
491
492		prevcpu = vcpu;
493                rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
494                                                       &vcpu);
495		switch (rc) {
496                case VMEXIT_SWITCH:
497			assert(guest_vcpu_mux);
498			if (vcpu == -1) {
499				stats.cpu_switch_rotate++;
500				vcpu = fbsdrun_get_next_cpu(prevcpu);
501			} else {
502				stats.cpu_switch_direct++;
503			}
504			/* fall through */
505		case VMEXIT_CONTINUE:
506                        rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
507			break;
508		case VMEXIT_RESTART:
509                        rip = vmexit[vcpu].rip;
510			break;
511		case VMEXIT_RESET:
512			exit(0);
513		default:
514			exit(1);
515		}
516	}
517	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
518}
519
520
521int
522main(int argc, char *argv[])
523{
524	int c, error, gdb_port, inject_bkpt, tmp, err;
525	struct vmctx *ctx;
526	uint64_t rip;
527
528	inject_bkpt = 0;
529	progname = basename(argv[0]);
530	gdb_port = DEFAULT_GDB_PORT;
531	guest_ncpus = 1;
532
533	while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:n:m:M:")) != -1) {
534		switch (c) {
535		case 'B':
536			inject_bkpt = 1;
537			break;
538		case 'x':
539			guest_vcpu_mux = 1;
540			break;
541		case 'p':
542			pincpu = atoi(optarg);
543			break;
544                case 'c':
545			guest_ncpus = atoi(optarg);
546			break;
547		case 'g':
548			gdb_port = atoi(optarg);
549			break;
550		case 'z':
551			guest_hz = atoi(optarg);
552			break;
553		case 't':
554			guest_tslice = atoi(optarg);
555			break;
556		case 's':
557			pci_parse_slot(optarg);
558			break;
559		case 'n':
560			pci_parse_name(optarg);
561			break;
562                case 'm':
563			lomem_sz = strtoul(optarg, NULL, 0) * MB;
564			break;
565                case 'M':
566			himem_sz = strtoul(optarg, NULL, 0) * MB;
567			break;
568		case 'H':
569			guest_vmexit_on_hlt = 1;
570			break;
571		case 'P':
572			guest_vmexit_on_pause = 1;
573			break;
574		case 'e':
575			strictio = 1;
576			break;
577		case 'h':
578			usage(0);
579		default:
580			usage(1);
581		}
582	}
583	argc -= optind;
584	argv += optind;
585
586	if (argc != 1)
587		usage(1);
588
589	/* No need to mux if guest is uni-processor */
590	if (guest_ncpus <= 1)
591		guest_vcpu_mux = 0;
592
593	/* vmexit on hlt if guest is muxed */
594	if (guest_vcpu_mux) {
595		guest_vmexit_on_hlt = 1;
596		guest_vmexit_on_pause = 1;
597	}
598
599	vmname = argv[0];
600
601	ctx = vm_open(vmname);
602	if (ctx == NULL) {
603		perror("vm_open");
604		exit(1);
605	}
606
607	if (fbsdrun_vmexit_on_hlt()) {
608		err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
609		if (err < 0) {
610			printf("VM exit on HLT not supported\n");
611			exit(1);
612		}
613		vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
614		handler[VM_EXITCODE_HLT] = vmexit_hlt;
615	}
616
617        if (fbsdrun_vmexit_on_pause()) {
618		/*
619		 * pause exit support required for this mode
620		 */
621		err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
622		if (err < 0) {
623			printf("SMP mux requested, no pause support\n");
624			exit(1);
625		}
626		vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
627		handler[VM_EXITCODE_PAUSE] = vmexit_pause;
628        }
629
630	if (lomem_sz != 0) {
631		lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
632		if (lomem_addr == (char *) MAP_FAILED) {
633			lomem_sz = 0;
634		} else if (himem_sz != 0) {
635			himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
636			if (himem_addr == (char *) MAP_FAILED) {
637				lomem_sz = 0;
638				himem_sz = 0;
639			}
640		}
641	}
642
643	init_inout();
644	init_pci(ctx);
645
646	if (gdb_port != 0)
647		init_dbgport(gdb_port);
648
649	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
650	assert(error == 0);
651
652	if (inject_bkpt) {
653		error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
654		assert(error == 0);
655	}
656
657	/*
658	 * build the guest tables, MP etc.
659	 */
660	vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size);
661
662	/*
663	 * Add CPU 0
664	 */
665	fbsdrun_addcpu(ctx, BSP, rip);
666
667	/*
668	 * Head off to the main event dispatch loop
669	 */
670	mevent_dispatch();
671
672	exit(1);
673}
674