bhyverun.c revision 243640
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/types.h>
33221828Sgrehan#include <sys/mman.h>
34221828Sgrehan#include <sys/time.h>
35221828Sgrehan
36221828Sgrehan#include <machine/segments.h>
37221828Sgrehan
38221828Sgrehan#include <stdio.h>
39221828Sgrehan#include <stdlib.h>
40221828Sgrehan#include <libgen.h>
41221828Sgrehan#include <unistd.h>
42221828Sgrehan#include <assert.h>
43221828Sgrehan#include <errno.h>
44221828Sgrehan#include <signal.h>
45221828Sgrehan#include <pthread.h>
46242404Sgrehan#include <pthread_np.h>
47221828Sgrehan
48221828Sgrehan#include <machine/vmm.h>
49221828Sgrehan#include <vmmapi.h>
50221828Sgrehan
51221828Sgrehan#include "fbsdrun.h"
52243327Sgrehan#include "acpi.h"
53221828Sgrehan#include "inout.h"
54221828Sgrehan#include "dbgport.h"
55241744Sgrehan#include "mem.h"
56221828Sgrehan#include "mevent.h"
57242131Sgrehan#include "mptbl.h"
58221828Sgrehan#include "pci_emul.h"
59221828Sgrehan#include "xmsr.h"
60239045Sneel#include "ioapic.h"
61240912Sneel#include "spinup_ap.h"
62221828Sgrehan
63221828Sgrehan#define	DEFAULT_GUEST_HZ	100
64221828Sgrehan#define	DEFAULT_GUEST_TSLICE	200
65221828Sgrehan
66221828Sgrehan#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
67221828Sgrehan
68221828Sgrehan#define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
69221828Sgrehan#define	VMEXIT_CONTINUE		1	/* continue from next instruction */
70221828Sgrehan#define	VMEXIT_RESTART		2	/* restart current instruction */
71221828Sgrehan#define	VMEXIT_ABORT		3	/* abort the vm run loop */
72221828Sgrehan#define	VMEXIT_RESET		4	/* guest machine has reset */
73221828Sgrehan
74221828Sgrehan#define MB		(1024UL * 1024)
75221828Sgrehan#define GB		(1024UL * MB)
76221828Sgrehan
77221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
78221828Sgrehan
79221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE;
80221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ;
81221828Sgrehanchar *vmname;
82221828Sgrehan
83221828Sgrehanu_long lomem_sz;
84221828Sgrehanu_long himem_sz;
85221828Sgrehan
86221828Sgrehanint guest_ncpus;
87221828Sgrehan
88221828Sgrehanstatic int pincpu = -1;
89221828Sgrehanstatic int guest_vcpu_mux;
90240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
91221828Sgrehan
92221828Sgrehanstatic int foundcpus;
93221828Sgrehan
94222105Sgrehanstatic int strictio;
95222105Sgrehan
96243327Sgrehanstatic int acpi;
97243327Sgrehan
98221828Sgrehanstatic char *lomem_addr;
99221828Sgrehanstatic char *himem_addr;
100221828Sgrehan
101221828Sgrehanstatic char *progname;
102221828Sgrehanstatic const int BSP = 0;
103221828Sgrehan
104221828Sgrehanstatic int cpumask;
105221828Sgrehan
106221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
107221828Sgrehan
108221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU];
109221828Sgrehan
110221828Sgrehanstruct fbsdstats {
111221828Sgrehan        uint64_t        vmexit_bogus;
112221828Sgrehan        uint64_t        vmexit_bogus_switch;
113221828Sgrehan        uint64_t        vmexit_hlt;
114221828Sgrehan        uint64_t        vmexit_pause;
115221828Sgrehan        uint64_t        vmexit_mtrap;
116234761Sgrehan        uint64_t        vmexit_paging;
117221828Sgrehan        uint64_t        cpu_switch_rotate;
118221828Sgrehan        uint64_t        cpu_switch_direct;
119221828Sgrehan        int             io_reset;
120221828Sgrehan} stats;
121221828Sgrehan
122221828Sgrehanstruct mt_vmm_info {
123221828Sgrehan	pthread_t	mt_thr;
124221828Sgrehan	struct vmctx	*mt_ctx;
125221828Sgrehan	int		mt_vcpu;
126221828Sgrehan} mt_vmm_info[VM_MAXCPU];
127221828Sgrehan
128221828Sgrehanstatic void
129221828Sgrehanusage(int code)
130221828Sgrehan{
131221828Sgrehan
132221828Sgrehan        fprintf(stderr,
133243327Sgrehan                "Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]"
134239043Sneel		"[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n"
135240943Sneel		"       -a: local apic is in XAPIC mode (default is X2APIC)\n"
136243327Sgrehan		"       -A: create an ACPI table\n"
137221828Sgrehan		"       -g: gdb port (default is %d and 0 means don't open)\n"
138221828Sgrehan		"       -c: # cpus (default 1)\n"
139221828Sgrehan		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
140221828Sgrehan		"       -B: inject breakpoint exception on vm entry\n"
141221828Sgrehan		"       -H: vmexit from the guest on hlt\n"
142239043Sneel		"       -I: present an ioapic to the guest\n"
143221828Sgrehan		"       -P: vmexit from the guest on pause\n"
144222105Sgrehan		"	-e: exit on unhandled i/o access\n"
145221828Sgrehan		"       -h: help\n"
146221828Sgrehan		"       -z: guest hz (default is %d)\n"
147221828Sgrehan		"       -s: <slot,driver,configinfo> PCI slot config\n"
148234938Sgrehan		"       -S: <slot,driver,configinfo> legacy PCI slot config\n"
149221828Sgrehan		"       -m: lowmem in MB\n"
150221828Sgrehan		"       -M: highmem in MB\n"
151221828Sgrehan		"       -x: mux vcpus to 1 hcpu\n"
152221828Sgrehan		"       -t: mux vcpu timeslice hz (default %d)\n",
153221828Sgrehan		progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
154221828Sgrehan		DEFAULT_GUEST_TSLICE);
155221828Sgrehan	exit(code);
156221828Sgrehan}
157221828Sgrehan
158221828Sgrehanvoid *
159221828Sgrehanpaddr_guest2host(uintptr_t gaddr)
160221828Sgrehan{
161221828Sgrehan	if (lomem_sz == 0)
162221828Sgrehan		return (NULL);
163221828Sgrehan
164221828Sgrehan	if (gaddr < lomem_sz) {
165221828Sgrehan		return ((void *)(lomem_addr + gaddr));
166221828Sgrehan	} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
167221828Sgrehan		return ((void *)(himem_addr + gaddr - 4*GB));
168221828Sgrehan	} else
169221828Sgrehan		return (NULL);
170221828Sgrehan}
171221828Sgrehan
172221828Sgrehanint
173240943Sneelfbsdrun_disable_x2apic(void)
174240943Sneel{
175240943Sneel
176240943Sneel	return (disable_x2apic);
177240943Sneel}
178240943Sneel
179240943Sneelint
180221828Sgrehanfbsdrun_vmexit_on_pause(void)
181221828Sgrehan{
182221828Sgrehan
183221828Sgrehan	return (guest_vmexit_on_pause);
184221828Sgrehan}
185221828Sgrehan
186221828Sgrehanint
187221828Sgrehanfbsdrun_vmexit_on_hlt(void)
188221828Sgrehan{
189221828Sgrehan
190221828Sgrehan	return (guest_vmexit_on_hlt);
191221828Sgrehan}
192221828Sgrehan
193221828Sgrehanint
194221828Sgrehanfbsdrun_muxed(void)
195221828Sgrehan{
196221828Sgrehan
197221828Sgrehan	return (guest_vcpu_mux);
198221828Sgrehan}
199221828Sgrehan
200221942Sjhbstatic void *
201221828Sgrehanfbsdrun_start_thread(void *param)
202221828Sgrehan{
203242404Sgrehan	char tname[MAXCOMLEN + 1];
204242404Sgrehan	struct mt_vmm_info *mtp;
205221828Sgrehan	int vcpu;
206221828Sgrehan
207242404Sgrehan	mtp = param;
208221828Sgrehan	vcpu = mtp->mt_vcpu;
209242404Sgrehan
210242404Sgrehan	snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
211242404Sgrehan	pthread_set_name_np(mtp->mt_thr, tname);
212242404Sgrehan
213221828Sgrehan	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
214221828Sgrehan
215221828Sgrehan	/* not reached */
216221828Sgrehan	exit(1);
217221828Sgrehan	return (NULL);
218221828Sgrehan}
219221828Sgrehan
220221828Sgrehanvoid
221221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
222221828Sgrehan{
223221828Sgrehan	int error;
224221828Sgrehan
225221828Sgrehan	if (cpumask & (1 << vcpu)) {
226242385Sgrehan		fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
227242385Sgrehan		    vcpu);
228221828Sgrehan		exit(1);
229221828Sgrehan	}
230221828Sgrehan
231221828Sgrehan	cpumask |= 1 << vcpu;
232221828Sgrehan	foundcpus++;
233221828Sgrehan
234221828Sgrehan	/*
235221828Sgrehan	 * Set up the vmexit struct to allow execution to start
236221828Sgrehan	 * at the given RIP
237221828Sgrehan	 */
238221828Sgrehan	vmexit[vcpu].rip = rip;
239221828Sgrehan	vmexit[vcpu].inst_length = 0;
240221828Sgrehan
241221828Sgrehan	if (vcpu == BSP || !guest_vcpu_mux){
242221828Sgrehan		mt_vmm_info[vcpu].mt_ctx = ctx;
243221828Sgrehan		mt_vmm_info[vcpu].mt_vcpu = vcpu;
244221828Sgrehan
245221828Sgrehan		error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
246221828Sgrehan				fbsdrun_start_thread, &mt_vmm_info[vcpu]);
247221828Sgrehan		assert(error == 0);
248221828Sgrehan	}
249221828Sgrehan}
250221828Sgrehan
251221828Sgrehanstatic int
252221828Sgrehanfbsdrun_get_next_cpu(int curcpu)
253221828Sgrehan{
254221828Sgrehan
255221828Sgrehan	/*
256221828Sgrehan	 * Get the next available CPU. Assumes they arrive
257221828Sgrehan	 * in ascending order with no gaps.
258221828Sgrehan	 */
259221828Sgrehan	return ((curcpu + 1) % foundcpus);
260221828Sgrehan}
261221828Sgrehan
262221942Sjhbstatic int
263221828Sgrehanvmexit_catch_reset(void)
264221828Sgrehan{
265221828Sgrehan        stats.io_reset++;
266221828Sgrehan        return (VMEXIT_RESET);
267221828Sgrehan}
268221828Sgrehan
269221942Sjhbstatic int
270221828Sgrehanvmexit_catch_inout(void)
271221828Sgrehan{
272221828Sgrehan	return (VMEXIT_ABORT);
273221828Sgrehan}
274221828Sgrehan
275221942Sjhbstatic int
276221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
277221828Sgrehan		     uint32_t eax)
278221828Sgrehan{
279221828Sgrehan#if PG_DEBUG /* put all types of debug here */
280221828Sgrehan        if (eax == 0) {
281221828Sgrehan		pause_noswitch = 1;
282221828Sgrehan	} else if (eax == 1) {
283221828Sgrehan		pause_noswitch = 0;
284221828Sgrehan	} else {
285221828Sgrehan		pause_noswitch = 0;
286221828Sgrehan		if (eax == 5) {
287221828Sgrehan			vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
288221828Sgrehan		}
289221828Sgrehan	}
290221828Sgrehan#endif
291221828Sgrehan        return (VMEXIT_CONTINUE);
292221828Sgrehan}
293221828Sgrehan
294221828Sgrehanstatic int
295221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
296221828Sgrehan{
297221828Sgrehan	int error;
298221828Sgrehan	int bytes, port, in, out;
299221828Sgrehan	uint32_t eax;
300221828Sgrehan	int vcpu;
301221828Sgrehan
302221828Sgrehan	vcpu = *pvcpu;
303221828Sgrehan
304221828Sgrehan	port = vme->u.inout.port;
305221828Sgrehan	bytes = vme->u.inout.bytes;
306221828Sgrehan	eax = vme->u.inout.eax;
307221828Sgrehan	in = vme->u.inout.in;
308221828Sgrehan	out = !in;
309221828Sgrehan
310221828Sgrehan	/* We don't deal with these */
311221828Sgrehan	if (vme->u.inout.string || vme->u.inout.rep)
312221828Sgrehan		return (VMEXIT_ABORT);
313221828Sgrehan
314221828Sgrehan	/* Special case of guest reset */
315221828Sgrehan	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
316221828Sgrehan		return (vmexit_catch_reset());
317221828Sgrehan
318221828Sgrehan        /* Extra-special case of host notifications */
319221828Sgrehan        if (out && port == GUEST_NIO_PORT)
320221828Sgrehan                return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
321221828Sgrehan
322222105Sgrehan	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
323221828Sgrehan	if (error == 0 && in)
324221828Sgrehan		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
325221828Sgrehan
326221828Sgrehan	if (error == 0)
327221828Sgrehan		return (VMEXIT_CONTINUE);
328221828Sgrehan	else {
329221828Sgrehan		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
330221828Sgrehan			in ? "in" : "out",
331221828Sgrehan			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
332221828Sgrehan		return (vmexit_catch_inout());
333221828Sgrehan	}
334221828Sgrehan}
335221828Sgrehan
336221828Sgrehanstatic int
337221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
338221828Sgrehan{
339242385Sgrehan	fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code,
340242385Sgrehan	    *pvcpu);
341221828Sgrehan	return (VMEXIT_ABORT);
342221828Sgrehan}
343221828Sgrehan
344221828Sgrehanstatic int
345221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
346221828Sgrehan{
347221828Sgrehan	int newcpu;
348221828Sgrehan	int retval = VMEXIT_CONTINUE;
349221828Sgrehan
350221828Sgrehan	newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
351221828Sgrehan
352221828Sgrehan	if (guest_vcpu_mux && *pvcpu != newcpu) {
353221828Sgrehan                retval = VMEXIT_SWITCH;
354221828Sgrehan                *pvcpu = newcpu;
355221828Sgrehan        }
356221828Sgrehan
357221828Sgrehan        return (retval);
358221828Sgrehan}
359221828Sgrehan
360221828Sgrehanstatic int
361240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
362240912Sneel{
363240912Sneel	int newcpu;
364240912Sneel	int retval = VMEXIT_CONTINUE;
365240912Sneel
366240912Sneel	newcpu = spinup_ap(ctx, *pvcpu,
367240912Sneel			   vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
368240912Sneel
369240912Sneel	if (guest_vcpu_mux && *pvcpu != newcpu) {
370240912Sneel		retval = VMEXIT_SWITCH;
371240912Sneel		*pvcpu = newcpu;
372240912Sneel	}
373240912Sneel
374240912Sneel	return (retval);
375240912Sneel}
376240912Sneel
377240912Sneelstatic int
378221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
379221828Sgrehan{
380221828Sgrehan
381242385Sgrehan	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
382242385Sgrehan	fprintf(stderr, "\treason\t\tVMX\n");
383242385Sgrehan	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
384242385Sgrehan	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
385242385Sgrehan	fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error);
386242385Sgrehan	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
387242385Sgrehan	fprintf(stderr, "\tqualification\t0x%016lx\n",
388242385Sgrehan	    vmexit->u.vmx.exit_qualification);
389221828Sgrehan
390221828Sgrehan	return (VMEXIT_ABORT);
391221828Sgrehan}
392221828Sgrehan
393221828Sgrehanstatic int bogus_noswitch = 1;
394221828Sgrehan
395221828Sgrehanstatic int
396221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
397221828Sgrehan{
398221828Sgrehan	stats.vmexit_bogus++;
399221828Sgrehan
400221828Sgrehan	if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
401221828Sgrehan		return (VMEXIT_RESTART);
402221828Sgrehan	} else {
403221828Sgrehan		stats.vmexit_bogus_switch++;
404221828Sgrehan		vmexit->inst_length = 0;
405221828Sgrehan		*pvcpu = -1;
406221828Sgrehan		return (VMEXIT_SWITCH);
407221828Sgrehan	}
408221828Sgrehan}
409221828Sgrehan
410221828Sgrehanstatic int
411221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
412221828Sgrehan{
413221828Sgrehan	stats.vmexit_hlt++;
414221828Sgrehan	if (fbsdrun_muxed()) {
415221828Sgrehan		*pvcpu = -1;
416221828Sgrehan		return (VMEXIT_SWITCH);
417221828Sgrehan	} else {
418221828Sgrehan		/*
419221828Sgrehan		 * Just continue execution with the next instruction. We use
420221828Sgrehan		 * the HLT VM exit as a way to be friendly with the host
421221828Sgrehan		 * scheduler.
422221828Sgrehan		 */
423221828Sgrehan		return (VMEXIT_CONTINUE);
424221828Sgrehan	}
425221828Sgrehan}
426221828Sgrehan
427221828Sgrehanstatic int pause_noswitch;
428221828Sgrehan
429221828Sgrehanstatic int
430221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
431221828Sgrehan{
432221828Sgrehan	stats.vmexit_pause++;
433221828Sgrehan
434221828Sgrehan	if (fbsdrun_muxed() && !pause_noswitch) {
435221828Sgrehan		*pvcpu = -1;
436221828Sgrehan		return (VMEXIT_SWITCH);
437221828Sgrehan        } else {
438221828Sgrehan		return (VMEXIT_CONTINUE);
439221828Sgrehan	}
440221828Sgrehan}
441221828Sgrehan
442221828Sgrehanstatic int
443221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
444221828Sgrehan{
445221828Sgrehan	stats.vmexit_mtrap++;
446221828Sgrehan
447221828Sgrehan	return (VMEXIT_RESTART);
448221828Sgrehan}
449221828Sgrehan
450234761Sgrehanstatic int
451234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
452234761Sgrehan{
453241744Sgrehan	int err;
454234761Sgrehan	stats.vmexit_paging++;
455234761Sgrehan
456241744Sgrehan	err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip,
457243640Sneel			  vmexit->u.paging.cr3, vmexit->u.paging.rwx,
458243640Sneel			  &vmexit->u.paging.vie);
459241744Sgrehan
460241744Sgrehan	if (err) {
461241744Sgrehan		if (err == EINVAL) {
462242385Sgrehan			fprintf(stderr,
463242385Sgrehan			    "Failed to emulate instruction at 0x%lx\n",
464242385Sgrehan			    vmexit->rip);
465241744Sgrehan		} else if (err == ESRCH) {
466242385Sgrehan			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
467242385Sgrehan			    vmexit->u.paging.gpa);
468241744Sgrehan		}
469241744Sgrehan
470234761Sgrehan		return (VMEXIT_ABORT);
471234761Sgrehan	}
472234761Sgrehan
473234761Sgrehan	return (VMEXIT_CONTINUE);
474234761Sgrehan}
475234761Sgrehan
476221828Sgrehanstatic void
477221828Sgrehansigalrm(int sig)
478221828Sgrehan{
479221828Sgrehan	return;
480221828Sgrehan}
481221828Sgrehan
482221828Sgrehanstatic void
483221828Sgrehansetup_timeslice(void)
484221828Sgrehan{
485221828Sgrehan	struct sigaction sa;
486221828Sgrehan	struct itimerval itv;
487221828Sgrehan	int error;
488221828Sgrehan
489221828Sgrehan	/*
490221828Sgrehan	 * Setup a realtime timer to generate a SIGALRM at a
491221828Sgrehan	 * frequency of 'guest_tslice' ticks per second.
492221828Sgrehan	 */
493221828Sgrehan	sigemptyset(&sa.sa_mask);
494221828Sgrehan	sa.sa_flags = 0;
495221828Sgrehan	sa.sa_handler = sigalrm;
496221828Sgrehan
497221828Sgrehan	error = sigaction(SIGALRM, &sa, NULL);
498221828Sgrehan	assert(error == 0);
499221828Sgrehan
500221828Sgrehan	itv.it_interval.tv_sec = 0;
501221828Sgrehan	itv.it_interval.tv_usec = 1000000 / guest_tslice;
502221828Sgrehan	itv.it_value.tv_sec = 0;
503221828Sgrehan	itv.it_value.tv_usec = 1000000 / guest_tslice;
504221828Sgrehan
505221828Sgrehan	error = setitimer(ITIMER_REAL, &itv, NULL);
506221828Sgrehan	assert(error == 0);
507221828Sgrehan}
508221828Sgrehan
509221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = {
510234761Sgrehan	[VM_EXITCODE_INOUT]  = vmexit_inout,
511234761Sgrehan	[VM_EXITCODE_VMX]    = vmexit_vmx,
512234761Sgrehan	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
513234761Sgrehan	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
514234761Sgrehan	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
515234761Sgrehan	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
516240912Sneel	[VM_EXITCODE_PAGING] = vmexit_paging,
517240912Sneel	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
518221828Sgrehan};
519221828Sgrehan
520221828Sgrehanstatic void
521221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
522221828Sgrehan{
523221828Sgrehan	int error, rc, prevcpu;
524221828Sgrehan
525221828Sgrehan	if (guest_vcpu_mux)
526221828Sgrehan		setup_timeslice();
527221828Sgrehan
528221828Sgrehan	if (pincpu >= 0) {
529221828Sgrehan		error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
530221828Sgrehan		assert(error == 0);
531221828Sgrehan	}
532221828Sgrehan
533221828Sgrehan	while (1) {
534221828Sgrehan		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
535241490Sneel		if (error != 0) {
536241490Sneel			/*
537241490Sneel			 * It is possible that 'vmmctl' or some other process
538241490Sneel			 * has transitioned the vcpu to CANNOT_RUN state right
539241490Sneel			 * before we tried to transition it to RUNNING.
540241490Sneel			 *
541241490Sneel			 * This is expected to be temporary so just retry.
542241490Sneel			 */
543241490Sneel			if (errno == EBUSY)
544241490Sneel				continue;
545241490Sneel			else
546241490Sneel				break;
547241490Sneel		}
548221828Sgrehan
549221828Sgrehan		prevcpu = vcpu;
550221828Sgrehan                rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
551221828Sgrehan                                                       &vcpu);
552221828Sgrehan		switch (rc) {
553221828Sgrehan                case VMEXIT_SWITCH:
554221828Sgrehan			assert(guest_vcpu_mux);
555221828Sgrehan			if (vcpu == -1) {
556221828Sgrehan				stats.cpu_switch_rotate++;
557221828Sgrehan				vcpu = fbsdrun_get_next_cpu(prevcpu);
558221828Sgrehan			} else {
559221828Sgrehan				stats.cpu_switch_direct++;
560221828Sgrehan			}
561221828Sgrehan			/* fall through */
562221828Sgrehan		case VMEXIT_CONTINUE:
563221828Sgrehan                        rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
564221828Sgrehan			break;
565221828Sgrehan		case VMEXIT_RESTART:
566221828Sgrehan                        rip = vmexit[vcpu].rip;
567221828Sgrehan			break;
568221828Sgrehan		case VMEXIT_RESET:
569221828Sgrehan			exit(0);
570221828Sgrehan		default:
571221828Sgrehan			exit(1);
572221828Sgrehan		}
573221828Sgrehan	}
574221828Sgrehan	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
575221828Sgrehan}
576221828Sgrehan
577221828Sgrehan
578221828Sgrehanint
579221828Sgrehanmain(int argc, char *argv[])
580221828Sgrehan{
581242192Sneel	int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons;
582221828Sgrehan	struct vmctx *ctx;
583221828Sgrehan	uint64_t rip;
584221828Sgrehan
585242192Sneel	bvmcons = 0;
586221828Sgrehan	inject_bkpt = 0;
587221828Sgrehan	progname = basename(argv[0]);
588221828Sgrehan	gdb_port = DEFAULT_GDB_PORT;
589221828Sgrehan	guest_ncpus = 1;
590239043Sneel	ioapic = 0;
591221828Sgrehan
592243327Sgrehan	while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) {
593221828Sgrehan		switch (c) {
594240943Sneel		case 'a':
595240943Sneel			disable_x2apic = 1;
596240943Sneel			break;
597243327Sgrehan		case 'A':
598243327Sgrehan			acpi = 1;
599243327Sgrehan			break;
600242192Sneel		case 'b':
601242192Sneel			bvmcons = 1;
602242192Sneel			break;
603221828Sgrehan		case 'B':
604221828Sgrehan			inject_bkpt = 1;
605221828Sgrehan			break;
606221828Sgrehan		case 'x':
607221828Sgrehan			guest_vcpu_mux = 1;
608221828Sgrehan			break;
609221828Sgrehan		case 'p':
610221828Sgrehan			pincpu = atoi(optarg);
611221828Sgrehan			break;
612221828Sgrehan                case 'c':
613221828Sgrehan			guest_ncpus = atoi(optarg);
614221828Sgrehan			break;
615221828Sgrehan		case 'g':
616221828Sgrehan			gdb_port = atoi(optarg);
617221828Sgrehan			break;
618221828Sgrehan		case 'z':
619221828Sgrehan			guest_hz = atoi(optarg);
620221828Sgrehan			break;
621221828Sgrehan		case 't':
622221828Sgrehan			guest_tslice = atoi(optarg);
623221828Sgrehan			break;
624221828Sgrehan		case 's':
625234938Sgrehan			pci_parse_slot(optarg, 0);
626221828Sgrehan			break;
627234938Sgrehan		case 'S':
628234938Sgrehan			pci_parse_slot(optarg, 1);
629234938Sgrehan			break;
630221828Sgrehan                case 'm':
631221828Sgrehan			lomem_sz = strtoul(optarg, NULL, 0) * MB;
632221828Sgrehan			break;
633221828Sgrehan                case 'M':
634221828Sgrehan			himem_sz = strtoul(optarg, NULL, 0) * MB;
635221828Sgrehan			break;
636221828Sgrehan		case 'H':
637221828Sgrehan			guest_vmexit_on_hlt = 1;
638221828Sgrehan			break;
639239043Sneel		case 'I':
640239043Sneel			ioapic = 1;
641239043Sneel			break;
642221828Sgrehan		case 'P':
643221828Sgrehan			guest_vmexit_on_pause = 1;
644221828Sgrehan			break;
645222105Sgrehan		case 'e':
646222105Sgrehan			strictio = 1;
647222105Sgrehan			break;
648221828Sgrehan		case 'h':
649221828Sgrehan			usage(0);
650221828Sgrehan		default:
651221828Sgrehan			usage(1);
652221828Sgrehan		}
653221828Sgrehan	}
654221828Sgrehan	argc -= optind;
655221828Sgrehan	argv += optind;
656221828Sgrehan
657221828Sgrehan	if (argc != 1)
658221828Sgrehan		usage(1);
659221828Sgrehan
660221828Sgrehan	/* No need to mux if guest is uni-processor */
661221828Sgrehan	if (guest_ncpus <= 1)
662221828Sgrehan		guest_vcpu_mux = 0;
663221828Sgrehan
664242385Sgrehan	if (guest_ncpus > VM_MAXCPU) {
665242385Sgrehan		fprintf(stderr, "%d vCPUs requested, max %d\n",
666242385Sgrehan		    guest_ncpus, VM_MAXCPU);
667242385Sgrehan		exit(1);
668242385Sgrehan	}
669242385Sgrehan
670221828Sgrehan	/* vmexit on hlt if guest is muxed */
671221828Sgrehan	if (guest_vcpu_mux) {
672221828Sgrehan		guest_vmexit_on_hlt = 1;
673221828Sgrehan		guest_vmexit_on_pause = 1;
674221828Sgrehan	}
675221828Sgrehan
676221828Sgrehan	vmname = argv[0];
677221828Sgrehan
678221828Sgrehan	ctx = vm_open(vmname);
679221828Sgrehan	if (ctx == NULL) {
680221828Sgrehan		perror("vm_open");
681221828Sgrehan		exit(1);
682221828Sgrehan	}
683221828Sgrehan
684221828Sgrehan	if (fbsdrun_vmexit_on_hlt()) {
685221828Sgrehan		err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
686221828Sgrehan		if (err < 0) {
687242385Sgrehan			fprintf(stderr, "VM exit on HLT not supported\n");
688221828Sgrehan			exit(1);
689221828Sgrehan		}
690221828Sgrehan		vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
691221828Sgrehan		handler[VM_EXITCODE_HLT] = vmexit_hlt;
692221828Sgrehan	}
693221828Sgrehan
694221828Sgrehan        if (fbsdrun_vmexit_on_pause()) {
695221828Sgrehan		/*
696221828Sgrehan		 * pause exit support required for this mode
697221828Sgrehan		 */
698221828Sgrehan		err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
699221828Sgrehan		if (err < 0) {
700242385Sgrehan			fprintf(stderr,
701242385Sgrehan			    "SMP mux requested, no pause support\n");
702221828Sgrehan			exit(1);
703221828Sgrehan		}
704221828Sgrehan		vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
705221828Sgrehan		handler[VM_EXITCODE_PAUSE] = vmexit_pause;
706221828Sgrehan        }
707221828Sgrehan
708240943Sneel	if (fbsdrun_disable_x2apic())
709240943Sneel		err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED);
710240943Sneel	else
711240943Sneel		err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED);
712240943Sneel
713240943Sneel	if (err) {
714242385Sgrehan		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
715240943Sneel		exit(1);
716240943Sneel	}
717240943Sneel
718221828Sgrehan	if (lomem_sz != 0) {
719221828Sgrehan		lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
720221828Sgrehan		if (lomem_addr == (char *) MAP_FAILED) {
721221828Sgrehan			lomem_sz = 0;
722221828Sgrehan		} else if (himem_sz != 0) {
723221828Sgrehan			himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
724221828Sgrehan			if (himem_addr == (char *) MAP_FAILED) {
725221828Sgrehan				lomem_sz = 0;
726221828Sgrehan				himem_sz = 0;
727221828Sgrehan			}
728221828Sgrehan		}
729221828Sgrehan	}
730221828Sgrehan
731221828Sgrehan	init_inout();
732221828Sgrehan	init_pci(ctx);
733239045Sneel	if (ioapic)
734239045Sneel		ioapic_init(0);
735221828Sgrehan
736221828Sgrehan	if (gdb_port != 0)
737221828Sgrehan		init_dbgport(gdb_port);
738221828Sgrehan
739242192Sneel	if (bvmcons)
740242192Sneel		init_bvmcons();
741242192Sneel
742221828Sgrehan	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
743221828Sgrehan	assert(error == 0);
744221828Sgrehan
745221828Sgrehan	if (inject_bkpt) {
746221828Sgrehan		error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
747221828Sgrehan		assert(error == 0);
748221828Sgrehan	}
749221828Sgrehan
750221828Sgrehan	/*
751221828Sgrehan	 * build the guest tables, MP etc.
752221828Sgrehan	 */
753242131Sgrehan	mptable_build(ctx, guest_ncpus, ioapic);
754221828Sgrehan
755243327Sgrehan	if (acpi) {
756243327Sgrehan		error = acpi_build(ctx, guest_ncpus, ioapic);
757243327Sgrehan		assert(error == 0);
758243327Sgrehan	}
759243327Sgrehan
760221828Sgrehan	/*
761221828Sgrehan	 * Add CPU 0
762221828Sgrehan	 */
763221828Sgrehan	fbsdrun_addcpu(ctx, BSP, rip);
764221828Sgrehan
765221828Sgrehan	/*
766221828Sgrehan	 * Head off to the main event dispatch loop
767221828Sgrehan	 */
768221828Sgrehan	mevent_dispatch();
769221828Sgrehan
770221828Sgrehan	exit(1);
771221828Sgrehan}
772