bhyverun.c revision 284539
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 284539 2015-06-18 06:00:17Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 284539 2015-06-18 06:00:17Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36259081Sneel#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257018Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50284539Sneel#include <stdbool.h> 51221828Sgrehan 52221828Sgrehan#include <machine/vmm.h> 53221828Sgrehan#include <vmmapi.h> 54221828Sgrehan 55244167Sgrehan#include "bhyverun.h" 56243327Sgrehan#include "acpi.h" 57221828Sgrehan#include "inout.h" 58221828Sgrehan#include "dbgport.h" 59261268Sjhb#include "ioapic.h" 60241744Sgrehan#include "mem.h" 61221828Sgrehan#include "mevent.h" 62242131Sgrehan#include "mptbl.h" 63221828Sgrehan#include "pci_emul.h" 64266125Sjhb#include "pci_irq.h" 65257293Sneel#include "pci_lpc.h" 66262744Stychon#include "smbiostbl.h" 67221828Sgrehan#include "xmsr.h" 68240912Sneel#include "spinup_ap.h" 69253181Sgrehan#include "rtc.h" 70221828Sgrehan 71221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 72221828Sgrehan 73221828Sgrehan#define MB (1024UL * 1024) 74221828Sgrehan#define GB (1024UL * MB) 75221828Sgrehan 76221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 77268777Sneelextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 78221828Sgrehan 79221828Sgrehanchar *vmname; 80221828Sgrehan 81221828Sgrehanint guest_ncpus; 82262744Stychonchar *guest_uuid_str; 83221828Sgrehan 84262236Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 85256711Sgrehanstatic int virtio_msix = 1; 86262236Sneelstatic int x2apic_mode = 0; /* default is xAPIC */ 87221828Sgrehan 88222105Sgrehanstatic int strictio; 89259635Sneelstatic int strictmsr = 1; 90222105Sgrehan 91243327Sgrehanstatic int acpi; 92243327Sgrehan 93221828Sgrehanstatic char *progname; 94221828Sgrehanstatic const int BSP = 0; 95221828Sgrehan 96263432Sneelstatic cpuset_t cpumask; 97221828Sgrehan 98221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 99221828Sgrehan 100269042Sneelstatic struct vm_exit vmexit[VM_MAXCPU]; 101221828Sgrehan 102256062Sgrehanstruct bhyvestats { 103221828Sgrehan uint64_t vmexit_bogus; 104283657Sneel uint64_t vmexit_reqidle; 105221828Sgrehan uint64_t vmexit_hlt; 106221828Sgrehan uint64_t vmexit_pause; 107221828Sgrehan uint64_t vmexit_mtrap; 108256072Sneel uint64_t vmexit_inst_emul; 109221828Sgrehan uint64_t cpu_switch_rotate; 110221828Sgrehan uint64_t cpu_switch_direct; 111221828Sgrehan} stats; 112221828Sgrehan 113221828Sgrehanstruct mt_vmm_info { 114221828Sgrehan pthread_t mt_thr; 115221828Sgrehan struct vmctx *mt_ctx; 116221828Sgrehan int mt_vcpu; 117221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 118221828Sgrehan 119265376Sneelstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 120265376Sneel 121221828Sgrehanstatic void 122221828Sgrehanusage(int code) 123221828Sgrehan{ 124221828Sgrehan 125221828Sgrehan fprintf(stderr, 126284539Sneel "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 127267959Sjhb " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 128262236Sneel " -a: local apic is in xAPIC mode (deprecated)\n" 129267934Sjhb " -A: create ACPI tables\n" 130221828Sgrehan " -c: # cpus (default 1)\n" 131265951Sneel " -C: include guest memory in core file\n" 132257018Sneel " -e: exit on unhandled I/O access\n" 133267959Sjhb " -g: gdb port\n" 134221828Sgrehan " -h: help\n" 135267959Sjhb " -H: vmexit from the guest on hlt\n" 136257293Sneel " -l: LPC device configuration\n" 137259635Sneel " -m: memory size in MB\n" 138267959Sjhb " -p: pin 'vcpu' to 'hostcpu'\n" 139267959Sjhb " -P: vmexit from the guest on pause\n" 140267959Sjhb " -s: <slot,driver,configinfo> PCI slot config\n" 141284539Sneel " -S: guest memory cannot be swapped\n" 142279225Sneel " -u: RTC keeps UTC time\n" 143267959Sjhb " -U: uuid\n" 144262236Sneel " -w: ignore unimplemented MSRs\n" 145267959Sjhb " -W: force virtio to use single-vector MSI\n" 146262744Stychon " -x: local apic is in x2APIC mode\n" 147267959Sjhb " -Y: disable MPtable generation\n", 148257018Sneel progname, (int)strlen(progname), ""); 149256062Sgrehan 150221828Sgrehan exit(code); 151221828Sgrehan} 152221828Sgrehan 153265376Sneelstatic int 154265376Sneelpincpu_parse(const char *opt) 155265376Sneel{ 156265376Sneel int vcpu, pcpu; 157265376Sneel 158265376Sneel if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 159265376Sneel fprintf(stderr, "invalid format: %s\n", opt); 160265376Sneel return (-1); 161265376Sneel } 162265376Sneel 163265376Sneel if (vcpu < 0 || vcpu >= VM_MAXCPU) { 164265376Sneel fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 165265376Sneel vcpu, VM_MAXCPU - 1); 166265376Sneel return (-1); 167265376Sneel } 168265376Sneel 169265376Sneel if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 170265376Sneel fprintf(stderr, "hostcpu '%d' outside valid range from " 171265376Sneel "0 to %d\n", pcpu, CPU_SETSIZE - 1); 172265376Sneel return (-1); 173265376Sneel } 174265376Sneel 175265376Sneel if (vcpumap[vcpu] == NULL) { 176265376Sneel if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 177265376Sneel perror("malloc"); 178265376Sneel return (-1); 179265376Sneel } 180265376Sneel CPU_ZERO(vcpumap[vcpu]); 181265376Sneel } 182265376Sneel CPU_SET(pcpu, vcpumap[vcpu]); 183265376Sneel return (0); 184265376Sneel} 185265376Sneel 186269042Sneelvoid 187269042Sneelvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 188269042Sneel int errcode) 189269042Sneel{ 190269042Sneel struct vmctx *ctx; 191277310Sneel int error, restart_instruction; 192269042Sneel 193269042Sneel ctx = arg; 194277310Sneel restart_instruction = 1; 195277310Sneel 196277310Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 197277310Sneel restart_instruction); 198269042Sneel assert(error == 0); 199269042Sneel} 200269042Sneel 201221828Sgrehanvoid * 202248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 203221828Sgrehan{ 204221828Sgrehan 205248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 206221828Sgrehan} 207221828Sgrehan 208221828Sgrehanint 209221828Sgrehanfbsdrun_vmexit_on_pause(void) 210221828Sgrehan{ 211221828Sgrehan 212221828Sgrehan return (guest_vmexit_on_pause); 213221828Sgrehan} 214221828Sgrehan 215221828Sgrehanint 216221828Sgrehanfbsdrun_vmexit_on_hlt(void) 217221828Sgrehan{ 218221828Sgrehan 219221828Sgrehan return (guest_vmexit_on_hlt); 220221828Sgrehan} 221221828Sgrehan 222256711Sgrehanint 223256711Sgrehanfbsdrun_virtio_msix(void) 224256711Sgrehan{ 225256711Sgrehan 226256711Sgrehan return (virtio_msix); 227256711Sgrehan} 228256711Sgrehan 229221942Sjhbstatic void * 230221828Sgrehanfbsdrun_start_thread(void *param) 231221828Sgrehan{ 232242404Sgrehan char tname[MAXCOMLEN + 1]; 233242404Sgrehan struct mt_vmm_info *mtp; 234221828Sgrehan int vcpu; 235221828Sgrehan 236242404Sgrehan mtp = param; 237221828Sgrehan vcpu = mtp->mt_vcpu; 238242404Sgrehan 239257729Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 240242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 241242404Sgrehan 242221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 243221828Sgrehan 244221828Sgrehan /* not reached */ 245221828Sgrehan exit(1); 246221828Sgrehan return (NULL); 247221828Sgrehan} 248221828Sgrehan 249221828Sgrehanvoid 250263432Sneelfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 251221828Sgrehan{ 252221828Sgrehan int error; 253221828Sgrehan 254263432Sneel assert(fromcpu == BSP); 255221828Sgrehan 256266933Sneel /* 257266933Sneel * The 'newcpu' must be activated in the context of 'fromcpu'. If 258266933Sneel * vm_activate_cpu() is delayed until newcpu's pthread starts running 259266933Sneel * then vmm.ko is out-of-sync with bhyve and this can create a race 260266933Sneel * with vm_suspend(). 261266933Sneel */ 262266933Sneel error = vm_activate_cpu(ctx, newcpu); 263266933Sneel assert(error == 0); 264266933Sneel 265263432Sneel CPU_SET_ATOMIC(newcpu, &cpumask); 266221828Sgrehan 267221828Sgrehan /* 268221828Sgrehan * Set up the vmexit struct to allow execution to start 269221828Sgrehan * at the given RIP 270221828Sgrehan */ 271263432Sneel vmexit[newcpu].rip = rip; 272263432Sneel vmexit[newcpu].inst_length = 0; 273221828Sgrehan 274263432Sneel mt_vmm_info[newcpu].mt_ctx = ctx; 275263432Sneel mt_vmm_info[newcpu].mt_vcpu = newcpu; 276256072Sneel 277263432Sneel error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 278263432Sneel fbsdrun_start_thread, &mt_vmm_info[newcpu]); 279256072Sneel assert(error == 0); 280221828Sgrehan} 281221828Sgrehan 282221828Sgrehanstatic int 283259081Sneelfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 284259081Sneel{ 285259081Sneel 286263432Sneel if (!CPU_ISSET(vcpu, &cpumask)) { 287265366Sneel fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 288259081Sneel exit(1); 289259081Sneel } 290259081Sneel 291263432Sneel CPU_CLR_ATOMIC(vcpu, &cpumask); 292263432Sneel return (CPU_EMPTY(&cpumask)); 293259081Sneel} 294259081Sneel 295259081Sneelstatic int 296221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 297221828Sgrehan uint32_t eax) 298221828Sgrehan{ 299256062Sgrehan#if BHYVE_DEBUG 300256062Sgrehan /* 301256062Sgrehan * put guest-driven debug here 302256062Sgrehan */ 303221828Sgrehan#endif 304221828Sgrehan return (VMEXIT_CONTINUE); 305221828Sgrehan} 306221828Sgrehan 307221828Sgrehanstatic int 308221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 309221828Sgrehan{ 310221828Sgrehan int error; 311266573Sneel int bytes, port, in, out, string; 312221828Sgrehan int vcpu; 313221828Sgrehan 314221828Sgrehan vcpu = *pvcpu; 315221828Sgrehan 316221828Sgrehan port = vme->u.inout.port; 317221828Sgrehan bytes = vme->u.inout.bytes; 318266573Sneel string = vme->u.inout.string; 319221828Sgrehan in = vme->u.inout.in; 320221828Sgrehan out = !in; 321221828Sgrehan 322221828Sgrehan /* Extra-special case of host notifications */ 323266573Sneel if (out && port == GUEST_NIO_PORT) { 324266573Sneel error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 325266573Sneel return (error); 326266573Sneel } 327221828Sgrehan 328266573Sneel error = emulate_inout(ctx, vcpu, vme, strictio); 329269094Sneel if (error) { 330281561Stychon fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 331281561Stychon in ? "in" : "out", 332281561Stychon bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 333281561Stychon port, vmexit->rip); 334269094Sneel return (VMEXIT_ABORT); 335269094Sneel } else { 336221828Sgrehan return (VMEXIT_CONTINUE); 337221828Sgrehan } 338221828Sgrehan} 339221828Sgrehan 340221828Sgrehanstatic int 341221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 342221828Sgrehan{ 343259635Sneel uint64_t val; 344259635Sneel uint32_t eax, edx; 345259635Sneel int error; 346259635Sneel 347259635Sneel val = 0; 348259635Sneel error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 349259635Sneel if (error != 0) { 350259635Sneel fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 351259635Sneel vme->u.msr.code, *pvcpu); 352262506Sneel if (strictmsr) { 353269042Sneel vm_inject_gp(ctx, *pvcpu); 354277310Sneel return (VMEXIT_CONTINUE); 355262506Sneel } 356259635Sneel } 357259635Sneel 358259635Sneel eax = val; 359259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 360259635Sneel assert(error == 0); 361259635Sneel 362259635Sneel edx = val >> 32; 363259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 364259635Sneel assert(error == 0); 365259635Sneel 366259635Sneel return (VMEXIT_CONTINUE); 367221828Sgrehan} 368221828Sgrehan 369221828Sgrehanstatic int 370221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 371221828Sgrehan{ 372259635Sneel int error; 373221828Sgrehan 374259635Sneel error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 375259635Sneel if (error != 0) { 376259635Sneel fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 377259635Sneel vme->u.msr.code, vme->u.msr.wval, *pvcpu); 378262506Sneel if (strictmsr) { 379269042Sneel vm_inject_gp(ctx, *pvcpu); 380277310Sneel return (VMEXIT_CONTINUE); 381262506Sneel } 382259635Sneel } 383259635Sneel return (VMEXIT_CONTINUE); 384221828Sgrehan} 385221828Sgrehan 386221828Sgrehanstatic int 387240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 388240912Sneel{ 389240912Sneel int newcpu; 390240912Sneel int retval = VMEXIT_CONTINUE; 391240912Sneel 392240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 393240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 394240912Sneel 395240912Sneel return (retval); 396240912Sneel} 397240912Sneel 398267966Sneel#define DEBUG_EPT_MISCONFIG 399267966Sneel#ifdef DEBUG_EPT_MISCONFIG 400267966Sneel#define EXIT_REASON_EPT_MISCONFIG 49 401267966Sneel#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 402267966Sneel#define VMCS_IDENT(x) ((x) | 0x80000000) 403267966Sneel 404267966Sneelstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 405267966Sneelstatic int ept_misconfig_ptenum; 406267966Sneel#endif 407267966Sneel 408240912Sneelstatic int 409221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 410221828Sgrehan{ 411221828Sgrehan 412242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 413242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 414242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 415242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 416260167Sneel fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 417242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 418242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 419242385Sgrehan vmexit->u.vmx.exit_qualification); 420260167Sneel fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 421260167Sneel fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 422267966Sneel#ifdef DEBUG_EPT_MISCONFIG 423267966Sneel if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 424267966Sneel vm_get_register(ctx, *pvcpu, 425267966Sneel VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 426267966Sneel &ept_misconfig_gpa); 427267966Sneel vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 428267966Sneel &ept_misconfig_ptenum); 429267966Sneel fprintf(stderr, "\tEPT misconfiguration:\n"); 430267966Sneel fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 431267966Sneel fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 432267966Sneel ept_misconfig_ptenum, ept_misconfig_pte[0], 433267966Sneel ept_misconfig_pte[1], ept_misconfig_pte[2], 434267966Sneel ept_misconfig_pte[3]); 435267966Sneel } 436267966Sneel#endif /* DEBUG_EPT_MISCONFIG */ 437221828Sgrehan return (VMEXIT_ABORT); 438221828Sgrehan} 439221828Sgrehan 440221828Sgrehanstatic int 441273375Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 442273375Sneel{ 443273375Sneel 444273375Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 445273375Sneel fprintf(stderr, "\treason\t\tSVM\n"); 446273375Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 447273375Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 448273375Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 449273375Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 450273375Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 451273375Sneel return (VMEXIT_ABORT); 452273375Sneel} 453273375Sneel 454273375Sneelstatic int 455221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 456221828Sgrehan{ 457256062Sgrehan 458277310Sneel assert(vmexit->inst_length == 0); 459277310Sneel 460221828Sgrehan stats.vmexit_bogus++; 461221828Sgrehan 462277310Sneel return (VMEXIT_CONTINUE); 463221828Sgrehan} 464221828Sgrehan 465221828Sgrehanstatic int 466283657Sneelvmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 467283657Sneel{ 468283657Sneel 469283657Sneel assert(vmexit->inst_length == 0); 470283657Sneel 471283657Sneel stats.vmexit_reqidle++; 472283657Sneel 473283657Sneel return (VMEXIT_CONTINUE); 474283657Sneel} 475283657Sneel 476283657Sneelstatic int 477221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 478221828Sgrehan{ 479256062Sgrehan 480221828Sgrehan stats.vmexit_hlt++; 481256062Sgrehan 482256062Sgrehan /* 483256062Sgrehan * Just continue execution with the next instruction. We use 484256062Sgrehan * the HLT VM exit as a way to be friendly with the host 485256062Sgrehan * scheduler. 486256062Sgrehan */ 487256062Sgrehan return (VMEXIT_CONTINUE); 488221828Sgrehan} 489221828Sgrehan 490221828Sgrehanstatic int 491221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 492221828Sgrehan{ 493256062Sgrehan 494221828Sgrehan stats.vmexit_pause++; 495221828Sgrehan 496256062Sgrehan return (VMEXIT_CONTINUE); 497221828Sgrehan} 498221828Sgrehan 499221828Sgrehanstatic int 500221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 501221828Sgrehan{ 502256062Sgrehan 503277310Sneel assert(vmexit->inst_length == 0); 504277310Sneel 505221828Sgrehan stats.vmexit_mtrap++; 506221828Sgrehan 507277310Sneel return (VMEXIT_CONTINUE); 508221828Sgrehan} 509221828Sgrehan 510234761Sgrehanstatic int 511256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 512234761Sgrehan{ 513280968Stychon int err, i; 514280968Stychon struct vie *vie; 515280968Stychon 516256072Sneel stats.vmexit_inst_emul++; 517234761Sgrehan 518280968Stychon vie = &vmexit->u.inst_emul.vie; 519256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 520280968Stychon vie, &vmexit->u.inst_emul.paging); 521241744Sgrehan 522241744Sgrehan if (err) { 523280968Stychon if (err == ESRCH) { 524242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 525256072Sneel vmexit->u.inst_emul.gpa); 526241744Sgrehan } 527241744Sgrehan 528280968Stychon fprintf(stderr, "Failed to emulate instruction ["); 529280968Stychon for (i = 0; i < vie->num_valid; i++) { 530280968Stychon fprintf(stderr, "0x%02x%s", vie->inst[i], 531280968Stychon i != (vie->num_valid - 1) ? " " : ""); 532280968Stychon } 533280968Stychon fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 534234761Sgrehan return (VMEXIT_ABORT); 535234761Sgrehan } 536234761Sgrehan 537234761Sgrehan return (VMEXIT_CONTINUE); 538234761Sgrehan} 539234761Sgrehan 540263780Sneelstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 541263780Sneelstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 542263780Sneel 543263780Sneelstatic int 544263780Sneelvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 545263780Sneel{ 546265062Sneel enum vm_suspend_how how; 547263780Sneel 548265062Sneel how = vmexit->u.suspended.how; 549265062Sneel 550263780Sneel fbsdrun_deletecpu(ctx, *pvcpu); 551263780Sneel 552265062Sneel if (*pvcpu != BSP) { 553263780Sneel pthread_mutex_lock(&resetcpu_mtx); 554263780Sneel pthread_cond_signal(&resetcpu_cond); 555263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 556263780Sneel pthread_exit(NULL); 557263780Sneel } 558263780Sneel 559263780Sneel pthread_mutex_lock(&resetcpu_mtx); 560263780Sneel while (!CPU_EMPTY(&cpumask)) { 561263780Sneel pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 562263780Sneel } 563263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 564265062Sneel 565265203Sneel switch (how) { 566265203Sneel case VM_SUSPEND_RESET: 567265062Sneel exit(0); 568265203Sneel case VM_SUSPEND_POWEROFF: 569265062Sneel exit(1); 570265203Sneel case VM_SUSPEND_HALT: 571265203Sneel exit(2); 572268889Sneel case VM_SUSPEND_TRIPLEFAULT: 573268889Sneel exit(3); 574265203Sneel default: 575265203Sneel fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 576265203Sneel exit(100); 577265203Sneel } 578265062Sneel return (0); /* NOTREACHED */ 579263780Sneel} 580263780Sneel 581221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 582234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 583266573Sneel [VM_EXITCODE_INOUT_STR] = vmexit_inout, 584234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 585273375Sneel [VM_EXITCODE_SVM] = vmexit_svm, 586234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 587283657Sneel [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 588234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 589234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 590234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 591256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 592240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 593268777Sneel [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 594268777Sneel [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 595221828Sgrehan}; 596221828Sgrehan 597221828Sgrehanstatic void 598277310Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 599221828Sgrehan{ 600221828Sgrehan int error, rc, prevcpu; 601253452Sgrehan enum vm_exitcode exitcode; 602266933Sneel cpuset_t active_cpus; 603221828Sgrehan 604265376Sneel if (vcpumap[vcpu] != NULL) { 605246686Sneel error = pthread_setaffinity_np(pthread_self(), 606265376Sneel sizeof(cpuset_t), vcpumap[vcpu]); 607221828Sgrehan assert(error == 0); 608221828Sgrehan } 609221828Sgrehan 610266933Sneel error = vm_active_cpus(ctx, &active_cpus); 611266933Sneel assert(CPU_ISSET(vcpu, &active_cpus)); 612266933Sneel 613277310Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 614277310Sneel assert(error == 0); 615277310Sneel 616221828Sgrehan while (1) { 617277310Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 618259737Sneel if (error != 0) 619259737Sneel break; 620221828Sgrehan 621221828Sgrehan prevcpu = vcpu; 622253452Sgrehan 623253452Sgrehan exitcode = vmexit[vcpu].exitcode; 624253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 625253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 626253452Sgrehan exitcode); 627253452Sgrehan exit(1); 628253452Sgrehan } 629253452Sgrehan 630253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 631253452Sgrehan 632221828Sgrehan switch (rc) { 633221828Sgrehan case VMEXIT_CONTINUE: 634221828Sgrehan break; 635265941Sneel case VMEXIT_ABORT: 636265941Sneel abort(); 637221828Sgrehan default: 638221828Sgrehan exit(1); 639221828Sgrehan } 640221828Sgrehan } 641221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 642221828Sgrehan} 643221828Sgrehan 644245020Sneelstatic int 645245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 646245020Sneel{ 647245020Sneel int tmp, error; 648221828Sgrehan 649245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 650245020Sneel 651245020Sneel /* 652245020Sneel * The guest is allowed to spinup more than one processor only if the 653245020Sneel * UNRESTRICTED_GUEST capability is available. 654245020Sneel */ 655245020Sneel if (error == 0) 656245020Sneel return (VM_MAXCPU); 657245020Sneel else 658245020Sneel return (1); 659245020Sneel} 660245020Sneel 661256645Sneelvoid 662256645Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 663256645Sneel{ 664256645Sneel int err, tmp; 665256645Sneel 666256645Sneel if (fbsdrun_vmexit_on_hlt()) { 667256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 668256645Sneel if (err < 0) { 669256645Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 670256645Sneel exit(1); 671256645Sneel } 672256645Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 673256645Sneel if (cpu == BSP) 674256645Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 675256645Sneel } 676256645Sneel 677256645Sneel if (fbsdrun_vmexit_on_pause()) { 678256645Sneel /* 679256645Sneel * pause exit support required for this mode 680256645Sneel */ 681256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 682256645Sneel if (err < 0) { 683256645Sneel fprintf(stderr, 684256645Sneel "SMP mux requested, no pause support\n"); 685256645Sneel exit(1); 686256645Sneel } 687256645Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 688256645Sneel if (cpu == BSP) 689256645Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 690256645Sneel } 691256645Sneel 692262236Sneel if (x2apic_mode) 693262236Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 694262236Sneel else 695256645Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 696256645Sneel 697256645Sneel if (err) { 698256645Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 699256645Sneel exit(1); 700256645Sneel } 701256645Sneel 702256645Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 703256645Sneel} 704256645Sneel 705284539Sneelstatic struct vmctx * 706284539Sneeldo_open(const char *vmname) 707284539Sneel{ 708284539Sneel struct vmctx *ctx; 709284539Sneel int error; 710284539Sneel bool reinit, romboot; 711284539Sneel 712284539Sneel reinit = romboot = false; 713284539Sneel 714284539Sneel if (lpc_bootrom()) 715284539Sneel romboot = true; 716284539Sneel 717284539Sneel error = vm_create(vmname); 718284539Sneel if (error) { 719284539Sneel if (errno == EEXIST) { 720284539Sneel if (romboot) { 721284539Sneel reinit = true; 722284539Sneel } else { 723284539Sneel /* 724284539Sneel * The virtual machine has been setup by the 725284539Sneel * userspace bootloader. 726284539Sneel */ 727284539Sneel } 728284539Sneel } else { 729284539Sneel perror("vm_create"); 730284539Sneel exit(1); 731284539Sneel } 732284539Sneel } else { 733284539Sneel if (!romboot) { 734284539Sneel /* 735284539Sneel * If the virtual machine was just created then a 736284539Sneel * bootrom must be configured to boot it. 737284539Sneel */ 738284539Sneel fprintf(stderr, "virtual machine cannot be booted\n"); 739284539Sneel exit(1); 740284539Sneel } 741284539Sneel } 742284539Sneel 743284539Sneel ctx = vm_open(vmname); 744284539Sneel if (ctx == NULL) { 745284539Sneel perror("vm_open"); 746284539Sneel exit(1); 747284539Sneel } 748284539Sneel 749284539Sneel if (reinit) { 750284539Sneel error = vm_reinit(ctx); 751284539Sneel if (error) { 752284539Sneel perror("vm_reinit"); 753284539Sneel exit(1); 754284539Sneel } 755284539Sneel } 756284539Sneel return (ctx); 757284539Sneel} 758284539Sneel 759221828Sgrehanint 760221828Sgrehanmain(int argc, char *argv[]) 761221828Sgrehan{ 762257423Sneel int c, error, gdb_port, err, bvmcons; 763284539Sneel int max_vcpus, mptgen, memflags; 764279225Sneel int rtc_localtime; 765221828Sgrehan struct vmctx *ctx; 766221828Sgrehan uint64_t rip; 767248477Sneel size_t memsize; 768284539Sneel char *optstr; 769221828Sgrehan 770242192Sneel bvmcons = 0; 771221828Sgrehan progname = basename(argv[0]); 772256156Sneel gdb_port = 0; 773221828Sgrehan guest_ncpus = 1; 774248477Sneel memsize = 256 * MB; 775265211Sneel mptgen = 1; 776279225Sneel rtc_localtime = 1; 777284539Sneel memflags = 0; 778221828Sgrehan 779284539Sneel optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 780284539Sneel while ((c = getopt(argc, argv, optstr)) != -1) { 781221828Sgrehan switch (c) { 782240943Sneel case 'a': 783262236Sneel x2apic_mode = 0; 784240943Sneel break; 785243327Sgrehan case 'A': 786243327Sgrehan acpi = 1; 787243327Sgrehan break; 788242192Sneel case 'b': 789242192Sneel bvmcons = 1; 790242192Sneel break; 791221828Sgrehan case 'p': 792265376Sneel if (pincpu_parse(optarg) != 0) { 793265376Sneel errx(EX_USAGE, "invalid vcpu pinning " 794265376Sneel "configuration '%s'", optarg); 795265376Sneel } 796221828Sgrehan break; 797221828Sgrehan case 'c': 798221828Sgrehan guest_ncpus = atoi(optarg); 799221828Sgrehan break; 800265951Sneel case 'C': 801284539Sneel memflags |= VM_MEM_F_INCORE; 802265951Sneel break; 803221828Sgrehan case 'g': 804221828Sgrehan gdb_port = atoi(optarg); 805221828Sgrehan break; 806257293Sneel case 'l': 807257293Sneel if (lpc_device_parse(optarg) != 0) { 808257293Sneel errx(EX_USAGE, "invalid lpc device " 809257293Sneel "configuration '%s'", optarg); 810257293Sneel } 811257293Sneel break; 812221828Sgrehan case 's': 813261217Sjhb if (pci_parse_slot(optarg) != 0) 814249916Sneel exit(1); 815249916Sneel else 816249916Sneel break; 817284539Sneel case 'S': 818284539Sneel memflags |= VM_MEM_F_WIRED; 819284539Sneel break; 820221828Sgrehan case 'm': 821256176Sneel error = vm_parse_memsize(optarg, &memsize); 822256176Sneel if (error) 823256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 824221828Sgrehan break; 825221828Sgrehan case 'H': 826221828Sgrehan guest_vmexit_on_hlt = 1; 827221828Sgrehan break; 828239043Sneel case 'I': 829257423Sneel /* 830257423Sneel * The "-I" option was used to add an ioapic to the 831257423Sneel * virtual machine. 832257423Sneel * 833257423Sneel * An ioapic is now provided unconditionally for each 834257423Sneel * virtual machine and this option is now deprecated. 835257423Sneel */ 836239043Sneel break; 837221828Sgrehan case 'P': 838221828Sgrehan guest_vmexit_on_pause = 1; 839221828Sgrehan break; 840222105Sgrehan case 'e': 841222105Sgrehan strictio = 1; 842222105Sgrehan break; 843279225Sneel case 'u': 844279225Sneel rtc_localtime = 0; 845279225Sneel break; 846262744Stychon case 'U': 847262744Stychon guest_uuid_str = optarg; 848262744Stychon break; 849259635Sneel case 'w': 850259635Sneel strictmsr = 0; 851259635Sneel break; 852256711Sgrehan case 'W': 853256711Sgrehan virtio_msix = 0; 854256711Sgrehan break; 855262236Sneel case 'x': 856262236Sneel x2apic_mode = 1; 857262236Sneel break; 858265211Sneel case 'Y': 859265211Sneel mptgen = 0; 860265211Sneel break; 861221828Sgrehan case 'h': 862221828Sgrehan usage(0); 863221828Sgrehan default: 864221828Sgrehan usage(1); 865221828Sgrehan } 866221828Sgrehan } 867221828Sgrehan argc -= optind; 868221828Sgrehan argv += optind; 869221828Sgrehan 870221828Sgrehan if (argc != 1) 871221828Sgrehan usage(1); 872221828Sgrehan 873221828Sgrehan vmname = argv[0]; 874284539Sneel ctx = do_open(vmname); 875221828Sgrehan 876281611Sneel if (guest_ncpus < 1) { 877281611Sneel fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 878281611Sneel exit(1); 879281611Sneel } 880281611Sneel 881245020Sneel max_vcpus = num_vcpus_allowed(ctx); 882245020Sneel if (guest_ncpus > max_vcpus) { 883245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 884245020Sneel guest_ncpus, max_vcpus); 885245020Sneel exit(1); 886245020Sneel } 887245020Sneel 888256645Sneel fbsdrun_set_capabilities(ctx, BSP); 889221828Sgrehan 890284539Sneel vm_set_memflags(ctx, memflags); 891248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 892248477Sneel if (err) { 893284539Sneel fprintf(stderr, "Unable to setup memory (%d)\n", errno); 894248477Sneel exit(1); 895221828Sgrehan } 896221828Sgrehan 897271888Sneel error = init_msr(); 898271888Sneel if (error) { 899271888Sneel fprintf(stderr, "init_msr error %d", error); 900271888Sneel exit(1); 901271888Sneel } 902271888Sneel 903249343Sneel init_mem(); 904221828Sgrehan init_inout(); 905266125Sjhb pci_irq_init(ctx); 906261268Sjhb ioapic_init(ctx); 907252682Sgrehan 908279225Sneel rtc_init(ctx, rtc_localtime); 909266125Sjhb sci_init(ctx); 910253181Sgrehan 911252682Sgrehan /* 912252682Sgrehan * Exit if a device emulation finds an error in it's initilization 913252682Sgrehan */ 914252682Sgrehan if (init_pci(ctx) != 0) 915252682Sgrehan exit(1); 916252682Sgrehan 917221828Sgrehan if (gdb_port != 0) 918221828Sgrehan init_dbgport(gdb_port); 919221828Sgrehan 920242192Sneel if (bvmcons) 921242192Sneel init_bvmcons(); 922242192Sneel 923284539Sneel if (lpc_bootrom()) { 924284539Sneel if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 925284539Sneel fprintf(stderr, "ROM boot failed: unrestricted guest " 926284539Sneel "capability not available\n"); 927284539Sneel exit(1); 928284539Sneel } 929284539Sneel error = vcpu_reset(ctx, BSP); 930284539Sneel assert(error == 0); 931284539Sneel } 932284539Sneel 933221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 934221828Sgrehan assert(error == 0); 935221828Sgrehan 936221828Sgrehan /* 937221828Sgrehan * build the guest tables, MP etc. 938221828Sgrehan */ 939265211Sneel if (mptgen) { 940265211Sneel error = mptable_build(ctx, guest_ncpus); 941265211Sneel if (error) 942265211Sneel exit(1); 943265211Sneel } 944221828Sgrehan 945262744Stychon error = smbios_build(ctx); 946262744Stychon assert(error == 0); 947262744Stychon 948243327Sgrehan if (acpi) { 949257423Sneel error = acpi_build(ctx, guest_ncpus); 950243327Sgrehan assert(error == 0); 951243327Sgrehan } 952243327Sgrehan 953221828Sgrehan /* 954257729Sgrehan * Change the proc title to include the VM name. 955257729Sgrehan */ 956257729Sgrehan setproctitle("%s", vmname); 957257729Sgrehan 958257729Sgrehan /* 959221828Sgrehan * Add CPU 0 960221828Sgrehan */ 961263432Sneel fbsdrun_addcpu(ctx, BSP, BSP, rip); 962221828Sgrehan 963221828Sgrehan /* 964221828Sgrehan * Head off to the main event dispatch loop 965221828Sgrehan */ 966221828Sgrehan mevent_dispatch(); 967221828Sgrehan 968221828Sgrehan exit(1); 969221828Sgrehan} 970