bhyverun.c revision 277310
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 277310 2015-01-18 03:08:30Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 277310 2015-01-18 03:08:30Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36259081Sneel#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257018Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50221828Sgrehan 51221828Sgrehan#include <machine/vmm.h> 52221828Sgrehan#include <vmmapi.h> 53221828Sgrehan 54244167Sgrehan#include "bhyverun.h" 55243327Sgrehan#include "acpi.h" 56221828Sgrehan#include "inout.h" 57221828Sgrehan#include "dbgport.h" 58261268Sjhb#include "ioapic.h" 59241744Sgrehan#include "mem.h" 60221828Sgrehan#include "mevent.h" 61242131Sgrehan#include "mptbl.h" 62221828Sgrehan#include "pci_emul.h" 63266125Sjhb#include "pci_irq.h" 64257293Sneel#include "pci_lpc.h" 65262744Stychon#include "smbiostbl.h" 66221828Sgrehan#include "xmsr.h" 67240912Sneel#include "spinup_ap.h" 68253181Sgrehan#include "rtc.h" 69221828Sgrehan 70221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 71221828Sgrehan 72221828Sgrehan#define MB (1024UL * 1024) 73221828Sgrehan#define GB (1024UL * MB) 74221828Sgrehan 75221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 76268777Sneelextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanchar *vmname; 79221828Sgrehan 80221828Sgrehanint guest_ncpus; 81262744Stychonchar *guest_uuid_str; 82221828Sgrehan 83262236Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 84256711Sgrehanstatic int virtio_msix = 1; 85262236Sneelstatic int x2apic_mode = 0; /* default is xAPIC */ 86221828Sgrehan 87222105Sgrehanstatic int strictio; 88259635Sneelstatic int strictmsr = 1; 89222105Sgrehan 90243327Sgrehanstatic int acpi; 91243327Sgrehan 92221828Sgrehanstatic char *progname; 93221828Sgrehanstatic const int BSP = 0; 94221828Sgrehan 95263432Sneelstatic cpuset_t cpumask; 96221828Sgrehan 97221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 98221828Sgrehan 99269042Sneelstatic struct vm_exit vmexit[VM_MAXCPU]; 100221828Sgrehan 101256062Sgrehanstruct bhyvestats { 102221828Sgrehan uint64_t vmexit_bogus; 103221828Sgrehan uint64_t vmexit_bogus_switch; 104221828Sgrehan uint64_t vmexit_hlt; 105221828Sgrehan uint64_t vmexit_pause; 106221828Sgrehan uint64_t vmexit_mtrap; 107256072Sneel uint64_t vmexit_inst_emul; 108221828Sgrehan uint64_t cpu_switch_rotate; 109221828Sgrehan uint64_t cpu_switch_direct; 110221828Sgrehan} stats; 111221828Sgrehan 112221828Sgrehanstruct mt_vmm_info { 113221828Sgrehan pthread_t mt_thr; 114221828Sgrehan struct vmctx *mt_ctx; 115221828Sgrehan int mt_vcpu; 116221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 117221828Sgrehan 118265376Sneelstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 119265376Sneel 120221828Sgrehanstatic void 121221828Sgrehanusage(int code) 122221828Sgrehan{ 123221828Sgrehan 124221828Sgrehan fprintf(stderr, 125267959Sjhb "Usage: %s [-abehwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 126267959Sjhb " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 127262236Sneel " -a: local apic is in xAPIC mode (deprecated)\n" 128267934Sjhb " -A: create ACPI tables\n" 129221828Sgrehan " -c: # cpus (default 1)\n" 130265951Sneel " -C: include guest memory in core file\n" 131257018Sneel " -e: exit on unhandled I/O access\n" 132267959Sjhb " -g: gdb port\n" 133221828Sgrehan " -h: help\n" 134267959Sjhb " -H: vmexit from the guest on hlt\n" 135257293Sneel " -l: LPC device configuration\n" 136259635Sneel " -m: memory size in MB\n" 137267959Sjhb " -p: pin 'vcpu' to 'hostcpu'\n" 138267959Sjhb " -P: vmexit from the guest on pause\n" 139267959Sjhb " -s: <slot,driver,configinfo> PCI slot config\n" 140267959Sjhb " -U: uuid\n" 141262236Sneel " -w: ignore unimplemented MSRs\n" 142267959Sjhb " -W: force virtio to use single-vector MSI\n" 143262744Stychon " -x: local apic is in x2APIC mode\n" 144267959Sjhb " -Y: disable MPtable generation\n", 145257018Sneel progname, (int)strlen(progname), ""); 146256062Sgrehan 147221828Sgrehan exit(code); 148221828Sgrehan} 149221828Sgrehan 150265376Sneelstatic int 151265376Sneelpincpu_parse(const char *opt) 152265376Sneel{ 153265376Sneel int vcpu, pcpu; 154265376Sneel 155265376Sneel if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 156265376Sneel fprintf(stderr, "invalid format: %s\n", opt); 157265376Sneel return (-1); 158265376Sneel } 159265376Sneel 160265376Sneel if (vcpu < 0 || vcpu >= VM_MAXCPU) { 161265376Sneel fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 162265376Sneel vcpu, VM_MAXCPU - 1); 163265376Sneel return (-1); 164265376Sneel } 165265376Sneel 166265376Sneel if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 167265376Sneel fprintf(stderr, "hostcpu '%d' outside valid range from " 168265376Sneel "0 to %d\n", pcpu, CPU_SETSIZE - 1); 169265376Sneel return (-1); 170265376Sneel } 171265376Sneel 172265376Sneel if (vcpumap[vcpu] == NULL) { 173265376Sneel if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 174265376Sneel perror("malloc"); 175265376Sneel return (-1); 176265376Sneel } 177265376Sneel CPU_ZERO(vcpumap[vcpu]); 178265376Sneel } 179265376Sneel CPU_SET(pcpu, vcpumap[vcpu]); 180265376Sneel return (0); 181265376Sneel} 182265376Sneel 183269042Sneelvoid 184269042Sneelvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 185269042Sneel int errcode) 186269042Sneel{ 187269042Sneel struct vmctx *ctx; 188277310Sneel int error, restart_instruction; 189269042Sneel 190269042Sneel ctx = arg; 191277310Sneel restart_instruction = 1; 192277310Sneel 193277310Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 194277310Sneel restart_instruction); 195269042Sneel assert(error == 0); 196269042Sneel} 197269042Sneel 198221828Sgrehanvoid * 199248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 200221828Sgrehan{ 201221828Sgrehan 202248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 203221828Sgrehan} 204221828Sgrehan 205221828Sgrehanint 206221828Sgrehanfbsdrun_vmexit_on_pause(void) 207221828Sgrehan{ 208221828Sgrehan 209221828Sgrehan return (guest_vmexit_on_pause); 210221828Sgrehan} 211221828Sgrehan 212221828Sgrehanint 213221828Sgrehanfbsdrun_vmexit_on_hlt(void) 214221828Sgrehan{ 215221828Sgrehan 216221828Sgrehan return (guest_vmexit_on_hlt); 217221828Sgrehan} 218221828Sgrehan 219256711Sgrehanint 220256711Sgrehanfbsdrun_virtio_msix(void) 221256711Sgrehan{ 222256711Sgrehan 223256711Sgrehan return (virtio_msix); 224256711Sgrehan} 225256711Sgrehan 226221942Sjhbstatic void * 227221828Sgrehanfbsdrun_start_thread(void *param) 228221828Sgrehan{ 229242404Sgrehan char tname[MAXCOMLEN + 1]; 230242404Sgrehan struct mt_vmm_info *mtp; 231221828Sgrehan int vcpu; 232221828Sgrehan 233242404Sgrehan mtp = param; 234221828Sgrehan vcpu = mtp->mt_vcpu; 235242404Sgrehan 236257729Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 237242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 238242404Sgrehan 239221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 240221828Sgrehan 241221828Sgrehan /* not reached */ 242221828Sgrehan exit(1); 243221828Sgrehan return (NULL); 244221828Sgrehan} 245221828Sgrehan 246221828Sgrehanvoid 247263432Sneelfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 248221828Sgrehan{ 249221828Sgrehan int error; 250221828Sgrehan 251263432Sneel assert(fromcpu == BSP); 252221828Sgrehan 253266933Sneel /* 254266933Sneel * The 'newcpu' must be activated in the context of 'fromcpu'. If 255266933Sneel * vm_activate_cpu() is delayed until newcpu's pthread starts running 256266933Sneel * then vmm.ko is out-of-sync with bhyve and this can create a race 257266933Sneel * with vm_suspend(). 258266933Sneel */ 259266933Sneel error = vm_activate_cpu(ctx, newcpu); 260266933Sneel assert(error == 0); 261266933Sneel 262263432Sneel CPU_SET_ATOMIC(newcpu, &cpumask); 263221828Sgrehan 264221828Sgrehan /* 265221828Sgrehan * Set up the vmexit struct to allow execution to start 266221828Sgrehan * at the given RIP 267221828Sgrehan */ 268263432Sneel vmexit[newcpu].rip = rip; 269263432Sneel vmexit[newcpu].inst_length = 0; 270221828Sgrehan 271263432Sneel mt_vmm_info[newcpu].mt_ctx = ctx; 272263432Sneel mt_vmm_info[newcpu].mt_vcpu = newcpu; 273256072Sneel 274263432Sneel error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 275263432Sneel fbsdrun_start_thread, &mt_vmm_info[newcpu]); 276256072Sneel assert(error == 0); 277221828Sgrehan} 278221828Sgrehan 279221828Sgrehanstatic int 280259081Sneelfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 281259081Sneel{ 282259081Sneel 283263432Sneel if (!CPU_ISSET(vcpu, &cpumask)) { 284265366Sneel fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 285259081Sneel exit(1); 286259081Sneel } 287259081Sneel 288263432Sneel CPU_CLR_ATOMIC(vcpu, &cpumask); 289263432Sneel return (CPU_EMPTY(&cpumask)); 290259081Sneel} 291259081Sneel 292259081Sneelstatic int 293221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 294221828Sgrehan uint32_t eax) 295221828Sgrehan{ 296256062Sgrehan#if BHYVE_DEBUG 297256062Sgrehan /* 298256062Sgrehan * put guest-driven debug here 299256062Sgrehan */ 300221828Sgrehan#endif 301221828Sgrehan return (VMEXIT_CONTINUE); 302221828Sgrehan} 303221828Sgrehan 304221828Sgrehanstatic int 305221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 306221828Sgrehan{ 307221828Sgrehan int error; 308266573Sneel int bytes, port, in, out, string; 309221828Sgrehan int vcpu; 310221828Sgrehan 311221828Sgrehan vcpu = *pvcpu; 312221828Sgrehan 313221828Sgrehan port = vme->u.inout.port; 314221828Sgrehan bytes = vme->u.inout.bytes; 315266573Sneel string = vme->u.inout.string; 316221828Sgrehan in = vme->u.inout.in; 317221828Sgrehan out = !in; 318221828Sgrehan 319221828Sgrehan /* Extra-special case of host notifications */ 320266573Sneel if (out && port == GUEST_NIO_PORT) { 321266573Sneel error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 322266573Sneel return (error); 323266573Sneel } 324221828Sgrehan 325266573Sneel error = emulate_inout(ctx, vcpu, vme, strictio); 326269094Sneel if (error) { 327269094Sneel fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", 328269094Sneel bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 329269094Sneel return (VMEXIT_ABORT); 330269094Sneel } else { 331221828Sgrehan return (VMEXIT_CONTINUE); 332221828Sgrehan } 333221828Sgrehan} 334221828Sgrehan 335221828Sgrehanstatic int 336221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 337221828Sgrehan{ 338259635Sneel uint64_t val; 339259635Sneel uint32_t eax, edx; 340259635Sneel int error; 341259635Sneel 342259635Sneel val = 0; 343259635Sneel error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 344259635Sneel if (error != 0) { 345259635Sneel fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 346259635Sneel vme->u.msr.code, *pvcpu); 347262506Sneel if (strictmsr) { 348269042Sneel vm_inject_gp(ctx, *pvcpu); 349277310Sneel return (VMEXIT_CONTINUE); 350262506Sneel } 351259635Sneel } 352259635Sneel 353259635Sneel eax = val; 354259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 355259635Sneel assert(error == 0); 356259635Sneel 357259635Sneel edx = val >> 32; 358259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 359259635Sneel assert(error == 0); 360259635Sneel 361259635Sneel return (VMEXIT_CONTINUE); 362221828Sgrehan} 363221828Sgrehan 364221828Sgrehanstatic int 365221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 366221828Sgrehan{ 367259635Sneel int error; 368221828Sgrehan 369259635Sneel error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 370259635Sneel if (error != 0) { 371259635Sneel fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 372259635Sneel vme->u.msr.code, vme->u.msr.wval, *pvcpu); 373262506Sneel if (strictmsr) { 374269042Sneel vm_inject_gp(ctx, *pvcpu); 375277310Sneel return (VMEXIT_CONTINUE); 376262506Sneel } 377259635Sneel } 378259635Sneel return (VMEXIT_CONTINUE); 379221828Sgrehan} 380221828Sgrehan 381221828Sgrehanstatic int 382240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 383240912Sneel{ 384240912Sneel int newcpu; 385240912Sneel int retval = VMEXIT_CONTINUE; 386240912Sneel 387240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 388240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 389240912Sneel 390240912Sneel return (retval); 391240912Sneel} 392240912Sneel 393267966Sneel#define DEBUG_EPT_MISCONFIG 394267966Sneel#ifdef DEBUG_EPT_MISCONFIG 395267966Sneel#define EXIT_REASON_EPT_MISCONFIG 49 396267966Sneel#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 397267966Sneel#define VMCS_IDENT(x) ((x) | 0x80000000) 398267966Sneel 399267966Sneelstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 400267966Sneelstatic int ept_misconfig_ptenum; 401267966Sneel#endif 402267966Sneel 403240912Sneelstatic int 404221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 405221828Sgrehan{ 406221828Sgrehan 407242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 408242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 409242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 410242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 411260167Sneel fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 412242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 413242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 414242385Sgrehan vmexit->u.vmx.exit_qualification); 415260167Sneel fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 416260167Sneel fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 417267966Sneel#ifdef DEBUG_EPT_MISCONFIG 418267966Sneel if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 419267966Sneel vm_get_register(ctx, *pvcpu, 420267966Sneel VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 421267966Sneel &ept_misconfig_gpa); 422267966Sneel vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 423267966Sneel &ept_misconfig_ptenum); 424267966Sneel fprintf(stderr, "\tEPT misconfiguration:\n"); 425267966Sneel fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 426267966Sneel fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 427267966Sneel ept_misconfig_ptenum, ept_misconfig_pte[0], 428267966Sneel ept_misconfig_pte[1], ept_misconfig_pte[2], 429267966Sneel ept_misconfig_pte[3]); 430267966Sneel } 431267966Sneel#endif /* DEBUG_EPT_MISCONFIG */ 432221828Sgrehan return (VMEXIT_ABORT); 433221828Sgrehan} 434221828Sgrehan 435221828Sgrehanstatic int 436273375Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 437273375Sneel{ 438273375Sneel 439273375Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 440273375Sneel fprintf(stderr, "\treason\t\tSVM\n"); 441273375Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 442273375Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 443273375Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 444273375Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 445273375Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 446273375Sneel return (VMEXIT_ABORT); 447273375Sneel} 448273375Sneel 449273375Sneelstatic int 450221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 451221828Sgrehan{ 452256062Sgrehan 453277310Sneel assert(vmexit->inst_length == 0); 454277310Sneel 455221828Sgrehan stats.vmexit_bogus++; 456221828Sgrehan 457277310Sneel return (VMEXIT_CONTINUE); 458221828Sgrehan} 459221828Sgrehan 460221828Sgrehanstatic int 461221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 462221828Sgrehan{ 463256062Sgrehan 464221828Sgrehan stats.vmexit_hlt++; 465256062Sgrehan 466256062Sgrehan /* 467256062Sgrehan * Just continue execution with the next instruction. We use 468256062Sgrehan * the HLT VM exit as a way to be friendly with the host 469256062Sgrehan * scheduler. 470256062Sgrehan */ 471256062Sgrehan return (VMEXIT_CONTINUE); 472221828Sgrehan} 473221828Sgrehan 474221828Sgrehanstatic int 475221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 476221828Sgrehan{ 477256062Sgrehan 478221828Sgrehan stats.vmexit_pause++; 479221828Sgrehan 480256062Sgrehan return (VMEXIT_CONTINUE); 481221828Sgrehan} 482221828Sgrehan 483221828Sgrehanstatic int 484221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 485221828Sgrehan{ 486256062Sgrehan 487277310Sneel assert(vmexit->inst_length == 0); 488277310Sneel 489221828Sgrehan stats.vmexit_mtrap++; 490221828Sgrehan 491277310Sneel return (VMEXIT_CONTINUE); 492221828Sgrehan} 493221828Sgrehan 494234761Sgrehanstatic int 495256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 496234761Sgrehan{ 497241744Sgrehan int err; 498256072Sneel stats.vmexit_inst_emul++; 499234761Sgrehan 500256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 501269008Sneel &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); 502241744Sgrehan 503241744Sgrehan if (err) { 504241744Sgrehan if (err == EINVAL) { 505242385Sgrehan fprintf(stderr, 506242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 507242385Sgrehan vmexit->rip); 508241744Sgrehan } else if (err == ESRCH) { 509242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 510256072Sneel vmexit->u.inst_emul.gpa); 511241744Sgrehan } 512241744Sgrehan 513234761Sgrehan return (VMEXIT_ABORT); 514234761Sgrehan } 515234761Sgrehan 516234761Sgrehan return (VMEXIT_CONTINUE); 517234761Sgrehan} 518234761Sgrehan 519263780Sneelstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 520263780Sneelstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 521263780Sneel 522263780Sneelstatic int 523263780Sneelvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 524263780Sneel{ 525265062Sneel enum vm_suspend_how how; 526263780Sneel 527265062Sneel how = vmexit->u.suspended.how; 528265062Sneel 529263780Sneel fbsdrun_deletecpu(ctx, *pvcpu); 530263780Sneel 531265062Sneel if (*pvcpu != BSP) { 532263780Sneel pthread_mutex_lock(&resetcpu_mtx); 533263780Sneel pthread_cond_signal(&resetcpu_cond); 534263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 535263780Sneel pthread_exit(NULL); 536263780Sneel } 537263780Sneel 538263780Sneel pthread_mutex_lock(&resetcpu_mtx); 539263780Sneel while (!CPU_EMPTY(&cpumask)) { 540263780Sneel pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 541263780Sneel } 542263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 543265062Sneel 544265203Sneel switch (how) { 545265203Sneel case VM_SUSPEND_RESET: 546265062Sneel exit(0); 547265203Sneel case VM_SUSPEND_POWEROFF: 548265062Sneel exit(1); 549265203Sneel case VM_SUSPEND_HALT: 550265203Sneel exit(2); 551268889Sneel case VM_SUSPEND_TRIPLEFAULT: 552268889Sneel exit(3); 553265203Sneel default: 554265203Sneel fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 555265203Sneel exit(100); 556265203Sneel } 557265062Sneel return (0); /* NOTREACHED */ 558263780Sneel} 559263780Sneel 560221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 561234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 562266573Sneel [VM_EXITCODE_INOUT_STR] = vmexit_inout, 563234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 564273375Sneel [VM_EXITCODE_SVM] = vmexit_svm, 565234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 566234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 567234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 568234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 569256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 570240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 571268777Sneel [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 572268777Sneel [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 573221828Sgrehan}; 574221828Sgrehan 575221828Sgrehanstatic void 576277310Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 577221828Sgrehan{ 578221828Sgrehan int error, rc, prevcpu; 579253452Sgrehan enum vm_exitcode exitcode; 580266933Sneel cpuset_t active_cpus; 581221828Sgrehan 582265376Sneel if (vcpumap[vcpu] != NULL) { 583246686Sneel error = pthread_setaffinity_np(pthread_self(), 584265376Sneel sizeof(cpuset_t), vcpumap[vcpu]); 585221828Sgrehan assert(error == 0); 586221828Sgrehan } 587221828Sgrehan 588266933Sneel error = vm_active_cpus(ctx, &active_cpus); 589266933Sneel assert(CPU_ISSET(vcpu, &active_cpus)); 590266933Sneel 591277310Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 592277310Sneel assert(error == 0); 593277310Sneel 594221828Sgrehan while (1) { 595277310Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 596259737Sneel if (error != 0) 597259737Sneel break; 598221828Sgrehan 599221828Sgrehan prevcpu = vcpu; 600253452Sgrehan 601253452Sgrehan exitcode = vmexit[vcpu].exitcode; 602253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 603253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 604253452Sgrehan exitcode); 605253452Sgrehan exit(1); 606253452Sgrehan } 607253452Sgrehan 608253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 609253452Sgrehan 610221828Sgrehan switch (rc) { 611221828Sgrehan case VMEXIT_CONTINUE: 612221828Sgrehan break; 613265941Sneel case VMEXIT_ABORT: 614265941Sneel abort(); 615221828Sgrehan default: 616221828Sgrehan exit(1); 617221828Sgrehan } 618221828Sgrehan } 619221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 620221828Sgrehan} 621221828Sgrehan 622245020Sneelstatic int 623245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 624245020Sneel{ 625245020Sneel int tmp, error; 626221828Sgrehan 627245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 628245020Sneel 629245020Sneel /* 630245020Sneel * The guest is allowed to spinup more than one processor only if the 631245020Sneel * UNRESTRICTED_GUEST capability is available. 632245020Sneel */ 633245020Sneel if (error == 0) 634245020Sneel return (VM_MAXCPU); 635245020Sneel else 636245020Sneel return (1); 637245020Sneel} 638245020Sneel 639256645Sneelvoid 640256645Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 641256645Sneel{ 642256645Sneel int err, tmp; 643256645Sneel 644256645Sneel if (fbsdrun_vmexit_on_hlt()) { 645256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 646256645Sneel if (err < 0) { 647256645Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 648256645Sneel exit(1); 649256645Sneel } 650256645Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 651256645Sneel if (cpu == BSP) 652256645Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 653256645Sneel } 654256645Sneel 655256645Sneel if (fbsdrun_vmexit_on_pause()) { 656256645Sneel /* 657256645Sneel * pause exit support required for this mode 658256645Sneel */ 659256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 660256645Sneel if (err < 0) { 661256645Sneel fprintf(stderr, 662256645Sneel "SMP mux requested, no pause support\n"); 663256645Sneel exit(1); 664256645Sneel } 665256645Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 666256645Sneel if (cpu == BSP) 667256645Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 668256645Sneel } 669256645Sneel 670262236Sneel if (x2apic_mode) 671262236Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 672262236Sneel else 673256645Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 674256645Sneel 675256645Sneel if (err) { 676256645Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 677256645Sneel exit(1); 678256645Sneel } 679256645Sneel 680256645Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 681256645Sneel} 682256645Sneel 683221828Sgrehanint 684221828Sgrehanmain(int argc, char *argv[]) 685221828Sgrehan{ 686257423Sneel int c, error, gdb_port, err, bvmcons; 687265951Sneel int dump_guest_memory, max_vcpus, mptgen; 688221828Sgrehan struct vmctx *ctx; 689221828Sgrehan uint64_t rip; 690248477Sneel size_t memsize; 691221828Sgrehan 692242192Sneel bvmcons = 0; 693265951Sneel dump_guest_memory = 0; 694221828Sgrehan progname = basename(argv[0]); 695256156Sneel gdb_port = 0; 696221828Sgrehan guest_ncpus = 1; 697248477Sneel memsize = 256 * MB; 698265211Sneel mptgen = 1; 699221828Sgrehan 700265951Sneel while ((c = getopt(argc, argv, "abehwxACHIPWYp:g:c:s:m:l:U:")) != -1) { 701221828Sgrehan switch (c) { 702240943Sneel case 'a': 703262236Sneel x2apic_mode = 0; 704240943Sneel break; 705243327Sgrehan case 'A': 706243327Sgrehan acpi = 1; 707243327Sgrehan break; 708242192Sneel case 'b': 709242192Sneel bvmcons = 1; 710242192Sneel break; 711221828Sgrehan case 'p': 712265376Sneel if (pincpu_parse(optarg) != 0) { 713265376Sneel errx(EX_USAGE, "invalid vcpu pinning " 714265376Sneel "configuration '%s'", optarg); 715265376Sneel } 716221828Sgrehan break; 717221828Sgrehan case 'c': 718221828Sgrehan guest_ncpus = atoi(optarg); 719221828Sgrehan break; 720265951Sneel case 'C': 721265951Sneel dump_guest_memory = 1; 722265951Sneel break; 723221828Sgrehan case 'g': 724221828Sgrehan gdb_port = atoi(optarg); 725221828Sgrehan break; 726257293Sneel case 'l': 727257293Sneel if (lpc_device_parse(optarg) != 0) { 728257293Sneel errx(EX_USAGE, "invalid lpc device " 729257293Sneel "configuration '%s'", optarg); 730257293Sneel } 731257293Sneel break; 732221828Sgrehan case 's': 733261217Sjhb if (pci_parse_slot(optarg) != 0) 734249916Sneel exit(1); 735249916Sneel else 736249916Sneel break; 737221828Sgrehan case 'm': 738256176Sneel error = vm_parse_memsize(optarg, &memsize); 739256176Sneel if (error) 740256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 741221828Sgrehan break; 742221828Sgrehan case 'H': 743221828Sgrehan guest_vmexit_on_hlt = 1; 744221828Sgrehan break; 745239043Sneel case 'I': 746257423Sneel /* 747257423Sneel * The "-I" option was used to add an ioapic to the 748257423Sneel * virtual machine. 749257423Sneel * 750257423Sneel * An ioapic is now provided unconditionally for each 751257423Sneel * virtual machine and this option is now deprecated. 752257423Sneel */ 753239043Sneel break; 754221828Sgrehan case 'P': 755221828Sgrehan guest_vmexit_on_pause = 1; 756221828Sgrehan break; 757222105Sgrehan case 'e': 758222105Sgrehan strictio = 1; 759222105Sgrehan break; 760262744Stychon case 'U': 761262744Stychon guest_uuid_str = optarg; 762262744Stychon break; 763259635Sneel case 'w': 764259635Sneel strictmsr = 0; 765259635Sneel break; 766256711Sgrehan case 'W': 767256711Sgrehan virtio_msix = 0; 768256711Sgrehan break; 769262236Sneel case 'x': 770262236Sneel x2apic_mode = 1; 771262236Sneel break; 772265211Sneel case 'Y': 773265211Sneel mptgen = 0; 774265211Sneel break; 775221828Sgrehan case 'h': 776221828Sgrehan usage(0); 777221828Sgrehan default: 778221828Sgrehan usage(1); 779221828Sgrehan } 780221828Sgrehan } 781221828Sgrehan argc -= optind; 782221828Sgrehan argv += optind; 783221828Sgrehan 784221828Sgrehan if (argc != 1) 785221828Sgrehan usage(1); 786221828Sgrehan 787221828Sgrehan vmname = argv[0]; 788221828Sgrehan 789221828Sgrehan ctx = vm_open(vmname); 790221828Sgrehan if (ctx == NULL) { 791221828Sgrehan perror("vm_open"); 792221828Sgrehan exit(1); 793221828Sgrehan } 794221828Sgrehan 795245020Sneel max_vcpus = num_vcpus_allowed(ctx); 796245020Sneel if (guest_ncpus > max_vcpus) { 797245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 798245020Sneel guest_ncpus, max_vcpus); 799245020Sneel exit(1); 800245020Sneel } 801245020Sneel 802256645Sneel fbsdrun_set_capabilities(ctx, BSP); 803221828Sgrehan 804265951Sneel if (dump_guest_memory) 805265951Sneel vm_set_memflags(ctx, VM_MEM_F_INCORE); 806248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 807248477Sneel if (err) { 808248477Sneel fprintf(stderr, "Unable to setup memory (%d)\n", err); 809248477Sneel exit(1); 810221828Sgrehan } 811221828Sgrehan 812271888Sneel error = init_msr(); 813271888Sneel if (error) { 814271888Sneel fprintf(stderr, "init_msr error %d", error); 815271888Sneel exit(1); 816271888Sneel } 817271888Sneel 818249343Sneel init_mem(); 819221828Sgrehan init_inout(); 820266125Sjhb pci_irq_init(ctx); 821261268Sjhb ioapic_init(ctx); 822252682Sgrehan 823253181Sgrehan rtc_init(ctx); 824266125Sjhb sci_init(ctx); 825253181Sgrehan 826252682Sgrehan /* 827252682Sgrehan * Exit if a device emulation finds an error in it's initilization 828252682Sgrehan */ 829252682Sgrehan if (init_pci(ctx) != 0) 830252682Sgrehan exit(1); 831252682Sgrehan 832221828Sgrehan if (gdb_port != 0) 833221828Sgrehan init_dbgport(gdb_port); 834221828Sgrehan 835242192Sneel if (bvmcons) 836242192Sneel init_bvmcons(); 837242192Sneel 838221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 839221828Sgrehan assert(error == 0); 840221828Sgrehan 841221828Sgrehan /* 842221828Sgrehan * build the guest tables, MP etc. 843221828Sgrehan */ 844265211Sneel if (mptgen) { 845265211Sneel error = mptable_build(ctx, guest_ncpus); 846265211Sneel if (error) 847265211Sneel exit(1); 848265211Sneel } 849221828Sgrehan 850262744Stychon error = smbios_build(ctx); 851262744Stychon assert(error == 0); 852262744Stychon 853243327Sgrehan if (acpi) { 854257423Sneel error = acpi_build(ctx, guest_ncpus); 855243327Sgrehan assert(error == 0); 856243327Sgrehan } 857243327Sgrehan 858221828Sgrehan /* 859257729Sgrehan * Change the proc title to include the VM name. 860257729Sgrehan */ 861257729Sgrehan setproctitle("%s", vmname); 862257729Sgrehan 863257729Sgrehan /* 864221828Sgrehan * Add CPU 0 865221828Sgrehan */ 866263432Sneel fbsdrun_addcpu(ctx, BSP, BSP, rip); 867221828Sgrehan 868221828Sgrehan /* 869221828Sgrehan * Head off to the main event dispatch loop 870221828Sgrehan */ 871221828Sgrehan mevent_dispatch(); 872221828Sgrehan 873221828Sgrehan exit(1); 874221828Sgrehan} 875