bhyverun.c revision 284894
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: stable/10/usr.sbin/bhyve/bhyverun.c 284894 2015-06-27 22:48:22Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/bhyverun.c 284894 2015-06-27 22:48:22Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36262350Sjhb#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257396Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50221828Sgrehan 51221828Sgrehan#include <machine/vmm.h> 52221828Sgrehan#include <vmmapi.h> 53221828Sgrehan 54244167Sgrehan#include "bhyverun.h" 55243327Sgrehan#include "acpi.h" 56221828Sgrehan#include "inout.h" 57221828Sgrehan#include "dbgport.h" 58267393Sjhb#include "ioapic.h" 59241744Sgrehan#include "mem.h" 60221828Sgrehan#include "mevent.h" 61242131Sgrehan#include "mptbl.h" 62221828Sgrehan#include "pci_emul.h" 63268972Sjhb#include "pci_irq.h" 64257396Sneel#include "pci_lpc.h" 65267450Sjhb#include "smbiostbl.h" 66221828Sgrehan#include "xmsr.h" 67240912Sneel#include "spinup_ap.h" 68253181Sgrehan#include "rtc.h" 69221828Sgrehan 70221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 71221828Sgrehan 72221828Sgrehan#define MB (1024UL * 1024) 73221828Sgrehan#define GB (1024UL * MB) 74221828Sgrehan 75221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 76270159Sgrehanextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanchar *vmname; 79221828Sgrehan 80221828Sgrehanint guest_ncpus; 81267450Sjhbchar *guest_uuid_str; 82221828Sgrehan 83267447Sjhbstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 84256755Sgrehanstatic int virtio_msix = 1; 85267447Sjhbstatic int x2apic_mode = 0; /* default is xAPIC */ 86221828Sgrehan 87222105Sgrehanstatic int strictio; 88264273Sjhbstatic int strictmsr = 1; 89222105Sgrehan 90243327Sgrehanstatic int acpi; 91243327Sgrehan 92221828Sgrehanstatic char *progname; 93221828Sgrehanstatic const int BSP = 0; 94221828Sgrehan 95268894Sjhbstatic cpuset_t cpumask; 96221828Sgrehan 97221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 98221828Sgrehan 99270159Sgrehanstatic struct vm_exit vmexit[VM_MAXCPU]; 100221828Sgrehan 101256062Sgrehanstruct bhyvestats { 102221828Sgrehan uint64_t vmexit_bogus; 103221828Sgrehan uint64_t vmexit_bogus_switch; 104221828Sgrehan uint64_t vmexit_hlt; 105221828Sgrehan uint64_t vmexit_pause; 106221828Sgrehan uint64_t vmexit_mtrap; 107256072Sneel uint64_t vmexit_inst_emul; 108221828Sgrehan uint64_t cpu_switch_rotate; 109221828Sgrehan uint64_t cpu_switch_direct; 110221828Sgrehan} stats; 111221828Sgrehan 112221828Sgrehanstruct mt_vmm_info { 113221828Sgrehan pthread_t mt_thr; 114221828Sgrehan struct vmctx *mt_ctx; 115221828Sgrehan int mt_vcpu; 116221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 117221828Sgrehan 118268894Sjhbstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 119268894Sjhb 120221828Sgrehanstatic void 121221828Sgrehanusage(int code) 122221828Sgrehan{ 123221828Sgrehan 124221828Sgrehan fprintf(stderr, 125284894Sneel "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 126270159Sgrehan " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 127267447Sjhb " -a: local apic is in xAPIC mode (deprecated)\n" 128270159Sgrehan " -A: create ACPI tables\n" 129221828Sgrehan " -c: # cpus (default 1)\n" 130268953Sjhb " -C: include guest memory in core file\n" 131257396Sneel " -e: exit on unhandled I/O access\n" 132270159Sgrehan " -g: gdb port\n" 133221828Sgrehan " -h: help\n" 134270159Sgrehan " -H: vmexit from the guest on hlt\n" 135257396Sneel " -l: LPC device configuration\n" 136264273Sjhb " -m: memory size in MB\n" 137270159Sgrehan " -p: pin 'vcpu' to 'hostcpu'\n" 138270159Sgrehan " -P: vmexit from the guest on pause\n" 139270159Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 140284894Sneel " -u: RTC keeps UTC time\n" 141270159Sgrehan " -U: uuid\n" 142267447Sjhb " -w: ignore unimplemented MSRs\n" 143270159Sgrehan " -W: force virtio to use single-vector MSI\n" 144267450Sjhb " -x: local apic is in x2APIC mode\n" 145270159Sgrehan " -Y: disable MPtable generation\n", 146257396Sneel progname, (int)strlen(progname), ""); 147256062Sgrehan 148221828Sgrehan exit(code); 149221828Sgrehan} 150221828Sgrehan 151268894Sjhbstatic int 152268894Sjhbpincpu_parse(const char *opt) 153268894Sjhb{ 154268894Sjhb int vcpu, pcpu; 155268894Sjhb 156268894Sjhb if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 157268894Sjhb fprintf(stderr, "invalid format: %s\n", opt); 158268894Sjhb return (-1); 159268894Sjhb } 160268894Sjhb 161268894Sjhb if (vcpu < 0 || vcpu >= VM_MAXCPU) { 162268894Sjhb fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 163268894Sjhb vcpu, VM_MAXCPU - 1); 164268894Sjhb return (-1); 165268894Sjhb } 166268894Sjhb 167268894Sjhb if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 168268894Sjhb fprintf(stderr, "hostcpu '%d' outside valid range from " 169268894Sjhb "0 to %d\n", pcpu, CPU_SETSIZE - 1); 170268894Sjhb return (-1); 171268894Sjhb } 172268894Sjhb 173268894Sjhb if (vcpumap[vcpu] == NULL) { 174268894Sjhb if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 175268894Sjhb perror("malloc"); 176268894Sjhb return (-1); 177268894Sjhb } 178268894Sjhb CPU_ZERO(vcpumap[vcpu]); 179268894Sjhb } 180268894Sjhb CPU_SET(pcpu, vcpumap[vcpu]); 181268894Sjhb return (0); 182268894Sjhb} 183268894Sjhb 184270159Sgrehanvoid 185270159Sgrehanvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 186270159Sgrehan int errcode) 187270159Sgrehan{ 188270159Sgrehan struct vmctx *ctx; 189284894Sneel int error, restart_instruction; 190270159Sgrehan 191270159Sgrehan ctx = arg; 192284894Sneel restart_instruction = 1; 193284894Sneel 194284894Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 195284894Sneel restart_instruction); 196270159Sgrehan assert(error == 0); 197270159Sgrehan} 198270159Sgrehan 199221828Sgrehanvoid * 200248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 201221828Sgrehan{ 202221828Sgrehan 203248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 204221828Sgrehan} 205221828Sgrehan 206221828Sgrehanint 207221828Sgrehanfbsdrun_vmexit_on_pause(void) 208221828Sgrehan{ 209221828Sgrehan 210221828Sgrehan return (guest_vmexit_on_pause); 211221828Sgrehan} 212221828Sgrehan 213221828Sgrehanint 214221828Sgrehanfbsdrun_vmexit_on_hlt(void) 215221828Sgrehan{ 216221828Sgrehan 217221828Sgrehan return (guest_vmexit_on_hlt); 218221828Sgrehan} 219221828Sgrehan 220256755Sgrehanint 221256755Sgrehanfbsdrun_virtio_msix(void) 222256755Sgrehan{ 223256755Sgrehan 224256755Sgrehan return (virtio_msix); 225256755Sgrehan} 226256755Sgrehan 227221942Sjhbstatic void * 228221828Sgrehanfbsdrun_start_thread(void *param) 229221828Sgrehan{ 230242404Sgrehan char tname[MAXCOMLEN + 1]; 231242404Sgrehan struct mt_vmm_info *mtp; 232221828Sgrehan int vcpu; 233221828Sgrehan 234242404Sgrehan mtp = param; 235221828Sgrehan vcpu = mtp->mt_vcpu; 236242404Sgrehan 237259301Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 238242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 239242404Sgrehan 240221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 241221828Sgrehan 242221828Sgrehan /* not reached */ 243221828Sgrehan exit(1); 244221828Sgrehan return (NULL); 245221828Sgrehan} 246221828Sgrehan 247221828Sgrehanvoid 248268894Sjhbfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 249221828Sgrehan{ 250221828Sgrehan int error; 251221828Sgrehan 252268894Sjhb assert(fromcpu == BSP); 253221828Sgrehan 254270070Sgrehan /* 255270070Sgrehan * The 'newcpu' must be activated in the context of 'fromcpu'. If 256270070Sgrehan * vm_activate_cpu() is delayed until newcpu's pthread starts running 257270070Sgrehan * then vmm.ko is out-of-sync with bhyve and this can create a race 258270070Sgrehan * with vm_suspend(). 259270070Sgrehan */ 260270070Sgrehan error = vm_activate_cpu(ctx, newcpu); 261270070Sgrehan assert(error == 0); 262270070Sgrehan 263268894Sjhb CPU_SET_ATOMIC(newcpu, &cpumask); 264221828Sgrehan 265221828Sgrehan /* 266221828Sgrehan * Set up the vmexit struct to allow execution to start 267221828Sgrehan * at the given RIP 268221828Sgrehan */ 269268894Sjhb vmexit[newcpu].rip = rip; 270268894Sjhb vmexit[newcpu].inst_length = 0; 271221828Sgrehan 272268894Sjhb mt_vmm_info[newcpu].mt_ctx = ctx; 273268894Sjhb mt_vmm_info[newcpu].mt_vcpu = newcpu; 274256072Sneel 275268894Sjhb error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 276268894Sjhb fbsdrun_start_thread, &mt_vmm_info[newcpu]); 277256072Sneel assert(error == 0); 278221828Sgrehan} 279221828Sgrehan 280221828Sgrehanstatic int 281262350Sjhbfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 282262350Sjhb{ 283262350Sjhb 284268894Sjhb if (!CPU_ISSET(vcpu, &cpumask)) { 285268894Sjhb fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 286262350Sjhb exit(1); 287262350Sjhb } 288262350Sjhb 289268894Sjhb CPU_CLR_ATOMIC(vcpu, &cpumask); 290268894Sjhb return (CPU_EMPTY(&cpumask)); 291262350Sjhb} 292262350Sjhb 293262350Sjhbstatic int 294221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 295221828Sgrehan uint32_t eax) 296221828Sgrehan{ 297256062Sgrehan#if BHYVE_DEBUG 298256062Sgrehan /* 299256062Sgrehan * put guest-driven debug here 300256062Sgrehan */ 301221828Sgrehan#endif 302221828Sgrehan return (VMEXIT_CONTINUE); 303221828Sgrehan} 304221828Sgrehan 305221828Sgrehanstatic int 306221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 307221828Sgrehan{ 308221828Sgrehan int error; 309268976Sjhb int bytes, port, in, out, string; 310221828Sgrehan int vcpu; 311221828Sgrehan 312221828Sgrehan vcpu = *pvcpu; 313221828Sgrehan 314221828Sgrehan port = vme->u.inout.port; 315221828Sgrehan bytes = vme->u.inout.bytes; 316268976Sjhb string = vme->u.inout.string; 317221828Sgrehan in = vme->u.inout.in; 318221828Sgrehan out = !in; 319221828Sgrehan 320221828Sgrehan /* Extra-special case of host notifications */ 321268976Sjhb if (out && port == GUEST_NIO_PORT) { 322268976Sjhb error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 323268976Sjhb return (error); 324268976Sjhb } 325221828Sgrehan 326268976Sjhb error = emulate_inout(ctx, vcpu, vme, strictio); 327270159Sgrehan if (error) { 328270159Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", 329270159Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 330270159Sgrehan return (VMEXIT_ABORT); 331270159Sgrehan } else { 332221828Sgrehan return (VMEXIT_CONTINUE); 333221828Sgrehan } 334221828Sgrehan} 335221828Sgrehan 336221828Sgrehanstatic int 337221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 338221828Sgrehan{ 339264273Sjhb uint64_t val; 340264273Sjhb uint32_t eax, edx; 341264273Sjhb int error; 342264273Sjhb 343264273Sjhb val = 0; 344264273Sjhb error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 345264273Sjhb if (error != 0) { 346264273Sjhb fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 347264273Sjhb vme->u.msr.code, *pvcpu); 348267427Sjhb if (strictmsr) { 349270159Sgrehan vm_inject_gp(ctx, *pvcpu); 350284894Sneel return (VMEXIT_CONTINUE); 351267427Sjhb } 352264273Sjhb } 353264273Sjhb 354264273Sjhb eax = val; 355264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 356264273Sjhb assert(error == 0); 357264273Sjhb 358264273Sjhb edx = val >> 32; 359264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 360264273Sjhb assert(error == 0); 361264273Sjhb 362264273Sjhb return (VMEXIT_CONTINUE); 363221828Sgrehan} 364221828Sgrehan 365221828Sgrehanstatic int 366221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 367221828Sgrehan{ 368264273Sjhb int error; 369221828Sgrehan 370264273Sjhb error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 371264273Sjhb if (error != 0) { 372264273Sjhb fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 373264273Sjhb vme->u.msr.code, vme->u.msr.wval, *pvcpu); 374267427Sjhb if (strictmsr) { 375270159Sgrehan vm_inject_gp(ctx, *pvcpu); 376284894Sneel return (VMEXIT_CONTINUE); 377267427Sjhb } 378264273Sjhb } 379264273Sjhb return (VMEXIT_CONTINUE); 380221828Sgrehan} 381221828Sgrehan 382221828Sgrehanstatic int 383240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 384240912Sneel{ 385240912Sneel int newcpu; 386240912Sneel int retval = VMEXIT_CONTINUE; 387240912Sneel 388240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 389240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 390240912Sneel 391240912Sneel return (retval); 392240912Sneel} 393240912Sneel 394270159Sgrehan#define DEBUG_EPT_MISCONFIG 395270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 396270159Sgrehan#define EXIT_REASON_EPT_MISCONFIG 49 397270159Sgrehan#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 398270159Sgrehan#define VMCS_IDENT(x) ((x) | 0x80000000) 399270159Sgrehan 400270159Sgrehanstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 401270159Sgrehanstatic int ept_misconfig_ptenum; 402270159Sgrehan#endif 403270159Sgrehan 404240912Sneelstatic int 405221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 406221828Sgrehan{ 407221828Sgrehan 408242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 409242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 410242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 411242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 412264619Sjhb fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 413242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 414242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 415242385Sgrehan vmexit->u.vmx.exit_qualification); 416264619Sjhb fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 417264619Sjhb fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 418270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 419270159Sgrehan if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 420270159Sgrehan vm_get_register(ctx, *pvcpu, 421270159Sgrehan VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 422270159Sgrehan &ept_misconfig_gpa); 423270159Sgrehan vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 424270159Sgrehan &ept_misconfig_ptenum); 425270159Sgrehan fprintf(stderr, "\tEPT misconfiguration:\n"); 426270159Sgrehan fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 427270159Sgrehan fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 428270159Sgrehan ept_misconfig_ptenum, ept_misconfig_pte[0], 429270159Sgrehan ept_misconfig_pte[1], ept_misconfig_pte[2], 430270159Sgrehan ept_misconfig_pte[3]); 431270159Sgrehan } 432270159Sgrehan#endif /* DEBUG_EPT_MISCONFIG */ 433221828Sgrehan return (VMEXIT_ABORT); 434221828Sgrehan} 435221828Sgrehan 436221828Sgrehanstatic int 437276403Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 438276403Sneel{ 439276403Sneel 440276403Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 441276403Sneel fprintf(stderr, "\treason\t\tSVM\n"); 442276403Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 443276403Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 444276403Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 445276403Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 446276403Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 447276403Sneel return (VMEXIT_ABORT); 448276403Sneel} 449276403Sneel 450276403Sneelstatic int 451221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 452221828Sgrehan{ 453256062Sgrehan 454284894Sneel assert(vmexit->inst_length == 0); 455284894Sneel 456221828Sgrehan stats.vmexit_bogus++; 457221828Sgrehan 458284894Sneel return (VMEXIT_CONTINUE); 459221828Sgrehan} 460221828Sgrehan 461221828Sgrehanstatic int 462221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 463221828Sgrehan{ 464256062Sgrehan 465221828Sgrehan stats.vmexit_hlt++; 466256062Sgrehan 467256062Sgrehan /* 468256062Sgrehan * Just continue execution with the next instruction. We use 469256062Sgrehan * the HLT VM exit as a way to be friendly with the host 470256062Sgrehan * scheduler. 471256062Sgrehan */ 472256062Sgrehan return (VMEXIT_CONTINUE); 473221828Sgrehan} 474221828Sgrehan 475221828Sgrehanstatic int 476221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 477221828Sgrehan{ 478256062Sgrehan 479221828Sgrehan stats.vmexit_pause++; 480221828Sgrehan 481256062Sgrehan return (VMEXIT_CONTINUE); 482221828Sgrehan} 483221828Sgrehan 484221828Sgrehanstatic int 485221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 486221828Sgrehan{ 487256062Sgrehan 488284894Sneel assert(vmexit->inst_length == 0); 489284894Sneel 490221828Sgrehan stats.vmexit_mtrap++; 491221828Sgrehan 492284894Sneel return (VMEXIT_CONTINUE); 493221828Sgrehan} 494221828Sgrehan 495234761Sgrehanstatic int 496256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 497234761Sgrehan{ 498241744Sgrehan int err; 499256072Sneel stats.vmexit_inst_emul++; 500234761Sgrehan 501256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 502270159Sgrehan &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); 503241744Sgrehan 504241744Sgrehan if (err) { 505241744Sgrehan if (err == EINVAL) { 506242385Sgrehan fprintf(stderr, 507242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 508242385Sgrehan vmexit->rip); 509241744Sgrehan } else if (err == ESRCH) { 510242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 511256072Sneel vmexit->u.inst_emul.gpa); 512241744Sgrehan } 513241744Sgrehan 514234761Sgrehan return (VMEXIT_ABORT); 515234761Sgrehan } 516234761Sgrehan 517234761Sgrehan return (VMEXIT_CONTINUE); 518234761Sgrehan} 519234761Sgrehan 520268935Sjhbstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 521268935Sjhbstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 522268935Sjhb 523268935Sjhbstatic int 524268935Sjhbvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 525268935Sjhb{ 526268935Sjhb enum vm_suspend_how how; 527268935Sjhb 528268935Sjhb how = vmexit->u.suspended.how; 529268935Sjhb 530268935Sjhb fbsdrun_deletecpu(ctx, *pvcpu); 531268935Sjhb 532268935Sjhb if (*pvcpu != BSP) { 533268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 534268935Sjhb pthread_cond_signal(&resetcpu_cond); 535268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 536268935Sjhb pthread_exit(NULL); 537268935Sjhb } 538268935Sjhb 539268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 540268935Sjhb while (!CPU_EMPTY(&cpumask)) { 541268935Sjhb pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 542268935Sjhb } 543268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 544268935Sjhb 545268935Sjhb switch (how) { 546268935Sjhb case VM_SUSPEND_RESET: 547268935Sjhb exit(0); 548268935Sjhb case VM_SUSPEND_POWEROFF: 549268935Sjhb exit(1); 550268935Sjhb case VM_SUSPEND_HALT: 551268935Sjhb exit(2); 552270159Sgrehan case VM_SUSPEND_TRIPLEFAULT: 553270159Sgrehan exit(3); 554268935Sjhb default: 555268935Sjhb fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 556268935Sjhb exit(100); 557268935Sjhb } 558268935Sjhb return (0); /* NOTREACHED */ 559268935Sjhb} 560268935Sjhb 561221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 562234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 563268976Sjhb [VM_EXITCODE_INOUT_STR] = vmexit_inout, 564234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 565276403Sneel [VM_EXITCODE_SVM] = vmexit_svm, 566234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 567234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 568234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 569234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 570256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 571240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 572270159Sgrehan [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 573270159Sgrehan [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 574221828Sgrehan}; 575221828Sgrehan 576221828Sgrehanstatic void 577284894Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 578221828Sgrehan{ 579221828Sgrehan int error, rc, prevcpu; 580253452Sgrehan enum vm_exitcode exitcode; 581270070Sgrehan cpuset_t active_cpus; 582221828Sgrehan 583268894Sjhb if (vcpumap[vcpu] != NULL) { 584246686Sneel error = pthread_setaffinity_np(pthread_self(), 585268894Sjhb sizeof(cpuset_t), vcpumap[vcpu]); 586221828Sgrehan assert(error == 0); 587221828Sgrehan } 588221828Sgrehan 589270070Sgrehan error = vm_active_cpus(ctx, &active_cpus); 590270070Sgrehan assert(CPU_ISSET(vcpu, &active_cpus)); 591270070Sgrehan 592284894Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 593284894Sneel assert(error == 0); 594284894Sneel 595221828Sgrehan while (1) { 596284894Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 597266393Sjhb if (error != 0) 598266393Sjhb break; 599221828Sgrehan 600221828Sgrehan prevcpu = vcpu; 601253452Sgrehan 602253452Sgrehan exitcode = vmexit[vcpu].exitcode; 603253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 604253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 605253452Sgrehan exitcode); 606253452Sgrehan exit(1); 607253452Sgrehan } 608253452Sgrehan 609253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 610253452Sgrehan 611221828Sgrehan switch (rc) { 612221828Sgrehan case VMEXIT_CONTINUE: 613221828Sgrehan break; 614268953Sjhb case VMEXIT_ABORT: 615268953Sjhb abort(); 616221828Sgrehan default: 617221828Sgrehan exit(1); 618221828Sgrehan } 619221828Sgrehan } 620221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 621221828Sgrehan} 622221828Sgrehan 623245020Sneelstatic int 624245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 625245020Sneel{ 626245020Sneel int tmp, error; 627221828Sgrehan 628245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 629245020Sneel 630245020Sneel /* 631245020Sneel * The guest is allowed to spinup more than one processor only if the 632245020Sneel * UNRESTRICTED_GUEST capability is available. 633245020Sneel */ 634245020Sneel if (error == 0) 635245020Sneel return (VM_MAXCPU); 636245020Sneel else 637245020Sneel return (1); 638245020Sneel} 639245020Sneel 640256869Sneelvoid 641256869Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 642256869Sneel{ 643256869Sneel int err, tmp; 644256869Sneel 645256869Sneel if (fbsdrun_vmexit_on_hlt()) { 646256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 647256869Sneel if (err < 0) { 648256869Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 649256869Sneel exit(1); 650256869Sneel } 651256869Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 652256869Sneel if (cpu == BSP) 653256869Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 654256869Sneel } 655256869Sneel 656256869Sneel if (fbsdrun_vmexit_on_pause()) { 657256869Sneel /* 658256869Sneel * pause exit support required for this mode 659256869Sneel */ 660256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 661256869Sneel if (err < 0) { 662256869Sneel fprintf(stderr, 663256869Sneel "SMP mux requested, no pause support\n"); 664256869Sneel exit(1); 665256869Sneel } 666256869Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 667256869Sneel if (cpu == BSP) 668256869Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 669256869Sneel } 670256869Sneel 671267447Sjhb if (x2apic_mode) 672267447Sjhb err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 673267447Sjhb else 674256869Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 675256869Sneel 676256869Sneel if (err) { 677256869Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 678256869Sneel exit(1); 679256869Sneel } 680256869Sneel 681256869Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 682256869Sneel} 683256869Sneel 684221828Sgrehanint 685221828Sgrehanmain(int argc, char *argv[]) 686221828Sgrehan{ 687259301Sgrehan int c, error, gdb_port, err, bvmcons; 688268953Sjhb int dump_guest_memory, max_vcpus, mptgen; 689284894Sneel int rtc_localtime; 690221828Sgrehan struct vmctx *ctx; 691221828Sgrehan uint64_t rip; 692248477Sneel size_t memsize; 693221828Sgrehan 694242192Sneel bvmcons = 0; 695268953Sjhb dump_guest_memory = 0; 696221828Sgrehan progname = basename(argv[0]); 697256156Sneel gdb_port = 0; 698221828Sgrehan guest_ncpus = 1; 699248477Sneel memsize = 256 * MB; 700268887Sjhb mptgen = 1; 701284894Sneel rtc_localtime = 1; 702221828Sgrehan 703284894Sneel while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) { 704221828Sgrehan switch (c) { 705240943Sneel case 'a': 706267447Sjhb x2apic_mode = 0; 707240943Sneel break; 708243327Sgrehan case 'A': 709243327Sgrehan acpi = 1; 710243327Sgrehan break; 711242192Sneel case 'b': 712242192Sneel bvmcons = 1; 713242192Sneel break; 714221828Sgrehan case 'p': 715268894Sjhb if (pincpu_parse(optarg) != 0) { 716268894Sjhb errx(EX_USAGE, "invalid vcpu pinning " 717268894Sjhb "configuration '%s'", optarg); 718268894Sjhb } 719221828Sgrehan break; 720221828Sgrehan case 'c': 721221828Sgrehan guest_ncpus = atoi(optarg); 722221828Sgrehan break; 723268953Sjhb case 'C': 724268953Sjhb dump_guest_memory = 1; 725268953Sjhb break; 726221828Sgrehan case 'g': 727221828Sgrehan gdb_port = atoi(optarg); 728221828Sgrehan break; 729257396Sneel case 'l': 730257396Sneel if (lpc_device_parse(optarg) != 0) { 731257396Sneel errx(EX_USAGE, "invalid lpc device " 732257396Sneel "configuration '%s'", optarg); 733257396Sneel } 734257396Sneel break; 735221828Sgrehan case 's': 736267341Sjhb if (pci_parse_slot(optarg) != 0) 737249916Sneel exit(1); 738249916Sneel else 739249916Sneel break; 740221828Sgrehan case 'm': 741256176Sneel error = vm_parse_memsize(optarg, &memsize); 742256176Sneel if (error) 743256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 744221828Sgrehan break; 745221828Sgrehan case 'H': 746221828Sgrehan guest_vmexit_on_hlt = 1; 747221828Sgrehan break; 748239043Sneel case 'I': 749259301Sgrehan /* 750259301Sgrehan * The "-I" option was used to add an ioapic to the 751259301Sgrehan * virtual machine. 752259301Sgrehan * 753259301Sgrehan * An ioapic is now provided unconditionally for each 754259301Sgrehan * virtual machine and this option is now deprecated. 755259301Sgrehan */ 756239043Sneel break; 757221828Sgrehan case 'P': 758221828Sgrehan guest_vmexit_on_pause = 1; 759221828Sgrehan break; 760222105Sgrehan case 'e': 761222105Sgrehan strictio = 1; 762222105Sgrehan break; 763284894Sneel case 'u': 764284894Sneel rtc_localtime = 0; 765284894Sneel break; 766267450Sjhb case 'U': 767267450Sjhb guest_uuid_str = optarg; 768267450Sjhb break; 769264273Sjhb case 'w': 770264273Sjhb strictmsr = 0; 771264273Sjhb break; 772256755Sgrehan case 'W': 773256755Sgrehan virtio_msix = 0; 774256755Sgrehan break; 775267447Sjhb case 'x': 776267447Sjhb x2apic_mode = 1; 777267447Sjhb break; 778268887Sjhb case 'Y': 779268887Sjhb mptgen = 0; 780268887Sjhb break; 781221828Sgrehan case 'h': 782221828Sgrehan usage(0); 783221828Sgrehan default: 784221828Sgrehan usage(1); 785221828Sgrehan } 786221828Sgrehan } 787221828Sgrehan argc -= optind; 788221828Sgrehan argv += optind; 789221828Sgrehan 790221828Sgrehan if (argc != 1) 791221828Sgrehan usage(1); 792221828Sgrehan 793221828Sgrehan vmname = argv[0]; 794221828Sgrehan 795221828Sgrehan ctx = vm_open(vmname); 796221828Sgrehan if (ctx == NULL) { 797221828Sgrehan perror("vm_open"); 798221828Sgrehan exit(1); 799221828Sgrehan } 800221828Sgrehan 801245020Sneel max_vcpus = num_vcpus_allowed(ctx); 802245020Sneel if (guest_ncpus > max_vcpus) { 803245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 804245020Sneel guest_ncpus, max_vcpus); 805245020Sneel exit(1); 806245020Sneel } 807245020Sneel 808256869Sneel fbsdrun_set_capabilities(ctx, BSP); 809221828Sgrehan 810268953Sjhb if (dump_guest_memory) 811268953Sjhb vm_set_memflags(ctx, VM_MEM_F_INCORE); 812248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 813248477Sneel if (err) { 814248477Sneel fprintf(stderr, "Unable to setup memory (%d)\n", err); 815248477Sneel exit(1); 816221828Sgrehan } 817221828Sgrehan 818276349Sneel error = init_msr(); 819276349Sneel if (error) { 820276349Sneel fprintf(stderr, "init_msr error %d", error); 821276349Sneel exit(1); 822276349Sneel } 823276349Sneel 824249343Sneel init_mem(); 825221828Sgrehan init_inout(); 826268972Sjhb pci_irq_init(ctx); 827267393Sjhb ioapic_init(ctx); 828252682Sgrehan 829284894Sneel rtc_init(ctx, rtc_localtime); 830268972Sjhb sci_init(ctx); 831253181Sgrehan 832252682Sgrehan /* 833252682Sgrehan * Exit if a device emulation finds an error in it's initilization 834252682Sgrehan */ 835252682Sgrehan if (init_pci(ctx) != 0) 836252682Sgrehan exit(1); 837252682Sgrehan 838221828Sgrehan if (gdb_port != 0) 839221828Sgrehan init_dbgport(gdb_port); 840221828Sgrehan 841242192Sneel if (bvmcons) 842242192Sneel init_bvmcons(); 843242192Sneel 844221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 845221828Sgrehan assert(error == 0); 846221828Sgrehan 847221828Sgrehan /* 848221828Sgrehan * build the guest tables, MP etc. 849221828Sgrehan */ 850268887Sjhb if (mptgen) { 851268887Sjhb error = mptable_build(ctx, guest_ncpus); 852268887Sjhb if (error) 853268887Sjhb exit(1); 854268887Sjhb } 855221828Sgrehan 856267450Sjhb error = smbios_build(ctx); 857267450Sjhb assert(error == 0); 858267450Sjhb 859243327Sgrehan if (acpi) { 860259301Sgrehan error = acpi_build(ctx, guest_ncpus); 861243327Sgrehan assert(error == 0); 862243327Sgrehan } 863243327Sgrehan 864221828Sgrehan /* 865259301Sgrehan * Change the proc title to include the VM name. 866259301Sgrehan */ 867259301Sgrehan setproctitle("%s", vmname); 868259301Sgrehan 869259301Sgrehan /* 870221828Sgrehan * Add CPU 0 871221828Sgrehan */ 872268894Sjhb fbsdrun_addcpu(ctx, BSP, BSP, rip); 873221828Sgrehan 874221828Sgrehan /* 875221828Sgrehan * Head off to the main event dispatch loop 876221828Sgrehan */ 877221828Sgrehan mevent_dispatch(); 878221828Sgrehan 879221828Sgrehan exit(1); 880221828Sgrehan} 881