1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: releng/10.2/usr.sbin/bhyve/bhyverun.c 284900 2015-06-28 03:22:26Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: releng/10.2/usr.sbin/bhyve/bhyverun.c 284900 2015-06-28 03:22:26Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36262350Sjhb#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257396Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50221828Sgrehan 51221828Sgrehan#include <machine/vmm.h> 52221828Sgrehan#include <vmmapi.h> 53221828Sgrehan 54244167Sgrehan#include "bhyverun.h" 55243327Sgrehan#include "acpi.h" 56221828Sgrehan#include "inout.h" 57221828Sgrehan#include "dbgport.h" 58267393Sjhb#include "ioapic.h" 59241744Sgrehan#include "mem.h" 60221828Sgrehan#include "mevent.h" 61242131Sgrehan#include "mptbl.h" 62221828Sgrehan#include "pci_emul.h" 63268972Sjhb#include "pci_irq.h" 64257396Sneel#include "pci_lpc.h" 65267450Sjhb#include "smbiostbl.h" 66221828Sgrehan#include "xmsr.h" 67240912Sneel#include "spinup_ap.h" 68253181Sgrehan#include "rtc.h" 69221828Sgrehan 70221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 71221828Sgrehan 72221828Sgrehan#define MB (1024UL * 1024) 73221828Sgrehan#define GB (1024UL * MB) 74221828Sgrehan 75221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 76270159Sgrehanextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanchar *vmname; 79221828Sgrehan 80221828Sgrehanint guest_ncpus; 81267450Sjhbchar *guest_uuid_str; 82221828Sgrehan 83267447Sjhbstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 84256755Sgrehanstatic int virtio_msix = 1; 85267447Sjhbstatic int x2apic_mode = 0; /* default is xAPIC */ 86221828Sgrehan 87222105Sgrehanstatic int strictio; 88264273Sjhbstatic int strictmsr = 1; 89222105Sgrehan 90243327Sgrehanstatic int acpi; 91243327Sgrehan 92221828Sgrehanstatic char *progname; 93221828Sgrehanstatic const int BSP = 0; 94221828Sgrehan 95268894Sjhbstatic cpuset_t cpumask; 96221828Sgrehan 97221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 98221828Sgrehan 99270159Sgrehanstatic struct vm_exit vmexit[VM_MAXCPU]; 100221828Sgrehan 101256062Sgrehanstruct bhyvestats { 102221828Sgrehan uint64_t vmexit_bogus; 103284900Sneel uint64_t vmexit_reqidle; 104221828Sgrehan uint64_t vmexit_hlt; 105221828Sgrehan uint64_t vmexit_pause; 106221828Sgrehan uint64_t vmexit_mtrap; 107256072Sneel uint64_t vmexit_inst_emul; 108221828Sgrehan uint64_t cpu_switch_rotate; 109221828Sgrehan uint64_t cpu_switch_direct; 110221828Sgrehan} stats; 111221828Sgrehan 112221828Sgrehanstruct mt_vmm_info { 113221828Sgrehan pthread_t mt_thr; 114221828Sgrehan struct vmctx *mt_ctx; 115221828Sgrehan int mt_vcpu; 116221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 117221828Sgrehan 118268894Sjhbstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 119268894Sjhb 120221828Sgrehanstatic void 121221828Sgrehanusage(int code) 122221828Sgrehan{ 123221828Sgrehan 124221828Sgrehan fprintf(stderr, 125284894Sneel "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 126270159Sgrehan " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 127267447Sjhb " -a: local apic is in xAPIC mode (deprecated)\n" 128270159Sgrehan " -A: create ACPI tables\n" 129221828Sgrehan " -c: # cpus (default 1)\n" 130268953Sjhb " -C: include guest memory in core file\n" 131257396Sneel " -e: exit on unhandled I/O access\n" 132270159Sgrehan " -g: gdb port\n" 133221828Sgrehan " -h: help\n" 134270159Sgrehan " -H: vmexit from the guest on hlt\n" 135257396Sneel " -l: LPC device configuration\n" 136264273Sjhb " -m: memory size in MB\n" 137270159Sgrehan " -p: pin 'vcpu' to 'hostcpu'\n" 138270159Sgrehan " -P: vmexit from the guest on pause\n" 139270159Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 140284894Sneel " -u: RTC keeps UTC time\n" 141270159Sgrehan " -U: uuid\n" 142267447Sjhb " -w: ignore unimplemented MSRs\n" 143270159Sgrehan " -W: force virtio to use single-vector MSI\n" 144267450Sjhb " -x: local apic is in x2APIC mode\n" 145270159Sgrehan " -Y: disable MPtable generation\n", 146257396Sneel progname, (int)strlen(progname), ""); 147256062Sgrehan 148221828Sgrehan exit(code); 149221828Sgrehan} 150221828Sgrehan 151268894Sjhbstatic int 152268894Sjhbpincpu_parse(const char *opt) 153268894Sjhb{ 154268894Sjhb int vcpu, pcpu; 155268894Sjhb 156268894Sjhb if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 157268894Sjhb fprintf(stderr, "invalid format: %s\n", opt); 158268894Sjhb return (-1); 159268894Sjhb } 160268894Sjhb 161268894Sjhb if (vcpu < 0 || vcpu >= VM_MAXCPU) { 162268894Sjhb fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 163268894Sjhb vcpu, VM_MAXCPU - 1); 164268894Sjhb return (-1); 165268894Sjhb } 166268894Sjhb 167268894Sjhb if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 168268894Sjhb fprintf(stderr, "hostcpu '%d' outside valid range from " 169268894Sjhb "0 to %d\n", pcpu, CPU_SETSIZE - 1); 170268894Sjhb return (-1); 171268894Sjhb } 172268894Sjhb 173268894Sjhb if (vcpumap[vcpu] == NULL) { 174268894Sjhb if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 175268894Sjhb perror("malloc"); 176268894Sjhb return (-1); 177268894Sjhb } 178268894Sjhb CPU_ZERO(vcpumap[vcpu]); 179268894Sjhb } 180268894Sjhb CPU_SET(pcpu, vcpumap[vcpu]); 181268894Sjhb return (0); 182268894Sjhb} 183268894Sjhb 184270159Sgrehanvoid 185270159Sgrehanvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 186270159Sgrehan int errcode) 187270159Sgrehan{ 188270159Sgrehan struct vmctx *ctx; 189284894Sneel int error, restart_instruction; 190270159Sgrehan 191270159Sgrehan ctx = arg; 192284894Sneel restart_instruction = 1; 193284894Sneel 194284894Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 195284894Sneel restart_instruction); 196270159Sgrehan assert(error == 0); 197270159Sgrehan} 198270159Sgrehan 199221828Sgrehanvoid * 200248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 201221828Sgrehan{ 202221828Sgrehan 203248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 204221828Sgrehan} 205221828Sgrehan 206221828Sgrehanint 207221828Sgrehanfbsdrun_vmexit_on_pause(void) 208221828Sgrehan{ 209221828Sgrehan 210221828Sgrehan return (guest_vmexit_on_pause); 211221828Sgrehan} 212221828Sgrehan 213221828Sgrehanint 214221828Sgrehanfbsdrun_vmexit_on_hlt(void) 215221828Sgrehan{ 216221828Sgrehan 217221828Sgrehan return (guest_vmexit_on_hlt); 218221828Sgrehan} 219221828Sgrehan 220256755Sgrehanint 221256755Sgrehanfbsdrun_virtio_msix(void) 222256755Sgrehan{ 223256755Sgrehan 224256755Sgrehan return (virtio_msix); 225256755Sgrehan} 226256755Sgrehan 227221942Sjhbstatic void * 228221828Sgrehanfbsdrun_start_thread(void *param) 229221828Sgrehan{ 230242404Sgrehan char tname[MAXCOMLEN + 1]; 231242404Sgrehan struct mt_vmm_info *mtp; 232221828Sgrehan int vcpu; 233221828Sgrehan 234242404Sgrehan mtp = param; 235221828Sgrehan vcpu = mtp->mt_vcpu; 236242404Sgrehan 237259301Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 238242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 239242404Sgrehan 240221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 241221828Sgrehan 242221828Sgrehan /* not reached */ 243221828Sgrehan exit(1); 244221828Sgrehan return (NULL); 245221828Sgrehan} 246221828Sgrehan 247221828Sgrehanvoid 248268894Sjhbfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 249221828Sgrehan{ 250221828Sgrehan int error; 251221828Sgrehan 252268894Sjhb assert(fromcpu == BSP); 253221828Sgrehan 254270070Sgrehan /* 255270070Sgrehan * The 'newcpu' must be activated in the context of 'fromcpu'. If 256270070Sgrehan * vm_activate_cpu() is delayed until newcpu's pthread starts running 257270070Sgrehan * then vmm.ko is out-of-sync with bhyve and this can create a race 258270070Sgrehan * with vm_suspend(). 259270070Sgrehan */ 260270070Sgrehan error = vm_activate_cpu(ctx, newcpu); 261270070Sgrehan assert(error == 0); 262270070Sgrehan 263268894Sjhb CPU_SET_ATOMIC(newcpu, &cpumask); 264221828Sgrehan 265221828Sgrehan /* 266221828Sgrehan * Set up the vmexit struct to allow execution to start 267221828Sgrehan * at the given RIP 268221828Sgrehan */ 269268894Sjhb vmexit[newcpu].rip = rip; 270268894Sjhb vmexit[newcpu].inst_length = 0; 271221828Sgrehan 272268894Sjhb mt_vmm_info[newcpu].mt_ctx = ctx; 273268894Sjhb mt_vmm_info[newcpu].mt_vcpu = newcpu; 274256072Sneel 275268894Sjhb error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 276268894Sjhb fbsdrun_start_thread, &mt_vmm_info[newcpu]); 277256072Sneel assert(error == 0); 278221828Sgrehan} 279221828Sgrehan 280221828Sgrehanstatic int 281262350Sjhbfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 282262350Sjhb{ 283262350Sjhb 284268894Sjhb if (!CPU_ISSET(vcpu, &cpumask)) { 285268894Sjhb fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 286262350Sjhb exit(1); 287262350Sjhb } 288262350Sjhb 289268894Sjhb CPU_CLR_ATOMIC(vcpu, &cpumask); 290268894Sjhb return (CPU_EMPTY(&cpumask)); 291262350Sjhb} 292262350Sjhb 293262350Sjhbstatic int 294221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 295221828Sgrehan uint32_t eax) 296221828Sgrehan{ 297256062Sgrehan#if BHYVE_DEBUG 298256062Sgrehan /* 299256062Sgrehan * put guest-driven debug here 300256062Sgrehan */ 301221828Sgrehan#endif 302221828Sgrehan return (VMEXIT_CONTINUE); 303221828Sgrehan} 304221828Sgrehan 305221828Sgrehanstatic int 306221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 307221828Sgrehan{ 308221828Sgrehan int error; 309268976Sjhb int bytes, port, in, out, string; 310221828Sgrehan int vcpu; 311221828Sgrehan 312221828Sgrehan vcpu = *pvcpu; 313221828Sgrehan 314221828Sgrehan port = vme->u.inout.port; 315221828Sgrehan bytes = vme->u.inout.bytes; 316268976Sjhb string = vme->u.inout.string; 317221828Sgrehan in = vme->u.inout.in; 318221828Sgrehan out = !in; 319221828Sgrehan 320221828Sgrehan /* Extra-special case of host notifications */ 321268976Sjhb if (out && port == GUEST_NIO_PORT) { 322268976Sjhb error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 323268976Sjhb return (error); 324268976Sjhb } 325221828Sgrehan 326268976Sjhb error = emulate_inout(ctx, vcpu, vme, strictio); 327270159Sgrehan if (error) { 328284899Sneel fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 329284899Sneel in ? "in" : "out", 330284899Sneel bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 331284899Sneel port, vmexit->rip); 332270159Sgrehan return (VMEXIT_ABORT); 333270159Sgrehan } else { 334221828Sgrehan return (VMEXIT_CONTINUE); 335221828Sgrehan } 336221828Sgrehan} 337221828Sgrehan 338221828Sgrehanstatic int 339221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 340221828Sgrehan{ 341264273Sjhb uint64_t val; 342264273Sjhb uint32_t eax, edx; 343264273Sjhb int error; 344264273Sjhb 345264273Sjhb val = 0; 346264273Sjhb error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 347264273Sjhb if (error != 0) { 348264273Sjhb fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 349264273Sjhb vme->u.msr.code, *pvcpu); 350267427Sjhb if (strictmsr) { 351270159Sgrehan vm_inject_gp(ctx, *pvcpu); 352284894Sneel return (VMEXIT_CONTINUE); 353267427Sjhb } 354264273Sjhb } 355264273Sjhb 356264273Sjhb eax = val; 357264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 358264273Sjhb assert(error == 0); 359264273Sjhb 360264273Sjhb edx = val >> 32; 361264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 362264273Sjhb assert(error == 0); 363264273Sjhb 364264273Sjhb return (VMEXIT_CONTINUE); 365221828Sgrehan} 366221828Sgrehan 367221828Sgrehanstatic int 368221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 369221828Sgrehan{ 370264273Sjhb int error; 371221828Sgrehan 372264273Sjhb error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 373264273Sjhb if (error != 0) { 374264273Sjhb fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 375264273Sjhb vme->u.msr.code, vme->u.msr.wval, *pvcpu); 376267427Sjhb if (strictmsr) { 377270159Sgrehan vm_inject_gp(ctx, *pvcpu); 378284894Sneel return (VMEXIT_CONTINUE); 379267427Sjhb } 380264273Sjhb } 381264273Sjhb return (VMEXIT_CONTINUE); 382221828Sgrehan} 383221828Sgrehan 384221828Sgrehanstatic int 385240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 386240912Sneel{ 387240912Sneel int newcpu; 388240912Sneel int retval = VMEXIT_CONTINUE; 389240912Sneel 390240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 391240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 392240912Sneel 393240912Sneel return (retval); 394240912Sneel} 395240912Sneel 396270159Sgrehan#define DEBUG_EPT_MISCONFIG 397270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 398270159Sgrehan#define EXIT_REASON_EPT_MISCONFIG 49 399270159Sgrehan#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 400270159Sgrehan#define VMCS_IDENT(x) ((x) | 0x80000000) 401270159Sgrehan 402270159Sgrehanstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 403270159Sgrehanstatic int ept_misconfig_ptenum; 404270159Sgrehan#endif 405270159Sgrehan 406240912Sneelstatic int 407221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 408221828Sgrehan{ 409221828Sgrehan 410242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 411242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 412242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 413242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 414264619Sjhb fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 415242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 416242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 417242385Sgrehan vmexit->u.vmx.exit_qualification); 418264619Sjhb fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 419264619Sjhb fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 420270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 421270159Sgrehan if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 422270159Sgrehan vm_get_register(ctx, *pvcpu, 423270159Sgrehan VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 424270159Sgrehan &ept_misconfig_gpa); 425270159Sgrehan vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 426270159Sgrehan &ept_misconfig_ptenum); 427270159Sgrehan fprintf(stderr, "\tEPT misconfiguration:\n"); 428270159Sgrehan fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 429270159Sgrehan fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 430270159Sgrehan ept_misconfig_ptenum, ept_misconfig_pte[0], 431270159Sgrehan ept_misconfig_pte[1], ept_misconfig_pte[2], 432270159Sgrehan ept_misconfig_pte[3]); 433270159Sgrehan } 434270159Sgrehan#endif /* DEBUG_EPT_MISCONFIG */ 435221828Sgrehan return (VMEXIT_ABORT); 436221828Sgrehan} 437221828Sgrehan 438221828Sgrehanstatic int 439276403Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 440276403Sneel{ 441276403Sneel 442276403Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 443276403Sneel fprintf(stderr, "\treason\t\tSVM\n"); 444276403Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 445276403Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 446276403Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 447276403Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 448276403Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 449276403Sneel return (VMEXIT_ABORT); 450276403Sneel} 451276403Sneel 452276403Sneelstatic int 453221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 454221828Sgrehan{ 455256062Sgrehan 456284894Sneel assert(vmexit->inst_length == 0); 457284894Sneel 458221828Sgrehan stats.vmexit_bogus++; 459221828Sgrehan 460284894Sneel return (VMEXIT_CONTINUE); 461221828Sgrehan} 462221828Sgrehan 463221828Sgrehanstatic int 464284900Sneelvmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 465284900Sneel{ 466284900Sneel 467284900Sneel assert(vmexit->inst_length == 0); 468284900Sneel 469284900Sneel stats.vmexit_reqidle++; 470284900Sneel 471284900Sneel return (VMEXIT_CONTINUE); 472284900Sneel} 473284900Sneel 474284900Sneelstatic int 475221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 476221828Sgrehan{ 477256062Sgrehan 478221828Sgrehan stats.vmexit_hlt++; 479256062Sgrehan 480256062Sgrehan /* 481256062Sgrehan * Just continue execution with the next instruction. We use 482256062Sgrehan * the HLT VM exit as a way to be friendly with the host 483256062Sgrehan * scheduler. 484256062Sgrehan */ 485256062Sgrehan return (VMEXIT_CONTINUE); 486221828Sgrehan} 487221828Sgrehan 488221828Sgrehanstatic int 489221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 490221828Sgrehan{ 491256062Sgrehan 492221828Sgrehan stats.vmexit_pause++; 493221828Sgrehan 494256062Sgrehan return (VMEXIT_CONTINUE); 495221828Sgrehan} 496221828Sgrehan 497221828Sgrehanstatic int 498221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 499221828Sgrehan{ 500256062Sgrehan 501284894Sneel assert(vmexit->inst_length == 0); 502284894Sneel 503221828Sgrehan stats.vmexit_mtrap++; 504221828Sgrehan 505284894Sneel return (VMEXIT_CONTINUE); 506221828Sgrehan} 507221828Sgrehan 508234761Sgrehanstatic int 509256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 510234761Sgrehan{ 511284899Sneel int err, i; 512284899Sneel struct vie *vie; 513284899Sneel 514256072Sneel stats.vmexit_inst_emul++; 515234761Sgrehan 516284899Sneel vie = &vmexit->u.inst_emul.vie; 517256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 518284899Sneel vie, &vmexit->u.inst_emul.paging); 519241744Sgrehan 520241744Sgrehan if (err) { 521284899Sneel if (err == ESRCH) { 522242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 523256072Sneel vmexit->u.inst_emul.gpa); 524241744Sgrehan } 525241744Sgrehan 526284899Sneel fprintf(stderr, "Failed to emulate instruction ["); 527284899Sneel for (i = 0; i < vie->num_valid; i++) { 528284899Sneel fprintf(stderr, "0x%02x%s", vie->inst[i], 529284899Sneel i != (vie->num_valid - 1) ? " " : ""); 530284899Sneel } 531284899Sneel fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 532234761Sgrehan return (VMEXIT_ABORT); 533234761Sgrehan } 534234761Sgrehan 535234761Sgrehan return (VMEXIT_CONTINUE); 536234761Sgrehan} 537234761Sgrehan 538268935Sjhbstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 539268935Sjhbstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 540268935Sjhb 541268935Sjhbstatic int 542268935Sjhbvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 543268935Sjhb{ 544268935Sjhb enum vm_suspend_how how; 545268935Sjhb 546268935Sjhb how = vmexit->u.suspended.how; 547268935Sjhb 548268935Sjhb fbsdrun_deletecpu(ctx, *pvcpu); 549268935Sjhb 550268935Sjhb if (*pvcpu != BSP) { 551268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 552268935Sjhb pthread_cond_signal(&resetcpu_cond); 553268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 554268935Sjhb pthread_exit(NULL); 555268935Sjhb } 556268935Sjhb 557268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 558268935Sjhb while (!CPU_EMPTY(&cpumask)) { 559268935Sjhb pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 560268935Sjhb } 561268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 562268935Sjhb 563268935Sjhb switch (how) { 564268935Sjhb case VM_SUSPEND_RESET: 565268935Sjhb exit(0); 566268935Sjhb case VM_SUSPEND_POWEROFF: 567268935Sjhb exit(1); 568268935Sjhb case VM_SUSPEND_HALT: 569268935Sjhb exit(2); 570270159Sgrehan case VM_SUSPEND_TRIPLEFAULT: 571270159Sgrehan exit(3); 572268935Sjhb default: 573268935Sjhb fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 574268935Sjhb exit(100); 575268935Sjhb } 576268935Sjhb return (0); /* NOTREACHED */ 577268935Sjhb} 578268935Sjhb 579221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 580234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 581268976Sjhb [VM_EXITCODE_INOUT_STR] = vmexit_inout, 582234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 583276403Sneel [VM_EXITCODE_SVM] = vmexit_svm, 584234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 585284900Sneel [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 586234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 587234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 588234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 589256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 590240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 591270159Sgrehan [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 592270159Sgrehan [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 593221828Sgrehan}; 594221828Sgrehan 595221828Sgrehanstatic void 596284894Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 597221828Sgrehan{ 598221828Sgrehan int error, rc, prevcpu; 599253452Sgrehan enum vm_exitcode exitcode; 600270070Sgrehan cpuset_t active_cpus; 601221828Sgrehan 602268894Sjhb if (vcpumap[vcpu] != NULL) { 603246686Sneel error = pthread_setaffinity_np(pthread_self(), 604268894Sjhb sizeof(cpuset_t), vcpumap[vcpu]); 605221828Sgrehan assert(error == 0); 606221828Sgrehan } 607221828Sgrehan 608270070Sgrehan error = vm_active_cpus(ctx, &active_cpus); 609270070Sgrehan assert(CPU_ISSET(vcpu, &active_cpus)); 610270070Sgrehan 611284894Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 612284894Sneel assert(error == 0); 613284894Sneel 614221828Sgrehan while (1) { 615284894Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 616266393Sjhb if (error != 0) 617266393Sjhb break; 618221828Sgrehan 619221828Sgrehan prevcpu = vcpu; 620253452Sgrehan 621253452Sgrehan exitcode = vmexit[vcpu].exitcode; 622253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 623253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 624253452Sgrehan exitcode); 625253452Sgrehan exit(1); 626253452Sgrehan } 627253452Sgrehan 628253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 629253452Sgrehan 630221828Sgrehan switch (rc) { 631221828Sgrehan case VMEXIT_CONTINUE: 632221828Sgrehan break; 633268953Sjhb case VMEXIT_ABORT: 634268953Sjhb abort(); 635221828Sgrehan default: 636221828Sgrehan exit(1); 637221828Sgrehan } 638221828Sgrehan } 639221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 640221828Sgrehan} 641221828Sgrehan 642245020Sneelstatic int 643245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 644245020Sneel{ 645245020Sneel int tmp, error; 646221828Sgrehan 647245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 648245020Sneel 649245020Sneel /* 650245020Sneel * The guest is allowed to spinup more than one processor only if the 651245020Sneel * UNRESTRICTED_GUEST capability is available. 652245020Sneel */ 653245020Sneel if (error == 0) 654245020Sneel return (VM_MAXCPU); 655245020Sneel else 656245020Sneel return (1); 657245020Sneel} 658245020Sneel 659256869Sneelvoid 660256869Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 661256869Sneel{ 662256869Sneel int err, tmp; 663256869Sneel 664256869Sneel if (fbsdrun_vmexit_on_hlt()) { 665256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 666256869Sneel if (err < 0) { 667256869Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 668256869Sneel exit(1); 669256869Sneel } 670256869Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 671256869Sneel if (cpu == BSP) 672256869Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 673256869Sneel } 674256869Sneel 675256869Sneel if (fbsdrun_vmexit_on_pause()) { 676256869Sneel /* 677256869Sneel * pause exit support required for this mode 678256869Sneel */ 679256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 680256869Sneel if (err < 0) { 681256869Sneel fprintf(stderr, 682256869Sneel "SMP mux requested, no pause support\n"); 683256869Sneel exit(1); 684256869Sneel } 685256869Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 686256869Sneel if (cpu == BSP) 687256869Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 688256869Sneel } 689256869Sneel 690267447Sjhb if (x2apic_mode) 691267447Sjhb err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 692267447Sjhb else 693256869Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 694256869Sneel 695256869Sneel if (err) { 696256869Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 697256869Sneel exit(1); 698256869Sneel } 699256869Sneel 700256869Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 701256869Sneel} 702256869Sneel 703221828Sgrehanint 704221828Sgrehanmain(int argc, char *argv[]) 705221828Sgrehan{ 706259301Sgrehan int c, error, gdb_port, err, bvmcons; 707268953Sjhb int dump_guest_memory, max_vcpus, mptgen; 708284894Sneel int rtc_localtime; 709221828Sgrehan struct vmctx *ctx; 710221828Sgrehan uint64_t rip; 711248477Sneel size_t memsize; 712221828Sgrehan 713242192Sneel bvmcons = 0; 714268953Sjhb dump_guest_memory = 0; 715221828Sgrehan progname = basename(argv[0]); 716256156Sneel gdb_port = 0; 717221828Sgrehan guest_ncpus = 1; 718248477Sneel memsize = 256 * MB; 719268887Sjhb mptgen = 1; 720284894Sneel rtc_localtime = 1; 721221828Sgrehan 722284894Sneel while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) { 723221828Sgrehan switch (c) { 724240943Sneel case 'a': 725267447Sjhb x2apic_mode = 0; 726240943Sneel break; 727243327Sgrehan case 'A': 728243327Sgrehan acpi = 1; 729243327Sgrehan break; 730242192Sneel case 'b': 731242192Sneel bvmcons = 1; 732242192Sneel break; 733221828Sgrehan case 'p': 734268894Sjhb if (pincpu_parse(optarg) != 0) { 735268894Sjhb errx(EX_USAGE, "invalid vcpu pinning " 736268894Sjhb "configuration '%s'", optarg); 737268894Sjhb } 738221828Sgrehan break; 739221828Sgrehan case 'c': 740221828Sgrehan guest_ncpus = atoi(optarg); 741221828Sgrehan break; 742268953Sjhb case 'C': 743268953Sjhb dump_guest_memory = 1; 744268953Sjhb break; 745221828Sgrehan case 'g': 746221828Sgrehan gdb_port = atoi(optarg); 747221828Sgrehan break; 748257396Sneel case 'l': 749257396Sneel if (lpc_device_parse(optarg) != 0) { 750257396Sneel errx(EX_USAGE, "invalid lpc device " 751257396Sneel "configuration '%s'", optarg); 752257396Sneel } 753257396Sneel break; 754221828Sgrehan case 's': 755267341Sjhb if (pci_parse_slot(optarg) != 0) 756249916Sneel exit(1); 757249916Sneel else 758249916Sneel break; 759221828Sgrehan case 'm': 760256176Sneel error = vm_parse_memsize(optarg, &memsize); 761256176Sneel if (error) 762256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 763221828Sgrehan break; 764221828Sgrehan case 'H': 765221828Sgrehan guest_vmexit_on_hlt = 1; 766221828Sgrehan break; 767239043Sneel case 'I': 768259301Sgrehan /* 769259301Sgrehan * The "-I" option was used to add an ioapic to the 770259301Sgrehan * virtual machine. 771259301Sgrehan * 772259301Sgrehan * An ioapic is now provided unconditionally for each 773259301Sgrehan * virtual machine and this option is now deprecated. 774259301Sgrehan */ 775239043Sneel break; 776221828Sgrehan case 'P': 777221828Sgrehan guest_vmexit_on_pause = 1; 778221828Sgrehan break; 779222105Sgrehan case 'e': 780222105Sgrehan strictio = 1; 781222105Sgrehan break; 782284894Sneel case 'u': 783284894Sneel rtc_localtime = 0; 784284894Sneel break; 785267450Sjhb case 'U': 786267450Sjhb guest_uuid_str = optarg; 787267450Sjhb break; 788264273Sjhb case 'w': 789264273Sjhb strictmsr = 0; 790264273Sjhb break; 791256755Sgrehan case 'W': 792256755Sgrehan virtio_msix = 0; 793256755Sgrehan break; 794267447Sjhb case 'x': 795267447Sjhb x2apic_mode = 1; 796267447Sjhb break; 797268887Sjhb case 'Y': 798268887Sjhb mptgen = 0; 799268887Sjhb break; 800221828Sgrehan case 'h': 801221828Sgrehan usage(0); 802221828Sgrehan default: 803221828Sgrehan usage(1); 804221828Sgrehan } 805221828Sgrehan } 806221828Sgrehan argc -= optind; 807221828Sgrehan argv += optind; 808221828Sgrehan 809221828Sgrehan if (argc != 1) 810221828Sgrehan usage(1); 811221828Sgrehan 812221828Sgrehan vmname = argv[0]; 813221828Sgrehan 814221828Sgrehan ctx = vm_open(vmname); 815221828Sgrehan if (ctx == NULL) { 816221828Sgrehan perror("vm_open"); 817221828Sgrehan exit(1); 818221828Sgrehan } 819221828Sgrehan 820284899Sneel if (guest_ncpus < 1) { 821284899Sneel fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 822284899Sneel exit(1); 823284899Sneel } 824284899Sneel 825245020Sneel max_vcpus = num_vcpus_allowed(ctx); 826245020Sneel if (guest_ncpus > max_vcpus) { 827245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 828245020Sneel guest_ncpus, max_vcpus); 829245020Sneel exit(1); 830245020Sneel } 831245020Sneel 832256869Sneel fbsdrun_set_capabilities(ctx, BSP); 833221828Sgrehan 834268953Sjhb if (dump_guest_memory) 835268953Sjhb vm_set_memflags(ctx, VM_MEM_F_INCORE); 836248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 837248477Sneel if (err) { 838248477Sneel fprintf(stderr, "Unable to setup memory (%d)\n", err); 839248477Sneel exit(1); 840221828Sgrehan } 841221828Sgrehan 842276349Sneel error = init_msr(); 843276349Sneel if (error) { 844276349Sneel fprintf(stderr, "init_msr error %d", error); 845276349Sneel exit(1); 846276349Sneel } 847276349Sneel 848249343Sneel init_mem(); 849221828Sgrehan init_inout(); 850268972Sjhb pci_irq_init(ctx); 851267393Sjhb ioapic_init(ctx); 852252682Sgrehan 853284894Sneel rtc_init(ctx, rtc_localtime); 854268972Sjhb sci_init(ctx); 855253181Sgrehan 856252682Sgrehan /* 857252682Sgrehan * Exit if a device emulation finds an error in it's initilization 858252682Sgrehan */ 859252682Sgrehan if (init_pci(ctx) != 0) 860252682Sgrehan exit(1); 861252682Sgrehan 862221828Sgrehan if (gdb_port != 0) 863221828Sgrehan init_dbgport(gdb_port); 864221828Sgrehan 865242192Sneel if (bvmcons) 866242192Sneel init_bvmcons(); 867242192Sneel 868221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 869221828Sgrehan assert(error == 0); 870221828Sgrehan 871221828Sgrehan /* 872221828Sgrehan * build the guest tables, MP etc. 873221828Sgrehan */ 874268887Sjhb if (mptgen) { 875268887Sjhb error = mptable_build(ctx, guest_ncpus); 876268887Sjhb if (error) 877268887Sjhb exit(1); 878268887Sjhb } 879221828Sgrehan 880267450Sjhb error = smbios_build(ctx); 881267450Sjhb assert(error == 0); 882267450Sjhb 883243327Sgrehan if (acpi) { 884259301Sgrehan error = acpi_build(ctx, guest_ncpus); 885243327Sgrehan assert(error == 0); 886243327Sgrehan } 887243327Sgrehan 888221828Sgrehan /* 889259301Sgrehan * Change the proc title to include the VM name. 890259301Sgrehan */ 891259301Sgrehan setproctitle("%s", vmname); 892259301Sgrehan 893259301Sgrehan /* 894221828Sgrehan * Add CPU 0 895221828Sgrehan */ 896268894Sjhb fbsdrun_addcpu(ctx, BSP, BSP, rip); 897221828Sgrehan 898221828Sgrehan /* 899221828Sgrehan * Head off to the main event dispatch loop 900221828Sgrehan */ 901221828Sgrehan mevent_dispatch(); 902221828Sgrehan 903221828Sgrehan exit(1); 904221828Sgrehan} 905