1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: releng/11.0/usr.sbin/bhyve/bhyverun.c 302362 2016-07-06 04:56:45Z ngie $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/bhyverun.c 302362 2016-07-06 04:56:45Z ngie $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36259081Sneel#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257018Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50284539Sneel#include <stdbool.h> 51221828Sgrehan 52221828Sgrehan#include <machine/vmm.h> 53221828Sgrehan#include <vmmapi.h> 54221828Sgrehan 55244167Sgrehan#include "bhyverun.h" 56243327Sgrehan#include "acpi.h" 57302332Sgrehan#include "atkbdc.h" 58221828Sgrehan#include "inout.h" 59221828Sgrehan#include "dbgport.h" 60288522Sgrehan#include "fwctl.h" 61261268Sjhb#include "ioapic.h" 62241744Sgrehan#include "mem.h" 63221828Sgrehan#include "mevent.h" 64242131Sgrehan#include "mptbl.h" 65221828Sgrehan#include "pci_emul.h" 66266125Sjhb#include "pci_irq.h" 67257293Sneel#include "pci_lpc.h" 68262744Stychon#include "smbiostbl.h" 69221828Sgrehan#include "xmsr.h" 70240912Sneel#include "spinup_ap.h" 71253181Sgrehan#include "rtc.h" 72221828Sgrehan 73221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 74221828Sgrehan 75221828Sgrehan#define MB (1024UL * 1024) 76221828Sgrehan#define GB (1024UL * MB) 77221828Sgrehan 78221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 79268777Sneelextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 80221828Sgrehan 81221828Sgrehanchar *vmname; 82221828Sgrehan 83221828Sgrehanint guest_ncpus; 84262744Stychonchar *guest_uuid_str; 85221828Sgrehan 86262236Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 87256711Sgrehanstatic int virtio_msix = 1; 88262236Sneelstatic int x2apic_mode = 0; /* default is xAPIC */ 89221828Sgrehan 90222105Sgrehanstatic int strictio; 91259635Sneelstatic int strictmsr = 1; 92222105Sgrehan 93243327Sgrehanstatic int acpi; 94243327Sgrehan 95221828Sgrehanstatic char *progname; 96221828Sgrehanstatic const int BSP = 0; 97221828Sgrehan 98263432Sneelstatic cpuset_t cpumask; 99221828Sgrehan 100221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 101221828Sgrehan 102269042Sneelstatic struct vm_exit vmexit[VM_MAXCPU]; 103221828Sgrehan 104256062Sgrehanstruct bhyvestats { 105221828Sgrehan uint64_t vmexit_bogus; 106283657Sneel uint64_t vmexit_reqidle; 107221828Sgrehan uint64_t vmexit_hlt; 108221828Sgrehan uint64_t vmexit_pause; 109221828Sgrehan uint64_t vmexit_mtrap; 110256072Sneel uint64_t vmexit_inst_emul; 111221828Sgrehan uint64_t cpu_switch_rotate; 112221828Sgrehan uint64_t cpu_switch_direct; 113221828Sgrehan} stats; 114221828Sgrehan 115221828Sgrehanstruct mt_vmm_info { 116221828Sgrehan pthread_t mt_thr; 117221828Sgrehan struct vmctx *mt_ctx; 118221828Sgrehan int mt_vcpu; 119221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 120221828Sgrehan 121265376Sneelstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 122265376Sneel 123221828Sgrehanstatic void 124221828Sgrehanusage(int code) 125221828Sgrehan{ 126221828Sgrehan 127221828Sgrehan fprintf(stderr, 128284539Sneel "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 129302332Sgrehan " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 130262236Sneel " -a: local apic is in xAPIC mode (deprecated)\n" 131267934Sjhb " -A: create ACPI tables\n" 132221828Sgrehan " -c: # cpus (default 1)\n" 133265951Sneel " -C: include guest memory in core file\n" 134257018Sneel " -e: exit on unhandled I/O access\n" 135267959Sjhb " -g: gdb port\n" 136221828Sgrehan " -h: help\n" 137267959Sjhb " -H: vmexit from the guest on hlt\n" 138257293Sneel " -l: LPC device configuration\n" 139302332Sgrehan " -m: memory size in MB\n" 140267959Sjhb " -p: pin 'vcpu' to 'hostcpu'\n" 141267959Sjhb " -P: vmexit from the guest on pause\n" 142267959Sjhb " -s: <slot,driver,configinfo> PCI slot config\n" 143284539Sneel " -S: guest memory cannot be swapped\n" 144279225Sneel " -u: RTC keeps UTC time\n" 145267959Sjhb " -U: uuid\n" 146262236Sneel " -w: ignore unimplemented MSRs\n" 147267959Sjhb " -W: force virtio to use single-vector MSI\n" 148262744Stychon " -x: local apic is in x2APIC mode\n" 149267959Sjhb " -Y: disable MPtable generation\n", 150257018Sneel progname, (int)strlen(progname), ""); 151256062Sgrehan 152221828Sgrehan exit(code); 153221828Sgrehan} 154221828Sgrehan 155265376Sneelstatic int 156265376Sneelpincpu_parse(const char *opt) 157265376Sneel{ 158265376Sneel int vcpu, pcpu; 159265376Sneel 160265376Sneel if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 161265376Sneel fprintf(stderr, "invalid format: %s\n", opt); 162265376Sneel return (-1); 163265376Sneel } 164265376Sneel 165265376Sneel if (vcpu < 0 || vcpu >= VM_MAXCPU) { 166265376Sneel fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 167265376Sneel vcpu, VM_MAXCPU - 1); 168265376Sneel return (-1); 169265376Sneel } 170265376Sneel 171265376Sneel if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 172265376Sneel fprintf(stderr, "hostcpu '%d' outside valid range from " 173265376Sneel "0 to %d\n", pcpu, CPU_SETSIZE - 1); 174265376Sneel return (-1); 175265376Sneel } 176265376Sneel 177265376Sneel if (vcpumap[vcpu] == NULL) { 178265376Sneel if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 179265376Sneel perror("malloc"); 180265376Sneel return (-1); 181265376Sneel } 182265376Sneel CPU_ZERO(vcpumap[vcpu]); 183265376Sneel } 184265376Sneel CPU_SET(pcpu, vcpumap[vcpu]); 185265376Sneel return (0); 186265376Sneel} 187265376Sneel 188269042Sneelvoid 189269042Sneelvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 190269042Sneel int errcode) 191269042Sneel{ 192269042Sneel struct vmctx *ctx; 193277310Sneel int error, restart_instruction; 194269042Sneel 195269042Sneel ctx = arg; 196277310Sneel restart_instruction = 1; 197277310Sneel 198277310Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 199277310Sneel restart_instruction); 200269042Sneel assert(error == 0); 201269042Sneel} 202269042Sneel 203221828Sgrehanvoid * 204248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 205221828Sgrehan{ 206221828Sgrehan 207248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 208221828Sgrehan} 209221828Sgrehan 210221828Sgrehanint 211221828Sgrehanfbsdrun_vmexit_on_pause(void) 212221828Sgrehan{ 213221828Sgrehan 214221828Sgrehan return (guest_vmexit_on_pause); 215221828Sgrehan} 216221828Sgrehan 217221828Sgrehanint 218221828Sgrehanfbsdrun_vmexit_on_hlt(void) 219221828Sgrehan{ 220221828Sgrehan 221221828Sgrehan return (guest_vmexit_on_hlt); 222221828Sgrehan} 223221828Sgrehan 224256711Sgrehanint 225256711Sgrehanfbsdrun_virtio_msix(void) 226256711Sgrehan{ 227256711Sgrehan 228256711Sgrehan return (virtio_msix); 229256711Sgrehan} 230256711Sgrehan 231221942Sjhbstatic void * 232221828Sgrehanfbsdrun_start_thread(void *param) 233221828Sgrehan{ 234242404Sgrehan char tname[MAXCOMLEN + 1]; 235242404Sgrehan struct mt_vmm_info *mtp; 236221828Sgrehan int vcpu; 237221828Sgrehan 238242404Sgrehan mtp = param; 239221828Sgrehan vcpu = mtp->mt_vcpu; 240242404Sgrehan 241257729Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 242242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 243242404Sgrehan 244221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 245221828Sgrehan 246221828Sgrehan /* not reached */ 247221828Sgrehan exit(1); 248221828Sgrehan return (NULL); 249221828Sgrehan} 250221828Sgrehan 251221828Sgrehanvoid 252263432Sneelfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 253221828Sgrehan{ 254221828Sgrehan int error; 255221828Sgrehan 256263432Sneel assert(fromcpu == BSP); 257221828Sgrehan 258266933Sneel /* 259266933Sneel * The 'newcpu' must be activated in the context of 'fromcpu'. If 260266933Sneel * vm_activate_cpu() is delayed until newcpu's pthread starts running 261266933Sneel * then vmm.ko is out-of-sync with bhyve and this can create a race 262266933Sneel * with vm_suspend(). 263266933Sneel */ 264266933Sneel error = vm_activate_cpu(ctx, newcpu); 265289746Sngie if (error != 0) 266289746Sngie err(EX_OSERR, "could not activate CPU %d", newcpu); 267266933Sneel 268263432Sneel CPU_SET_ATOMIC(newcpu, &cpumask); 269221828Sgrehan 270221828Sgrehan /* 271221828Sgrehan * Set up the vmexit struct to allow execution to start 272221828Sgrehan * at the given RIP 273221828Sgrehan */ 274263432Sneel vmexit[newcpu].rip = rip; 275263432Sneel vmexit[newcpu].inst_length = 0; 276221828Sgrehan 277263432Sneel mt_vmm_info[newcpu].mt_ctx = ctx; 278263432Sneel mt_vmm_info[newcpu].mt_vcpu = newcpu; 279256072Sneel 280263432Sneel error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 281263432Sneel fbsdrun_start_thread, &mt_vmm_info[newcpu]); 282256072Sneel assert(error == 0); 283221828Sgrehan} 284221828Sgrehan 285221828Sgrehanstatic int 286259081Sneelfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 287259081Sneel{ 288259081Sneel 289263432Sneel if (!CPU_ISSET(vcpu, &cpumask)) { 290265366Sneel fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 291259081Sneel exit(1); 292259081Sneel } 293259081Sneel 294263432Sneel CPU_CLR_ATOMIC(vcpu, &cpumask); 295263432Sneel return (CPU_EMPTY(&cpumask)); 296259081Sneel} 297259081Sneel 298259081Sneelstatic int 299221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 300221828Sgrehan uint32_t eax) 301221828Sgrehan{ 302256062Sgrehan#if BHYVE_DEBUG 303256062Sgrehan /* 304256062Sgrehan * put guest-driven debug here 305256062Sgrehan */ 306221828Sgrehan#endif 307221828Sgrehan return (VMEXIT_CONTINUE); 308221828Sgrehan} 309221828Sgrehan 310221828Sgrehanstatic int 311221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 312221828Sgrehan{ 313221828Sgrehan int error; 314292981Saraujo int bytes, port, in, out; 315221828Sgrehan int vcpu; 316221828Sgrehan 317221828Sgrehan vcpu = *pvcpu; 318221828Sgrehan 319221828Sgrehan port = vme->u.inout.port; 320221828Sgrehan bytes = vme->u.inout.bytes; 321221828Sgrehan in = vme->u.inout.in; 322221828Sgrehan out = !in; 323221828Sgrehan 324221828Sgrehan /* Extra-special case of host notifications */ 325266573Sneel if (out && port == GUEST_NIO_PORT) { 326266573Sneel error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 327266573Sneel return (error); 328266573Sneel } 329221828Sgrehan 330266573Sneel error = emulate_inout(ctx, vcpu, vme, strictio); 331269094Sneel if (error) { 332281561Stychon fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 333281561Stychon in ? "in" : "out", 334281561Stychon bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 335281561Stychon port, vmexit->rip); 336269094Sneel return (VMEXIT_ABORT); 337269094Sneel } else { 338221828Sgrehan return (VMEXIT_CONTINUE); 339221828Sgrehan } 340221828Sgrehan} 341221828Sgrehan 342221828Sgrehanstatic int 343221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 344221828Sgrehan{ 345259635Sneel uint64_t val; 346259635Sneel uint32_t eax, edx; 347259635Sneel int error; 348259635Sneel 349259635Sneel val = 0; 350259635Sneel error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 351259635Sneel if (error != 0) { 352259635Sneel fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 353259635Sneel vme->u.msr.code, *pvcpu); 354262506Sneel if (strictmsr) { 355269042Sneel vm_inject_gp(ctx, *pvcpu); 356277310Sneel return (VMEXIT_CONTINUE); 357262506Sneel } 358259635Sneel } 359259635Sneel 360259635Sneel eax = val; 361259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 362259635Sneel assert(error == 0); 363259635Sneel 364259635Sneel edx = val >> 32; 365259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 366259635Sneel assert(error == 0); 367259635Sneel 368259635Sneel return (VMEXIT_CONTINUE); 369221828Sgrehan} 370221828Sgrehan 371221828Sgrehanstatic int 372221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 373221828Sgrehan{ 374259635Sneel int error; 375221828Sgrehan 376259635Sneel error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 377259635Sneel if (error != 0) { 378259635Sneel fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 379259635Sneel vme->u.msr.code, vme->u.msr.wval, *pvcpu); 380262506Sneel if (strictmsr) { 381269042Sneel vm_inject_gp(ctx, *pvcpu); 382277310Sneel return (VMEXIT_CONTINUE); 383262506Sneel } 384259635Sneel } 385259635Sneel return (VMEXIT_CONTINUE); 386221828Sgrehan} 387221828Sgrehan 388221828Sgrehanstatic int 389240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 390240912Sneel{ 391240912Sneel 392302362Sngie (void)spinup_ap(ctx, *pvcpu, 393302362Sngie vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 394240912Sneel 395302362Sngie return (VMEXIT_CONTINUE); 396240912Sneel} 397240912Sneel 398267966Sneel#define DEBUG_EPT_MISCONFIG 399267966Sneel#ifdef DEBUG_EPT_MISCONFIG 400267966Sneel#define EXIT_REASON_EPT_MISCONFIG 49 401267966Sneel#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 402267966Sneel#define VMCS_IDENT(x) ((x) | 0x80000000) 403267966Sneel 404267966Sneelstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 405267966Sneelstatic int ept_misconfig_ptenum; 406267966Sneel#endif 407267966Sneel 408240912Sneelstatic int 409221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 410221828Sgrehan{ 411221828Sgrehan 412242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 413242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 414242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 415242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 416260167Sneel fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 417242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 418242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 419242385Sgrehan vmexit->u.vmx.exit_qualification); 420260167Sneel fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 421260167Sneel fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 422267966Sneel#ifdef DEBUG_EPT_MISCONFIG 423267966Sneel if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 424267966Sneel vm_get_register(ctx, *pvcpu, 425267966Sneel VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 426267966Sneel &ept_misconfig_gpa); 427267966Sneel vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 428267966Sneel &ept_misconfig_ptenum); 429267966Sneel fprintf(stderr, "\tEPT misconfiguration:\n"); 430267966Sneel fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 431267966Sneel fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 432267966Sneel ept_misconfig_ptenum, ept_misconfig_pte[0], 433267966Sneel ept_misconfig_pte[1], ept_misconfig_pte[2], 434267966Sneel ept_misconfig_pte[3]); 435267966Sneel } 436267966Sneel#endif /* DEBUG_EPT_MISCONFIG */ 437221828Sgrehan return (VMEXIT_ABORT); 438221828Sgrehan} 439221828Sgrehan 440221828Sgrehanstatic int 441273375Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 442273375Sneel{ 443273375Sneel 444273375Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 445273375Sneel fprintf(stderr, "\treason\t\tSVM\n"); 446273375Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 447273375Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 448273375Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 449273375Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 450273375Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 451273375Sneel return (VMEXIT_ABORT); 452273375Sneel} 453273375Sneel 454273375Sneelstatic int 455221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 456221828Sgrehan{ 457256062Sgrehan 458277310Sneel assert(vmexit->inst_length == 0); 459277310Sneel 460221828Sgrehan stats.vmexit_bogus++; 461221828Sgrehan 462277310Sneel return (VMEXIT_CONTINUE); 463221828Sgrehan} 464221828Sgrehan 465221828Sgrehanstatic int 466283657Sneelvmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 467283657Sneel{ 468283657Sneel 469283657Sneel assert(vmexit->inst_length == 0); 470283657Sneel 471283657Sneel stats.vmexit_reqidle++; 472283657Sneel 473283657Sneel return (VMEXIT_CONTINUE); 474283657Sneel} 475283657Sneel 476283657Sneelstatic int 477221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 478221828Sgrehan{ 479256062Sgrehan 480221828Sgrehan stats.vmexit_hlt++; 481256062Sgrehan 482256062Sgrehan /* 483256062Sgrehan * Just continue execution with the next instruction. We use 484256062Sgrehan * the HLT VM exit as a way to be friendly with the host 485256062Sgrehan * scheduler. 486256062Sgrehan */ 487256062Sgrehan return (VMEXIT_CONTINUE); 488221828Sgrehan} 489221828Sgrehan 490221828Sgrehanstatic int 491221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 492221828Sgrehan{ 493256062Sgrehan 494221828Sgrehan stats.vmexit_pause++; 495221828Sgrehan 496256062Sgrehan return (VMEXIT_CONTINUE); 497221828Sgrehan} 498221828Sgrehan 499221828Sgrehanstatic int 500221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 501221828Sgrehan{ 502256062Sgrehan 503277310Sneel assert(vmexit->inst_length == 0); 504277310Sneel 505221828Sgrehan stats.vmexit_mtrap++; 506221828Sgrehan 507277310Sneel return (VMEXIT_CONTINUE); 508221828Sgrehan} 509221828Sgrehan 510234761Sgrehanstatic int 511256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 512234761Sgrehan{ 513280968Stychon int err, i; 514280968Stychon struct vie *vie; 515280968Stychon 516256072Sneel stats.vmexit_inst_emul++; 517234761Sgrehan 518280968Stychon vie = &vmexit->u.inst_emul.vie; 519256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 520280968Stychon vie, &vmexit->u.inst_emul.paging); 521241744Sgrehan 522241744Sgrehan if (err) { 523280968Stychon if (err == ESRCH) { 524242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 525256072Sneel vmexit->u.inst_emul.gpa); 526241744Sgrehan } 527241744Sgrehan 528280968Stychon fprintf(stderr, "Failed to emulate instruction ["); 529280968Stychon for (i = 0; i < vie->num_valid; i++) { 530280968Stychon fprintf(stderr, "0x%02x%s", vie->inst[i], 531280968Stychon i != (vie->num_valid - 1) ? " " : ""); 532280968Stychon } 533280968Stychon fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 534234761Sgrehan return (VMEXIT_ABORT); 535234761Sgrehan } 536234761Sgrehan 537234761Sgrehan return (VMEXIT_CONTINUE); 538234761Sgrehan} 539234761Sgrehan 540263780Sneelstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 541263780Sneelstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 542263780Sneel 543263780Sneelstatic int 544263780Sneelvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 545263780Sneel{ 546265062Sneel enum vm_suspend_how how; 547263780Sneel 548265062Sneel how = vmexit->u.suspended.how; 549265062Sneel 550263780Sneel fbsdrun_deletecpu(ctx, *pvcpu); 551263780Sneel 552265062Sneel if (*pvcpu != BSP) { 553263780Sneel pthread_mutex_lock(&resetcpu_mtx); 554263780Sneel pthread_cond_signal(&resetcpu_cond); 555263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 556263780Sneel pthread_exit(NULL); 557263780Sneel } 558263780Sneel 559263780Sneel pthread_mutex_lock(&resetcpu_mtx); 560263780Sneel while (!CPU_EMPTY(&cpumask)) { 561263780Sneel pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 562263780Sneel } 563263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 564265062Sneel 565265203Sneel switch (how) { 566265203Sneel case VM_SUSPEND_RESET: 567265062Sneel exit(0); 568265203Sneel case VM_SUSPEND_POWEROFF: 569265062Sneel exit(1); 570265203Sneel case VM_SUSPEND_HALT: 571265203Sneel exit(2); 572268889Sneel case VM_SUSPEND_TRIPLEFAULT: 573268889Sneel exit(3); 574265203Sneel default: 575265203Sneel fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 576265203Sneel exit(100); 577265203Sneel } 578265062Sneel return (0); /* NOTREACHED */ 579263780Sneel} 580263780Sneel 581221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 582234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 583266573Sneel [VM_EXITCODE_INOUT_STR] = vmexit_inout, 584234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 585273375Sneel [VM_EXITCODE_SVM] = vmexit_svm, 586234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 587283657Sneel [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 588234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 589234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 590234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 591256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 592240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 593268777Sneel [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 594268777Sneel [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 595221828Sgrehan}; 596221828Sgrehan 597221828Sgrehanstatic void 598277310Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 599221828Sgrehan{ 600292982Sbz int error, rc; 601253452Sgrehan enum vm_exitcode exitcode; 602266933Sneel cpuset_t active_cpus; 603221828Sgrehan 604265376Sneel if (vcpumap[vcpu] != NULL) { 605246686Sneel error = pthread_setaffinity_np(pthread_self(), 606265376Sneel sizeof(cpuset_t), vcpumap[vcpu]); 607221828Sgrehan assert(error == 0); 608221828Sgrehan } 609221828Sgrehan 610266933Sneel error = vm_active_cpus(ctx, &active_cpus); 611266933Sneel assert(CPU_ISSET(vcpu, &active_cpus)); 612266933Sneel 613277310Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 614277310Sneel assert(error == 0); 615277310Sneel 616221828Sgrehan while (1) { 617277310Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 618259737Sneel if (error != 0) 619259737Sneel break; 620221828Sgrehan 621253452Sgrehan exitcode = vmexit[vcpu].exitcode; 622253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 623253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 624253452Sgrehan exitcode); 625253452Sgrehan exit(1); 626253452Sgrehan } 627253452Sgrehan 628292981Saraujo rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 629253452Sgrehan 630221828Sgrehan switch (rc) { 631221828Sgrehan case VMEXIT_CONTINUE: 632221828Sgrehan break; 633265941Sneel case VMEXIT_ABORT: 634265941Sneel abort(); 635221828Sgrehan default: 636221828Sgrehan exit(1); 637221828Sgrehan } 638221828Sgrehan } 639221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 640221828Sgrehan} 641221828Sgrehan 642245020Sneelstatic int 643245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 644245020Sneel{ 645245020Sneel int tmp, error; 646221828Sgrehan 647245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 648245020Sneel 649245020Sneel /* 650245020Sneel * The guest is allowed to spinup more than one processor only if the 651245020Sneel * UNRESTRICTED_GUEST capability is available. 652245020Sneel */ 653245020Sneel if (error == 0) 654245020Sneel return (VM_MAXCPU); 655245020Sneel else 656245020Sneel return (1); 657245020Sneel} 658245020Sneel 659256645Sneelvoid 660256645Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 661256645Sneel{ 662256645Sneel int err, tmp; 663256645Sneel 664256645Sneel if (fbsdrun_vmexit_on_hlt()) { 665256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 666256645Sneel if (err < 0) { 667256645Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 668256645Sneel exit(1); 669256645Sneel } 670256645Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 671256645Sneel if (cpu == BSP) 672256645Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 673256645Sneel } 674256645Sneel 675256645Sneel if (fbsdrun_vmexit_on_pause()) { 676256645Sneel /* 677256645Sneel * pause exit support required for this mode 678256645Sneel */ 679256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 680256645Sneel if (err < 0) { 681256645Sneel fprintf(stderr, 682256645Sneel "SMP mux requested, no pause support\n"); 683256645Sneel exit(1); 684256645Sneel } 685256645Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 686256645Sneel if (cpu == BSP) 687256645Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 688256645Sneel } 689256645Sneel 690262236Sneel if (x2apic_mode) 691262236Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 692262236Sneel else 693256645Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 694256645Sneel 695256645Sneel if (err) { 696256645Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 697256645Sneel exit(1); 698256645Sneel } 699256645Sneel 700256645Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 701256645Sneel} 702256645Sneel 703284539Sneelstatic struct vmctx * 704284539Sneeldo_open(const char *vmname) 705284539Sneel{ 706284539Sneel struct vmctx *ctx; 707284539Sneel int error; 708284539Sneel bool reinit, romboot; 709284539Sneel 710284539Sneel reinit = romboot = false; 711284539Sneel 712284539Sneel if (lpc_bootrom()) 713284539Sneel romboot = true; 714284539Sneel 715284539Sneel error = vm_create(vmname); 716284539Sneel if (error) { 717284539Sneel if (errno == EEXIST) { 718284539Sneel if (romboot) { 719284539Sneel reinit = true; 720284539Sneel } else { 721284539Sneel /* 722284539Sneel * The virtual machine has been setup by the 723284539Sneel * userspace bootloader. 724284539Sneel */ 725284539Sneel } 726284539Sneel } else { 727284539Sneel perror("vm_create"); 728284539Sneel exit(1); 729284539Sneel } 730284539Sneel } else { 731284539Sneel if (!romboot) { 732284539Sneel /* 733284539Sneel * If the virtual machine was just created then a 734284539Sneel * bootrom must be configured to boot it. 735284539Sneel */ 736284539Sneel fprintf(stderr, "virtual machine cannot be booted\n"); 737284539Sneel exit(1); 738284539Sneel } 739284539Sneel } 740284539Sneel 741284539Sneel ctx = vm_open(vmname); 742284539Sneel if (ctx == NULL) { 743284539Sneel perror("vm_open"); 744284539Sneel exit(1); 745284539Sneel } 746284539Sneel 747284539Sneel if (reinit) { 748284539Sneel error = vm_reinit(ctx); 749284539Sneel if (error) { 750284539Sneel perror("vm_reinit"); 751284539Sneel exit(1); 752284539Sneel } 753284539Sneel } 754284539Sneel return (ctx); 755284539Sneel} 756284539Sneel 757221828Sgrehanint 758221828Sgrehanmain(int argc, char *argv[]) 759221828Sgrehan{ 760257423Sneel int c, error, gdb_port, err, bvmcons; 761284539Sneel int max_vcpus, mptgen, memflags; 762279225Sneel int rtc_localtime; 763221828Sgrehan struct vmctx *ctx; 764221828Sgrehan uint64_t rip; 765248477Sneel size_t memsize; 766284539Sneel char *optstr; 767221828Sgrehan 768242192Sneel bvmcons = 0; 769221828Sgrehan progname = basename(argv[0]); 770256156Sneel gdb_port = 0; 771221828Sgrehan guest_ncpus = 1; 772248477Sneel memsize = 256 * MB; 773265211Sneel mptgen = 1; 774279225Sneel rtc_localtime = 1; 775284539Sneel memflags = 0; 776221828Sgrehan 777284539Sneel optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 778284539Sneel while ((c = getopt(argc, argv, optstr)) != -1) { 779221828Sgrehan switch (c) { 780240943Sneel case 'a': 781262236Sneel x2apic_mode = 0; 782240943Sneel break; 783243327Sgrehan case 'A': 784243327Sgrehan acpi = 1; 785243327Sgrehan break; 786242192Sneel case 'b': 787242192Sneel bvmcons = 1; 788242192Sneel break; 789221828Sgrehan case 'p': 790265376Sneel if (pincpu_parse(optarg) != 0) { 791265376Sneel errx(EX_USAGE, "invalid vcpu pinning " 792265376Sneel "configuration '%s'", optarg); 793265376Sneel } 794221828Sgrehan break; 795221828Sgrehan case 'c': 796221828Sgrehan guest_ncpus = atoi(optarg); 797221828Sgrehan break; 798265951Sneel case 'C': 799284539Sneel memflags |= VM_MEM_F_INCORE; 800265951Sneel break; 801221828Sgrehan case 'g': 802221828Sgrehan gdb_port = atoi(optarg); 803221828Sgrehan break; 804257293Sneel case 'l': 805257293Sneel if (lpc_device_parse(optarg) != 0) { 806257293Sneel errx(EX_USAGE, "invalid lpc device " 807257293Sneel "configuration '%s'", optarg); 808257293Sneel } 809257293Sneel break; 810221828Sgrehan case 's': 811261217Sjhb if (pci_parse_slot(optarg) != 0) 812249916Sneel exit(1); 813249916Sneel else 814249916Sneel break; 815284539Sneel case 'S': 816284539Sneel memflags |= VM_MEM_F_WIRED; 817284539Sneel break; 818221828Sgrehan case 'm': 819256176Sneel error = vm_parse_memsize(optarg, &memsize); 820256176Sneel if (error) 821256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 822221828Sgrehan break; 823221828Sgrehan case 'H': 824221828Sgrehan guest_vmexit_on_hlt = 1; 825221828Sgrehan break; 826239043Sneel case 'I': 827257423Sneel /* 828257423Sneel * The "-I" option was used to add an ioapic to the 829257423Sneel * virtual machine. 830257423Sneel * 831257423Sneel * An ioapic is now provided unconditionally for each 832257423Sneel * virtual machine and this option is now deprecated. 833257423Sneel */ 834239043Sneel break; 835221828Sgrehan case 'P': 836221828Sgrehan guest_vmexit_on_pause = 1; 837221828Sgrehan break; 838222105Sgrehan case 'e': 839222105Sgrehan strictio = 1; 840222105Sgrehan break; 841279225Sneel case 'u': 842279225Sneel rtc_localtime = 0; 843279225Sneel break; 844262744Stychon case 'U': 845262744Stychon guest_uuid_str = optarg; 846262744Stychon break; 847259635Sneel case 'w': 848259635Sneel strictmsr = 0; 849259635Sneel break; 850256711Sgrehan case 'W': 851256711Sgrehan virtio_msix = 0; 852256711Sgrehan break; 853262236Sneel case 'x': 854262236Sneel x2apic_mode = 1; 855262236Sneel break; 856265211Sneel case 'Y': 857265211Sneel mptgen = 0; 858265211Sneel break; 859221828Sgrehan case 'h': 860221828Sgrehan usage(0); 861221828Sgrehan default: 862221828Sgrehan usage(1); 863221828Sgrehan } 864221828Sgrehan } 865221828Sgrehan argc -= optind; 866221828Sgrehan argv += optind; 867221828Sgrehan 868221828Sgrehan if (argc != 1) 869221828Sgrehan usage(1); 870221828Sgrehan 871221828Sgrehan vmname = argv[0]; 872284539Sneel ctx = do_open(vmname); 873221828Sgrehan 874281611Sneel if (guest_ncpus < 1) { 875281611Sneel fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 876281611Sneel exit(1); 877281611Sneel } 878281611Sneel 879245020Sneel max_vcpus = num_vcpus_allowed(ctx); 880245020Sneel if (guest_ncpus > max_vcpus) { 881245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 882245020Sneel guest_ncpus, max_vcpus); 883245020Sneel exit(1); 884245020Sneel } 885245020Sneel 886256645Sneel fbsdrun_set_capabilities(ctx, BSP); 887221828Sgrehan 888284539Sneel vm_set_memflags(ctx, memflags); 889248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 890248477Sneel if (err) { 891284539Sneel fprintf(stderr, "Unable to setup memory (%d)\n", errno); 892248477Sneel exit(1); 893221828Sgrehan } 894221828Sgrehan 895271888Sneel error = init_msr(); 896271888Sneel if (error) { 897271888Sneel fprintf(stderr, "init_msr error %d", error); 898271888Sneel exit(1); 899271888Sneel } 900271888Sneel 901249343Sneel init_mem(); 902221828Sgrehan init_inout(); 903302332Sgrehan atkbdc_init(ctx); 904266125Sjhb pci_irq_init(ctx); 905261268Sjhb ioapic_init(ctx); 906252682Sgrehan 907279225Sneel rtc_init(ctx, rtc_localtime); 908266125Sjhb sci_init(ctx); 909253181Sgrehan 910252682Sgrehan /* 911252682Sgrehan * Exit if a device emulation finds an error in it's initilization 912252682Sgrehan */ 913252682Sgrehan if (init_pci(ctx) != 0) 914252682Sgrehan exit(1); 915252682Sgrehan 916221828Sgrehan if (gdb_port != 0) 917221828Sgrehan init_dbgport(gdb_port); 918221828Sgrehan 919242192Sneel if (bvmcons) 920242192Sneel init_bvmcons(); 921242192Sneel 922284539Sneel if (lpc_bootrom()) { 923284539Sneel if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 924284539Sneel fprintf(stderr, "ROM boot failed: unrestricted guest " 925284539Sneel "capability not available\n"); 926284539Sneel exit(1); 927284539Sneel } 928284539Sneel error = vcpu_reset(ctx, BSP); 929284539Sneel assert(error == 0); 930284539Sneel } 931284539Sneel 932221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 933221828Sgrehan assert(error == 0); 934221828Sgrehan 935221828Sgrehan /* 936221828Sgrehan * build the guest tables, MP etc. 937221828Sgrehan */ 938265211Sneel if (mptgen) { 939265211Sneel error = mptable_build(ctx, guest_ncpus); 940265211Sneel if (error) 941265211Sneel exit(1); 942265211Sneel } 943221828Sgrehan 944262744Stychon error = smbios_build(ctx); 945262744Stychon assert(error == 0); 946262744Stychon 947243327Sgrehan if (acpi) { 948257423Sneel error = acpi_build(ctx, guest_ncpus); 949243327Sgrehan assert(error == 0); 950243327Sgrehan } 951243327Sgrehan 952288522Sgrehan if (lpc_bootrom()) 953288522Sgrehan fwctl_init(); 954288522Sgrehan 955221828Sgrehan /* 956257729Sgrehan * Change the proc title to include the VM name. 957257729Sgrehan */ 958257729Sgrehan setproctitle("%s", vmname); 959257729Sgrehan 960257729Sgrehan /* 961221828Sgrehan * Add CPU 0 962221828Sgrehan */ 963263432Sneel fbsdrun_addcpu(ctx, BSP, BSP, rip); 964221828Sgrehan 965221828Sgrehan /* 966221828Sgrehan * Head off to the main event dispatch loop 967221828Sgrehan */ 968221828Sgrehan mevent_dispatch(); 969221828Sgrehan 970221828Sgrehan exit(1); 971221828Sgrehan} 972