bhyverun.c revision 292982
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 292982 2015-12-31 10:55:50Z bz $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 292982 2015-12-31 10:55:50Z bz $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36259081Sneel#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257018Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50284539Sneel#include <stdbool.h> 51221828Sgrehan 52221828Sgrehan#include <machine/vmm.h> 53221828Sgrehan#include <vmmapi.h> 54221828Sgrehan 55244167Sgrehan#include "bhyverun.h" 56243327Sgrehan#include "acpi.h" 57221828Sgrehan#include "inout.h" 58221828Sgrehan#include "dbgport.h" 59288522Sgrehan#include "fwctl.h" 60261268Sjhb#include "ioapic.h" 61241744Sgrehan#include "mem.h" 62221828Sgrehan#include "mevent.h" 63242131Sgrehan#include "mptbl.h" 64221828Sgrehan#include "pci_emul.h" 65266125Sjhb#include "pci_irq.h" 66257293Sneel#include "pci_lpc.h" 67262744Stychon#include "smbiostbl.h" 68221828Sgrehan#include "xmsr.h" 69240912Sneel#include "spinup_ap.h" 70253181Sgrehan#include "rtc.h" 71221828Sgrehan 72221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 73221828Sgrehan 74221828Sgrehan#define MB (1024UL * 1024) 75221828Sgrehan#define GB (1024UL * MB) 76221828Sgrehan 77221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 78268777Sneelextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 79221828Sgrehan 80221828Sgrehanchar *vmname; 81221828Sgrehan 82221828Sgrehanint guest_ncpus; 83262744Stychonchar *guest_uuid_str; 84221828Sgrehan 85262236Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 86256711Sgrehanstatic int virtio_msix = 1; 87262236Sneelstatic int x2apic_mode = 0; /* default is xAPIC */ 88221828Sgrehan 89222105Sgrehanstatic int strictio; 90259635Sneelstatic int strictmsr = 1; 91222105Sgrehan 92243327Sgrehanstatic int acpi; 93243327Sgrehan 94221828Sgrehanstatic char *progname; 95221828Sgrehanstatic const int BSP = 0; 96221828Sgrehan 97263432Sneelstatic cpuset_t cpumask; 98221828Sgrehan 99221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 100221828Sgrehan 101269042Sneelstatic struct vm_exit vmexit[VM_MAXCPU]; 102221828Sgrehan 103256062Sgrehanstruct bhyvestats { 104221828Sgrehan uint64_t vmexit_bogus; 105283657Sneel uint64_t vmexit_reqidle; 106221828Sgrehan uint64_t vmexit_hlt; 107221828Sgrehan uint64_t vmexit_pause; 108221828Sgrehan uint64_t vmexit_mtrap; 109256072Sneel uint64_t vmexit_inst_emul; 110221828Sgrehan uint64_t cpu_switch_rotate; 111221828Sgrehan uint64_t cpu_switch_direct; 112221828Sgrehan} stats; 113221828Sgrehan 114221828Sgrehanstruct mt_vmm_info { 115221828Sgrehan pthread_t mt_thr; 116221828Sgrehan struct vmctx *mt_ctx; 117221828Sgrehan int mt_vcpu; 118221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 119221828Sgrehan 120265376Sneelstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 121265376Sneel 122221828Sgrehanstatic void 123221828Sgrehanusage(int code) 124221828Sgrehan{ 125221828Sgrehan 126221828Sgrehan fprintf(stderr, 127284539Sneel "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 128267959Sjhb " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 129262236Sneel " -a: local apic is in xAPIC mode (deprecated)\n" 130267934Sjhb " -A: create ACPI tables\n" 131221828Sgrehan " -c: # cpus (default 1)\n" 132265951Sneel " -C: include guest memory in core file\n" 133257018Sneel " -e: exit on unhandled I/O access\n" 134267959Sjhb " -g: gdb port\n" 135221828Sgrehan " -h: help\n" 136267959Sjhb " -H: vmexit from the guest on hlt\n" 137257293Sneel " -l: LPC device configuration\n" 138259635Sneel " -m: memory size in MB\n" 139267959Sjhb " -p: pin 'vcpu' to 'hostcpu'\n" 140267959Sjhb " -P: vmexit from the guest on pause\n" 141267959Sjhb " -s: <slot,driver,configinfo> PCI slot config\n" 142284539Sneel " -S: guest memory cannot be swapped\n" 143279225Sneel " -u: RTC keeps UTC time\n" 144267959Sjhb " -U: uuid\n" 145262236Sneel " -w: ignore unimplemented MSRs\n" 146267959Sjhb " -W: force virtio to use single-vector MSI\n" 147262744Stychon " -x: local apic is in x2APIC mode\n" 148267959Sjhb " -Y: disable MPtable generation\n", 149257018Sneel progname, (int)strlen(progname), ""); 150256062Sgrehan 151221828Sgrehan exit(code); 152221828Sgrehan} 153221828Sgrehan 154265376Sneelstatic int 155265376Sneelpincpu_parse(const char *opt) 156265376Sneel{ 157265376Sneel int vcpu, pcpu; 158265376Sneel 159265376Sneel if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 160265376Sneel fprintf(stderr, "invalid format: %s\n", opt); 161265376Sneel return (-1); 162265376Sneel } 163265376Sneel 164265376Sneel if (vcpu < 0 || vcpu >= VM_MAXCPU) { 165265376Sneel fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 166265376Sneel vcpu, VM_MAXCPU - 1); 167265376Sneel return (-1); 168265376Sneel } 169265376Sneel 170265376Sneel if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 171265376Sneel fprintf(stderr, "hostcpu '%d' outside valid range from " 172265376Sneel "0 to %d\n", pcpu, CPU_SETSIZE - 1); 173265376Sneel return (-1); 174265376Sneel } 175265376Sneel 176265376Sneel if (vcpumap[vcpu] == NULL) { 177265376Sneel if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 178265376Sneel perror("malloc"); 179265376Sneel return (-1); 180265376Sneel } 181265376Sneel CPU_ZERO(vcpumap[vcpu]); 182265376Sneel } 183265376Sneel CPU_SET(pcpu, vcpumap[vcpu]); 184265376Sneel return (0); 185265376Sneel} 186265376Sneel 187269042Sneelvoid 188269042Sneelvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 189269042Sneel int errcode) 190269042Sneel{ 191269042Sneel struct vmctx *ctx; 192277310Sneel int error, restart_instruction; 193269042Sneel 194269042Sneel ctx = arg; 195277310Sneel restart_instruction = 1; 196277310Sneel 197277310Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 198277310Sneel restart_instruction); 199269042Sneel assert(error == 0); 200269042Sneel} 201269042Sneel 202221828Sgrehanvoid * 203248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 204221828Sgrehan{ 205221828Sgrehan 206248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 207221828Sgrehan} 208221828Sgrehan 209221828Sgrehanint 210221828Sgrehanfbsdrun_vmexit_on_pause(void) 211221828Sgrehan{ 212221828Sgrehan 213221828Sgrehan return (guest_vmexit_on_pause); 214221828Sgrehan} 215221828Sgrehan 216221828Sgrehanint 217221828Sgrehanfbsdrun_vmexit_on_hlt(void) 218221828Sgrehan{ 219221828Sgrehan 220221828Sgrehan return (guest_vmexit_on_hlt); 221221828Sgrehan} 222221828Sgrehan 223256711Sgrehanint 224256711Sgrehanfbsdrun_virtio_msix(void) 225256711Sgrehan{ 226256711Sgrehan 227256711Sgrehan return (virtio_msix); 228256711Sgrehan} 229256711Sgrehan 230221942Sjhbstatic void * 231221828Sgrehanfbsdrun_start_thread(void *param) 232221828Sgrehan{ 233242404Sgrehan char tname[MAXCOMLEN + 1]; 234242404Sgrehan struct mt_vmm_info *mtp; 235221828Sgrehan int vcpu; 236221828Sgrehan 237242404Sgrehan mtp = param; 238221828Sgrehan vcpu = mtp->mt_vcpu; 239242404Sgrehan 240257729Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 241242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 242242404Sgrehan 243221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 244221828Sgrehan 245221828Sgrehan /* not reached */ 246221828Sgrehan exit(1); 247221828Sgrehan return (NULL); 248221828Sgrehan} 249221828Sgrehan 250221828Sgrehanvoid 251263432Sneelfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 252221828Sgrehan{ 253221828Sgrehan int error; 254221828Sgrehan 255263432Sneel assert(fromcpu == BSP); 256221828Sgrehan 257266933Sneel /* 258266933Sneel * The 'newcpu' must be activated in the context of 'fromcpu'. If 259266933Sneel * vm_activate_cpu() is delayed until newcpu's pthread starts running 260266933Sneel * then vmm.ko is out-of-sync with bhyve and this can create a race 261266933Sneel * with vm_suspend(). 262266933Sneel */ 263266933Sneel error = vm_activate_cpu(ctx, newcpu); 264289746Sngie if (error != 0) 265289746Sngie err(EX_OSERR, "could not activate CPU %d", newcpu); 266266933Sneel 267263432Sneel CPU_SET_ATOMIC(newcpu, &cpumask); 268221828Sgrehan 269221828Sgrehan /* 270221828Sgrehan * Set up the vmexit struct to allow execution to start 271221828Sgrehan * at the given RIP 272221828Sgrehan */ 273263432Sneel vmexit[newcpu].rip = rip; 274263432Sneel vmexit[newcpu].inst_length = 0; 275221828Sgrehan 276263432Sneel mt_vmm_info[newcpu].mt_ctx = ctx; 277263432Sneel mt_vmm_info[newcpu].mt_vcpu = newcpu; 278256072Sneel 279263432Sneel error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 280263432Sneel fbsdrun_start_thread, &mt_vmm_info[newcpu]); 281256072Sneel assert(error == 0); 282221828Sgrehan} 283221828Sgrehan 284221828Sgrehanstatic int 285259081Sneelfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 286259081Sneel{ 287259081Sneel 288263432Sneel if (!CPU_ISSET(vcpu, &cpumask)) { 289265366Sneel fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 290259081Sneel exit(1); 291259081Sneel } 292259081Sneel 293263432Sneel CPU_CLR_ATOMIC(vcpu, &cpumask); 294263432Sneel return (CPU_EMPTY(&cpumask)); 295259081Sneel} 296259081Sneel 297259081Sneelstatic int 298221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 299221828Sgrehan uint32_t eax) 300221828Sgrehan{ 301256062Sgrehan#if BHYVE_DEBUG 302256062Sgrehan /* 303256062Sgrehan * put guest-driven debug here 304256062Sgrehan */ 305221828Sgrehan#endif 306221828Sgrehan return (VMEXIT_CONTINUE); 307221828Sgrehan} 308221828Sgrehan 309221828Sgrehanstatic int 310221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 311221828Sgrehan{ 312221828Sgrehan int error; 313292981Saraujo int bytes, port, in, out; 314221828Sgrehan int vcpu; 315221828Sgrehan 316221828Sgrehan vcpu = *pvcpu; 317221828Sgrehan 318221828Sgrehan port = vme->u.inout.port; 319221828Sgrehan bytes = vme->u.inout.bytes; 320221828Sgrehan in = vme->u.inout.in; 321221828Sgrehan out = !in; 322221828Sgrehan 323221828Sgrehan /* Extra-special case of host notifications */ 324266573Sneel if (out && port == GUEST_NIO_PORT) { 325266573Sneel error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 326266573Sneel return (error); 327266573Sneel } 328221828Sgrehan 329266573Sneel error = emulate_inout(ctx, vcpu, vme, strictio); 330269094Sneel if (error) { 331281561Stychon fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 332281561Stychon in ? "in" : "out", 333281561Stychon bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 334281561Stychon port, vmexit->rip); 335269094Sneel return (VMEXIT_ABORT); 336269094Sneel } else { 337221828Sgrehan return (VMEXIT_CONTINUE); 338221828Sgrehan } 339221828Sgrehan} 340221828Sgrehan 341221828Sgrehanstatic int 342221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 343221828Sgrehan{ 344259635Sneel uint64_t val; 345259635Sneel uint32_t eax, edx; 346259635Sneel int error; 347259635Sneel 348259635Sneel val = 0; 349259635Sneel error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 350259635Sneel if (error != 0) { 351259635Sneel fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 352259635Sneel vme->u.msr.code, *pvcpu); 353262506Sneel if (strictmsr) { 354269042Sneel vm_inject_gp(ctx, *pvcpu); 355277310Sneel return (VMEXIT_CONTINUE); 356262506Sneel } 357259635Sneel } 358259635Sneel 359259635Sneel eax = val; 360259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 361259635Sneel assert(error == 0); 362259635Sneel 363259635Sneel edx = val >> 32; 364259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 365259635Sneel assert(error == 0); 366259635Sneel 367259635Sneel return (VMEXIT_CONTINUE); 368221828Sgrehan} 369221828Sgrehan 370221828Sgrehanstatic int 371221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 372221828Sgrehan{ 373259635Sneel int error; 374221828Sgrehan 375259635Sneel error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 376259635Sneel if (error != 0) { 377259635Sneel fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 378259635Sneel vme->u.msr.code, vme->u.msr.wval, *pvcpu); 379262506Sneel if (strictmsr) { 380269042Sneel vm_inject_gp(ctx, *pvcpu); 381277310Sneel return (VMEXIT_CONTINUE); 382262506Sneel } 383259635Sneel } 384259635Sneel return (VMEXIT_CONTINUE); 385221828Sgrehan} 386221828Sgrehan 387221828Sgrehanstatic int 388240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 389240912Sneel{ 390240912Sneel int newcpu; 391240912Sneel int retval = VMEXIT_CONTINUE; 392240912Sneel 393240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 394240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 395240912Sneel 396240912Sneel return (retval); 397240912Sneel} 398240912Sneel 399267966Sneel#define DEBUG_EPT_MISCONFIG 400267966Sneel#ifdef DEBUG_EPT_MISCONFIG 401267966Sneel#define EXIT_REASON_EPT_MISCONFIG 49 402267966Sneel#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 403267966Sneel#define VMCS_IDENT(x) ((x) | 0x80000000) 404267966Sneel 405267966Sneelstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 406267966Sneelstatic int ept_misconfig_ptenum; 407267966Sneel#endif 408267966Sneel 409240912Sneelstatic int 410221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 411221828Sgrehan{ 412221828Sgrehan 413242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 414242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 415242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 416242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 417260167Sneel fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 418242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 419242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 420242385Sgrehan vmexit->u.vmx.exit_qualification); 421260167Sneel fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 422260167Sneel fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 423267966Sneel#ifdef DEBUG_EPT_MISCONFIG 424267966Sneel if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 425267966Sneel vm_get_register(ctx, *pvcpu, 426267966Sneel VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 427267966Sneel &ept_misconfig_gpa); 428267966Sneel vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 429267966Sneel &ept_misconfig_ptenum); 430267966Sneel fprintf(stderr, "\tEPT misconfiguration:\n"); 431267966Sneel fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 432267966Sneel fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 433267966Sneel ept_misconfig_ptenum, ept_misconfig_pte[0], 434267966Sneel ept_misconfig_pte[1], ept_misconfig_pte[2], 435267966Sneel ept_misconfig_pte[3]); 436267966Sneel } 437267966Sneel#endif /* DEBUG_EPT_MISCONFIG */ 438221828Sgrehan return (VMEXIT_ABORT); 439221828Sgrehan} 440221828Sgrehan 441221828Sgrehanstatic int 442273375Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 443273375Sneel{ 444273375Sneel 445273375Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 446273375Sneel fprintf(stderr, "\treason\t\tSVM\n"); 447273375Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 448273375Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 449273375Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 450273375Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 451273375Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 452273375Sneel return (VMEXIT_ABORT); 453273375Sneel} 454273375Sneel 455273375Sneelstatic int 456221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 457221828Sgrehan{ 458256062Sgrehan 459277310Sneel assert(vmexit->inst_length == 0); 460277310Sneel 461221828Sgrehan stats.vmexit_bogus++; 462221828Sgrehan 463277310Sneel return (VMEXIT_CONTINUE); 464221828Sgrehan} 465221828Sgrehan 466221828Sgrehanstatic int 467283657Sneelvmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 468283657Sneel{ 469283657Sneel 470283657Sneel assert(vmexit->inst_length == 0); 471283657Sneel 472283657Sneel stats.vmexit_reqidle++; 473283657Sneel 474283657Sneel return (VMEXIT_CONTINUE); 475283657Sneel} 476283657Sneel 477283657Sneelstatic int 478221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 479221828Sgrehan{ 480256062Sgrehan 481221828Sgrehan stats.vmexit_hlt++; 482256062Sgrehan 483256062Sgrehan /* 484256062Sgrehan * Just continue execution with the next instruction. We use 485256062Sgrehan * the HLT VM exit as a way to be friendly with the host 486256062Sgrehan * scheduler. 487256062Sgrehan */ 488256062Sgrehan return (VMEXIT_CONTINUE); 489221828Sgrehan} 490221828Sgrehan 491221828Sgrehanstatic int 492221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 493221828Sgrehan{ 494256062Sgrehan 495221828Sgrehan stats.vmexit_pause++; 496221828Sgrehan 497256062Sgrehan return (VMEXIT_CONTINUE); 498221828Sgrehan} 499221828Sgrehan 500221828Sgrehanstatic int 501221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 502221828Sgrehan{ 503256062Sgrehan 504277310Sneel assert(vmexit->inst_length == 0); 505277310Sneel 506221828Sgrehan stats.vmexit_mtrap++; 507221828Sgrehan 508277310Sneel return (VMEXIT_CONTINUE); 509221828Sgrehan} 510221828Sgrehan 511234761Sgrehanstatic int 512256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 513234761Sgrehan{ 514280968Stychon int err, i; 515280968Stychon struct vie *vie; 516280968Stychon 517256072Sneel stats.vmexit_inst_emul++; 518234761Sgrehan 519280968Stychon vie = &vmexit->u.inst_emul.vie; 520256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 521280968Stychon vie, &vmexit->u.inst_emul.paging); 522241744Sgrehan 523241744Sgrehan if (err) { 524280968Stychon if (err == ESRCH) { 525242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 526256072Sneel vmexit->u.inst_emul.gpa); 527241744Sgrehan } 528241744Sgrehan 529280968Stychon fprintf(stderr, "Failed to emulate instruction ["); 530280968Stychon for (i = 0; i < vie->num_valid; i++) { 531280968Stychon fprintf(stderr, "0x%02x%s", vie->inst[i], 532280968Stychon i != (vie->num_valid - 1) ? " " : ""); 533280968Stychon } 534280968Stychon fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 535234761Sgrehan return (VMEXIT_ABORT); 536234761Sgrehan } 537234761Sgrehan 538234761Sgrehan return (VMEXIT_CONTINUE); 539234761Sgrehan} 540234761Sgrehan 541263780Sneelstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 542263780Sneelstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 543263780Sneel 544263780Sneelstatic int 545263780Sneelvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 546263780Sneel{ 547265062Sneel enum vm_suspend_how how; 548263780Sneel 549265062Sneel how = vmexit->u.suspended.how; 550265062Sneel 551263780Sneel fbsdrun_deletecpu(ctx, *pvcpu); 552263780Sneel 553265062Sneel if (*pvcpu != BSP) { 554263780Sneel pthread_mutex_lock(&resetcpu_mtx); 555263780Sneel pthread_cond_signal(&resetcpu_cond); 556263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 557263780Sneel pthread_exit(NULL); 558263780Sneel } 559263780Sneel 560263780Sneel pthread_mutex_lock(&resetcpu_mtx); 561263780Sneel while (!CPU_EMPTY(&cpumask)) { 562263780Sneel pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 563263780Sneel } 564263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 565265062Sneel 566265203Sneel switch (how) { 567265203Sneel case VM_SUSPEND_RESET: 568265062Sneel exit(0); 569265203Sneel case VM_SUSPEND_POWEROFF: 570265062Sneel exit(1); 571265203Sneel case VM_SUSPEND_HALT: 572265203Sneel exit(2); 573268889Sneel case VM_SUSPEND_TRIPLEFAULT: 574268889Sneel exit(3); 575265203Sneel default: 576265203Sneel fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 577265203Sneel exit(100); 578265203Sneel } 579265062Sneel return (0); /* NOTREACHED */ 580263780Sneel} 581263780Sneel 582221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 583234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 584266573Sneel [VM_EXITCODE_INOUT_STR] = vmexit_inout, 585234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 586273375Sneel [VM_EXITCODE_SVM] = vmexit_svm, 587234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 588283657Sneel [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 589234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 590234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 591234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 592256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 593240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 594268777Sneel [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 595268777Sneel [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 596221828Sgrehan}; 597221828Sgrehan 598221828Sgrehanstatic void 599277310Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 600221828Sgrehan{ 601292982Sbz int error, rc; 602253452Sgrehan enum vm_exitcode exitcode; 603266933Sneel cpuset_t active_cpus; 604221828Sgrehan 605265376Sneel if (vcpumap[vcpu] != NULL) { 606246686Sneel error = pthread_setaffinity_np(pthread_self(), 607265376Sneel sizeof(cpuset_t), vcpumap[vcpu]); 608221828Sgrehan assert(error == 0); 609221828Sgrehan } 610221828Sgrehan 611266933Sneel error = vm_active_cpus(ctx, &active_cpus); 612266933Sneel assert(CPU_ISSET(vcpu, &active_cpus)); 613266933Sneel 614277310Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 615277310Sneel assert(error == 0); 616277310Sneel 617221828Sgrehan while (1) { 618277310Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 619259737Sneel if (error != 0) 620259737Sneel break; 621221828Sgrehan 622253452Sgrehan exitcode = vmexit[vcpu].exitcode; 623253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 624253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 625253452Sgrehan exitcode); 626253452Sgrehan exit(1); 627253452Sgrehan } 628253452Sgrehan 629292981Saraujo rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 630253452Sgrehan 631221828Sgrehan switch (rc) { 632221828Sgrehan case VMEXIT_CONTINUE: 633221828Sgrehan break; 634265941Sneel case VMEXIT_ABORT: 635265941Sneel abort(); 636221828Sgrehan default: 637221828Sgrehan exit(1); 638221828Sgrehan } 639221828Sgrehan } 640221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 641221828Sgrehan} 642221828Sgrehan 643245020Sneelstatic int 644245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 645245020Sneel{ 646245020Sneel int tmp, error; 647221828Sgrehan 648245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 649245020Sneel 650245020Sneel /* 651245020Sneel * The guest is allowed to spinup more than one processor only if the 652245020Sneel * UNRESTRICTED_GUEST capability is available. 653245020Sneel */ 654245020Sneel if (error == 0) 655245020Sneel return (VM_MAXCPU); 656245020Sneel else 657245020Sneel return (1); 658245020Sneel} 659245020Sneel 660256645Sneelvoid 661256645Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 662256645Sneel{ 663256645Sneel int err, tmp; 664256645Sneel 665256645Sneel if (fbsdrun_vmexit_on_hlt()) { 666256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 667256645Sneel if (err < 0) { 668256645Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 669256645Sneel exit(1); 670256645Sneel } 671256645Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 672256645Sneel if (cpu == BSP) 673256645Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 674256645Sneel } 675256645Sneel 676256645Sneel if (fbsdrun_vmexit_on_pause()) { 677256645Sneel /* 678256645Sneel * pause exit support required for this mode 679256645Sneel */ 680256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 681256645Sneel if (err < 0) { 682256645Sneel fprintf(stderr, 683256645Sneel "SMP mux requested, no pause support\n"); 684256645Sneel exit(1); 685256645Sneel } 686256645Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 687256645Sneel if (cpu == BSP) 688256645Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 689256645Sneel } 690256645Sneel 691262236Sneel if (x2apic_mode) 692262236Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 693262236Sneel else 694256645Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 695256645Sneel 696256645Sneel if (err) { 697256645Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 698256645Sneel exit(1); 699256645Sneel } 700256645Sneel 701256645Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 702256645Sneel} 703256645Sneel 704284539Sneelstatic struct vmctx * 705284539Sneeldo_open(const char *vmname) 706284539Sneel{ 707284539Sneel struct vmctx *ctx; 708284539Sneel int error; 709284539Sneel bool reinit, romboot; 710284539Sneel 711284539Sneel reinit = romboot = false; 712284539Sneel 713284539Sneel if (lpc_bootrom()) 714284539Sneel romboot = true; 715284539Sneel 716284539Sneel error = vm_create(vmname); 717284539Sneel if (error) { 718284539Sneel if (errno == EEXIST) { 719284539Sneel if (romboot) { 720284539Sneel reinit = true; 721284539Sneel } else { 722284539Sneel /* 723284539Sneel * The virtual machine has been setup by the 724284539Sneel * userspace bootloader. 725284539Sneel */ 726284539Sneel } 727284539Sneel } else { 728284539Sneel perror("vm_create"); 729284539Sneel exit(1); 730284539Sneel } 731284539Sneel } else { 732284539Sneel if (!romboot) { 733284539Sneel /* 734284539Sneel * If the virtual machine was just created then a 735284539Sneel * bootrom must be configured to boot it. 736284539Sneel */ 737284539Sneel fprintf(stderr, "virtual machine cannot be booted\n"); 738284539Sneel exit(1); 739284539Sneel } 740284539Sneel } 741284539Sneel 742284539Sneel ctx = vm_open(vmname); 743284539Sneel if (ctx == NULL) { 744284539Sneel perror("vm_open"); 745284539Sneel exit(1); 746284539Sneel } 747284539Sneel 748284539Sneel if (reinit) { 749284539Sneel error = vm_reinit(ctx); 750284539Sneel if (error) { 751284539Sneel perror("vm_reinit"); 752284539Sneel exit(1); 753284539Sneel } 754284539Sneel } 755284539Sneel return (ctx); 756284539Sneel} 757284539Sneel 758221828Sgrehanint 759221828Sgrehanmain(int argc, char *argv[]) 760221828Sgrehan{ 761257423Sneel int c, error, gdb_port, err, bvmcons; 762284539Sneel int max_vcpus, mptgen, memflags; 763279225Sneel int rtc_localtime; 764221828Sgrehan struct vmctx *ctx; 765221828Sgrehan uint64_t rip; 766248477Sneel size_t memsize; 767284539Sneel char *optstr; 768221828Sgrehan 769242192Sneel bvmcons = 0; 770221828Sgrehan progname = basename(argv[0]); 771256156Sneel gdb_port = 0; 772221828Sgrehan guest_ncpus = 1; 773248477Sneel memsize = 256 * MB; 774265211Sneel mptgen = 1; 775279225Sneel rtc_localtime = 1; 776284539Sneel memflags = 0; 777221828Sgrehan 778284539Sneel optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 779284539Sneel while ((c = getopt(argc, argv, optstr)) != -1) { 780221828Sgrehan switch (c) { 781240943Sneel case 'a': 782262236Sneel x2apic_mode = 0; 783240943Sneel break; 784243327Sgrehan case 'A': 785243327Sgrehan acpi = 1; 786243327Sgrehan break; 787242192Sneel case 'b': 788242192Sneel bvmcons = 1; 789242192Sneel break; 790221828Sgrehan case 'p': 791265376Sneel if (pincpu_parse(optarg) != 0) { 792265376Sneel errx(EX_USAGE, "invalid vcpu pinning " 793265376Sneel "configuration '%s'", optarg); 794265376Sneel } 795221828Sgrehan break; 796221828Sgrehan case 'c': 797221828Sgrehan guest_ncpus = atoi(optarg); 798221828Sgrehan break; 799265951Sneel case 'C': 800284539Sneel memflags |= VM_MEM_F_INCORE; 801265951Sneel break; 802221828Sgrehan case 'g': 803221828Sgrehan gdb_port = atoi(optarg); 804221828Sgrehan break; 805257293Sneel case 'l': 806257293Sneel if (lpc_device_parse(optarg) != 0) { 807257293Sneel errx(EX_USAGE, "invalid lpc device " 808257293Sneel "configuration '%s'", optarg); 809257293Sneel } 810257293Sneel break; 811221828Sgrehan case 's': 812261217Sjhb if (pci_parse_slot(optarg) != 0) 813249916Sneel exit(1); 814249916Sneel else 815249916Sneel break; 816284539Sneel case 'S': 817284539Sneel memflags |= VM_MEM_F_WIRED; 818284539Sneel break; 819221828Sgrehan case 'm': 820256176Sneel error = vm_parse_memsize(optarg, &memsize); 821256176Sneel if (error) 822256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 823221828Sgrehan break; 824221828Sgrehan case 'H': 825221828Sgrehan guest_vmexit_on_hlt = 1; 826221828Sgrehan break; 827239043Sneel case 'I': 828257423Sneel /* 829257423Sneel * The "-I" option was used to add an ioapic to the 830257423Sneel * virtual machine. 831257423Sneel * 832257423Sneel * An ioapic is now provided unconditionally for each 833257423Sneel * virtual machine and this option is now deprecated. 834257423Sneel */ 835239043Sneel break; 836221828Sgrehan case 'P': 837221828Sgrehan guest_vmexit_on_pause = 1; 838221828Sgrehan break; 839222105Sgrehan case 'e': 840222105Sgrehan strictio = 1; 841222105Sgrehan break; 842279225Sneel case 'u': 843279225Sneel rtc_localtime = 0; 844279225Sneel break; 845262744Stychon case 'U': 846262744Stychon guest_uuid_str = optarg; 847262744Stychon break; 848259635Sneel case 'w': 849259635Sneel strictmsr = 0; 850259635Sneel break; 851256711Sgrehan case 'W': 852256711Sgrehan virtio_msix = 0; 853256711Sgrehan break; 854262236Sneel case 'x': 855262236Sneel x2apic_mode = 1; 856262236Sneel break; 857265211Sneel case 'Y': 858265211Sneel mptgen = 0; 859265211Sneel break; 860221828Sgrehan case 'h': 861221828Sgrehan usage(0); 862221828Sgrehan default: 863221828Sgrehan usage(1); 864221828Sgrehan } 865221828Sgrehan } 866221828Sgrehan argc -= optind; 867221828Sgrehan argv += optind; 868221828Sgrehan 869221828Sgrehan if (argc != 1) 870221828Sgrehan usage(1); 871221828Sgrehan 872221828Sgrehan vmname = argv[0]; 873284539Sneel ctx = do_open(vmname); 874221828Sgrehan 875281611Sneel if (guest_ncpus < 1) { 876281611Sneel fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 877281611Sneel exit(1); 878281611Sneel } 879281611Sneel 880245020Sneel max_vcpus = num_vcpus_allowed(ctx); 881245020Sneel if (guest_ncpus > max_vcpus) { 882245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 883245020Sneel guest_ncpus, max_vcpus); 884245020Sneel exit(1); 885245020Sneel } 886245020Sneel 887256645Sneel fbsdrun_set_capabilities(ctx, BSP); 888221828Sgrehan 889284539Sneel vm_set_memflags(ctx, memflags); 890248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 891248477Sneel if (err) { 892284539Sneel fprintf(stderr, "Unable to setup memory (%d)\n", errno); 893248477Sneel exit(1); 894221828Sgrehan } 895221828Sgrehan 896271888Sneel error = init_msr(); 897271888Sneel if (error) { 898271888Sneel fprintf(stderr, "init_msr error %d", error); 899271888Sneel exit(1); 900271888Sneel } 901271888Sneel 902249343Sneel init_mem(); 903221828Sgrehan init_inout(); 904266125Sjhb pci_irq_init(ctx); 905261268Sjhb ioapic_init(ctx); 906252682Sgrehan 907279225Sneel rtc_init(ctx, rtc_localtime); 908266125Sjhb sci_init(ctx); 909253181Sgrehan 910252682Sgrehan /* 911252682Sgrehan * Exit if a device emulation finds an error in it's initilization 912252682Sgrehan */ 913252682Sgrehan if (init_pci(ctx) != 0) 914252682Sgrehan exit(1); 915252682Sgrehan 916221828Sgrehan if (gdb_port != 0) 917221828Sgrehan init_dbgport(gdb_port); 918221828Sgrehan 919242192Sneel if (bvmcons) 920242192Sneel init_bvmcons(); 921242192Sneel 922284539Sneel if (lpc_bootrom()) { 923284539Sneel if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 924284539Sneel fprintf(stderr, "ROM boot failed: unrestricted guest " 925284539Sneel "capability not available\n"); 926284539Sneel exit(1); 927284539Sneel } 928284539Sneel error = vcpu_reset(ctx, BSP); 929284539Sneel assert(error == 0); 930284539Sneel } 931284539Sneel 932221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 933221828Sgrehan assert(error == 0); 934221828Sgrehan 935221828Sgrehan /* 936221828Sgrehan * build the guest tables, MP etc. 937221828Sgrehan */ 938265211Sneel if (mptgen) { 939265211Sneel error = mptable_build(ctx, guest_ncpus); 940265211Sneel if (error) 941265211Sneel exit(1); 942265211Sneel } 943221828Sgrehan 944262744Stychon error = smbios_build(ctx); 945262744Stychon assert(error == 0); 946262744Stychon 947243327Sgrehan if (acpi) { 948257423Sneel error = acpi_build(ctx, guest_ncpus); 949243327Sgrehan assert(error == 0); 950243327Sgrehan } 951243327Sgrehan 952288522Sgrehan if (lpc_bootrom()) 953288522Sgrehan fwctl_init(); 954288522Sgrehan 955221828Sgrehan /* 956257729Sgrehan * Change the proc title to include the VM name. 957257729Sgrehan */ 958257729Sgrehan setproctitle("%s", vmname); 959257729Sgrehan 960257729Sgrehan /* 961221828Sgrehan * Add CPU 0 962221828Sgrehan */ 963263432Sneel fbsdrun_addcpu(ctx, BSP, BSP, rip); 964221828Sgrehan 965221828Sgrehan /* 966221828Sgrehan * Head off to the main event dispatch loop 967221828Sgrehan */ 968221828Sgrehan mevent_dispatch(); 969221828Sgrehan 970221828Sgrehan exit(1); 971221828Sgrehan} 972