bhyverun.c revision 269094
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 269094 2014-07-25 20:18:35Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 269094 2014-07-25 20:18:35Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36259081Sneel#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257018Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50221828Sgrehan 51221828Sgrehan#include <machine/vmm.h> 52221828Sgrehan#include <vmmapi.h> 53221828Sgrehan 54244167Sgrehan#include "bhyverun.h" 55243327Sgrehan#include "acpi.h" 56221828Sgrehan#include "inout.h" 57221828Sgrehan#include "dbgport.h" 58261268Sjhb#include "ioapic.h" 59241744Sgrehan#include "mem.h" 60221828Sgrehan#include "mevent.h" 61242131Sgrehan#include "mptbl.h" 62221828Sgrehan#include "pci_emul.h" 63266125Sjhb#include "pci_irq.h" 64257293Sneel#include "pci_lpc.h" 65262744Stychon#include "smbiostbl.h" 66221828Sgrehan#include "xmsr.h" 67240912Sneel#include "spinup_ap.h" 68253181Sgrehan#include "rtc.h" 69221828Sgrehan 70221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 71221828Sgrehan 72221828Sgrehan#define MB (1024UL * 1024) 73221828Sgrehan#define GB (1024UL * MB) 74221828Sgrehan 75221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 76268777Sneelextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanchar *vmname; 79221828Sgrehan 80221828Sgrehanint guest_ncpus; 81262744Stychonchar *guest_uuid_str; 82221828Sgrehan 83262236Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 84256711Sgrehanstatic int virtio_msix = 1; 85262236Sneelstatic int x2apic_mode = 0; /* default is xAPIC */ 86221828Sgrehan 87222105Sgrehanstatic int strictio; 88259635Sneelstatic int strictmsr = 1; 89222105Sgrehan 90243327Sgrehanstatic int acpi; 91243327Sgrehan 92221828Sgrehanstatic char *progname; 93221828Sgrehanstatic const int BSP = 0; 94221828Sgrehan 95263432Sneelstatic cpuset_t cpumask; 96221828Sgrehan 97221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 98221828Sgrehan 99269042Sneelstatic struct vm_exit vmexit[VM_MAXCPU]; 100221828Sgrehan 101256062Sgrehanstruct bhyvestats { 102221828Sgrehan uint64_t vmexit_bogus; 103221828Sgrehan uint64_t vmexit_bogus_switch; 104221828Sgrehan uint64_t vmexit_hlt; 105221828Sgrehan uint64_t vmexit_pause; 106221828Sgrehan uint64_t vmexit_mtrap; 107256072Sneel uint64_t vmexit_inst_emul; 108221828Sgrehan uint64_t cpu_switch_rotate; 109221828Sgrehan uint64_t cpu_switch_direct; 110221828Sgrehan} stats; 111221828Sgrehan 112221828Sgrehanstruct mt_vmm_info { 113221828Sgrehan pthread_t mt_thr; 114221828Sgrehan struct vmctx *mt_ctx; 115221828Sgrehan int mt_vcpu; 116221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 117221828Sgrehan 118265376Sneelstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 119265376Sneel 120221828Sgrehanstatic void 121221828Sgrehanusage(int code) 122221828Sgrehan{ 123221828Sgrehan 124221828Sgrehan fprintf(stderr, 125267959Sjhb "Usage: %s [-abehwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 126267959Sjhb " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 127262236Sneel " -a: local apic is in xAPIC mode (deprecated)\n" 128267934Sjhb " -A: create ACPI tables\n" 129221828Sgrehan " -c: # cpus (default 1)\n" 130265951Sneel " -C: include guest memory in core file\n" 131257018Sneel " -e: exit on unhandled I/O access\n" 132267959Sjhb " -g: gdb port\n" 133221828Sgrehan " -h: help\n" 134267959Sjhb " -H: vmexit from the guest on hlt\n" 135257293Sneel " -l: LPC device configuration\n" 136259635Sneel " -m: memory size in MB\n" 137267959Sjhb " -p: pin 'vcpu' to 'hostcpu'\n" 138267959Sjhb " -P: vmexit from the guest on pause\n" 139267959Sjhb " -s: <slot,driver,configinfo> PCI slot config\n" 140267959Sjhb " -U: uuid\n" 141262236Sneel " -w: ignore unimplemented MSRs\n" 142267959Sjhb " -W: force virtio to use single-vector MSI\n" 143262744Stychon " -x: local apic is in x2APIC mode\n" 144267959Sjhb " -Y: disable MPtable generation\n", 145257018Sneel progname, (int)strlen(progname), ""); 146256062Sgrehan 147221828Sgrehan exit(code); 148221828Sgrehan} 149221828Sgrehan 150265376Sneelstatic int 151265376Sneelpincpu_parse(const char *opt) 152265376Sneel{ 153265376Sneel int vcpu, pcpu; 154265376Sneel 155265376Sneel if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 156265376Sneel fprintf(stderr, "invalid format: %s\n", opt); 157265376Sneel return (-1); 158265376Sneel } 159265376Sneel 160265376Sneel if (vcpu < 0 || vcpu >= VM_MAXCPU) { 161265376Sneel fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 162265376Sneel vcpu, VM_MAXCPU - 1); 163265376Sneel return (-1); 164265376Sneel } 165265376Sneel 166265376Sneel if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 167265376Sneel fprintf(stderr, "hostcpu '%d' outside valid range from " 168265376Sneel "0 to %d\n", pcpu, CPU_SETSIZE - 1); 169265376Sneel return (-1); 170265376Sneel } 171265376Sneel 172265376Sneel if (vcpumap[vcpu] == NULL) { 173265376Sneel if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 174265376Sneel perror("malloc"); 175265376Sneel return (-1); 176265376Sneel } 177265376Sneel CPU_ZERO(vcpumap[vcpu]); 178265376Sneel } 179265376Sneel CPU_SET(pcpu, vcpumap[vcpu]); 180265376Sneel return (0); 181265376Sneel} 182265376Sneel 183269042Sneelvoid 184269042Sneelvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 185269042Sneel int errcode) 186269042Sneel{ 187269042Sneel struct vmctx *ctx; 188269042Sneel int error; 189269042Sneel 190269042Sneel ctx = arg; 191269042Sneel if (errcode_valid) 192269042Sneel error = vm_inject_exception2(ctx, vcpu, vector, errcode); 193269042Sneel else 194269042Sneel error = vm_inject_exception(ctx, vcpu, vector); 195269042Sneel assert(error == 0); 196269042Sneel 197269042Sneel /* 198269042Sneel * Set the instruction length to 0 to ensure that the instruction is 199269042Sneel * restarted when the fault handler returns. 200269042Sneel */ 201269042Sneel vmexit[vcpu].inst_length = 0; 202269042Sneel} 203269042Sneel 204221828Sgrehanvoid * 205248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 206221828Sgrehan{ 207221828Sgrehan 208248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 209221828Sgrehan} 210221828Sgrehan 211221828Sgrehanint 212221828Sgrehanfbsdrun_vmexit_on_pause(void) 213221828Sgrehan{ 214221828Sgrehan 215221828Sgrehan return (guest_vmexit_on_pause); 216221828Sgrehan} 217221828Sgrehan 218221828Sgrehanint 219221828Sgrehanfbsdrun_vmexit_on_hlt(void) 220221828Sgrehan{ 221221828Sgrehan 222221828Sgrehan return (guest_vmexit_on_hlt); 223221828Sgrehan} 224221828Sgrehan 225256711Sgrehanint 226256711Sgrehanfbsdrun_virtio_msix(void) 227256711Sgrehan{ 228256711Sgrehan 229256711Sgrehan return (virtio_msix); 230256711Sgrehan} 231256711Sgrehan 232221942Sjhbstatic void * 233221828Sgrehanfbsdrun_start_thread(void *param) 234221828Sgrehan{ 235242404Sgrehan char tname[MAXCOMLEN + 1]; 236242404Sgrehan struct mt_vmm_info *mtp; 237221828Sgrehan int vcpu; 238221828Sgrehan 239242404Sgrehan mtp = param; 240221828Sgrehan vcpu = mtp->mt_vcpu; 241242404Sgrehan 242257729Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 243242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 244242404Sgrehan 245221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 246221828Sgrehan 247221828Sgrehan /* not reached */ 248221828Sgrehan exit(1); 249221828Sgrehan return (NULL); 250221828Sgrehan} 251221828Sgrehan 252221828Sgrehanvoid 253263432Sneelfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 254221828Sgrehan{ 255221828Sgrehan int error; 256221828Sgrehan 257263432Sneel assert(fromcpu == BSP); 258221828Sgrehan 259266933Sneel /* 260266933Sneel * The 'newcpu' must be activated in the context of 'fromcpu'. If 261266933Sneel * vm_activate_cpu() is delayed until newcpu's pthread starts running 262266933Sneel * then vmm.ko is out-of-sync with bhyve and this can create a race 263266933Sneel * with vm_suspend(). 264266933Sneel */ 265266933Sneel error = vm_activate_cpu(ctx, newcpu); 266266933Sneel assert(error == 0); 267266933Sneel 268263432Sneel CPU_SET_ATOMIC(newcpu, &cpumask); 269221828Sgrehan 270221828Sgrehan /* 271221828Sgrehan * Set up the vmexit struct to allow execution to start 272221828Sgrehan * at the given RIP 273221828Sgrehan */ 274263432Sneel vmexit[newcpu].rip = rip; 275263432Sneel vmexit[newcpu].inst_length = 0; 276221828Sgrehan 277263432Sneel mt_vmm_info[newcpu].mt_ctx = ctx; 278263432Sneel mt_vmm_info[newcpu].mt_vcpu = newcpu; 279256072Sneel 280263432Sneel error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 281263432Sneel fbsdrun_start_thread, &mt_vmm_info[newcpu]); 282256072Sneel assert(error == 0); 283221828Sgrehan} 284221828Sgrehan 285221828Sgrehanstatic int 286259081Sneelfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 287259081Sneel{ 288259081Sneel 289263432Sneel if (!CPU_ISSET(vcpu, &cpumask)) { 290265366Sneel fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 291259081Sneel exit(1); 292259081Sneel } 293259081Sneel 294263432Sneel CPU_CLR_ATOMIC(vcpu, &cpumask); 295263432Sneel return (CPU_EMPTY(&cpumask)); 296259081Sneel} 297259081Sneel 298259081Sneelstatic int 299221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 300221828Sgrehan uint32_t eax) 301221828Sgrehan{ 302256062Sgrehan#if BHYVE_DEBUG 303256062Sgrehan /* 304256062Sgrehan * put guest-driven debug here 305256062Sgrehan */ 306221828Sgrehan#endif 307221828Sgrehan return (VMEXIT_CONTINUE); 308221828Sgrehan} 309221828Sgrehan 310221828Sgrehanstatic int 311221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 312221828Sgrehan{ 313221828Sgrehan int error; 314266573Sneel int bytes, port, in, out, string; 315221828Sgrehan int vcpu; 316221828Sgrehan 317221828Sgrehan vcpu = *pvcpu; 318221828Sgrehan 319221828Sgrehan port = vme->u.inout.port; 320221828Sgrehan bytes = vme->u.inout.bytes; 321266573Sneel string = vme->u.inout.string; 322221828Sgrehan in = vme->u.inout.in; 323221828Sgrehan out = !in; 324221828Sgrehan 325221828Sgrehan /* Extra-special case of host notifications */ 326266573Sneel if (out && port == GUEST_NIO_PORT) { 327266573Sneel error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 328266573Sneel return (error); 329266573Sneel } 330221828Sgrehan 331266573Sneel error = emulate_inout(ctx, vcpu, vme, strictio); 332269094Sneel if (!error && in && !string) { 333266573Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, 334266573Sneel vme->u.inout.eax); 335269094Sneel assert(error == 0); 336266573Sneel } 337221828Sgrehan 338269094Sneel if (error) { 339269094Sneel fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", 340269094Sneel bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 341269094Sneel return (VMEXIT_ABORT); 342269094Sneel } else { 343221828Sgrehan return (VMEXIT_CONTINUE); 344221828Sgrehan } 345221828Sgrehan} 346221828Sgrehan 347221828Sgrehanstatic int 348221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 349221828Sgrehan{ 350259635Sneel uint64_t val; 351259635Sneel uint32_t eax, edx; 352259635Sneel int error; 353259635Sneel 354259635Sneel val = 0; 355259635Sneel error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 356259635Sneel if (error != 0) { 357259635Sneel fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 358259635Sneel vme->u.msr.code, *pvcpu); 359262506Sneel if (strictmsr) { 360269042Sneel vm_inject_gp(ctx, *pvcpu); 361262506Sneel return (VMEXIT_RESTART); 362262506Sneel } 363259635Sneel } 364259635Sneel 365259635Sneel eax = val; 366259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 367259635Sneel assert(error == 0); 368259635Sneel 369259635Sneel edx = val >> 32; 370259635Sneel error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 371259635Sneel assert(error == 0); 372259635Sneel 373259635Sneel return (VMEXIT_CONTINUE); 374221828Sgrehan} 375221828Sgrehan 376221828Sgrehanstatic int 377221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 378221828Sgrehan{ 379259635Sneel int error; 380221828Sgrehan 381259635Sneel error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 382259635Sneel if (error != 0) { 383259635Sneel fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 384259635Sneel vme->u.msr.code, vme->u.msr.wval, *pvcpu); 385262506Sneel if (strictmsr) { 386269042Sneel vm_inject_gp(ctx, *pvcpu); 387262506Sneel return (VMEXIT_RESTART); 388262506Sneel } 389259635Sneel } 390259635Sneel return (VMEXIT_CONTINUE); 391221828Sgrehan} 392221828Sgrehan 393221828Sgrehanstatic int 394240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 395240912Sneel{ 396240912Sneel int newcpu; 397240912Sneel int retval = VMEXIT_CONTINUE; 398240912Sneel 399240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 400240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 401240912Sneel 402240912Sneel return (retval); 403240912Sneel} 404240912Sneel 405267966Sneel#define DEBUG_EPT_MISCONFIG 406267966Sneel#ifdef DEBUG_EPT_MISCONFIG 407267966Sneel#define EXIT_REASON_EPT_MISCONFIG 49 408267966Sneel#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 409267966Sneel#define VMCS_IDENT(x) ((x) | 0x80000000) 410267966Sneel 411267966Sneelstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 412267966Sneelstatic int ept_misconfig_ptenum; 413267966Sneel#endif 414267966Sneel 415240912Sneelstatic int 416221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 417221828Sgrehan{ 418221828Sgrehan 419242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 420242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 421242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 422242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 423260167Sneel fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 424242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 425242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 426242385Sgrehan vmexit->u.vmx.exit_qualification); 427260167Sneel fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 428260167Sneel fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 429267966Sneel#ifdef DEBUG_EPT_MISCONFIG 430267966Sneel if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 431267966Sneel vm_get_register(ctx, *pvcpu, 432267966Sneel VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 433267966Sneel &ept_misconfig_gpa); 434267966Sneel vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 435267966Sneel &ept_misconfig_ptenum); 436267966Sneel fprintf(stderr, "\tEPT misconfiguration:\n"); 437267966Sneel fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 438267966Sneel fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 439267966Sneel ept_misconfig_ptenum, ept_misconfig_pte[0], 440267966Sneel ept_misconfig_pte[1], ept_misconfig_pte[2], 441267966Sneel ept_misconfig_pte[3]); 442267966Sneel } 443267966Sneel#endif /* DEBUG_EPT_MISCONFIG */ 444221828Sgrehan return (VMEXIT_ABORT); 445221828Sgrehan} 446221828Sgrehan 447221828Sgrehanstatic int 448221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 449221828Sgrehan{ 450256062Sgrehan 451221828Sgrehan stats.vmexit_bogus++; 452221828Sgrehan 453256062Sgrehan return (VMEXIT_RESTART); 454221828Sgrehan} 455221828Sgrehan 456221828Sgrehanstatic int 457221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 458221828Sgrehan{ 459256062Sgrehan 460221828Sgrehan stats.vmexit_hlt++; 461256062Sgrehan 462256062Sgrehan /* 463256062Sgrehan * Just continue execution with the next instruction. We use 464256062Sgrehan * the HLT VM exit as a way to be friendly with the host 465256062Sgrehan * scheduler. 466256062Sgrehan */ 467256062Sgrehan return (VMEXIT_CONTINUE); 468221828Sgrehan} 469221828Sgrehan 470221828Sgrehanstatic int 471221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 472221828Sgrehan{ 473256062Sgrehan 474221828Sgrehan stats.vmexit_pause++; 475221828Sgrehan 476256062Sgrehan return (VMEXIT_CONTINUE); 477221828Sgrehan} 478221828Sgrehan 479221828Sgrehanstatic int 480221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 481221828Sgrehan{ 482256062Sgrehan 483221828Sgrehan stats.vmexit_mtrap++; 484221828Sgrehan 485221828Sgrehan return (VMEXIT_RESTART); 486221828Sgrehan} 487221828Sgrehan 488234761Sgrehanstatic int 489256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 490234761Sgrehan{ 491241744Sgrehan int err; 492256072Sneel stats.vmexit_inst_emul++; 493234761Sgrehan 494256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 495269008Sneel &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); 496241744Sgrehan 497241744Sgrehan if (err) { 498241744Sgrehan if (err == EINVAL) { 499242385Sgrehan fprintf(stderr, 500242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 501242385Sgrehan vmexit->rip); 502241744Sgrehan } else if (err == ESRCH) { 503242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 504256072Sneel vmexit->u.inst_emul.gpa); 505241744Sgrehan } 506241744Sgrehan 507234761Sgrehan return (VMEXIT_ABORT); 508234761Sgrehan } 509234761Sgrehan 510234761Sgrehan return (VMEXIT_CONTINUE); 511234761Sgrehan} 512234761Sgrehan 513263780Sneelstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 514263780Sneelstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 515263780Sneel 516263780Sneelstatic int 517263780Sneelvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 518263780Sneel{ 519265062Sneel enum vm_suspend_how how; 520263780Sneel 521265062Sneel how = vmexit->u.suspended.how; 522265062Sneel 523263780Sneel fbsdrun_deletecpu(ctx, *pvcpu); 524263780Sneel 525265062Sneel if (*pvcpu != BSP) { 526263780Sneel pthread_mutex_lock(&resetcpu_mtx); 527263780Sneel pthread_cond_signal(&resetcpu_cond); 528263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 529263780Sneel pthread_exit(NULL); 530263780Sneel } 531263780Sneel 532263780Sneel pthread_mutex_lock(&resetcpu_mtx); 533263780Sneel while (!CPU_EMPTY(&cpumask)) { 534263780Sneel pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 535263780Sneel } 536263780Sneel pthread_mutex_unlock(&resetcpu_mtx); 537265062Sneel 538265203Sneel switch (how) { 539265203Sneel case VM_SUSPEND_RESET: 540265062Sneel exit(0); 541265203Sneel case VM_SUSPEND_POWEROFF: 542265062Sneel exit(1); 543265203Sneel case VM_SUSPEND_HALT: 544265203Sneel exit(2); 545268889Sneel case VM_SUSPEND_TRIPLEFAULT: 546268889Sneel exit(3); 547265203Sneel default: 548265203Sneel fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 549265203Sneel exit(100); 550265203Sneel } 551265062Sneel return (0); /* NOTREACHED */ 552263780Sneel} 553263780Sneel 554221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 555234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 556266573Sneel [VM_EXITCODE_INOUT_STR] = vmexit_inout, 557234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 558234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 559234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 560234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 561234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 562256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 563240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 564268777Sneel [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 565268777Sneel [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 566221828Sgrehan}; 567221828Sgrehan 568221828Sgrehanstatic void 569221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 570221828Sgrehan{ 571221828Sgrehan int error, rc, prevcpu; 572253452Sgrehan enum vm_exitcode exitcode; 573266933Sneel cpuset_t active_cpus; 574221828Sgrehan 575265376Sneel if (vcpumap[vcpu] != NULL) { 576246686Sneel error = pthread_setaffinity_np(pthread_self(), 577265376Sneel sizeof(cpuset_t), vcpumap[vcpu]); 578221828Sgrehan assert(error == 0); 579221828Sgrehan } 580221828Sgrehan 581266933Sneel error = vm_active_cpus(ctx, &active_cpus); 582266933Sneel assert(CPU_ISSET(vcpu, &active_cpus)); 583266933Sneel 584221828Sgrehan while (1) { 585221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 586259737Sneel if (error != 0) 587259737Sneel break; 588221828Sgrehan 589221828Sgrehan prevcpu = vcpu; 590253452Sgrehan 591253452Sgrehan exitcode = vmexit[vcpu].exitcode; 592253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 593253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 594253452Sgrehan exitcode); 595253452Sgrehan exit(1); 596253452Sgrehan } 597253452Sgrehan 598253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 599253452Sgrehan 600221828Sgrehan switch (rc) { 601221828Sgrehan case VMEXIT_CONTINUE: 602221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 603221828Sgrehan break; 604221828Sgrehan case VMEXIT_RESTART: 605221828Sgrehan rip = vmexit[vcpu].rip; 606221828Sgrehan break; 607265941Sneel case VMEXIT_ABORT: 608265941Sneel abort(); 609221828Sgrehan default: 610221828Sgrehan exit(1); 611221828Sgrehan } 612221828Sgrehan } 613221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 614221828Sgrehan} 615221828Sgrehan 616245020Sneelstatic int 617245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 618245020Sneel{ 619245020Sneel int tmp, error; 620221828Sgrehan 621245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 622245020Sneel 623245020Sneel /* 624245020Sneel * The guest is allowed to spinup more than one processor only if the 625245020Sneel * UNRESTRICTED_GUEST capability is available. 626245020Sneel */ 627245020Sneel if (error == 0) 628245020Sneel return (VM_MAXCPU); 629245020Sneel else 630245020Sneel return (1); 631245020Sneel} 632245020Sneel 633256645Sneelvoid 634256645Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 635256645Sneel{ 636256645Sneel int err, tmp; 637256645Sneel 638256645Sneel if (fbsdrun_vmexit_on_hlt()) { 639256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 640256645Sneel if (err < 0) { 641256645Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 642256645Sneel exit(1); 643256645Sneel } 644256645Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 645256645Sneel if (cpu == BSP) 646256645Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 647256645Sneel } 648256645Sneel 649256645Sneel if (fbsdrun_vmexit_on_pause()) { 650256645Sneel /* 651256645Sneel * pause exit support required for this mode 652256645Sneel */ 653256645Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 654256645Sneel if (err < 0) { 655256645Sneel fprintf(stderr, 656256645Sneel "SMP mux requested, no pause support\n"); 657256645Sneel exit(1); 658256645Sneel } 659256645Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 660256645Sneel if (cpu == BSP) 661256645Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 662256645Sneel } 663256645Sneel 664262236Sneel if (x2apic_mode) 665262236Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 666262236Sneel else 667256645Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 668256645Sneel 669256645Sneel if (err) { 670256645Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 671256645Sneel exit(1); 672256645Sneel } 673256645Sneel 674256645Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 675256645Sneel} 676256645Sneel 677221828Sgrehanint 678221828Sgrehanmain(int argc, char *argv[]) 679221828Sgrehan{ 680257423Sneel int c, error, gdb_port, err, bvmcons; 681265951Sneel int dump_guest_memory, max_vcpus, mptgen; 682221828Sgrehan struct vmctx *ctx; 683221828Sgrehan uint64_t rip; 684248477Sneel size_t memsize; 685221828Sgrehan 686242192Sneel bvmcons = 0; 687265951Sneel dump_guest_memory = 0; 688221828Sgrehan progname = basename(argv[0]); 689256156Sneel gdb_port = 0; 690221828Sgrehan guest_ncpus = 1; 691248477Sneel memsize = 256 * MB; 692265211Sneel mptgen = 1; 693221828Sgrehan 694265951Sneel while ((c = getopt(argc, argv, "abehwxACHIPWYp:g:c:s:m:l:U:")) != -1) { 695221828Sgrehan switch (c) { 696240943Sneel case 'a': 697262236Sneel x2apic_mode = 0; 698240943Sneel break; 699243327Sgrehan case 'A': 700243327Sgrehan acpi = 1; 701243327Sgrehan break; 702242192Sneel case 'b': 703242192Sneel bvmcons = 1; 704242192Sneel break; 705221828Sgrehan case 'p': 706265376Sneel if (pincpu_parse(optarg) != 0) { 707265376Sneel errx(EX_USAGE, "invalid vcpu pinning " 708265376Sneel "configuration '%s'", optarg); 709265376Sneel } 710221828Sgrehan break; 711221828Sgrehan case 'c': 712221828Sgrehan guest_ncpus = atoi(optarg); 713221828Sgrehan break; 714265951Sneel case 'C': 715265951Sneel dump_guest_memory = 1; 716265951Sneel break; 717221828Sgrehan case 'g': 718221828Sgrehan gdb_port = atoi(optarg); 719221828Sgrehan break; 720257293Sneel case 'l': 721257293Sneel if (lpc_device_parse(optarg) != 0) { 722257293Sneel errx(EX_USAGE, "invalid lpc device " 723257293Sneel "configuration '%s'", optarg); 724257293Sneel } 725257293Sneel break; 726221828Sgrehan case 's': 727261217Sjhb if (pci_parse_slot(optarg) != 0) 728249916Sneel exit(1); 729249916Sneel else 730249916Sneel break; 731221828Sgrehan case 'm': 732256176Sneel error = vm_parse_memsize(optarg, &memsize); 733256176Sneel if (error) 734256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 735221828Sgrehan break; 736221828Sgrehan case 'H': 737221828Sgrehan guest_vmexit_on_hlt = 1; 738221828Sgrehan break; 739239043Sneel case 'I': 740257423Sneel /* 741257423Sneel * The "-I" option was used to add an ioapic to the 742257423Sneel * virtual machine. 743257423Sneel * 744257423Sneel * An ioapic is now provided unconditionally for each 745257423Sneel * virtual machine and this option is now deprecated. 746257423Sneel */ 747239043Sneel break; 748221828Sgrehan case 'P': 749221828Sgrehan guest_vmexit_on_pause = 1; 750221828Sgrehan break; 751222105Sgrehan case 'e': 752222105Sgrehan strictio = 1; 753222105Sgrehan break; 754262744Stychon case 'U': 755262744Stychon guest_uuid_str = optarg; 756262744Stychon break; 757259635Sneel case 'w': 758259635Sneel strictmsr = 0; 759259635Sneel break; 760256711Sgrehan case 'W': 761256711Sgrehan virtio_msix = 0; 762256711Sgrehan break; 763262236Sneel case 'x': 764262236Sneel x2apic_mode = 1; 765262236Sneel break; 766265211Sneel case 'Y': 767265211Sneel mptgen = 0; 768265211Sneel break; 769221828Sgrehan case 'h': 770221828Sgrehan usage(0); 771221828Sgrehan default: 772221828Sgrehan usage(1); 773221828Sgrehan } 774221828Sgrehan } 775221828Sgrehan argc -= optind; 776221828Sgrehan argv += optind; 777221828Sgrehan 778221828Sgrehan if (argc != 1) 779221828Sgrehan usage(1); 780221828Sgrehan 781221828Sgrehan vmname = argv[0]; 782221828Sgrehan 783221828Sgrehan ctx = vm_open(vmname); 784221828Sgrehan if (ctx == NULL) { 785221828Sgrehan perror("vm_open"); 786221828Sgrehan exit(1); 787221828Sgrehan } 788221828Sgrehan 789245020Sneel max_vcpus = num_vcpus_allowed(ctx); 790245020Sneel if (guest_ncpus > max_vcpus) { 791245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 792245020Sneel guest_ncpus, max_vcpus); 793245020Sneel exit(1); 794245020Sneel } 795245020Sneel 796256645Sneel fbsdrun_set_capabilities(ctx, BSP); 797221828Sgrehan 798265951Sneel if (dump_guest_memory) 799265951Sneel vm_set_memflags(ctx, VM_MEM_F_INCORE); 800248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 801248477Sneel if (err) { 802248477Sneel fprintf(stderr, "Unable to setup memory (%d)\n", err); 803248477Sneel exit(1); 804221828Sgrehan } 805221828Sgrehan 806249343Sneel init_mem(); 807221828Sgrehan init_inout(); 808266125Sjhb pci_irq_init(ctx); 809261268Sjhb ioapic_init(ctx); 810252682Sgrehan 811253181Sgrehan rtc_init(ctx); 812266125Sjhb sci_init(ctx); 813253181Sgrehan 814252682Sgrehan /* 815252682Sgrehan * Exit if a device emulation finds an error in it's initilization 816252682Sgrehan */ 817252682Sgrehan if (init_pci(ctx) != 0) 818252682Sgrehan exit(1); 819252682Sgrehan 820221828Sgrehan if (gdb_port != 0) 821221828Sgrehan init_dbgport(gdb_port); 822221828Sgrehan 823242192Sneel if (bvmcons) 824242192Sneel init_bvmcons(); 825242192Sneel 826221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 827221828Sgrehan assert(error == 0); 828221828Sgrehan 829221828Sgrehan /* 830221828Sgrehan * build the guest tables, MP etc. 831221828Sgrehan */ 832265211Sneel if (mptgen) { 833265211Sneel error = mptable_build(ctx, guest_ncpus); 834265211Sneel if (error) 835265211Sneel exit(1); 836265211Sneel } 837221828Sgrehan 838262744Stychon error = smbios_build(ctx); 839262744Stychon assert(error == 0); 840262744Stychon 841243327Sgrehan if (acpi) { 842257423Sneel error = acpi_build(ctx, guest_ncpus); 843243327Sgrehan assert(error == 0); 844243327Sgrehan } 845243327Sgrehan 846221828Sgrehan /* 847257729Sgrehan * Change the proc title to include the VM name. 848257729Sgrehan */ 849257729Sgrehan setproctitle("%s", vmname); 850257729Sgrehan 851257729Sgrehan /* 852221828Sgrehan * Add CPU 0 853221828Sgrehan */ 854263432Sneel fbsdrun_addcpu(ctx, BSP, BSP, rip); 855221828Sgrehan 856221828Sgrehan /* 857221828Sgrehan * Head off to the main event dispatch loop 858221828Sgrehan */ 859221828Sgrehan mevent_dispatch(); 860221828Sgrehan 861221828Sgrehan exit(1); 862221828Sgrehan} 863