bhyverun.c revision 246686
1169689Skan/*- 2169689Skan * Copyright (c) 2011 NetApp, Inc. 3132718Skan * All rights reserved. 4169689Skan * 590075Sobrien * Redistribution and use in source and binary forms, with or without 690075Sobrien * modification, are permitted provided that the following conditions 790075Sobrien * are met: 890075Sobrien * 1. Redistributions of source code must retain the above copyright 990075Sobrien * notice, this list of conditions and the following disclaimer. 1090075Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1190075Sobrien * notice, this list of conditions and the following disclaimer in the 1290075Sobrien * documentation and/or other materials provided with the distribution. 1390075Sobrien * 14169689Skan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15169689Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17169689Skan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 1890075Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22169689Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24169689Skan * SUCH DAMAGE. 25169689Skan * 2690075Sobrien * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 246686 2013-02-11 20:36:07Z neel $ 27169689Skan */ 28169689Skan 29169689Skan#include <sys/cdefs.h> 30169689Skan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 246686 2013-02-11 20:36:07Z neel $"); 31169689Skan 32169689Skan#include <sys/types.h> 33169689Skan#include <sys/mman.h> 34169689Skan#include <sys/time.h> 35169689Skan 36169689Skan#include <machine/segments.h> 37169689Skan 38169689Skan#include <stdio.h> 39169689Skan#include <stdlib.h> 40169689Skan#include <libgen.h> 41169689Skan#include <unistd.h> 42169689Skan#include <assert.h> 43169689Skan#include <errno.h> 44169689Skan#include <signal.h> 45169689Skan#include <pthread.h> 46169689Skan#include <pthread_np.h> 47169689Skan 48169689Skan#include <machine/vmm.h> 49169689Skan#include <vmmapi.h> 50169689Skan 51169689Skan#include "bhyverun.h" 52169689Skan#include "acpi.h" 53169689Skan#include "inout.h" 54169689Skan#include "dbgport.h" 55169689Skan#include "mem.h" 56169689Skan#include "mevent.h" 57169689Skan#include "mptbl.h" 58169689Skan#include "pci_emul.h" 59169689Skan#include "xmsr.h" 60169689Skan#include "ioapic.h" 61169689Skan#include "spinup_ap.h" 62169689Skan 63169689Skan#define DEFAULT_GUEST_HZ 100 64169689Skan#define DEFAULT_GUEST_TSLICE 200 65169689Skan 66169689Skan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 67169689Skan 68169689Skan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 69169689Skan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 70169689Skan#define VMEXIT_RESTART 2 /* restart current instruction */ 71169689Skan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 72169689Skan#define VMEXIT_RESET 4 /* guest machine has reset */ 73169689Skan 74169689Skan#define MB (1024UL * 1024) 75169689Skan#define GB (1024UL * MB) 76169689Skan 77169689Skantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 78169689Skan 79169689Skanint guest_tslice = DEFAULT_GUEST_TSLICE; 80169689Skanint guest_hz = DEFAULT_GUEST_HZ; 81169689Skanchar *vmname; 82169689Skan 83169689Skanu_long lomem_sz; 84169689Skanu_long himem_sz; 85169689Skan 8690075Sobrienint guest_ncpus; 87169689Skan 88169689Skanstatic int pincpu = -1; 89169689Skanstatic int guest_vcpu_mux; 90169689Skanstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 91169689Skan 92169689Skanstatic int foundcpus; 93169689Skan 9490075Sobrienstatic int strictio; 95169689Skan 96169689Skanstatic int acpi; 97169689Skan 98169689Skanstatic char *lomem_addr; 99169689Skanstatic char *himem_addr; 100169689Skan 101169689Skanstatic char *progname; 10290075Sobrienstatic const int BSP = 0; 103169689Skan 104169689Skanstatic int cpumask; 105169689Skan 106169689Skanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 107169689Skan 108169689Skanstruct vm_exit vmexit[VM_MAXCPU]; 109169689Skan 110169689Skanstruct fbsdstats { 111169689Skan uint64_t vmexit_bogus; 112169689Skan uint64_t vmexit_bogus_switch; 113169689Skan uint64_t vmexit_hlt; 114169689Skan uint64_t vmexit_pause; 115169689Skan uint64_t vmexit_mtrap; 116169689Skan uint64_t vmexit_paging; 117169689Skan uint64_t cpu_switch_rotate; 118169689Skan uint64_t cpu_switch_direct; 119169689Skan int io_reset; 120169689Skan} stats; 121169689Skan 122169689Skanstruct mt_vmm_info { 123169689Skan pthread_t mt_thr; 124169689Skan struct vmctx *mt_ctx; 125169689Skan int mt_vcpu; 126169689Skan} mt_vmm_info[VM_MAXCPU]; 127169689Skan 128169689Skanstatic void 129169689Skanusage(int code) 130169689Skan{ 131169689Skan 132169689Skan fprintf(stderr, 133169689Skan "Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]" 134169689Skan "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem]" 135169689Skan " <vmname>\n" 136169689Skan " -a: local apic is in XAPIC mode (default is X2APIC)\n" 137169689Skan " -A: create an ACPI table\n" 138169689Skan " -g: gdb port (default is %d and 0 means don't open)\n" 139169689Skan " -c: # cpus (default 1)\n" 140169689Skan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 141169689Skan " -B: inject breakpoint exception on vm entry\n" 142169689Skan " -H: vmexit from the guest on hlt\n" 143169689Skan " -I: present an ioapic to the guest\n" 144169689Skan " -P: vmexit from the guest on pause\n" 145169689Skan " -e: exit on unhandled i/o access\n" 146169689Skan " -h: help\n" 147169689Skan " -z: guest hz (default is %d)\n" 148169689Skan " -s: <slot,driver,configinfo> PCI slot config\n" 149169689Skan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 150169689Skan " -m: lowmem in MB\n" 151169689Skan " -M: highmem in MB\n" 152169689Skan " -x: mux vcpus to 1 hcpu\n" 153169689Skan " -t: mux vcpu timeslice hz (default %d)\n", 154169689Skan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 155169689Skan DEFAULT_GUEST_TSLICE); 156169689Skan exit(code); 157169689Skan} 158169689Skan 159169689Skanvoid * 160169689Skanpaddr_guest2host(uintptr_t gaddr) 161169689Skan{ 162169689Skan if (lomem_sz == 0) 163169689Skan return (NULL); 164169689Skan 165169689Skan if (gaddr < lomem_sz) { 166169689Skan return ((void *)(lomem_addr + gaddr)); 167169689Skan } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { 168169689Skan return ((void *)(himem_addr + gaddr - 4*GB)); 169169689Skan } else 170169689Skan return (NULL); 171169689Skan} 172169689Skan 173169689Skanint 174169689Skanfbsdrun_disable_x2apic(void) 175169689Skan{ 176169689Skan 177169689Skan return (disable_x2apic); 17890075Sobrien} 179169689Skan 18090075Sobrienint 181169689Skanfbsdrun_vmexit_on_pause(void) 182169689Skan{ 183169689Skan 184169689Skan return (guest_vmexit_on_pause); 185169689Skan} 186169689Skan 187169689Skanint 188169689Skanfbsdrun_vmexit_on_hlt(void) 189169689Skan{ 19090075Sobrien 191169689Skan return (guest_vmexit_on_hlt); 19290075Sobrien} 193169689Skan 194169689Skanint 195169689Skanfbsdrun_muxed(void) 196169689Skan{ 197169689Skan 198169689Skan return (guest_vcpu_mux); 199169689Skan} 200169689Skan 201169689Skanstatic void * 202169689Skanfbsdrun_start_thread(void *param) 203169689Skan{ 20490075Sobrien char tname[MAXCOMLEN + 1]; 205169689Skan struct mt_vmm_info *mtp; 20690075Sobrien int vcpu; 207169689Skan 208169689Skan mtp = param; 20990075Sobrien vcpu = mtp->mt_vcpu; 210169689Skan 211169689Skan snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); 212169689Skan pthread_set_name_np(mtp->mt_thr, tname); 213169689Skan 214169689Skan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 215169689Skan 21690075Sobrien /* not reached */ 217169689Skan exit(1); 21890075Sobrien return (NULL); 219169689Skan} 220169689Skan 221169689Skanvoid 222169689Skanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 22390075Sobrien{ 224169689Skan int error; 22590075Sobrien 226169689Skan if (cpumask & (1 << vcpu)) { 227169689Skan fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", 228169689Skan vcpu); 229169689Skan exit(1); 230169689Skan } 231169689Skan 232169689Skan cpumask |= 1 << vcpu; 233169689Skan foundcpus++; 23490075Sobrien 235169689Skan /* 23690075Sobrien * Set up the vmexit struct to allow execution to start 237169689Skan * at the given RIP 238169689Skan */ 239169689Skan vmexit[vcpu].rip = rip; 240169689Skan vmexit[vcpu].inst_length = 0; 241169689Skan 242169689Skan if (vcpu == BSP || !guest_vcpu_mux){ 243169689Skan mt_vmm_info[vcpu].mt_ctx = ctx; 244169689Skan mt_vmm_info[vcpu].mt_vcpu = vcpu; 245169689Skan 246169689Skan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 24790075Sobrien fbsdrun_start_thread, &mt_vmm_info[vcpu]); 248169689Skan assert(error == 0); 249169689Skan } 250169689Skan} 251169689Skan 252169689Skanstatic int 253169689Skanfbsdrun_get_next_cpu(int curcpu) 254169689Skan{ 255169689Skan 256169689Skan /* 257169689Skan * Get the next available CPU. Assumes they arrive 258169689Skan * in ascending order with no gaps. 259169689Skan */ 260169689Skan return ((curcpu + 1) % foundcpus); 261169689Skan} 262169689Skan 263169689Skanstatic int 264169689Skanvmexit_catch_reset(void) 265169689Skan{ 266169689Skan stats.io_reset++; 267169689Skan return (VMEXIT_RESET); 268169689Skan} 269169689Skan 270169689Skanstatic int 271169689Skanvmexit_catch_inout(void) 272169689Skan{ 273169689Skan return (VMEXIT_ABORT); 274169689Skan} 275169689Skan 276169689Skanstatic int 277169689Skanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 278169689Skan uint32_t eax) 279169689Skan{ 280169689Skan#if PG_DEBUG /* put all types of debug here */ 281169689Skan if (eax == 0) { 282169689Skan pause_noswitch = 1; 283169689Skan } else if (eax == 1) { 284169689Skan pause_noswitch = 0; 285169689Skan } else { 286169689Skan pause_noswitch = 0; 287169689Skan if (eax == 5) { 288169689Skan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 289169689Skan } 290169689Skan } 291169689Skan#endif 292169689Skan return (VMEXIT_CONTINUE); 293169689Skan} 294169689Skan 295169689Skanstatic int 296169689Skanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 297169689Skan{ 298169689Skan int error; 299169689Skan int bytes, port, in, out; 300169689Skan uint32_t eax; 301169689Skan int vcpu; 302169689Skan 303169689Skan vcpu = *pvcpu; 304169689Skan 305169689Skan port = vme->u.inout.port; 306169689Skan bytes = vme->u.inout.bytes; 307169689Skan eax = vme->u.inout.eax; 308169689Skan in = vme->u.inout.in; 309169689Skan out = !in; 310169689Skan 311169689Skan /* We don't deal with these */ 312169689Skan if (vme->u.inout.string || vme->u.inout.rep) 313169689Skan return (VMEXIT_ABORT); 314169689Skan 315169689Skan /* Special case of guest reset */ 316169689Skan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 317169689Skan return (vmexit_catch_reset()); 318169689Skan 319169689Skan /* Extra-special case of host notifications */ 320169689Skan if (out && port == GUEST_NIO_PORT) 321169689Skan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 322169689Skan 323169689Skan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 324169689Skan if (error == 0 && in) 325169689Skan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 326169689Skan 327169689Skan if (error == 0) 328169689Skan return (VMEXIT_CONTINUE); 329169689Skan else { 330169689Skan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 331169689Skan in ? "in" : "out", 332169689Skan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 333169689Skan return (vmexit_catch_inout()); 334169689Skan } 335169689Skan} 336169689Skan 337169689Skanstatic int 338169689Skanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 339169689Skan{ 340169689Skan fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, 341169689Skan *pvcpu); 342169689Skan return (VMEXIT_ABORT); 343169689Skan} 344169689Skan 345169689Skanstatic int 346169689Skanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 347169689Skan{ 348169689Skan int newcpu; 349169689Skan int retval = VMEXIT_CONTINUE; 350169689Skan 351169689Skan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 352169689Skan 353169689Skan if (guest_vcpu_mux && *pvcpu != newcpu) { 354169689Skan retval = VMEXIT_SWITCH; 355169689Skan *pvcpu = newcpu; 356169689Skan } 357169689Skan 358169689Skan return (retval); 359169689Skan} 360169689Skan 361169689Skanstatic int 362169689Skanvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 363169689Skan{ 364169689Skan int newcpu; 365169689Skan int retval = VMEXIT_CONTINUE; 366169689Skan 367169689Skan newcpu = spinup_ap(ctx, *pvcpu, 368169689Skan vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 369169689Skan 370169689Skan if (guest_vcpu_mux && *pvcpu != newcpu) { 371169689Skan retval = VMEXIT_SWITCH; 372169689Skan *pvcpu = newcpu; 373169689Skan } 374169689Skan 375169689Skan return (retval); 376169689Skan} 377169689Skan 378169689Skanstatic int 379169689Skanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 380169689Skan{ 381169689Skan 382169689Skan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 383169689Skan fprintf(stderr, "\treason\t\tVMX\n"); 384169689Skan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 385169689Skan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 386169689Skan fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); 387169689Skan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 388169689Skan fprintf(stderr, "\tqualification\t0x%016lx\n", 389169689Skan vmexit->u.vmx.exit_qualification); 390169689Skan 391169689Skan return (VMEXIT_ABORT); 392169689Skan} 393169689Skan 394169689Skanstatic int bogus_noswitch = 1; 395169689Skan 396169689Skanstatic int 397169689Skanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 398169689Skan{ 399169689Skan stats.vmexit_bogus++; 400169689Skan 401169689Skan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 402169689Skan return (VMEXIT_RESTART); 403169689Skan } else { 404169689Skan stats.vmexit_bogus_switch++; 405169689Skan vmexit->inst_length = 0; 406169689Skan *pvcpu = -1; 407169689Skan return (VMEXIT_SWITCH); 408169689Skan } 409169689Skan} 410169689Skan 411169689Skanstatic int 412169689Skanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 413169689Skan{ 414169689Skan stats.vmexit_hlt++; 415169689Skan if (fbsdrun_muxed()) { 416169689Skan *pvcpu = -1; 417169689Skan return (VMEXIT_SWITCH); 418169689Skan } else { 419169689Skan /* 420169689Skan * Just continue execution with the next instruction. We use 421169689Skan * the HLT VM exit as a way to be friendly with the host 422169689Skan * scheduler. 423169689Skan */ 424169689Skan return (VMEXIT_CONTINUE); 425169689Skan } 426169689Skan} 427169689Skan 428169689Skanstatic int pause_noswitch; 429169689Skan 430169689Skanstatic int 431169689Skanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 432169689Skan{ 433169689Skan stats.vmexit_pause++; 434169689Skan 435169689Skan if (fbsdrun_muxed() && !pause_noswitch) { 436169689Skan *pvcpu = -1; 437169689Skan return (VMEXIT_SWITCH); 438169689Skan } else { 439169689Skan return (VMEXIT_CONTINUE); 440169689Skan } 441169689Skan} 442169689Skan 443169689Skanstatic int 444169689Skanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 445169689Skan{ 446169689Skan stats.vmexit_mtrap++; 447169689Skan 448169689Skan return (VMEXIT_RESTART); 449169689Skan} 450169689Skan 451169689Skanstatic int 452169689Skanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 453169689Skan{ 454169689Skan int err; 455169689Skan stats.vmexit_paging++; 456169689Skan 457169689Skan err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, 458169689Skan &vmexit->u.paging.vie); 459169689Skan 460169689Skan if (err) { 461169689Skan if (err == EINVAL) { 462169689Skan fprintf(stderr, 463169689Skan "Failed to emulate instruction at 0x%lx\n", 464169689Skan vmexit->rip); 465169689Skan } else if (err == ESRCH) { 466169689Skan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 467169689Skan vmexit->u.paging.gpa); 468169689Skan } 469169689Skan 470169689Skan return (VMEXIT_ABORT); 471169689Skan } 472169689Skan 473169689Skan return (VMEXIT_CONTINUE); 474169689Skan} 475169689Skan 476169689Skanstatic void 477169689Skansigalrm(int sig) 478169689Skan{ 479169689Skan return; 480169689Skan} 481169689Skan 482169689Skanstatic void 483169689Skansetup_timeslice(void) 484169689Skan{ 485169689Skan struct sigaction sa; 486169689Skan struct itimerval itv; 487169689Skan int error; 488169689Skan 489169689Skan /* 490169689Skan * Setup a realtime timer to generate a SIGALRM at a 491169689Skan * frequency of 'guest_tslice' ticks per second. 492169689Skan */ 493169689Skan sigemptyset(&sa.sa_mask); 494169689Skan sa.sa_flags = 0; 495169689Skan sa.sa_handler = sigalrm; 496169689Skan 497169689Skan error = sigaction(SIGALRM, &sa, NULL); 498169689Skan assert(error == 0); 499169689Skan 500169689Skan itv.it_interval.tv_sec = 0; 501169689Skan itv.it_interval.tv_usec = 1000000 / guest_tslice; 502169689Skan itv.it_value.tv_sec = 0; 503169689Skan itv.it_value.tv_usec = 1000000 / guest_tslice; 504169689Skan 505169689Skan error = setitimer(ITIMER_REAL, &itv, NULL); 506169689Skan assert(error == 0); 507169689Skan} 508169689Skan 509169689Skanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 510169689Skan [VM_EXITCODE_INOUT] = vmexit_inout, 511169689Skan [VM_EXITCODE_VMX] = vmexit_vmx, 512169689Skan [VM_EXITCODE_BOGUS] = vmexit_bogus, 513169689Skan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 514169689Skan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 515169689Skan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 516169689Skan [VM_EXITCODE_PAGING] = vmexit_paging, 517169689Skan [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 518169689Skan}; 519169689Skan 520169689Skanstatic void 521169689Skanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 522169689Skan{ 523169689Skan cpuset_t mask; 524169689Skan int error, rc, prevcpu; 525169689Skan 526169689Skan if (guest_vcpu_mux) 527169689Skan setup_timeslice(); 528169689Skan 529169689Skan if (pincpu >= 0) { 530169689Skan CPU_ZERO(&mask); 531169689Skan CPU_SET(pincpu + vcpu, &mask); 532169689Skan error = pthread_setaffinity_np(pthread_self(), 533169689Skan sizeof(mask), &mask); 534169689Skan assert(error == 0); 535169689Skan } 536169689Skan 537169689Skan while (1) { 538169689Skan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 539169689Skan if (error != 0) { 540169689Skan /* 541169689Skan * It is possible that 'vmmctl' or some other process 542169689Skan * has transitioned the vcpu to CANNOT_RUN state right 543169689Skan * before we tried to transition it to RUNNING. 544169689Skan * 545169689Skan * This is expected to be temporary so just retry. 546169689Skan */ 547169689Skan if (errno == EBUSY) 548169689Skan continue; 549169689Skan else 550169689Skan break; 551169689Skan } 552169689Skan 553169689Skan prevcpu = vcpu; 554169689Skan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 555169689Skan &vcpu); 556169689Skan switch (rc) { 557169689Skan case VMEXIT_SWITCH: 558169689Skan assert(guest_vcpu_mux); 559169689Skan if (vcpu == -1) { 560169689Skan stats.cpu_switch_rotate++; 561169689Skan vcpu = fbsdrun_get_next_cpu(prevcpu); 562169689Skan } else { 563169689Skan stats.cpu_switch_direct++; 564169689Skan } 565169689Skan /* fall through */ 566169689Skan case VMEXIT_CONTINUE: 567169689Skan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 568169689Skan break; 569169689Skan case VMEXIT_RESTART: 570169689Skan rip = vmexit[vcpu].rip; 571169689Skan break; 572169689Skan case VMEXIT_RESET: 573169689Skan exit(0); 574169689Skan default: 575169689Skan exit(1); 576169689Skan } 577169689Skan } 578169689Skan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 579169689Skan} 580169689Skan 581169689Skanstatic int 582169689Skannum_vcpus_allowed(struct vmctx *ctx) 583169689Skan{ 584169689Skan int tmp, error; 585169689Skan 586169689Skan error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 587169689Skan 588169689Skan /* 589169689Skan * The guest is allowed to spinup more than one processor only if the 590169689Skan * UNRESTRICTED_GUEST capability is available. 591169689Skan */ 592169689Skan if (error == 0) 593169689Skan return (VM_MAXCPU); 594169689Skan else 595169689Skan return (1); 596169689Skan} 597169689Skan 598169689Skanint 599169689Skanmain(int argc, char *argv[]) 600169689Skan{ 601169689Skan int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; 602169689Skan int max_vcpus; 603169689Skan struct vmctx *ctx; 604169689Skan uint64_t rip; 605169689Skan 606169689Skan bvmcons = 0; 607169689Skan inject_bkpt = 0; 608169689Skan progname = basename(argv[0]); 609169689Skan gdb_port = DEFAULT_GDB_PORT; 610169689Skan guest_ncpus = 1; 611169689Skan ioapic = 0; 612169689Skan 613169689Skan while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) { 614169689Skan switch (c) { 615169689Skan case 'a': 616169689Skan disable_x2apic = 1; 617169689Skan break; 618169689Skan case 'A': 619169689Skan acpi = 1; 620169689Skan break; 621169689Skan case 'b': 622169689Skan bvmcons = 1; 623169689Skan break; 624169689Skan case 'B': 625169689Skan inject_bkpt = 1; 626169689Skan break; 627169689Skan case 'x': 628169689Skan guest_vcpu_mux = 1; 629169689Skan break; 630169689Skan case 'p': 631169689Skan pincpu = atoi(optarg); 632169689Skan break; 633169689Skan case 'c': 634169689Skan guest_ncpus = atoi(optarg); 635169689Skan break; 636169689Skan case 'g': 637169689Skan gdb_port = atoi(optarg); 638169689Skan break; 639169689Skan case 'z': 640169689Skan guest_hz = atoi(optarg); 641169689Skan break; 642169689Skan case 't': 643169689Skan guest_tslice = atoi(optarg); 644169689Skan break; 645169689Skan case 's': 646169689Skan pci_parse_slot(optarg, 0); 647169689Skan break; 648169689Skan case 'S': 64990075Sobrien pci_parse_slot(optarg, 1); 65090075Sobrien break; 65190075Sobrien case 'm': 65290075Sobrien lomem_sz = strtoul(optarg, NULL, 0) * MB; 65390075Sobrien break; 65490075Sobrien case 'M': 65590075Sobrien himem_sz = strtoul(optarg, NULL, 0) * MB; 65690075Sobrien break; 65790075Sobrien case 'H': 65890075Sobrien guest_vmexit_on_hlt = 1; 65990075Sobrien break; 66090075Sobrien case 'I': 66190075Sobrien ioapic = 1; 66290075Sobrien break; 66390075Sobrien case 'P': 66490075Sobrien guest_vmexit_on_pause = 1; 66590075Sobrien break; 66690075Sobrien case 'e': 66790075Sobrien strictio = 1; 66890075Sobrien break; 66990075Sobrien case 'h': 67090075Sobrien usage(0); 671169689Skan default: 67290075Sobrien usage(1); 673169689Skan } 674169689Skan } 675169689Skan argc -= optind; 676169689Skan argv += optind; 677169689Skan 67890075Sobrien if (argc != 1) 679169689Skan usage(1); 68090075Sobrien 681169689Skan /* No need to mux if guest is uni-processor */ 682169689Skan if (guest_ncpus <= 1) 683169689Skan guest_vcpu_mux = 0; 684169689Skan 685169689Skan /* vmexit on hlt if guest is muxed */ 686169689Skan if (guest_vcpu_mux) { 68790075Sobrien guest_vmexit_on_hlt = 1; 688169689Skan guest_vmexit_on_pause = 1; 68990075Sobrien } 690169689Skan 691169689Skan vmname = argv[0]; 692169689Skan 69390075Sobrien ctx = vm_open(vmname); 694169689Skan if (ctx == NULL) { 69590075Sobrien perror("vm_open"); 696169689Skan exit(1); 69790075Sobrien } 69890075Sobrien 69990075Sobrien max_vcpus = num_vcpus_allowed(ctx); 70090075Sobrien if (guest_ncpus > max_vcpus) { 70190075Sobrien fprintf(stderr, "%d vCPUs requested but only %d available\n", 70290075Sobrien guest_ncpus, max_vcpus); 70390075Sobrien exit(1); 70490075Sobrien } 70590075Sobrien 70690075Sobrien if (fbsdrun_vmexit_on_hlt()) { 70790075Sobrien err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 70890075Sobrien if (err < 0) { 70990075Sobrien fprintf(stderr, "VM exit on HLT not supported\n"); 71090075Sobrien exit(1); 71190075Sobrien } 71290075Sobrien vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 71390075Sobrien handler[VM_EXITCODE_HLT] = vmexit_hlt; 714169689Skan } 71590075Sobrien 716169689Skan if (fbsdrun_vmexit_on_pause()) { 717169689Skan /* 718169689Skan * pause exit support required for this mode 719169689Skan */ 720169689Skan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 721169689Skan if (err < 0) { 722169689Skan fprintf(stderr, 723169689Skan "SMP mux requested, no pause support\n"); 724169689Skan exit(1); 72590075Sobrien } 726169689Skan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 727132718Skan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 728169689Skan } 729169689Skan 730169689Skan if (fbsdrun_disable_x2apic()) 731132718Skan err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 732169689Skan else 733132718Skan err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 734169689Skan 735169689Skan if (err) { 736169689Skan fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 737169689Skan exit(1); 738169689Skan } 739132718Skan 740169689Skan if (lomem_sz != 0) { 741132718Skan lomem_addr = vm_map_memory(ctx, 0, lomem_sz); 742169689Skan if (lomem_addr == (char *) MAP_FAILED) { 743169689Skan lomem_sz = 0; 744169689Skan } else if (himem_sz != 0) { 745132718Skan himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); 746169689Skan if (himem_addr == (char *) MAP_FAILED) { 74790075Sobrien lomem_sz = 0; 748169689Skan himem_sz = 0; 749169689Skan } 750169689Skan } 751169689Skan } 752169689Skan 753169689Skan init_inout(); 75490075Sobrien init_pci(ctx); 755169689Skan if (ioapic) 75690075Sobrien ioapic_init(0); 757169689Skan 758169689Skan if (gdb_port != 0) 759169689Skan init_dbgport(gdb_port); 760169689Skan 761169689Skan if (bvmcons) 76290075Sobrien init_bvmcons(); 763169689Skan 76490075Sobrien error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 765169689Skan assert(error == 0); 766169689Skan 767169689Skan if (inject_bkpt) { 768169689Skan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 769169689Skan assert(error == 0); 77090075Sobrien } 771169689Skan 77290075Sobrien /* 773169689Skan * build the guest tables, MP etc. 774169689Skan */ 775169689Skan mptable_build(ctx, guest_ncpus, ioapic); 776169689Skan 777169689Skan if (acpi) { 77890075Sobrien error = acpi_build(ctx, guest_ncpus, ioapic); 779169689Skan assert(error == 0); 780169689Skan } 781169689Skan 78290075Sobrien /* 783169689Skan * Add CPU 0 784169689Skan */ 785169689Skan fbsdrun_addcpu(ctx, BSP, rip); 786169689Skan 78790075Sobrien /* 788169689Skan * Head off to the main event dispatch loop 78990075Sobrien */ 790169689Skan mevent_dispatch(); 791169689Skan 792169689Skan exit(1); 793169689Skan} 794169689Skan