bhyverun.c revision 249916
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 249916 2013-04-26 02:24:50Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 249916 2013-04-26 02:24:50Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36221828Sgrehan#include <machine/segments.h> 37221828Sgrehan 38221828Sgrehan#include <stdio.h> 39221828Sgrehan#include <stdlib.h> 40221828Sgrehan#include <libgen.h> 41221828Sgrehan#include <unistd.h> 42221828Sgrehan#include <assert.h> 43221828Sgrehan#include <errno.h> 44221828Sgrehan#include <signal.h> 45221828Sgrehan#include <pthread.h> 46242404Sgrehan#include <pthread_np.h> 47221828Sgrehan 48221828Sgrehan#include <machine/vmm.h> 49221828Sgrehan#include <vmmapi.h> 50221828Sgrehan 51244167Sgrehan#include "bhyverun.h" 52243327Sgrehan#include "acpi.h" 53221828Sgrehan#include "inout.h" 54221828Sgrehan#include "dbgport.h" 55241744Sgrehan#include "mem.h" 56221828Sgrehan#include "mevent.h" 57242131Sgrehan#include "mptbl.h" 58221828Sgrehan#include "pci_emul.h" 59221828Sgrehan#include "xmsr.h" 60239045Sneel#include "ioapic.h" 61240912Sneel#include "spinup_ap.h" 62221828Sgrehan 63221828Sgrehan#define DEFAULT_GUEST_HZ 100 64221828Sgrehan#define DEFAULT_GUEST_TSLICE 200 65221828Sgrehan 66221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 67221828Sgrehan 68221828Sgrehan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 69221828Sgrehan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 70221828Sgrehan#define VMEXIT_RESTART 2 /* restart current instruction */ 71221828Sgrehan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 72221828Sgrehan#define VMEXIT_RESET 4 /* guest machine has reset */ 73221828Sgrehan 74221828Sgrehan#define MB (1024UL * 1024) 75221828Sgrehan#define GB (1024UL * MB) 76221828Sgrehan 77221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 78221828Sgrehan 79221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE; 80221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ; 81221828Sgrehanchar *vmname; 82221828Sgrehan 83221828Sgrehanint guest_ncpus; 84221828Sgrehan 85221828Sgrehanstatic int pincpu = -1; 86221828Sgrehanstatic int guest_vcpu_mux; 87240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 88221828Sgrehan 89221828Sgrehanstatic int foundcpus; 90221828Sgrehan 91222105Sgrehanstatic int strictio; 92222105Sgrehan 93243327Sgrehanstatic int acpi; 94243327Sgrehan 95221828Sgrehanstatic char *progname; 96221828Sgrehanstatic const int BSP = 0; 97221828Sgrehan 98221828Sgrehanstatic int cpumask; 99221828Sgrehan 100221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 101221828Sgrehan 102221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU]; 103221828Sgrehan 104221828Sgrehanstruct fbsdstats { 105221828Sgrehan uint64_t vmexit_bogus; 106221828Sgrehan uint64_t vmexit_bogus_switch; 107221828Sgrehan uint64_t vmexit_hlt; 108221828Sgrehan uint64_t vmexit_pause; 109221828Sgrehan uint64_t vmexit_mtrap; 110234761Sgrehan uint64_t vmexit_paging; 111221828Sgrehan uint64_t cpu_switch_rotate; 112221828Sgrehan uint64_t cpu_switch_direct; 113221828Sgrehan int io_reset; 114221828Sgrehan} stats; 115221828Sgrehan 116221828Sgrehanstruct mt_vmm_info { 117221828Sgrehan pthread_t mt_thr; 118221828Sgrehan struct vmctx *mt_ctx; 119221828Sgrehan int mt_vcpu; 120221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 121221828Sgrehan 122221828Sgrehanstatic void 123221828Sgrehanusage(int code) 124221828Sgrehan{ 125221828Sgrehan 126221828Sgrehan fprintf(stderr, 127243327Sgrehan "Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]" 128245679Sneel "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem]" 129245679Sneel " <vmname>\n" 130240943Sneel " -a: local apic is in XAPIC mode (default is X2APIC)\n" 131243327Sgrehan " -A: create an ACPI table\n" 132221828Sgrehan " -g: gdb port (default is %d and 0 means don't open)\n" 133221828Sgrehan " -c: # cpus (default 1)\n" 134221828Sgrehan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 135221828Sgrehan " -B: inject breakpoint exception on vm entry\n" 136221828Sgrehan " -H: vmexit from the guest on hlt\n" 137239043Sneel " -I: present an ioapic to the guest\n" 138221828Sgrehan " -P: vmexit from the guest on pause\n" 139222105Sgrehan " -e: exit on unhandled i/o access\n" 140221828Sgrehan " -h: help\n" 141221828Sgrehan " -z: guest hz (default is %d)\n" 142221828Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 143234938Sgrehan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 144248477Sneel " -m: memory size in MB\n" 145221828Sgrehan " -x: mux vcpus to 1 hcpu\n" 146221828Sgrehan " -t: mux vcpu timeslice hz (default %d)\n", 147221828Sgrehan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 148221828Sgrehan DEFAULT_GUEST_TSLICE); 149221828Sgrehan exit(code); 150221828Sgrehan} 151221828Sgrehan 152221828Sgrehanvoid * 153248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 154221828Sgrehan{ 155221828Sgrehan 156248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 157221828Sgrehan} 158221828Sgrehan 159221828Sgrehanint 160240943Sneelfbsdrun_disable_x2apic(void) 161240943Sneel{ 162240943Sneel 163240943Sneel return (disable_x2apic); 164240943Sneel} 165240943Sneel 166240943Sneelint 167221828Sgrehanfbsdrun_vmexit_on_pause(void) 168221828Sgrehan{ 169221828Sgrehan 170221828Sgrehan return (guest_vmexit_on_pause); 171221828Sgrehan} 172221828Sgrehan 173221828Sgrehanint 174221828Sgrehanfbsdrun_vmexit_on_hlt(void) 175221828Sgrehan{ 176221828Sgrehan 177221828Sgrehan return (guest_vmexit_on_hlt); 178221828Sgrehan} 179221828Sgrehan 180221828Sgrehanint 181221828Sgrehanfbsdrun_muxed(void) 182221828Sgrehan{ 183221828Sgrehan 184221828Sgrehan return (guest_vcpu_mux); 185221828Sgrehan} 186221828Sgrehan 187221942Sjhbstatic void * 188221828Sgrehanfbsdrun_start_thread(void *param) 189221828Sgrehan{ 190242404Sgrehan char tname[MAXCOMLEN + 1]; 191242404Sgrehan struct mt_vmm_info *mtp; 192221828Sgrehan int vcpu; 193221828Sgrehan 194242404Sgrehan mtp = param; 195221828Sgrehan vcpu = mtp->mt_vcpu; 196242404Sgrehan 197242404Sgrehan snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); 198242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 199242404Sgrehan 200221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 201221828Sgrehan 202221828Sgrehan /* not reached */ 203221828Sgrehan exit(1); 204221828Sgrehan return (NULL); 205221828Sgrehan} 206221828Sgrehan 207221828Sgrehanvoid 208221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 209221828Sgrehan{ 210221828Sgrehan int error; 211221828Sgrehan 212221828Sgrehan if (cpumask & (1 << vcpu)) { 213242385Sgrehan fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", 214242385Sgrehan vcpu); 215221828Sgrehan exit(1); 216221828Sgrehan } 217221828Sgrehan 218221828Sgrehan cpumask |= 1 << vcpu; 219221828Sgrehan foundcpus++; 220221828Sgrehan 221221828Sgrehan /* 222221828Sgrehan * Set up the vmexit struct to allow execution to start 223221828Sgrehan * at the given RIP 224221828Sgrehan */ 225221828Sgrehan vmexit[vcpu].rip = rip; 226221828Sgrehan vmexit[vcpu].inst_length = 0; 227221828Sgrehan 228221828Sgrehan if (vcpu == BSP || !guest_vcpu_mux){ 229221828Sgrehan mt_vmm_info[vcpu].mt_ctx = ctx; 230221828Sgrehan mt_vmm_info[vcpu].mt_vcpu = vcpu; 231221828Sgrehan 232221828Sgrehan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 233221828Sgrehan fbsdrun_start_thread, &mt_vmm_info[vcpu]); 234221828Sgrehan assert(error == 0); 235221828Sgrehan } 236221828Sgrehan} 237221828Sgrehan 238221828Sgrehanstatic int 239221828Sgrehanfbsdrun_get_next_cpu(int curcpu) 240221828Sgrehan{ 241221828Sgrehan 242221828Sgrehan /* 243221828Sgrehan * Get the next available CPU. Assumes they arrive 244221828Sgrehan * in ascending order with no gaps. 245221828Sgrehan */ 246221828Sgrehan return ((curcpu + 1) % foundcpus); 247221828Sgrehan} 248221828Sgrehan 249221942Sjhbstatic int 250221828Sgrehanvmexit_catch_reset(void) 251221828Sgrehan{ 252221828Sgrehan stats.io_reset++; 253221828Sgrehan return (VMEXIT_RESET); 254221828Sgrehan} 255221828Sgrehan 256221942Sjhbstatic int 257221828Sgrehanvmexit_catch_inout(void) 258221828Sgrehan{ 259221828Sgrehan return (VMEXIT_ABORT); 260221828Sgrehan} 261221828Sgrehan 262221942Sjhbstatic int 263221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 264221828Sgrehan uint32_t eax) 265221828Sgrehan{ 266221828Sgrehan#if PG_DEBUG /* put all types of debug here */ 267221828Sgrehan if (eax == 0) { 268221828Sgrehan pause_noswitch = 1; 269221828Sgrehan } else if (eax == 1) { 270221828Sgrehan pause_noswitch = 0; 271221828Sgrehan } else { 272221828Sgrehan pause_noswitch = 0; 273221828Sgrehan if (eax == 5) { 274221828Sgrehan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 275221828Sgrehan } 276221828Sgrehan } 277221828Sgrehan#endif 278221828Sgrehan return (VMEXIT_CONTINUE); 279221828Sgrehan} 280221828Sgrehan 281221828Sgrehanstatic int 282221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 283221828Sgrehan{ 284221828Sgrehan int error; 285221828Sgrehan int bytes, port, in, out; 286221828Sgrehan uint32_t eax; 287221828Sgrehan int vcpu; 288221828Sgrehan 289221828Sgrehan vcpu = *pvcpu; 290221828Sgrehan 291221828Sgrehan port = vme->u.inout.port; 292221828Sgrehan bytes = vme->u.inout.bytes; 293221828Sgrehan eax = vme->u.inout.eax; 294221828Sgrehan in = vme->u.inout.in; 295221828Sgrehan out = !in; 296221828Sgrehan 297221828Sgrehan /* We don't deal with these */ 298221828Sgrehan if (vme->u.inout.string || vme->u.inout.rep) 299221828Sgrehan return (VMEXIT_ABORT); 300221828Sgrehan 301221828Sgrehan /* Special case of guest reset */ 302221828Sgrehan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 303221828Sgrehan return (vmexit_catch_reset()); 304221828Sgrehan 305221828Sgrehan /* Extra-special case of host notifications */ 306221828Sgrehan if (out && port == GUEST_NIO_PORT) 307221828Sgrehan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 308221828Sgrehan 309222105Sgrehan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 310221828Sgrehan if (error == 0 && in) 311221828Sgrehan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 312221828Sgrehan 313221828Sgrehan if (error == 0) 314221828Sgrehan return (VMEXIT_CONTINUE); 315221828Sgrehan else { 316221828Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 317221828Sgrehan in ? "in" : "out", 318221828Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 319221828Sgrehan return (vmexit_catch_inout()); 320221828Sgrehan } 321221828Sgrehan} 322221828Sgrehan 323221828Sgrehanstatic int 324221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 325221828Sgrehan{ 326242385Sgrehan fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, 327242385Sgrehan *pvcpu); 328221828Sgrehan return (VMEXIT_ABORT); 329221828Sgrehan} 330221828Sgrehan 331221828Sgrehanstatic int 332221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 333221828Sgrehan{ 334221828Sgrehan int newcpu; 335221828Sgrehan int retval = VMEXIT_CONTINUE; 336221828Sgrehan 337221828Sgrehan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 338221828Sgrehan 339221828Sgrehan if (guest_vcpu_mux && *pvcpu != newcpu) { 340221828Sgrehan retval = VMEXIT_SWITCH; 341221828Sgrehan *pvcpu = newcpu; 342221828Sgrehan } 343221828Sgrehan 344221828Sgrehan return (retval); 345221828Sgrehan} 346221828Sgrehan 347221828Sgrehanstatic int 348240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 349240912Sneel{ 350240912Sneel int newcpu; 351240912Sneel int retval = VMEXIT_CONTINUE; 352240912Sneel 353240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 354240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 355240912Sneel 356240912Sneel if (guest_vcpu_mux && *pvcpu != newcpu) { 357240912Sneel retval = VMEXIT_SWITCH; 358240912Sneel *pvcpu = newcpu; 359240912Sneel } 360240912Sneel 361240912Sneel return (retval); 362240912Sneel} 363240912Sneel 364240912Sneelstatic int 365221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 366221828Sgrehan{ 367221828Sgrehan 368242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 369242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 370242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 371242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 372242385Sgrehan fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); 373242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 374242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 375242385Sgrehan vmexit->u.vmx.exit_qualification); 376221828Sgrehan 377221828Sgrehan return (VMEXIT_ABORT); 378221828Sgrehan} 379221828Sgrehan 380221828Sgrehanstatic int bogus_noswitch = 1; 381221828Sgrehan 382221828Sgrehanstatic int 383221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 384221828Sgrehan{ 385221828Sgrehan stats.vmexit_bogus++; 386221828Sgrehan 387221828Sgrehan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 388221828Sgrehan return (VMEXIT_RESTART); 389221828Sgrehan } else { 390221828Sgrehan stats.vmexit_bogus_switch++; 391221828Sgrehan vmexit->inst_length = 0; 392221828Sgrehan *pvcpu = -1; 393221828Sgrehan return (VMEXIT_SWITCH); 394221828Sgrehan } 395221828Sgrehan} 396221828Sgrehan 397221828Sgrehanstatic int 398221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 399221828Sgrehan{ 400221828Sgrehan stats.vmexit_hlt++; 401221828Sgrehan if (fbsdrun_muxed()) { 402221828Sgrehan *pvcpu = -1; 403221828Sgrehan return (VMEXIT_SWITCH); 404221828Sgrehan } else { 405221828Sgrehan /* 406221828Sgrehan * Just continue execution with the next instruction. We use 407221828Sgrehan * the HLT VM exit as a way to be friendly with the host 408221828Sgrehan * scheduler. 409221828Sgrehan */ 410221828Sgrehan return (VMEXIT_CONTINUE); 411221828Sgrehan } 412221828Sgrehan} 413221828Sgrehan 414221828Sgrehanstatic int pause_noswitch; 415221828Sgrehan 416221828Sgrehanstatic int 417221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 418221828Sgrehan{ 419221828Sgrehan stats.vmexit_pause++; 420221828Sgrehan 421221828Sgrehan if (fbsdrun_muxed() && !pause_noswitch) { 422221828Sgrehan *pvcpu = -1; 423221828Sgrehan return (VMEXIT_SWITCH); 424221828Sgrehan } else { 425221828Sgrehan return (VMEXIT_CONTINUE); 426221828Sgrehan } 427221828Sgrehan} 428221828Sgrehan 429221828Sgrehanstatic int 430221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 431221828Sgrehan{ 432221828Sgrehan stats.vmexit_mtrap++; 433221828Sgrehan 434221828Sgrehan return (VMEXIT_RESTART); 435221828Sgrehan} 436221828Sgrehan 437234761Sgrehanstatic int 438234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 439234761Sgrehan{ 440241744Sgrehan int err; 441234761Sgrehan stats.vmexit_paging++; 442234761Sgrehan 443243651Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, 444243640Sneel &vmexit->u.paging.vie); 445241744Sgrehan 446241744Sgrehan if (err) { 447241744Sgrehan if (err == EINVAL) { 448242385Sgrehan fprintf(stderr, 449242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 450242385Sgrehan vmexit->rip); 451241744Sgrehan } else if (err == ESRCH) { 452242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 453242385Sgrehan vmexit->u.paging.gpa); 454241744Sgrehan } 455241744Sgrehan 456234761Sgrehan return (VMEXIT_ABORT); 457234761Sgrehan } 458234761Sgrehan 459234761Sgrehan return (VMEXIT_CONTINUE); 460234761Sgrehan} 461234761Sgrehan 462221828Sgrehanstatic void 463221828Sgrehansigalrm(int sig) 464221828Sgrehan{ 465221828Sgrehan return; 466221828Sgrehan} 467221828Sgrehan 468221828Sgrehanstatic void 469221828Sgrehansetup_timeslice(void) 470221828Sgrehan{ 471221828Sgrehan struct sigaction sa; 472221828Sgrehan struct itimerval itv; 473221828Sgrehan int error; 474221828Sgrehan 475221828Sgrehan /* 476221828Sgrehan * Setup a realtime timer to generate a SIGALRM at a 477221828Sgrehan * frequency of 'guest_tslice' ticks per second. 478221828Sgrehan */ 479221828Sgrehan sigemptyset(&sa.sa_mask); 480221828Sgrehan sa.sa_flags = 0; 481221828Sgrehan sa.sa_handler = sigalrm; 482221828Sgrehan 483221828Sgrehan error = sigaction(SIGALRM, &sa, NULL); 484221828Sgrehan assert(error == 0); 485221828Sgrehan 486221828Sgrehan itv.it_interval.tv_sec = 0; 487221828Sgrehan itv.it_interval.tv_usec = 1000000 / guest_tslice; 488221828Sgrehan itv.it_value.tv_sec = 0; 489221828Sgrehan itv.it_value.tv_usec = 1000000 / guest_tslice; 490221828Sgrehan 491221828Sgrehan error = setitimer(ITIMER_REAL, &itv, NULL); 492221828Sgrehan assert(error == 0); 493221828Sgrehan} 494221828Sgrehan 495221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 496234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 497234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 498234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 499234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 500234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 501234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 502240912Sneel [VM_EXITCODE_PAGING] = vmexit_paging, 503240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 504221828Sgrehan}; 505221828Sgrehan 506221828Sgrehanstatic void 507221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 508221828Sgrehan{ 509246686Sneel cpuset_t mask; 510221828Sgrehan int error, rc, prevcpu; 511221828Sgrehan 512221828Sgrehan if (guest_vcpu_mux) 513221828Sgrehan setup_timeslice(); 514221828Sgrehan 515221828Sgrehan if (pincpu >= 0) { 516246686Sneel CPU_ZERO(&mask); 517246686Sneel CPU_SET(pincpu + vcpu, &mask); 518246686Sneel error = pthread_setaffinity_np(pthread_self(), 519246686Sneel sizeof(mask), &mask); 520221828Sgrehan assert(error == 0); 521221828Sgrehan } 522221828Sgrehan 523221828Sgrehan while (1) { 524221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 525241490Sneel if (error != 0) { 526241490Sneel /* 527241490Sneel * It is possible that 'vmmctl' or some other process 528241490Sneel * has transitioned the vcpu to CANNOT_RUN state right 529241490Sneel * before we tried to transition it to RUNNING. 530241490Sneel * 531241490Sneel * This is expected to be temporary so just retry. 532241490Sneel */ 533241490Sneel if (errno == EBUSY) 534241490Sneel continue; 535241490Sneel else 536241490Sneel break; 537241490Sneel } 538221828Sgrehan 539221828Sgrehan prevcpu = vcpu; 540221828Sgrehan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 541221828Sgrehan &vcpu); 542221828Sgrehan switch (rc) { 543221828Sgrehan case VMEXIT_SWITCH: 544221828Sgrehan assert(guest_vcpu_mux); 545221828Sgrehan if (vcpu == -1) { 546221828Sgrehan stats.cpu_switch_rotate++; 547221828Sgrehan vcpu = fbsdrun_get_next_cpu(prevcpu); 548221828Sgrehan } else { 549221828Sgrehan stats.cpu_switch_direct++; 550221828Sgrehan } 551221828Sgrehan /* fall through */ 552221828Sgrehan case VMEXIT_CONTINUE: 553221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 554221828Sgrehan break; 555221828Sgrehan case VMEXIT_RESTART: 556221828Sgrehan rip = vmexit[vcpu].rip; 557221828Sgrehan break; 558221828Sgrehan case VMEXIT_RESET: 559221828Sgrehan exit(0); 560221828Sgrehan default: 561221828Sgrehan exit(1); 562221828Sgrehan } 563221828Sgrehan } 564221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 565221828Sgrehan} 566221828Sgrehan 567245020Sneelstatic int 568245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 569245020Sneel{ 570245020Sneel int tmp, error; 571221828Sgrehan 572245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 573245020Sneel 574245020Sneel /* 575245020Sneel * The guest is allowed to spinup more than one processor only if the 576245020Sneel * UNRESTRICTED_GUEST capability is available. 577245020Sneel */ 578245020Sneel if (error == 0) 579245020Sneel return (VM_MAXCPU); 580245020Sneel else 581245020Sneel return (1); 582245020Sneel} 583245020Sneel 584221828Sgrehanint 585221828Sgrehanmain(int argc, char *argv[]) 586221828Sgrehan{ 587242192Sneel int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; 588245020Sneel int max_vcpus; 589221828Sgrehan struct vmctx *ctx; 590221828Sgrehan uint64_t rip; 591248477Sneel size_t memsize; 592221828Sgrehan 593242192Sneel bvmcons = 0; 594221828Sgrehan inject_bkpt = 0; 595221828Sgrehan progname = basename(argv[0]); 596221828Sgrehan gdb_port = DEFAULT_GDB_PORT; 597221828Sgrehan guest_ncpus = 1; 598239043Sneel ioapic = 0; 599248477Sneel memsize = 256 * MB; 600221828Sgrehan 601248477Sneel while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:")) != -1) { 602221828Sgrehan switch (c) { 603240943Sneel case 'a': 604240943Sneel disable_x2apic = 1; 605240943Sneel break; 606243327Sgrehan case 'A': 607243327Sgrehan acpi = 1; 608243327Sgrehan break; 609242192Sneel case 'b': 610242192Sneel bvmcons = 1; 611242192Sneel break; 612221828Sgrehan case 'B': 613221828Sgrehan inject_bkpt = 1; 614221828Sgrehan break; 615221828Sgrehan case 'x': 616221828Sgrehan guest_vcpu_mux = 1; 617221828Sgrehan break; 618221828Sgrehan case 'p': 619221828Sgrehan pincpu = atoi(optarg); 620221828Sgrehan break; 621221828Sgrehan case 'c': 622221828Sgrehan guest_ncpus = atoi(optarg); 623221828Sgrehan break; 624221828Sgrehan case 'g': 625221828Sgrehan gdb_port = atoi(optarg); 626221828Sgrehan break; 627221828Sgrehan case 'z': 628221828Sgrehan guest_hz = atoi(optarg); 629221828Sgrehan break; 630221828Sgrehan case 't': 631221828Sgrehan guest_tslice = atoi(optarg); 632221828Sgrehan break; 633221828Sgrehan case 's': 634249916Sneel if (pci_parse_slot(optarg, 0) != 0) 635249916Sneel exit(1); 636249916Sneel else 637249916Sneel break; 638234938Sgrehan case 'S': 639249916Sneel if (pci_parse_slot(optarg, 1) != 0) 640249916Sneel exit(1); 641249916Sneel else 642249916Sneel break; 643221828Sgrehan case 'm': 644248477Sneel memsize = strtoul(optarg, NULL, 0) * MB; 645221828Sgrehan break; 646221828Sgrehan case 'H': 647221828Sgrehan guest_vmexit_on_hlt = 1; 648221828Sgrehan break; 649239043Sneel case 'I': 650239043Sneel ioapic = 1; 651239043Sneel break; 652221828Sgrehan case 'P': 653221828Sgrehan guest_vmexit_on_pause = 1; 654221828Sgrehan break; 655222105Sgrehan case 'e': 656222105Sgrehan strictio = 1; 657222105Sgrehan break; 658221828Sgrehan case 'h': 659221828Sgrehan usage(0); 660221828Sgrehan default: 661221828Sgrehan usage(1); 662221828Sgrehan } 663221828Sgrehan } 664221828Sgrehan argc -= optind; 665221828Sgrehan argv += optind; 666221828Sgrehan 667221828Sgrehan if (argc != 1) 668221828Sgrehan usage(1); 669221828Sgrehan 670221828Sgrehan /* No need to mux if guest is uni-processor */ 671221828Sgrehan if (guest_ncpus <= 1) 672221828Sgrehan guest_vcpu_mux = 0; 673221828Sgrehan 674221828Sgrehan /* vmexit on hlt if guest is muxed */ 675221828Sgrehan if (guest_vcpu_mux) { 676221828Sgrehan guest_vmexit_on_hlt = 1; 677221828Sgrehan guest_vmexit_on_pause = 1; 678221828Sgrehan } 679221828Sgrehan 680221828Sgrehan vmname = argv[0]; 681221828Sgrehan 682221828Sgrehan ctx = vm_open(vmname); 683221828Sgrehan if (ctx == NULL) { 684221828Sgrehan perror("vm_open"); 685221828Sgrehan exit(1); 686221828Sgrehan } 687221828Sgrehan 688245020Sneel max_vcpus = num_vcpus_allowed(ctx); 689245020Sneel if (guest_ncpus > max_vcpus) { 690245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 691245020Sneel guest_ncpus, max_vcpus); 692245020Sneel exit(1); 693245020Sneel } 694245020Sneel 695221828Sgrehan if (fbsdrun_vmexit_on_hlt()) { 696221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 697221828Sgrehan if (err < 0) { 698242385Sgrehan fprintf(stderr, "VM exit on HLT not supported\n"); 699221828Sgrehan exit(1); 700221828Sgrehan } 701221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 702221828Sgrehan handler[VM_EXITCODE_HLT] = vmexit_hlt; 703221828Sgrehan } 704221828Sgrehan 705221828Sgrehan if (fbsdrun_vmexit_on_pause()) { 706221828Sgrehan /* 707221828Sgrehan * pause exit support required for this mode 708221828Sgrehan */ 709221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 710221828Sgrehan if (err < 0) { 711242385Sgrehan fprintf(stderr, 712242385Sgrehan "SMP mux requested, no pause support\n"); 713221828Sgrehan exit(1); 714221828Sgrehan } 715221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 716221828Sgrehan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 717221828Sgrehan } 718221828Sgrehan 719240943Sneel if (fbsdrun_disable_x2apic()) 720240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 721240943Sneel else 722240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 723240943Sneel 724240943Sneel if (err) { 725242385Sgrehan fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 726240943Sneel exit(1); 727240943Sneel } 728240943Sneel 729248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 730248477Sneel if (err) { 731248477Sneel fprintf(stderr, "Unable to setup memory (%d)\n", err); 732248477Sneel exit(1); 733221828Sgrehan } 734221828Sgrehan 735249343Sneel init_mem(); 736221828Sgrehan init_inout(); 737221828Sgrehan init_pci(ctx); 738239045Sneel if (ioapic) 739239045Sneel ioapic_init(0); 740221828Sgrehan 741221828Sgrehan if (gdb_port != 0) 742221828Sgrehan init_dbgport(gdb_port); 743221828Sgrehan 744242192Sneel if (bvmcons) 745242192Sneel init_bvmcons(); 746242192Sneel 747221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 748221828Sgrehan assert(error == 0); 749221828Sgrehan 750221828Sgrehan if (inject_bkpt) { 751221828Sgrehan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 752221828Sgrehan assert(error == 0); 753221828Sgrehan } 754221828Sgrehan 755221828Sgrehan /* 756221828Sgrehan * build the guest tables, MP etc. 757221828Sgrehan */ 758242131Sgrehan mptable_build(ctx, guest_ncpus, ioapic); 759221828Sgrehan 760243327Sgrehan if (acpi) { 761243327Sgrehan error = acpi_build(ctx, guest_ncpus, ioapic); 762243327Sgrehan assert(error == 0); 763243327Sgrehan } 764243327Sgrehan 765221828Sgrehan /* 766221828Sgrehan * Add CPU 0 767221828Sgrehan */ 768221828Sgrehan fbsdrun_addcpu(ctx, BSP, rip); 769221828Sgrehan 770221828Sgrehan /* 771221828Sgrehan * Head off to the main event dispatch loop 772221828Sgrehan */ 773221828Sgrehan mevent_dispatch(); 774221828Sgrehan 775221828Sgrehan exit(1); 776221828Sgrehan} 777