bhyverun.c revision 240943
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36221828Sgrehan#include <machine/segments.h> 37221828Sgrehan 38221828Sgrehan#include <stdio.h> 39221828Sgrehan#include <stdlib.h> 40221828Sgrehan#include <libgen.h> 41221828Sgrehan#include <unistd.h> 42221828Sgrehan#include <assert.h> 43221828Sgrehan#include <errno.h> 44221828Sgrehan#include <signal.h> 45221828Sgrehan#include <pthread.h> 46221828Sgrehan 47221828Sgrehan#include <machine/vmm.h> 48221828Sgrehan#include <vmmapi.h> 49221828Sgrehan 50221828Sgrehan#include "fbsdrun.h" 51221828Sgrehan#include "inout.h" 52221828Sgrehan#include "dbgport.h" 53221828Sgrehan#include "mevent.h" 54221828Sgrehan#include "pci_emul.h" 55221828Sgrehan#include "xmsr.h" 56234761Sgrehan#include "instruction_emul.h" 57239045Sneel#include "ioapic.h" 58240912Sneel#include "spinup_ap.h" 59221828Sgrehan 60221828Sgrehan#define DEFAULT_GUEST_HZ 100 61221828Sgrehan#define DEFAULT_GUEST_TSLICE 200 62221828Sgrehan 63221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 64221828Sgrehan 65221828Sgrehan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 66221828Sgrehan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 67221828Sgrehan#define VMEXIT_RESTART 2 /* restart current instruction */ 68221828Sgrehan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 69221828Sgrehan#define VMEXIT_RESET 4 /* guest machine has reset */ 70221828Sgrehan 71221828Sgrehan#define MB (1024UL * 1024) 72221828Sgrehan#define GB (1024UL * MB) 73221828Sgrehan 74221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 75221828Sgrehan 76221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE; 77221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ; 78221828Sgrehanchar *vmname; 79221828Sgrehan 80221828Sgrehanu_long lomem_sz; 81221828Sgrehanu_long himem_sz; 82221828Sgrehan 83221828Sgrehanint guest_ncpus; 84221828Sgrehan 85221828Sgrehanstatic int pincpu = -1; 86221828Sgrehanstatic int guest_vcpu_mux; 87240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 88221828Sgrehan 89221828Sgrehanstatic int foundcpus; 90221828Sgrehan 91222105Sgrehanstatic int strictio; 92222105Sgrehan 93221828Sgrehanstatic char *lomem_addr; 94221828Sgrehanstatic char *himem_addr; 95221828Sgrehan 96221828Sgrehanstatic char *progname; 97221828Sgrehanstatic const int BSP = 0; 98221828Sgrehan 99221828Sgrehanstatic int cpumask; 100221828Sgrehan 101221828Sgrehanstatic void *oem_tbl_start; 102221828Sgrehanstatic int oem_tbl_size; 103221828Sgrehan 104221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 105221828Sgrehan 106221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU]; 107221828Sgrehan 108221828Sgrehanstruct fbsdstats { 109221828Sgrehan uint64_t vmexit_bogus; 110221828Sgrehan uint64_t vmexit_bogus_switch; 111221828Sgrehan uint64_t vmexit_hlt; 112221828Sgrehan uint64_t vmexit_pause; 113221828Sgrehan uint64_t vmexit_mtrap; 114234761Sgrehan uint64_t vmexit_paging; 115221828Sgrehan uint64_t cpu_switch_rotate; 116221828Sgrehan uint64_t cpu_switch_direct; 117221828Sgrehan int io_reset; 118221828Sgrehan} stats; 119221828Sgrehan 120221828Sgrehanstruct mt_vmm_info { 121221828Sgrehan pthread_t mt_thr; 122221828Sgrehan struct vmctx *mt_ctx; 123221828Sgrehan int mt_vcpu; 124221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 125221828Sgrehan 126221828Sgrehanstatic void 127221828Sgrehanusage(int code) 128221828Sgrehan{ 129221828Sgrehan 130221828Sgrehan fprintf(stderr, 131240943Sneel "Usage: %s [-aehBHIP][-g <gdb port>][-z <hz>][-s <pci>]" 132239043Sneel "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n" 133240943Sneel " -a: local apic is in XAPIC mode (default is X2APIC)\n" 134221828Sgrehan " -g: gdb port (default is %d and 0 means don't open)\n" 135221828Sgrehan " -c: # cpus (default 1)\n" 136221828Sgrehan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 137221828Sgrehan " -B: inject breakpoint exception on vm entry\n" 138221828Sgrehan " -H: vmexit from the guest on hlt\n" 139239043Sneel " -I: present an ioapic to the guest\n" 140221828Sgrehan " -P: vmexit from the guest on pause\n" 141222105Sgrehan " -e: exit on unhandled i/o access\n" 142221828Sgrehan " -h: help\n" 143221828Sgrehan " -z: guest hz (default is %d)\n" 144221828Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 145234938Sgrehan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 146221828Sgrehan " -n: <slot,name> PCI slot naming\n" 147221828Sgrehan " -m: lowmem in MB\n" 148221828Sgrehan " -M: highmem in MB\n" 149221828Sgrehan " -x: mux vcpus to 1 hcpu\n" 150221828Sgrehan " -t: mux vcpu timeslice hz (default %d)\n", 151221828Sgrehan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 152221828Sgrehan DEFAULT_GUEST_TSLICE); 153221828Sgrehan exit(code); 154221828Sgrehan} 155221828Sgrehan 156221828Sgrehanvoid * 157221828Sgrehanpaddr_guest2host(uintptr_t gaddr) 158221828Sgrehan{ 159221828Sgrehan if (lomem_sz == 0) 160221828Sgrehan return (NULL); 161221828Sgrehan 162221828Sgrehan if (gaddr < lomem_sz) { 163221828Sgrehan return ((void *)(lomem_addr + gaddr)); 164221828Sgrehan } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { 165221828Sgrehan return ((void *)(himem_addr + gaddr - 4*GB)); 166221828Sgrehan } else 167221828Sgrehan return (NULL); 168221828Sgrehan} 169221828Sgrehan 170221828Sgrehanvoid 171221828Sgrehanfbsdrun_add_oemtbl(void *tbl, int tblsz) 172221828Sgrehan{ 173221828Sgrehan oem_tbl_start = tbl; 174221828Sgrehan oem_tbl_size = tblsz; 175221828Sgrehan} 176221828Sgrehan 177221828Sgrehanint 178240943Sneelfbsdrun_disable_x2apic(void) 179240943Sneel{ 180240943Sneel 181240943Sneel return (disable_x2apic); 182240943Sneel} 183240943Sneel 184240943Sneelint 185221828Sgrehanfbsdrun_vmexit_on_pause(void) 186221828Sgrehan{ 187221828Sgrehan 188221828Sgrehan return (guest_vmexit_on_pause); 189221828Sgrehan} 190221828Sgrehan 191221828Sgrehanint 192221828Sgrehanfbsdrun_vmexit_on_hlt(void) 193221828Sgrehan{ 194221828Sgrehan 195221828Sgrehan return (guest_vmexit_on_hlt); 196221828Sgrehan} 197221828Sgrehan 198221828Sgrehanint 199221828Sgrehanfbsdrun_muxed(void) 200221828Sgrehan{ 201221828Sgrehan 202221828Sgrehan return (guest_vcpu_mux); 203221828Sgrehan} 204221828Sgrehan 205221942Sjhbstatic void * 206221828Sgrehanfbsdrun_start_thread(void *param) 207221828Sgrehan{ 208221828Sgrehan int vcpu; 209221828Sgrehan struct mt_vmm_info *mtp = param; 210221828Sgrehan 211221828Sgrehan vcpu = mtp->mt_vcpu; 212221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 213221828Sgrehan 214221828Sgrehan /* not reached */ 215221828Sgrehan exit(1); 216221828Sgrehan return (NULL); 217221828Sgrehan} 218221828Sgrehan 219221828Sgrehanvoid 220221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 221221828Sgrehan{ 222221828Sgrehan int error; 223221828Sgrehan 224221828Sgrehan if (cpumask & (1 << vcpu)) { 225221828Sgrehan printf("addcpu: attempting to add existing cpu %d\n", vcpu); 226221828Sgrehan exit(1); 227221828Sgrehan } 228221828Sgrehan 229221828Sgrehan cpumask |= 1 << vcpu; 230221828Sgrehan foundcpus++; 231221828Sgrehan 232221828Sgrehan /* 233221828Sgrehan * Set up the vmexit struct to allow execution to start 234221828Sgrehan * at the given RIP 235221828Sgrehan */ 236221828Sgrehan vmexit[vcpu].rip = rip; 237221828Sgrehan vmexit[vcpu].inst_length = 0; 238221828Sgrehan 239221828Sgrehan if (vcpu == BSP || !guest_vcpu_mux){ 240221828Sgrehan mt_vmm_info[vcpu].mt_ctx = ctx; 241221828Sgrehan mt_vmm_info[vcpu].mt_vcpu = vcpu; 242221828Sgrehan 243221828Sgrehan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 244221828Sgrehan fbsdrun_start_thread, &mt_vmm_info[vcpu]); 245221828Sgrehan assert(error == 0); 246221828Sgrehan } 247221828Sgrehan} 248221828Sgrehan 249221828Sgrehanstatic int 250221828Sgrehanfbsdrun_get_next_cpu(int curcpu) 251221828Sgrehan{ 252221828Sgrehan 253221828Sgrehan /* 254221828Sgrehan * Get the next available CPU. Assumes they arrive 255221828Sgrehan * in ascending order with no gaps. 256221828Sgrehan */ 257221828Sgrehan return ((curcpu + 1) % foundcpus); 258221828Sgrehan} 259221828Sgrehan 260221942Sjhbstatic int 261221828Sgrehanvmexit_catch_reset(void) 262221828Sgrehan{ 263221828Sgrehan stats.io_reset++; 264221828Sgrehan return (VMEXIT_RESET); 265221828Sgrehan} 266221828Sgrehan 267221942Sjhbstatic int 268221828Sgrehanvmexit_catch_inout(void) 269221828Sgrehan{ 270221828Sgrehan return (VMEXIT_ABORT); 271221828Sgrehan} 272221828Sgrehan 273221942Sjhbstatic int 274221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 275221828Sgrehan uint32_t eax) 276221828Sgrehan{ 277221828Sgrehan#if PG_DEBUG /* put all types of debug here */ 278221828Sgrehan if (eax == 0) { 279221828Sgrehan pause_noswitch = 1; 280221828Sgrehan } else if (eax == 1) { 281221828Sgrehan pause_noswitch = 0; 282221828Sgrehan } else { 283221828Sgrehan pause_noswitch = 0; 284221828Sgrehan if (eax == 5) { 285221828Sgrehan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 286221828Sgrehan } 287221828Sgrehan } 288221828Sgrehan#endif 289221828Sgrehan return (VMEXIT_CONTINUE); 290221828Sgrehan} 291221828Sgrehan 292221828Sgrehanstatic int 293221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 294221828Sgrehan{ 295221828Sgrehan int error; 296221828Sgrehan int bytes, port, in, out; 297221828Sgrehan uint32_t eax; 298221828Sgrehan int vcpu; 299221828Sgrehan 300221828Sgrehan vcpu = *pvcpu; 301221828Sgrehan 302221828Sgrehan port = vme->u.inout.port; 303221828Sgrehan bytes = vme->u.inout.bytes; 304221828Sgrehan eax = vme->u.inout.eax; 305221828Sgrehan in = vme->u.inout.in; 306221828Sgrehan out = !in; 307221828Sgrehan 308221828Sgrehan /* We don't deal with these */ 309221828Sgrehan if (vme->u.inout.string || vme->u.inout.rep) 310221828Sgrehan return (VMEXIT_ABORT); 311221828Sgrehan 312221828Sgrehan /* Special case of guest reset */ 313221828Sgrehan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 314221828Sgrehan return (vmexit_catch_reset()); 315221828Sgrehan 316221828Sgrehan /* Extra-special case of host notifications */ 317221828Sgrehan if (out && port == GUEST_NIO_PORT) 318221828Sgrehan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 319221828Sgrehan 320222105Sgrehan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 321221828Sgrehan if (error == 0 && in) 322221828Sgrehan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 323221828Sgrehan 324221828Sgrehan if (error == 0) 325221828Sgrehan return (VMEXIT_CONTINUE); 326221828Sgrehan else { 327221828Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 328221828Sgrehan in ? "in" : "out", 329221828Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 330221828Sgrehan return (vmexit_catch_inout()); 331221828Sgrehan } 332221828Sgrehan} 333221828Sgrehan 334221828Sgrehanstatic int 335221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 336221828Sgrehan{ 337221828Sgrehan printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu); 338221828Sgrehan return (VMEXIT_ABORT); 339221828Sgrehan} 340221828Sgrehan 341221828Sgrehanstatic int 342221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 343221828Sgrehan{ 344221828Sgrehan int newcpu; 345221828Sgrehan int retval = VMEXIT_CONTINUE; 346221828Sgrehan 347221828Sgrehan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 348221828Sgrehan 349221828Sgrehan if (guest_vcpu_mux && *pvcpu != newcpu) { 350221828Sgrehan retval = VMEXIT_SWITCH; 351221828Sgrehan *pvcpu = newcpu; 352221828Sgrehan } 353221828Sgrehan 354221828Sgrehan return (retval); 355221828Sgrehan} 356221828Sgrehan 357221828Sgrehanstatic int 358240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 359240912Sneel{ 360240912Sneel int newcpu; 361240912Sneel int retval = VMEXIT_CONTINUE; 362240912Sneel 363240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 364240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 365240912Sneel 366240912Sneel if (guest_vcpu_mux && *pvcpu != newcpu) { 367240912Sneel retval = VMEXIT_SWITCH; 368240912Sneel *pvcpu = newcpu; 369240912Sneel } 370240912Sneel 371240912Sneel return (retval); 372240912Sneel} 373240912Sneel 374240912Sneelstatic int 375221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 376221828Sgrehan{ 377221828Sgrehan 378221828Sgrehan printf("vm exit[%d]\n", *pvcpu); 379221828Sgrehan printf("\treason\t\tVMX\n"); 380221828Sgrehan printf("\trip\t\t0x%016lx\n", vmexit->rip); 381221828Sgrehan printf("\tinst_length\t%d\n", vmexit->inst_length); 382221828Sgrehan printf("\terror\t\t%d\n", vmexit->u.vmx.error); 383221828Sgrehan printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 384221828Sgrehan printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); 385221828Sgrehan 386221828Sgrehan return (VMEXIT_ABORT); 387221828Sgrehan} 388221828Sgrehan 389221828Sgrehanstatic int bogus_noswitch = 1; 390221828Sgrehan 391221828Sgrehanstatic int 392221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 393221828Sgrehan{ 394221828Sgrehan stats.vmexit_bogus++; 395221828Sgrehan 396221828Sgrehan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 397221828Sgrehan return (VMEXIT_RESTART); 398221828Sgrehan } else { 399221828Sgrehan stats.vmexit_bogus_switch++; 400221828Sgrehan vmexit->inst_length = 0; 401221828Sgrehan *pvcpu = -1; 402221828Sgrehan return (VMEXIT_SWITCH); 403221828Sgrehan } 404221828Sgrehan} 405221828Sgrehan 406221828Sgrehanstatic int 407221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 408221828Sgrehan{ 409221828Sgrehan stats.vmexit_hlt++; 410221828Sgrehan if (fbsdrun_muxed()) { 411221828Sgrehan *pvcpu = -1; 412221828Sgrehan return (VMEXIT_SWITCH); 413221828Sgrehan } else { 414221828Sgrehan /* 415221828Sgrehan * Just continue execution with the next instruction. We use 416221828Sgrehan * the HLT VM exit as a way to be friendly with the host 417221828Sgrehan * scheduler. 418221828Sgrehan */ 419221828Sgrehan return (VMEXIT_CONTINUE); 420221828Sgrehan } 421221828Sgrehan} 422221828Sgrehan 423221828Sgrehanstatic int pause_noswitch; 424221828Sgrehan 425221828Sgrehanstatic int 426221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 427221828Sgrehan{ 428221828Sgrehan stats.vmexit_pause++; 429221828Sgrehan 430221828Sgrehan if (fbsdrun_muxed() && !pause_noswitch) { 431221828Sgrehan *pvcpu = -1; 432221828Sgrehan return (VMEXIT_SWITCH); 433221828Sgrehan } else { 434221828Sgrehan return (VMEXIT_CONTINUE); 435221828Sgrehan } 436221828Sgrehan} 437221828Sgrehan 438221828Sgrehanstatic int 439221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 440221828Sgrehan{ 441221828Sgrehan stats.vmexit_mtrap++; 442221828Sgrehan 443221828Sgrehan return (VMEXIT_RESTART); 444221828Sgrehan} 445221828Sgrehan 446234761Sgrehanstatic int 447234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 448234761Sgrehan{ 449234761Sgrehan 450234761Sgrehan stats.vmexit_paging++; 451234761Sgrehan 452234761Sgrehan if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) { 453234761Sgrehan printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip); 454234761Sgrehan return (VMEXIT_ABORT); 455234761Sgrehan } 456234761Sgrehan 457234761Sgrehan return (VMEXIT_CONTINUE); 458234761Sgrehan} 459234761Sgrehan 460221828Sgrehanstatic void 461221828Sgrehansigalrm(int sig) 462221828Sgrehan{ 463221828Sgrehan return; 464221828Sgrehan} 465221828Sgrehan 466221828Sgrehanstatic void 467221828Sgrehansetup_timeslice(void) 468221828Sgrehan{ 469221828Sgrehan struct sigaction sa; 470221828Sgrehan struct itimerval itv; 471221828Sgrehan int error; 472221828Sgrehan 473221828Sgrehan /* 474221828Sgrehan * Setup a realtime timer to generate a SIGALRM at a 475221828Sgrehan * frequency of 'guest_tslice' ticks per second. 476221828Sgrehan */ 477221828Sgrehan sigemptyset(&sa.sa_mask); 478221828Sgrehan sa.sa_flags = 0; 479221828Sgrehan sa.sa_handler = sigalrm; 480221828Sgrehan 481221828Sgrehan error = sigaction(SIGALRM, &sa, NULL); 482221828Sgrehan assert(error == 0); 483221828Sgrehan 484221828Sgrehan itv.it_interval.tv_sec = 0; 485221828Sgrehan itv.it_interval.tv_usec = 1000000 / guest_tslice; 486221828Sgrehan itv.it_value.tv_sec = 0; 487221828Sgrehan itv.it_value.tv_usec = 1000000 / guest_tslice; 488221828Sgrehan 489221828Sgrehan error = setitimer(ITIMER_REAL, &itv, NULL); 490221828Sgrehan assert(error == 0); 491221828Sgrehan} 492221828Sgrehan 493221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 494234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 495234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 496234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 497234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 498234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 499234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 500240912Sneel [VM_EXITCODE_PAGING] = vmexit_paging, 501240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 502221828Sgrehan}; 503221828Sgrehan 504221828Sgrehanstatic void 505221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 506221828Sgrehan{ 507221828Sgrehan int error, rc, prevcpu; 508221828Sgrehan 509221828Sgrehan if (guest_vcpu_mux) 510221828Sgrehan setup_timeslice(); 511221828Sgrehan 512221828Sgrehan if (pincpu >= 0) { 513221828Sgrehan error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); 514221828Sgrehan assert(error == 0); 515221828Sgrehan } 516221828Sgrehan 517221828Sgrehan while (1) { 518221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 519221828Sgrehan if (error != 0) 520221828Sgrehan break; 521221828Sgrehan 522221828Sgrehan prevcpu = vcpu; 523221828Sgrehan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 524221828Sgrehan &vcpu); 525221828Sgrehan switch (rc) { 526221828Sgrehan case VMEXIT_SWITCH: 527221828Sgrehan assert(guest_vcpu_mux); 528221828Sgrehan if (vcpu == -1) { 529221828Sgrehan stats.cpu_switch_rotate++; 530221828Sgrehan vcpu = fbsdrun_get_next_cpu(prevcpu); 531221828Sgrehan } else { 532221828Sgrehan stats.cpu_switch_direct++; 533221828Sgrehan } 534221828Sgrehan /* fall through */ 535221828Sgrehan case VMEXIT_CONTINUE: 536221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 537221828Sgrehan break; 538221828Sgrehan case VMEXIT_RESTART: 539221828Sgrehan rip = vmexit[vcpu].rip; 540221828Sgrehan break; 541221828Sgrehan case VMEXIT_RESET: 542221828Sgrehan exit(0); 543221828Sgrehan default: 544221828Sgrehan exit(1); 545221828Sgrehan } 546221828Sgrehan } 547221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 548221828Sgrehan} 549221828Sgrehan 550221828Sgrehan 551221828Sgrehanint 552221828Sgrehanmain(int argc, char *argv[]) 553221828Sgrehan{ 554239043Sneel int c, error, gdb_port, inject_bkpt, tmp, err, ioapic; 555221828Sgrehan struct vmctx *ctx; 556221828Sgrehan uint64_t rip; 557221828Sgrehan 558221828Sgrehan inject_bkpt = 0; 559221828Sgrehan progname = basename(argv[0]); 560221828Sgrehan gdb_port = DEFAULT_GDB_PORT; 561221828Sgrehan guest_ncpus = 1; 562239043Sneel ioapic = 0; 563221828Sgrehan 564240943Sneel while ((c = getopt(argc, argv, "aehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { 565221828Sgrehan switch (c) { 566240943Sneel case 'a': 567240943Sneel disable_x2apic = 1; 568240943Sneel break; 569221828Sgrehan case 'B': 570221828Sgrehan inject_bkpt = 1; 571221828Sgrehan break; 572221828Sgrehan case 'x': 573221828Sgrehan guest_vcpu_mux = 1; 574221828Sgrehan break; 575221828Sgrehan case 'p': 576221828Sgrehan pincpu = atoi(optarg); 577221828Sgrehan break; 578221828Sgrehan case 'c': 579221828Sgrehan guest_ncpus = atoi(optarg); 580221828Sgrehan break; 581221828Sgrehan case 'g': 582221828Sgrehan gdb_port = atoi(optarg); 583221828Sgrehan break; 584221828Sgrehan case 'z': 585221828Sgrehan guest_hz = atoi(optarg); 586221828Sgrehan break; 587221828Sgrehan case 't': 588221828Sgrehan guest_tslice = atoi(optarg); 589221828Sgrehan break; 590221828Sgrehan case 's': 591234938Sgrehan pci_parse_slot(optarg, 0); 592221828Sgrehan break; 593234938Sgrehan case 'S': 594234938Sgrehan pci_parse_slot(optarg, 1); 595234938Sgrehan break; 596221828Sgrehan case 'n': 597221828Sgrehan pci_parse_name(optarg); 598221828Sgrehan break; 599221828Sgrehan case 'm': 600221828Sgrehan lomem_sz = strtoul(optarg, NULL, 0) * MB; 601221828Sgrehan break; 602221828Sgrehan case 'M': 603221828Sgrehan himem_sz = strtoul(optarg, NULL, 0) * MB; 604221828Sgrehan break; 605221828Sgrehan case 'H': 606221828Sgrehan guest_vmexit_on_hlt = 1; 607221828Sgrehan break; 608239043Sneel case 'I': 609239043Sneel ioapic = 1; 610239043Sneel break; 611221828Sgrehan case 'P': 612221828Sgrehan guest_vmexit_on_pause = 1; 613221828Sgrehan break; 614222105Sgrehan case 'e': 615222105Sgrehan strictio = 1; 616222105Sgrehan break; 617221828Sgrehan case 'h': 618221828Sgrehan usage(0); 619221828Sgrehan default: 620221828Sgrehan usage(1); 621221828Sgrehan } 622221828Sgrehan } 623221828Sgrehan argc -= optind; 624221828Sgrehan argv += optind; 625221828Sgrehan 626221828Sgrehan if (argc != 1) 627221828Sgrehan usage(1); 628221828Sgrehan 629221828Sgrehan /* No need to mux if guest is uni-processor */ 630221828Sgrehan if (guest_ncpus <= 1) 631221828Sgrehan guest_vcpu_mux = 0; 632221828Sgrehan 633221828Sgrehan /* vmexit on hlt if guest is muxed */ 634221828Sgrehan if (guest_vcpu_mux) { 635221828Sgrehan guest_vmexit_on_hlt = 1; 636221828Sgrehan guest_vmexit_on_pause = 1; 637221828Sgrehan } 638221828Sgrehan 639221828Sgrehan vmname = argv[0]; 640221828Sgrehan 641221828Sgrehan ctx = vm_open(vmname); 642221828Sgrehan if (ctx == NULL) { 643221828Sgrehan perror("vm_open"); 644221828Sgrehan exit(1); 645221828Sgrehan } 646221828Sgrehan 647221828Sgrehan if (fbsdrun_vmexit_on_hlt()) { 648221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 649221828Sgrehan if (err < 0) { 650221828Sgrehan printf("VM exit on HLT not supported\n"); 651221828Sgrehan exit(1); 652221828Sgrehan } 653221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 654221828Sgrehan handler[VM_EXITCODE_HLT] = vmexit_hlt; 655221828Sgrehan } 656221828Sgrehan 657221828Sgrehan if (fbsdrun_vmexit_on_pause()) { 658221828Sgrehan /* 659221828Sgrehan * pause exit support required for this mode 660221828Sgrehan */ 661221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 662221828Sgrehan if (err < 0) { 663221828Sgrehan printf("SMP mux requested, no pause support\n"); 664221828Sgrehan exit(1); 665221828Sgrehan } 666221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 667221828Sgrehan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 668221828Sgrehan } 669221828Sgrehan 670240943Sneel if (fbsdrun_disable_x2apic()) 671240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 672240943Sneel else 673240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 674240943Sneel 675240943Sneel if (err) { 676240943Sneel printf("Unable to set x2apic state (%d)\n", err); 677240943Sneel exit(1); 678240943Sneel } 679240943Sneel 680221828Sgrehan if (lomem_sz != 0) { 681221828Sgrehan lomem_addr = vm_map_memory(ctx, 0, lomem_sz); 682221828Sgrehan if (lomem_addr == (char *) MAP_FAILED) { 683221828Sgrehan lomem_sz = 0; 684221828Sgrehan } else if (himem_sz != 0) { 685221828Sgrehan himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); 686221828Sgrehan if (himem_addr == (char *) MAP_FAILED) { 687221828Sgrehan lomem_sz = 0; 688221828Sgrehan himem_sz = 0; 689221828Sgrehan } 690221828Sgrehan } 691221828Sgrehan } 692221828Sgrehan 693221828Sgrehan init_inout(); 694221828Sgrehan init_pci(ctx); 695239045Sneel if (ioapic) 696239045Sneel ioapic_init(0); 697221828Sgrehan 698221828Sgrehan if (gdb_port != 0) 699221828Sgrehan init_dbgport(gdb_port); 700221828Sgrehan 701221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 702221828Sgrehan assert(error == 0); 703221828Sgrehan 704221828Sgrehan if (inject_bkpt) { 705221828Sgrehan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 706221828Sgrehan assert(error == 0); 707221828Sgrehan } 708221828Sgrehan 709221828Sgrehan /* 710221828Sgrehan * build the guest tables, MP etc. 711221828Sgrehan */ 712239043Sneel vm_build_tables(ctx, guest_ncpus, ioapic, oem_tbl_start, oem_tbl_size); 713221828Sgrehan 714221828Sgrehan /* 715221828Sgrehan * Add CPU 0 716221828Sgrehan */ 717221828Sgrehan fbsdrun_addcpu(ctx, BSP, rip); 718221828Sgrehan 719221828Sgrehan /* 720221828Sgrehan * Head off to the main event dispatch loop 721221828Sgrehan */ 722221828Sgrehan mevent_dispatch(); 723221828Sgrehan 724221828Sgrehan exit(1); 725221828Sgrehan} 726