bhyverun.c revision 242131
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36221828Sgrehan#include <machine/segments.h> 37221828Sgrehan 38221828Sgrehan#include <stdio.h> 39221828Sgrehan#include <stdlib.h> 40221828Sgrehan#include <libgen.h> 41221828Sgrehan#include <unistd.h> 42221828Sgrehan#include <assert.h> 43221828Sgrehan#include <errno.h> 44221828Sgrehan#include <signal.h> 45221828Sgrehan#include <pthread.h> 46221828Sgrehan 47221828Sgrehan#include <machine/vmm.h> 48221828Sgrehan#include <vmmapi.h> 49221828Sgrehan 50221828Sgrehan#include "fbsdrun.h" 51221828Sgrehan#include "inout.h" 52221828Sgrehan#include "dbgport.h" 53241744Sgrehan#include "mem.h" 54221828Sgrehan#include "mevent.h" 55242131Sgrehan#include "mptbl.h" 56221828Sgrehan#include "pci_emul.h" 57221828Sgrehan#include "xmsr.h" 58234761Sgrehan#include "instruction_emul.h" 59239045Sneel#include "ioapic.h" 60240912Sneel#include "spinup_ap.h" 61221828Sgrehan 62221828Sgrehan#define DEFAULT_GUEST_HZ 100 63221828Sgrehan#define DEFAULT_GUEST_TSLICE 200 64221828Sgrehan 65221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 66221828Sgrehan 67221828Sgrehan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 68221828Sgrehan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 69221828Sgrehan#define VMEXIT_RESTART 2 /* restart current instruction */ 70221828Sgrehan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 71221828Sgrehan#define VMEXIT_RESET 4 /* guest machine has reset */ 72221828Sgrehan 73221828Sgrehan#define MB (1024UL * 1024) 74221828Sgrehan#define GB (1024UL * MB) 75221828Sgrehan 76221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE; 79221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ; 80221828Sgrehanchar *vmname; 81221828Sgrehan 82221828Sgrehanu_long lomem_sz; 83221828Sgrehanu_long himem_sz; 84221828Sgrehan 85221828Sgrehanint guest_ncpus; 86221828Sgrehan 87221828Sgrehanstatic int pincpu = -1; 88221828Sgrehanstatic int guest_vcpu_mux; 89240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 90221828Sgrehan 91221828Sgrehanstatic int foundcpus; 92221828Sgrehan 93222105Sgrehanstatic int strictio; 94222105Sgrehan 95221828Sgrehanstatic char *lomem_addr; 96221828Sgrehanstatic char *himem_addr; 97221828Sgrehan 98221828Sgrehanstatic char *progname; 99221828Sgrehanstatic const int BSP = 0; 100221828Sgrehan 101221828Sgrehanstatic int cpumask; 102221828Sgrehan 103221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 104221828Sgrehan 105221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU]; 106221828Sgrehan 107221828Sgrehanstruct fbsdstats { 108221828Sgrehan uint64_t vmexit_bogus; 109221828Sgrehan uint64_t vmexit_bogus_switch; 110221828Sgrehan uint64_t vmexit_hlt; 111221828Sgrehan uint64_t vmexit_pause; 112221828Sgrehan uint64_t vmexit_mtrap; 113234761Sgrehan uint64_t vmexit_paging; 114221828Sgrehan uint64_t cpu_switch_rotate; 115221828Sgrehan uint64_t cpu_switch_direct; 116221828Sgrehan int io_reset; 117221828Sgrehan} stats; 118221828Sgrehan 119221828Sgrehanstruct mt_vmm_info { 120221828Sgrehan pthread_t mt_thr; 121221828Sgrehan struct vmctx *mt_ctx; 122221828Sgrehan int mt_vcpu; 123221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 124221828Sgrehan 125221828Sgrehanstatic void 126221828Sgrehanusage(int code) 127221828Sgrehan{ 128221828Sgrehan 129221828Sgrehan fprintf(stderr, 130240943Sneel "Usage: %s [-aehBHIP][-g <gdb port>][-z <hz>][-s <pci>]" 131239043Sneel "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n" 132240943Sneel " -a: local apic is in XAPIC mode (default is X2APIC)\n" 133221828Sgrehan " -g: gdb port (default is %d and 0 means don't open)\n" 134221828Sgrehan " -c: # cpus (default 1)\n" 135221828Sgrehan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 136221828Sgrehan " -B: inject breakpoint exception on vm entry\n" 137221828Sgrehan " -H: vmexit from the guest on hlt\n" 138239043Sneel " -I: present an ioapic to the guest\n" 139221828Sgrehan " -P: vmexit from the guest on pause\n" 140222105Sgrehan " -e: exit on unhandled i/o access\n" 141221828Sgrehan " -h: help\n" 142221828Sgrehan " -z: guest hz (default is %d)\n" 143221828Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 144234938Sgrehan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 145221828Sgrehan " -m: lowmem in MB\n" 146221828Sgrehan " -M: highmem in MB\n" 147221828Sgrehan " -x: mux vcpus to 1 hcpu\n" 148221828Sgrehan " -t: mux vcpu timeslice hz (default %d)\n", 149221828Sgrehan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 150221828Sgrehan DEFAULT_GUEST_TSLICE); 151221828Sgrehan exit(code); 152221828Sgrehan} 153221828Sgrehan 154221828Sgrehanvoid * 155221828Sgrehanpaddr_guest2host(uintptr_t gaddr) 156221828Sgrehan{ 157221828Sgrehan if (lomem_sz == 0) 158221828Sgrehan return (NULL); 159221828Sgrehan 160221828Sgrehan if (gaddr < lomem_sz) { 161221828Sgrehan return ((void *)(lomem_addr + gaddr)); 162221828Sgrehan } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { 163221828Sgrehan return ((void *)(himem_addr + gaddr - 4*GB)); 164221828Sgrehan } else 165221828Sgrehan return (NULL); 166221828Sgrehan} 167221828Sgrehan 168221828Sgrehanint 169240943Sneelfbsdrun_disable_x2apic(void) 170240943Sneel{ 171240943Sneel 172240943Sneel return (disable_x2apic); 173240943Sneel} 174240943Sneel 175240943Sneelint 176221828Sgrehanfbsdrun_vmexit_on_pause(void) 177221828Sgrehan{ 178221828Sgrehan 179221828Sgrehan return (guest_vmexit_on_pause); 180221828Sgrehan} 181221828Sgrehan 182221828Sgrehanint 183221828Sgrehanfbsdrun_vmexit_on_hlt(void) 184221828Sgrehan{ 185221828Sgrehan 186221828Sgrehan return (guest_vmexit_on_hlt); 187221828Sgrehan} 188221828Sgrehan 189221828Sgrehanint 190221828Sgrehanfbsdrun_muxed(void) 191221828Sgrehan{ 192221828Sgrehan 193221828Sgrehan return (guest_vcpu_mux); 194221828Sgrehan} 195221828Sgrehan 196221942Sjhbstatic void * 197221828Sgrehanfbsdrun_start_thread(void *param) 198221828Sgrehan{ 199221828Sgrehan int vcpu; 200221828Sgrehan struct mt_vmm_info *mtp = param; 201221828Sgrehan 202221828Sgrehan vcpu = mtp->mt_vcpu; 203221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 204221828Sgrehan 205221828Sgrehan /* not reached */ 206221828Sgrehan exit(1); 207221828Sgrehan return (NULL); 208221828Sgrehan} 209221828Sgrehan 210221828Sgrehanvoid 211221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 212221828Sgrehan{ 213221828Sgrehan int error; 214221828Sgrehan 215221828Sgrehan if (cpumask & (1 << vcpu)) { 216221828Sgrehan printf("addcpu: attempting to add existing cpu %d\n", vcpu); 217221828Sgrehan exit(1); 218221828Sgrehan } 219221828Sgrehan 220221828Sgrehan cpumask |= 1 << vcpu; 221221828Sgrehan foundcpus++; 222221828Sgrehan 223221828Sgrehan /* 224221828Sgrehan * Set up the vmexit struct to allow execution to start 225221828Sgrehan * at the given RIP 226221828Sgrehan */ 227221828Sgrehan vmexit[vcpu].rip = rip; 228221828Sgrehan vmexit[vcpu].inst_length = 0; 229221828Sgrehan 230221828Sgrehan if (vcpu == BSP || !guest_vcpu_mux){ 231221828Sgrehan mt_vmm_info[vcpu].mt_ctx = ctx; 232221828Sgrehan mt_vmm_info[vcpu].mt_vcpu = vcpu; 233221828Sgrehan 234221828Sgrehan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 235221828Sgrehan fbsdrun_start_thread, &mt_vmm_info[vcpu]); 236221828Sgrehan assert(error == 0); 237221828Sgrehan } 238221828Sgrehan} 239221828Sgrehan 240221828Sgrehanstatic int 241221828Sgrehanfbsdrun_get_next_cpu(int curcpu) 242221828Sgrehan{ 243221828Sgrehan 244221828Sgrehan /* 245221828Sgrehan * Get the next available CPU. Assumes they arrive 246221828Sgrehan * in ascending order with no gaps. 247221828Sgrehan */ 248221828Sgrehan return ((curcpu + 1) % foundcpus); 249221828Sgrehan} 250221828Sgrehan 251221942Sjhbstatic int 252221828Sgrehanvmexit_catch_reset(void) 253221828Sgrehan{ 254221828Sgrehan stats.io_reset++; 255221828Sgrehan return (VMEXIT_RESET); 256221828Sgrehan} 257221828Sgrehan 258221942Sjhbstatic int 259221828Sgrehanvmexit_catch_inout(void) 260221828Sgrehan{ 261221828Sgrehan return (VMEXIT_ABORT); 262221828Sgrehan} 263221828Sgrehan 264221942Sjhbstatic int 265221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 266221828Sgrehan uint32_t eax) 267221828Sgrehan{ 268221828Sgrehan#if PG_DEBUG /* put all types of debug here */ 269221828Sgrehan if (eax == 0) { 270221828Sgrehan pause_noswitch = 1; 271221828Sgrehan } else if (eax == 1) { 272221828Sgrehan pause_noswitch = 0; 273221828Sgrehan } else { 274221828Sgrehan pause_noswitch = 0; 275221828Sgrehan if (eax == 5) { 276221828Sgrehan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 277221828Sgrehan } 278221828Sgrehan } 279221828Sgrehan#endif 280221828Sgrehan return (VMEXIT_CONTINUE); 281221828Sgrehan} 282221828Sgrehan 283221828Sgrehanstatic int 284221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 285221828Sgrehan{ 286221828Sgrehan int error; 287221828Sgrehan int bytes, port, in, out; 288221828Sgrehan uint32_t eax; 289221828Sgrehan int vcpu; 290221828Sgrehan 291221828Sgrehan vcpu = *pvcpu; 292221828Sgrehan 293221828Sgrehan port = vme->u.inout.port; 294221828Sgrehan bytes = vme->u.inout.bytes; 295221828Sgrehan eax = vme->u.inout.eax; 296221828Sgrehan in = vme->u.inout.in; 297221828Sgrehan out = !in; 298221828Sgrehan 299221828Sgrehan /* We don't deal with these */ 300221828Sgrehan if (vme->u.inout.string || vme->u.inout.rep) 301221828Sgrehan return (VMEXIT_ABORT); 302221828Sgrehan 303221828Sgrehan /* Special case of guest reset */ 304221828Sgrehan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 305221828Sgrehan return (vmexit_catch_reset()); 306221828Sgrehan 307221828Sgrehan /* Extra-special case of host notifications */ 308221828Sgrehan if (out && port == GUEST_NIO_PORT) 309221828Sgrehan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 310221828Sgrehan 311222105Sgrehan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 312221828Sgrehan if (error == 0 && in) 313221828Sgrehan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 314221828Sgrehan 315221828Sgrehan if (error == 0) 316221828Sgrehan return (VMEXIT_CONTINUE); 317221828Sgrehan else { 318221828Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 319221828Sgrehan in ? "in" : "out", 320221828Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 321221828Sgrehan return (vmexit_catch_inout()); 322221828Sgrehan } 323221828Sgrehan} 324221828Sgrehan 325221828Sgrehanstatic int 326221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 327221828Sgrehan{ 328221828Sgrehan printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu); 329221828Sgrehan return (VMEXIT_ABORT); 330221828Sgrehan} 331221828Sgrehan 332221828Sgrehanstatic int 333221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 334221828Sgrehan{ 335221828Sgrehan int newcpu; 336221828Sgrehan int retval = VMEXIT_CONTINUE; 337221828Sgrehan 338221828Sgrehan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 339221828Sgrehan 340221828Sgrehan if (guest_vcpu_mux && *pvcpu != newcpu) { 341221828Sgrehan retval = VMEXIT_SWITCH; 342221828Sgrehan *pvcpu = newcpu; 343221828Sgrehan } 344221828Sgrehan 345221828Sgrehan return (retval); 346221828Sgrehan} 347221828Sgrehan 348221828Sgrehanstatic int 349240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 350240912Sneel{ 351240912Sneel int newcpu; 352240912Sneel int retval = VMEXIT_CONTINUE; 353240912Sneel 354240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 355240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 356240912Sneel 357240912Sneel if (guest_vcpu_mux && *pvcpu != newcpu) { 358240912Sneel retval = VMEXIT_SWITCH; 359240912Sneel *pvcpu = newcpu; 360240912Sneel } 361240912Sneel 362240912Sneel return (retval); 363240912Sneel} 364240912Sneel 365240912Sneelstatic int 366221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 367221828Sgrehan{ 368221828Sgrehan 369221828Sgrehan printf("vm exit[%d]\n", *pvcpu); 370221828Sgrehan printf("\treason\t\tVMX\n"); 371221828Sgrehan printf("\trip\t\t0x%016lx\n", vmexit->rip); 372221828Sgrehan printf("\tinst_length\t%d\n", vmexit->inst_length); 373221828Sgrehan printf("\terror\t\t%d\n", vmexit->u.vmx.error); 374221828Sgrehan printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 375221828Sgrehan printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); 376221828Sgrehan 377221828Sgrehan return (VMEXIT_ABORT); 378221828Sgrehan} 379221828Sgrehan 380221828Sgrehanstatic int bogus_noswitch = 1; 381221828Sgrehan 382221828Sgrehanstatic int 383221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 384221828Sgrehan{ 385221828Sgrehan stats.vmexit_bogus++; 386221828Sgrehan 387221828Sgrehan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 388221828Sgrehan return (VMEXIT_RESTART); 389221828Sgrehan } else { 390221828Sgrehan stats.vmexit_bogus_switch++; 391221828Sgrehan vmexit->inst_length = 0; 392221828Sgrehan *pvcpu = -1; 393221828Sgrehan return (VMEXIT_SWITCH); 394221828Sgrehan } 395221828Sgrehan} 396221828Sgrehan 397221828Sgrehanstatic int 398221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 399221828Sgrehan{ 400221828Sgrehan stats.vmexit_hlt++; 401221828Sgrehan if (fbsdrun_muxed()) { 402221828Sgrehan *pvcpu = -1; 403221828Sgrehan return (VMEXIT_SWITCH); 404221828Sgrehan } else { 405221828Sgrehan /* 406221828Sgrehan * Just continue execution with the next instruction. We use 407221828Sgrehan * the HLT VM exit as a way to be friendly with the host 408221828Sgrehan * scheduler. 409221828Sgrehan */ 410221828Sgrehan return (VMEXIT_CONTINUE); 411221828Sgrehan } 412221828Sgrehan} 413221828Sgrehan 414221828Sgrehanstatic int pause_noswitch; 415221828Sgrehan 416221828Sgrehanstatic int 417221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 418221828Sgrehan{ 419221828Sgrehan stats.vmexit_pause++; 420221828Sgrehan 421221828Sgrehan if (fbsdrun_muxed() && !pause_noswitch) { 422221828Sgrehan *pvcpu = -1; 423221828Sgrehan return (VMEXIT_SWITCH); 424221828Sgrehan } else { 425221828Sgrehan return (VMEXIT_CONTINUE); 426221828Sgrehan } 427221828Sgrehan} 428221828Sgrehan 429221828Sgrehanstatic int 430221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 431221828Sgrehan{ 432221828Sgrehan stats.vmexit_mtrap++; 433221828Sgrehan 434221828Sgrehan return (VMEXIT_RESTART); 435221828Sgrehan} 436221828Sgrehan 437234761Sgrehanstatic int 438234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 439234761Sgrehan{ 440241744Sgrehan int err; 441234761Sgrehan stats.vmexit_paging++; 442234761Sgrehan 443241744Sgrehan err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, 444241744Sgrehan vmexit->u.paging.cr3, vmexit->u.paging.rwx); 445241744Sgrehan 446241744Sgrehan if (err) { 447241744Sgrehan if (err == EINVAL) { 448241744Sgrehan printf("Failed to emulate instruction at 0x%lx\n", 449241744Sgrehan vmexit->rip); 450241744Sgrehan } else if (err == ESRCH) { 451241744Sgrehan printf("Unhandled memory access to 0x%lx\n", 452241744Sgrehan vmexit->u.paging.gpa); 453241744Sgrehan } 454241744Sgrehan 455234761Sgrehan return (VMEXIT_ABORT); 456234761Sgrehan } 457234761Sgrehan 458234761Sgrehan return (VMEXIT_CONTINUE); 459234761Sgrehan} 460234761Sgrehan 461221828Sgrehanstatic void 462221828Sgrehansigalrm(int sig) 463221828Sgrehan{ 464221828Sgrehan return; 465221828Sgrehan} 466221828Sgrehan 467221828Sgrehanstatic void 468221828Sgrehansetup_timeslice(void) 469221828Sgrehan{ 470221828Sgrehan struct sigaction sa; 471221828Sgrehan struct itimerval itv; 472221828Sgrehan int error; 473221828Sgrehan 474221828Sgrehan /* 475221828Sgrehan * Setup a realtime timer to generate a SIGALRM at a 476221828Sgrehan * frequency of 'guest_tslice' ticks per second. 477221828Sgrehan */ 478221828Sgrehan sigemptyset(&sa.sa_mask); 479221828Sgrehan sa.sa_flags = 0; 480221828Sgrehan sa.sa_handler = sigalrm; 481221828Sgrehan 482221828Sgrehan error = sigaction(SIGALRM, &sa, NULL); 483221828Sgrehan assert(error == 0); 484221828Sgrehan 485221828Sgrehan itv.it_interval.tv_sec = 0; 486221828Sgrehan itv.it_interval.tv_usec = 1000000 / guest_tslice; 487221828Sgrehan itv.it_value.tv_sec = 0; 488221828Sgrehan itv.it_value.tv_usec = 1000000 / guest_tslice; 489221828Sgrehan 490221828Sgrehan error = setitimer(ITIMER_REAL, &itv, NULL); 491221828Sgrehan assert(error == 0); 492221828Sgrehan} 493221828Sgrehan 494221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 495234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 496234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 497234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 498234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 499234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 500234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 501240912Sneel [VM_EXITCODE_PAGING] = vmexit_paging, 502240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 503221828Sgrehan}; 504221828Sgrehan 505221828Sgrehanstatic void 506221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 507221828Sgrehan{ 508221828Sgrehan int error, rc, prevcpu; 509221828Sgrehan 510221828Sgrehan if (guest_vcpu_mux) 511221828Sgrehan setup_timeslice(); 512221828Sgrehan 513221828Sgrehan if (pincpu >= 0) { 514221828Sgrehan error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); 515221828Sgrehan assert(error == 0); 516221828Sgrehan } 517221828Sgrehan 518221828Sgrehan while (1) { 519221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 520241490Sneel if (error != 0) { 521241490Sneel /* 522241490Sneel * It is possible that 'vmmctl' or some other process 523241490Sneel * has transitioned the vcpu to CANNOT_RUN state right 524241490Sneel * before we tried to transition it to RUNNING. 525241490Sneel * 526241490Sneel * This is expected to be temporary so just retry. 527241490Sneel */ 528241490Sneel if (errno == EBUSY) 529241490Sneel continue; 530241490Sneel else 531241490Sneel break; 532241490Sneel } 533221828Sgrehan 534221828Sgrehan prevcpu = vcpu; 535221828Sgrehan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 536221828Sgrehan &vcpu); 537221828Sgrehan switch (rc) { 538221828Sgrehan case VMEXIT_SWITCH: 539221828Sgrehan assert(guest_vcpu_mux); 540221828Sgrehan if (vcpu == -1) { 541221828Sgrehan stats.cpu_switch_rotate++; 542221828Sgrehan vcpu = fbsdrun_get_next_cpu(prevcpu); 543221828Sgrehan } else { 544221828Sgrehan stats.cpu_switch_direct++; 545221828Sgrehan } 546221828Sgrehan /* fall through */ 547221828Sgrehan case VMEXIT_CONTINUE: 548221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 549221828Sgrehan break; 550221828Sgrehan case VMEXIT_RESTART: 551221828Sgrehan rip = vmexit[vcpu].rip; 552221828Sgrehan break; 553221828Sgrehan case VMEXIT_RESET: 554221828Sgrehan exit(0); 555221828Sgrehan default: 556221828Sgrehan exit(1); 557221828Sgrehan } 558221828Sgrehan } 559221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 560221828Sgrehan} 561221828Sgrehan 562221828Sgrehan 563221828Sgrehanint 564221828Sgrehanmain(int argc, char *argv[]) 565221828Sgrehan{ 566239043Sneel int c, error, gdb_port, inject_bkpt, tmp, err, ioapic; 567221828Sgrehan struct vmctx *ctx; 568221828Sgrehan uint64_t rip; 569221828Sgrehan 570221828Sgrehan inject_bkpt = 0; 571221828Sgrehan progname = basename(argv[0]); 572221828Sgrehan gdb_port = DEFAULT_GDB_PORT; 573221828Sgrehan guest_ncpus = 1; 574239043Sneel ioapic = 0; 575221828Sgrehan 576240943Sneel while ((c = getopt(argc, argv, "aehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { 577221828Sgrehan switch (c) { 578240943Sneel case 'a': 579240943Sneel disable_x2apic = 1; 580240943Sneel break; 581221828Sgrehan case 'B': 582221828Sgrehan inject_bkpt = 1; 583221828Sgrehan break; 584221828Sgrehan case 'x': 585221828Sgrehan guest_vcpu_mux = 1; 586221828Sgrehan break; 587221828Sgrehan case 'p': 588221828Sgrehan pincpu = atoi(optarg); 589221828Sgrehan break; 590221828Sgrehan case 'c': 591221828Sgrehan guest_ncpus = atoi(optarg); 592221828Sgrehan break; 593221828Sgrehan case 'g': 594221828Sgrehan gdb_port = atoi(optarg); 595221828Sgrehan break; 596221828Sgrehan case 'z': 597221828Sgrehan guest_hz = atoi(optarg); 598221828Sgrehan break; 599221828Sgrehan case 't': 600221828Sgrehan guest_tslice = atoi(optarg); 601221828Sgrehan break; 602221828Sgrehan case 's': 603234938Sgrehan pci_parse_slot(optarg, 0); 604221828Sgrehan break; 605234938Sgrehan case 'S': 606234938Sgrehan pci_parse_slot(optarg, 1); 607234938Sgrehan break; 608221828Sgrehan case 'm': 609221828Sgrehan lomem_sz = strtoul(optarg, NULL, 0) * MB; 610221828Sgrehan break; 611221828Sgrehan case 'M': 612221828Sgrehan himem_sz = strtoul(optarg, NULL, 0) * MB; 613221828Sgrehan break; 614221828Sgrehan case 'H': 615221828Sgrehan guest_vmexit_on_hlt = 1; 616221828Sgrehan break; 617239043Sneel case 'I': 618239043Sneel ioapic = 1; 619239043Sneel break; 620221828Sgrehan case 'P': 621221828Sgrehan guest_vmexit_on_pause = 1; 622221828Sgrehan break; 623222105Sgrehan case 'e': 624222105Sgrehan strictio = 1; 625222105Sgrehan break; 626221828Sgrehan case 'h': 627221828Sgrehan usage(0); 628221828Sgrehan default: 629221828Sgrehan usage(1); 630221828Sgrehan } 631221828Sgrehan } 632221828Sgrehan argc -= optind; 633221828Sgrehan argv += optind; 634221828Sgrehan 635221828Sgrehan if (argc != 1) 636221828Sgrehan usage(1); 637221828Sgrehan 638221828Sgrehan /* No need to mux if guest is uni-processor */ 639221828Sgrehan if (guest_ncpus <= 1) 640221828Sgrehan guest_vcpu_mux = 0; 641221828Sgrehan 642221828Sgrehan /* vmexit on hlt if guest is muxed */ 643221828Sgrehan if (guest_vcpu_mux) { 644221828Sgrehan guest_vmexit_on_hlt = 1; 645221828Sgrehan guest_vmexit_on_pause = 1; 646221828Sgrehan } 647221828Sgrehan 648221828Sgrehan vmname = argv[0]; 649221828Sgrehan 650221828Sgrehan ctx = vm_open(vmname); 651221828Sgrehan if (ctx == NULL) { 652221828Sgrehan perror("vm_open"); 653221828Sgrehan exit(1); 654221828Sgrehan } 655221828Sgrehan 656221828Sgrehan if (fbsdrun_vmexit_on_hlt()) { 657221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 658221828Sgrehan if (err < 0) { 659221828Sgrehan printf("VM exit on HLT not supported\n"); 660221828Sgrehan exit(1); 661221828Sgrehan } 662221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 663221828Sgrehan handler[VM_EXITCODE_HLT] = vmexit_hlt; 664221828Sgrehan } 665221828Sgrehan 666221828Sgrehan if (fbsdrun_vmexit_on_pause()) { 667221828Sgrehan /* 668221828Sgrehan * pause exit support required for this mode 669221828Sgrehan */ 670221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 671221828Sgrehan if (err < 0) { 672221828Sgrehan printf("SMP mux requested, no pause support\n"); 673221828Sgrehan exit(1); 674221828Sgrehan } 675221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 676221828Sgrehan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 677221828Sgrehan } 678221828Sgrehan 679240943Sneel if (fbsdrun_disable_x2apic()) 680240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 681240943Sneel else 682240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 683240943Sneel 684240943Sneel if (err) { 685240943Sneel printf("Unable to set x2apic state (%d)\n", err); 686240943Sneel exit(1); 687240943Sneel } 688240943Sneel 689221828Sgrehan if (lomem_sz != 0) { 690221828Sgrehan lomem_addr = vm_map_memory(ctx, 0, lomem_sz); 691221828Sgrehan if (lomem_addr == (char *) MAP_FAILED) { 692221828Sgrehan lomem_sz = 0; 693221828Sgrehan } else if (himem_sz != 0) { 694221828Sgrehan himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); 695221828Sgrehan if (himem_addr == (char *) MAP_FAILED) { 696221828Sgrehan lomem_sz = 0; 697221828Sgrehan himem_sz = 0; 698221828Sgrehan } 699221828Sgrehan } 700221828Sgrehan } 701221828Sgrehan 702221828Sgrehan init_inout(); 703221828Sgrehan init_pci(ctx); 704239045Sneel if (ioapic) 705239045Sneel ioapic_init(0); 706221828Sgrehan 707221828Sgrehan if (gdb_port != 0) 708221828Sgrehan init_dbgport(gdb_port); 709221828Sgrehan 710221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 711221828Sgrehan assert(error == 0); 712221828Sgrehan 713221828Sgrehan if (inject_bkpt) { 714221828Sgrehan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 715221828Sgrehan assert(error == 0); 716221828Sgrehan } 717221828Sgrehan 718221828Sgrehan /* 719221828Sgrehan * build the guest tables, MP etc. 720221828Sgrehan */ 721242131Sgrehan mptable_build(ctx, guest_ncpus, ioapic); 722221828Sgrehan 723221828Sgrehan /* 724221828Sgrehan * Add CPU 0 725221828Sgrehan */ 726221828Sgrehan fbsdrun_addcpu(ctx, BSP, rip); 727221828Sgrehan 728221828Sgrehan /* 729221828Sgrehan * Head off to the main event dispatch loop 730221828Sgrehan */ 731221828Sgrehan mevent_dispatch(); 732221828Sgrehan 733221828Sgrehan exit(1); 734221828Sgrehan} 735