bhyverun.c revision 242385
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36221828Sgrehan#include <machine/segments.h> 37221828Sgrehan 38221828Sgrehan#include <stdio.h> 39221828Sgrehan#include <stdlib.h> 40221828Sgrehan#include <libgen.h> 41221828Sgrehan#include <unistd.h> 42221828Sgrehan#include <assert.h> 43221828Sgrehan#include <errno.h> 44221828Sgrehan#include <signal.h> 45221828Sgrehan#include <pthread.h> 46221828Sgrehan 47221828Sgrehan#include <machine/vmm.h> 48221828Sgrehan#include <vmmapi.h> 49221828Sgrehan 50221828Sgrehan#include "fbsdrun.h" 51221828Sgrehan#include "inout.h" 52221828Sgrehan#include "dbgport.h" 53241744Sgrehan#include "mem.h" 54221828Sgrehan#include "mevent.h" 55242131Sgrehan#include "mptbl.h" 56221828Sgrehan#include "pci_emul.h" 57221828Sgrehan#include "xmsr.h" 58234761Sgrehan#include "instruction_emul.h" 59239045Sneel#include "ioapic.h" 60240912Sneel#include "spinup_ap.h" 61221828Sgrehan 62221828Sgrehan#define DEFAULT_GUEST_HZ 100 63221828Sgrehan#define DEFAULT_GUEST_TSLICE 200 64221828Sgrehan 65221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 66221828Sgrehan 67221828Sgrehan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 68221828Sgrehan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 69221828Sgrehan#define VMEXIT_RESTART 2 /* restart current instruction */ 70221828Sgrehan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 71221828Sgrehan#define VMEXIT_RESET 4 /* guest machine has reset */ 72221828Sgrehan 73221828Sgrehan#define MB (1024UL * 1024) 74221828Sgrehan#define GB (1024UL * MB) 75221828Sgrehan 76221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 77221828Sgrehan 78221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE; 79221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ; 80221828Sgrehanchar *vmname; 81221828Sgrehan 82221828Sgrehanu_long lomem_sz; 83221828Sgrehanu_long himem_sz; 84221828Sgrehan 85221828Sgrehanint guest_ncpus; 86221828Sgrehan 87221828Sgrehanstatic int pincpu = -1; 88221828Sgrehanstatic int guest_vcpu_mux; 89240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 90221828Sgrehan 91221828Sgrehanstatic int foundcpus; 92221828Sgrehan 93222105Sgrehanstatic int strictio; 94222105Sgrehan 95221828Sgrehanstatic char *lomem_addr; 96221828Sgrehanstatic char *himem_addr; 97221828Sgrehan 98221828Sgrehanstatic char *progname; 99221828Sgrehanstatic const int BSP = 0; 100221828Sgrehan 101221828Sgrehanstatic int cpumask; 102221828Sgrehan 103221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 104221828Sgrehan 105221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU]; 106221828Sgrehan 107221828Sgrehanstruct fbsdstats { 108221828Sgrehan uint64_t vmexit_bogus; 109221828Sgrehan uint64_t vmexit_bogus_switch; 110221828Sgrehan uint64_t vmexit_hlt; 111221828Sgrehan uint64_t vmexit_pause; 112221828Sgrehan uint64_t vmexit_mtrap; 113234761Sgrehan uint64_t vmexit_paging; 114221828Sgrehan uint64_t cpu_switch_rotate; 115221828Sgrehan uint64_t cpu_switch_direct; 116221828Sgrehan int io_reset; 117221828Sgrehan} stats; 118221828Sgrehan 119221828Sgrehanstruct mt_vmm_info { 120221828Sgrehan pthread_t mt_thr; 121221828Sgrehan struct vmctx *mt_ctx; 122221828Sgrehan int mt_vcpu; 123221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 124221828Sgrehan 125221828Sgrehanstatic void 126221828Sgrehanusage(int code) 127221828Sgrehan{ 128221828Sgrehan 129221828Sgrehan fprintf(stderr, 130240943Sneel "Usage: %s [-aehBHIP][-g <gdb port>][-z <hz>][-s <pci>]" 131239043Sneel "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n" 132240943Sneel " -a: local apic is in XAPIC mode (default is X2APIC)\n" 133221828Sgrehan " -g: gdb port (default is %d and 0 means don't open)\n" 134221828Sgrehan " -c: # cpus (default 1)\n" 135221828Sgrehan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 136221828Sgrehan " -B: inject breakpoint exception on vm entry\n" 137221828Sgrehan " -H: vmexit from the guest on hlt\n" 138239043Sneel " -I: present an ioapic to the guest\n" 139221828Sgrehan " -P: vmexit from the guest on pause\n" 140222105Sgrehan " -e: exit on unhandled i/o access\n" 141221828Sgrehan " -h: help\n" 142221828Sgrehan " -z: guest hz (default is %d)\n" 143221828Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 144234938Sgrehan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 145221828Sgrehan " -m: lowmem in MB\n" 146221828Sgrehan " -M: highmem in MB\n" 147221828Sgrehan " -x: mux vcpus to 1 hcpu\n" 148221828Sgrehan " -t: mux vcpu timeslice hz (default %d)\n", 149221828Sgrehan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 150221828Sgrehan DEFAULT_GUEST_TSLICE); 151221828Sgrehan exit(code); 152221828Sgrehan} 153221828Sgrehan 154221828Sgrehanvoid * 155221828Sgrehanpaddr_guest2host(uintptr_t gaddr) 156221828Sgrehan{ 157221828Sgrehan if (lomem_sz == 0) 158221828Sgrehan return (NULL); 159221828Sgrehan 160221828Sgrehan if (gaddr < lomem_sz) { 161221828Sgrehan return ((void *)(lomem_addr + gaddr)); 162221828Sgrehan } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { 163221828Sgrehan return ((void *)(himem_addr + gaddr - 4*GB)); 164221828Sgrehan } else 165221828Sgrehan return (NULL); 166221828Sgrehan} 167221828Sgrehan 168221828Sgrehanint 169240943Sneelfbsdrun_disable_x2apic(void) 170240943Sneel{ 171240943Sneel 172240943Sneel return (disable_x2apic); 173240943Sneel} 174240943Sneel 175240943Sneelint 176221828Sgrehanfbsdrun_vmexit_on_pause(void) 177221828Sgrehan{ 178221828Sgrehan 179221828Sgrehan return (guest_vmexit_on_pause); 180221828Sgrehan} 181221828Sgrehan 182221828Sgrehanint 183221828Sgrehanfbsdrun_vmexit_on_hlt(void) 184221828Sgrehan{ 185221828Sgrehan 186221828Sgrehan return (guest_vmexit_on_hlt); 187221828Sgrehan} 188221828Sgrehan 189221828Sgrehanint 190221828Sgrehanfbsdrun_muxed(void) 191221828Sgrehan{ 192221828Sgrehan 193221828Sgrehan return (guest_vcpu_mux); 194221828Sgrehan} 195221828Sgrehan 196221942Sjhbstatic void * 197221828Sgrehanfbsdrun_start_thread(void *param) 198221828Sgrehan{ 199221828Sgrehan int vcpu; 200221828Sgrehan struct mt_vmm_info *mtp = param; 201221828Sgrehan 202221828Sgrehan vcpu = mtp->mt_vcpu; 203221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 204221828Sgrehan 205221828Sgrehan /* not reached */ 206221828Sgrehan exit(1); 207221828Sgrehan return (NULL); 208221828Sgrehan} 209221828Sgrehan 210221828Sgrehanvoid 211221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 212221828Sgrehan{ 213221828Sgrehan int error; 214221828Sgrehan 215221828Sgrehan if (cpumask & (1 << vcpu)) { 216242385Sgrehan fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", 217242385Sgrehan vcpu); 218221828Sgrehan exit(1); 219221828Sgrehan } 220221828Sgrehan 221221828Sgrehan cpumask |= 1 << vcpu; 222221828Sgrehan foundcpus++; 223221828Sgrehan 224221828Sgrehan /* 225221828Sgrehan * Set up the vmexit struct to allow execution to start 226221828Sgrehan * at the given RIP 227221828Sgrehan */ 228221828Sgrehan vmexit[vcpu].rip = rip; 229221828Sgrehan vmexit[vcpu].inst_length = 0; 230221828Sgrehan 231221828Sgrehan if (vcpu == BSP || !guest_vcpu_mux){ 232221828Sgrehan mt_vmm_info[vcpu].mt_ctx = ctx; 233221828Sgrehan mt_vmm_info[vcpu].mt_vcpu = vcpu; 234221828Sgrehan 235221828Sgrehan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 236221828Sgrehan fbsdrun_start_thread, &mt_vmm_info[vcpu]); 237221828Sgrehan assert(error == 0); 238221828Sgrehan } 239221828Sgrehan} 240221828Sgrehan 241221828Sgrehanstatic int 242221828Sgrehanfbsdrun_get_next_cpu(int curcpu) 243221828Sgrehan{ 244221828Sgrehan 245221828Sgrehan /* 246221828Sgrehan * Get the next available CPU. Assumes they arrive 247221828Sgrehan * in ascending order with no gaps. 248221828Sgrehan */ 249221828Sgrehan return ((curcpu + 1) % foundcpus); 250221828Sgrehan} 251221828Sgrehan 252221942Sjhbstatic int 253221828Sgrehanvmexit_catch_reset(void) 254221828Sgrehan{ 255221828Sgrehan stats.io_reset++; 256221828Sgrehan return (VMEXIT_RESET); 257221828Sgrehan} 258221828Sgrehan 259221942Sjhbstatic int 260221828Sgrehanvmexit_catch_inout(void) 261221828Sgrehan{ 262221828Sgrehan return (VMEXIT_ABORT); 263221828Sgrehan} 264221828Sgrehan 265221942Sjhbstatic int 266221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 267221828Sgrehan uint32_t eax) 268221828Sgrehan{ 269221828Sgrehan#if PG_DEBUG /* put all types of debug here */ 270221828Sgrehan if (eax == 0) { 271221828Sgrehan pause_noswitch = 1; 272221828Sgrehan } else if (eax == 1) { 273221828Sgrehan pause_noswitch = 0; 274221828Sgrehan } else { 275221828Sgrehan pause_noswitch = 0; 276221828Sgrehan if (eax == 5) { 277221828Sgrehan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 278221828Sgrehan } 279221828Sgrehan } 280221828Sgrehan#endif 281221828Sgrehan return (VMEXIT_CONTINUE); 282221828Sgrehan} 283221828Sgrehan 284221828Sgrehanstatic int 285221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 286221828Sgrehan{ 287221828Sgrehan int error; 288221828Sgrehan int bytes, port, in, out; 289221828Sgrehan uint32_t eax; 290221828Sgrehan int vcpu; 291221828Sgrehan 292221828Sgrehan vcpu = *pvcpu; 293221828Sgrehan 294221828Sgrehan port = vme->u.inout.port; 295221828Sgrehan bytes = vme->u.inout.bytes; 296221828Sgrehan eax = vme->u.inout.eax; 297221828Sgrehan in = vme->u.inout.in; 298221828Sgrehan out = !in; 299221828Sgrehan 300221828Sgrehan /* We don't deal with these */ 301221828Sgrehan if (vme->u.inout.string || vme->u.inout.rep) 302221828Sgrehan return (VMEXIT_ABORT); 303221828Sgrehan 304221828Sgrehan /* Special case of guest reset */ 305221828Sgrehan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 306221828Sgrehan return (vmexit_catch_reset()); 307221828Sgrehan 308221828Sgrehan /* Extra-special case of host notifications */ 309221828Sgrehan if (out && port == GUEST_NIO_PORT) 310221828Sgrehan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 311221828Sgrehan 312222105Sgrehan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 313221828Sgrehan if (error == 0 && in) 314221828Sgrehan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 315221828Sgrehan 316221828Sgrehan if (error == 0) 317221828Sgrehan return (VMEXIT_CONTINUE); 318221828Sgrehan else { 319221828Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 320221828Sgrehan in ? "in" : "out", 321221828Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 322221828Sgrehan return (vmexit_catch_inout()); 323221828Sgrehan } 324221828Sgrehan} 325221828Sgrehan 326221828Sgrehanstatic int 327221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 328221828Sgrehan{ 329242385Sgrehan fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, 330242385Sgrehan *pvcpu); 331221828Sgrehan return (VMEXIT_ABORT); 332221828Sgrehan} 333221828Sgrehan 334221828Sgrehanstatic int 335221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 336221828Sgrehan{ 337221828Sgrehan int newcpu; 338221828Sgrehan int retval = VMEXIT_CONTINUE; 339221828Sgrehan 340221828Sgrehan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 341221828Sgrehan 342221828Sgrehan if (guest_vcpu_mux && *pvcpu != newcpu) { 343221828Sgrehan retval = VMEXIT_SWITCH; 344221828Sgrehan *pvcpu = newcpu; 345221828Sgrehan } 346221828Sgrehan 347221828Sgrehan return (retval); 348221828Sgrehan} 349221828Sgrehan 350221828Sgrehanstatic int 351240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 352240912Sneel{ 353240912Sneel int newcpu; 354240912Sneel int retval = VMEXIT_CONTINUE; 355240912Sneel 356240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 357240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 358240912Sneel 359240912Sneel if (guest_vcpu_mux && *pvcpu != newcpu) { 360240912Sneel retval = VMEXIT_SWITCH; 361240912Sneel *pvcpu = newcpu; 362240912Sneel } 363240912Sneel 364240912Sneel return (retval); 365240912Sneel} 366240912Sneel 367240912Sneelstatic int 368221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 369221828Sgrehan{ 370221828Sgrehan 371242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 372242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 373242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 374242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 375242385Sgrehan fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); 376242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 377242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 378242385Sgrehan vmexit->u.vmx.exit_qualification); 379221828Sgrehan 380221828Sgrehan return (VMEXIT_ABORT); 381221828Sgrehan} 382221828Sgrehan 383221828Sgrehanstatic int bogus_noswitch = 1; 384221828Sgrehan 385221828Sgrehanstatic int 386221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 387221828Sgrehan{ 388221828Sgrehan stats.vmexit_bogus++; 389221828Sgrehan 390221828Sgrehan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 391221828Sgrehan return (VMEXIT_RESTART); 392221828Sgrehan } else { 393221828Sgrehan stats.vmexit_bogus_switch++; 394221828Sgrehan vmexit->inst_length = 0; 395221828Sgrehan *pvcpu = -1; 396221828Sgrehan return (VMEXIT_SWITCH); 397221828Sgrehan } 398221828Sgrehan} 399221828Sgrehan 400221828Sgrehanstatic int 401221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 402221828Sgrehan{ 403221828Sgrehan stats.vmexit_hlt++; 404221828Sgrehan if (fbsdrun_muxed()) { 405221828Sgrehan *pvcpu = -1; 406221828Sgrehan return (VMEXIT_SWITCH); 407221828Sgrehan } else { 408221828Sgrehan /* 409221828Sgrehan * Just continue execution with the next instruction. We use 410221828Sgrehan * the HLT VM exit as a way to be friendly with the host 411221828Sgrehan * scheduler. 412221828Sgrehan */ 413221828Sgrehan return (VMEXIT_CONTINUE); 414221828Sgrehan } 415221828Sgrehan} 416221828Sgrehan 417221828Sgrehanstatic int pause_noswitch; 418221828Sgrehan 419221828Sgrehanstatic int 420221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 421221828Sgrehan{ 422221828Sgrehan stats.vmexit_pause++; 423221828Sgrehan 424221828Sgrehan if (fbsdrun_muxed() && !pause_noswitch) { 425221828Sgrehan *pvcpu = -1; 426221828Sgrehan return (VMEXIT_SWITCH); 427221828Sgrehan } else { 428221828Sgrehan return (VMEXIT_CONTINUE); 429221828Sgrehan } 430221828Sgrehan} 431221828Sgrehan 432221828Sgrehanstatic int 433221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 434221828Sgrehan{ 435221828Sgrehan stats.vmexit_mtrap++; 436221828Sgrehan 437221828Sgrehan return (VMEXIT_RESTART); 438221828Sgrehan} 439221828Sgrehan 440234761Sgrehanstatic int 441234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 442234761Sgrehan{ 443241744Sgrehan int err; 444234761Sgrehan stats.vmexit_paging++; 445234761Sgrehan 446241744Sgrehan err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, 447241744Sgrehan vmexit->u.paging.cr3, vmexit->u.paging.rwx); 448241744Sgrehan 449241744Sgrehan if (err) { 450241744Sgrehan if (err == EINVAL) { 451242385Sgrehan fprintf(stderr, 452242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 453242385Sgrehan vmexit->rip); 454241744Sgrehan } else if (err == ESRCH) { 455242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 456242385Sgrehan vmexit->u.paging.gpa); 457241744Sgrehan } 458241744Sgrehan 459234761Sgrehan return (VMEXIT_ABORT); 460234761Sgrehan } 461234761Sgrehan 462234761Sgrehan return (VMEXIT_CONTINUE); 463234761Sgrehan} 464234761Sgrehan 465221828Sgrehanstatic void 466221828Sgrehansigalrm(int sig) 467221828Sgrehan{ 468221828Sgrehan return; 469221828Sgrehan} 470221828Sgrehan 471221828Sgrehanstatic void 472221828Sgrehansetup_timeslice(void) 473221828Sgrehan{ 474221828Sgrehan struct sigaction sa; 475221828Sgrehan struct itimerval itv; 476221828Sgrehan int error; 477221828Sgrehan 478221828Sgrehan /* 479221828Sgrehan * Setup a realtime timer to generate a SIGALRM at a 480221828Sgrehan * frequency of 'guest_tslice' ticks per second. 481221828Sgrehan */ 482221828Sgrehan sigemptyset(&sa.sa_mask); 483221828Sgrehan sa.sa_flags = 0; 484221828Sgrehan sa.sa_handler = sigalrm; 485221828Sgrehan 486221828Sgrehan error = sigaction(SIGALRM, &sa, NULL); 487221828Sgrehan assert(error == 0); 488221828Sgrehan 489221828Sgrehan itv.it_interval.tv_sec = 0; 490221828Sgrehan itv.it_interval.tv_usec = 1000000 / guest_tslice; 491221828Sgrehan itv.it_value.tv_sec = 0; 492221828Sgrehan itv.it_value.tv_usec = 1000000 / guest_tslice; 493221828Sgrehan 494221828Sgrehan error = setitimer(ITIMER_REAL, &itv, NULL); 495221828Sgrehan assert(error == 0); 496221828Sgrehan} 497221828Sgrehan 498221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 499234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 500234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 501234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 502234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 503234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 504234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 505240912Sneel [VM_EXITCODE_PAGING] = vmexit_paging, 506240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 507221828Sgrehan}; 508221828Sgrehan 509221828Sgrehanstatic void 510221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 511221828Sgrehan{ 512221828Sgrehan int error, rc, prevcpu; 513221828Sgrehan 514221828Sgrehan if (guest_vcpu_mux) 515221828Sgrehan setup_timeslice(); 516221828Sgrehan 517221828Sgrehan if (pincpu >= 0) { 518221828Sgrehan error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); 519221828Sgrehan assert(error == 0); 520221828Sgrehan } 521221828Sgrehan 522221828Sgrehan while (1) { 523221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 524241490Sneel if (error != 0) { 525241490Sneel /* 526241490Sneel * It is possible that 'vmmctl' or some other process 527241490Sneel * has transitioned the vcpu to CANNOT_RUN state right 528241490Sneel * before we tried to transition it to RUNNING. 529241490Sneel * 530241490Sneel * This is expected to be temporary so just retry. 531241490Sneel */ 532241490Sneel if (errno == EBUSY) 533241490Sneel continue; 534241490Sneel else 535241490Sneel break; 536241490Sneel } 537221828Sgrehan 538221828Sgrehan prevcpu = vcpu; 539221828Sgrehan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 540221828Sgrehan &vcpu); 541221828Sgrehan switch (rc) { 542221828Sgrehan case VMEXIT_SWITCH: 543221828Sgrehan assert(guest_vcpu_mux); 544221828Sgrehan if (vcpu == -1) { 545221828Sgrehan stats.cpu_switch_rotate++; 546221828Sgrehan vcpu = fbsdrun_get_next_cpu(prevcpu); 547221828Sgrehan } else { 548221828Sgrehan stats.cpu_switch_direct++; 549221828Sgrehan } 550221828Sgrehan /* fall through */ 551221828Sgrehan case VMEXIT_CONTINUE: 552221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 553221828Sgrehan break; 554221828Sgrehan case VMEXIT_RESTART: 555221828Sgrehan rip = vmexit[vcpu].rip; 556221828Sgrehan break; 557221828Sgrehan case VMEXIT_RESET: 558221828Sgrehan exit(0); 559221828Sgrehan default: 560221828Sgrehan exit(1); 561221828Sgrehan } 562221828Sgrehan } 563221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 564221828Sgrehan} 565221828Sgrehan 566221828Sgrehan 567221828Sgrehanint 568221828Sgrehanmain(int argc, char *argv[]) 569221828Sgrehan{ 570242192Sneel int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; 571221828Sgrehan struct vmctx *ctx; 572221828Sgrehan uint64_t rip; 573221828Sgrehan 574242192Sneel bvmcons = 0; 575221828Sgrehan inject_bkpt = 0; 576221828Sgrehan progname = basename(argv[0]); 577221828Sgrehan gdb_port = DEFAULT_GDB_PORT; 578221828Sgrehan guest_ncpus = 1; 579239043Sneel ioapic = 0; 580221828Sgrehan 581242192Sneel while ((c = getopt(argc, argv, "abehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { 582221828Sgrehan switch (c) { 583240943Sneel case 'a': 584240943Sneel disable_x2apic = 1; 585240943Sneel break; 586242192Sneel case 'b': 587242192Sneel bvmcons = 1; 588242192Sneel break; 589221828Sgrehan case 'B': 590221828Sgrehan inject_bkpt = 1; 591221828Sgrehan break; 592221828Sgrehan case 'x': 593221828Sgrehan guest_vcpu_mux = 1; 594221828Sgrehan break; 595221828Sgrehan case 'p': 596221828Sgrehan pincpu = atoi(optarg); 597221828Sgrehan break; 598221828Sgrehan case 'c': 599221828Sgrehan guest_ncpus = atoi(optarg); 600221828Sgrehan break; 601221828Sgrehan case 'g': 602221828Sgrehan gdb_port = atoi(optarg); 603221828Sgrehan break; 604221828Sgrehan case 'z': 605221828Sgrehan guest_hz = atoi(optarg); 606221828Sgrehan break; 607221828Sgrehan case 't': 608221828Sgrehan guest_tslice = atoi(optarg); 609221828Sgrehan break; 610221828Sgrehan case 's': 611234938Sgrehan pci_parse_slot(optarg, 0); 612221828Sgrehan break; 613234938Sgrehan case 'S': 614234938Sgrehan pci_parse_slot(optarg, 1); 615234938Sgrehan break; 616221828Sgrehan case 'm': 617221828Sgrehan lomem_sz = strtoul(optarg, NULL, 0) * MB; 618221828Sgrehan break; 619221828Sgrehan case 'M': 620221828Sgrehan himem_sz = strtoul(optarg, NULL, 0) * MB; 621221828Sgrehan break; 622221828Sgrehan case 'H': 623221828Sgrehan guest_vmexit_on_hlt = 1; 624221828Sgrehan break; 625239043Sneel case 'I': 626239043Sneel ioapic = 1; 627239043Sneel break; 628221828Sgrehan case 'P': 629221828Sgrehan guest_vmexit_on_pause = 1; 630221828Sgrehan break; 631222105Sgrehan case 'e': 632222105Sgrehan strictio = 1; 633222105Sgrehan break; 634221828Sgrehan case 'h': 635221828Sgrehan usage(0); 636221828Sgrehan default: 637221828Sgrehan usage(1); 638221828Sgrehan } 639221828Sgrehan } 640221828Sgrehan argc -= optind; 641221828Sgrehan argv += optind; 642221828Sgrehan 643221828Sgrehan if (argc != 1) 644221828Sgrehan usage(1); 645221828Sgrehan 646221828Sgrehan /* No need to mux if guest is uni-processor */ 647221828Sgrehan if (guest_ncpus <= 1) 648221828Sgrehan guest_vcpu_mux = 0; 649221828Sgrehan 650242385Sgrehan if (guest_ncpus > VM_MAXCPU) { 651242385Sgrehan fprintf(stderr, "%d vCPUs requested, max %d\n", 652242385Sgrehan guest_ncpus, VM_MAXCPU); 653242385Sgrehan exit(1); 654242385Sgrehan } 655242385Sgrehan 656221828Sgrehan /* vmexit on hlt if guest is muxed */ 657221828Sgrehan if (guest_vcpu_mux) { 658221828Sgrehan guest_vmexit_on_hlt = 1; 659221828Sgrehan guest_vmexit_on_pause = 1; 660221828Sgrehan } 661221828Sgrehan 662221828Sgrehan vmname = argv[0]; 663221828Sgrehan 664221828Sgrehan ctx = vm_open(vmname); 665221828Sgrehan if (ctx == NULL) { 666221828Sgrehan perror("vm_open"); 667221828Sgrehan exit(1); 668221828Sgrehan } 669221828Sgrehan 670221828Sgrehan if (fbsdrun_vmexit_on_hlt()) { 671221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 672221828Sgrehan if (err < 0) { 673242385Sgrehan fprintf(stderr, "VM exit on HLT not supported\n"); 674221828Sgrehan exit(1); 675221828Sgrehan } 676221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 677221828Sgrehan handler[VM_EXITCODE_HLT] = vmexit_hlt; 678221828Sgrehan } 679221828Sgrehan 680221828Sgrehan if (fbsdrun_vmexit_on_pause()) { 681221828Sgrehan /* 682221828Sgrehan * pause exit support required for this mode 683221828Sgrehan */ 684221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 685221828Sgrehan if (err < 0) { 686242385Sgrehan fprintf(stderr, 687242385Sgrehan "SMP mux requested, no pause support\n"); 688221828Sgrehan exit(1); 689221828Sgrehan } 690221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 691221828Sgrehan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 692221828Sgrehan } 693221828Sgrehan 694240943Sneel if (fbsdrun_disable_x2apic()) 695240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 696240943Sneel else 697240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 698240943Sneel 699240943Sneel if (err) { 700242385Sgrehan fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 701240943Sneel exit(1); 702240943Sneel } 703240943Sneel 704221828Sgrehan if (lomem_sz != 0) { 705221828Sgrehan lomem_addr = vm_map_memory(ctx, 0, lomem_sz); 706221828Sgrehan if (lomem_addr == (char *) MAP_FAILED) { 707221828Sgrehan lomem_sz = 0; 708221828Sgrehan } else if (himem_sz != 0) { 709221828Sgrehan himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); 710221828Sgrehan if (himem_addr == (char *) MAP_FAILED) { 711221828Sgrehan lomem_sz = 0; 712221828Sgrehan himem_sz = 0; 713221828Sgrehan } 714221828Sgrehan } 715221828Sgrehan } 716221828Sgrehan 717221828Sgrehan init_inout(); 718221828Sgrehan init_pci(ctx); 719239045Sneel if (ioapic) 720239045Sneel ioapic_init(0); 721221828Sgrehan 722221828Sgrehan if (gdb_port != 0) 723221828Sgrehan init_dbgport(gdb_port); 724221828Sgrehan 725242192Sneel if (bvmcons) 726242192Sneel init_bvmcons(); 727242192Sneel 728221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 729221828Sgrehan assert(error == 0); 730221828Sgrehan 731221828Sgrehan if (inject_bkpt) { 732221828Sgrehan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 733221828Sgrehan assert(error == 0); 734221828Sgrehan } 735221828Sgrehan 736221828Sgrehan /* 737221828Sgrehan * build the guest tables, MP etc. 738221828Sgrehan */ 739242131Sgrehan mptable_build(ctx, guest_ncpus, ioapic); 740221828Sgrehan 741221828Sgrehan /* 742221828Sgrehan * Add CPU 0 743221828Sgrehan */ 744221828Sgrehan fbsdrun_addcpu(ctx, BSP, rip); 745221828Sgrehan 746221828Sgrehan /* 747221828Sgrehan * Head off to the main event dispatch loop 748221828Sgrehan */ 749221828Sgrehan mevent_dispatch(); 750221828Sgrehan 751221828Sgrehan exit(1); 752221828Sgrehan} 753