bhyverun.c revision 244167
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36221828Sgrehan#include <machine/segments.h> 37221828Sgrehan 38221828Sgrehan#include <stdio.h> 39221828Sgrehan#include <stdlib.h> 40221828Sgrehan#include <libgen.h> 41221828Sgrehan#include <unistd.h> 42221828Sgrehan#include <assert.h> 43221828Sgrehan#include <errno.h> 44221828Sgrehan#include <signal.h> 45221828Sgrehan#include <pthread.h> 46242404Sgrehan#include <pthread_np.h> 47221828Sgrehan 48221828Sgrehan#include <machine/vmm.h> 49221828Sgrehan#include <vmmapi.h> 50221828Sgrehan 51244167Sgrehan#include "bhyverun.h" 52243327Sgrehan#include "acpi.h" 53221828Sgrehan#include "inout.h" 54221828Sgrehan#include "dbgport.h" 55241744Sgrehan#include "mem.h" 56221828Sgrehan#include "mevent.h" 57242131Sgrehan#include "mptbl.h" 58221828Sgrehan#include "pci_emul.h" 59221828Sgrehan#include "xmsr.h" 60239045Sneel#include "ioapic.h" 61240912Sneel#include "spinup_ap.h" 62221828Sgrehan 63221828Sgrehan#define DEFAULT_GUEST_HZ 100 64221828Sgrehan#define DEFAULT_GUEST_TSLICE 200 65221828Sgrehan 66221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 67221828Sgrehan 68221828Sgrehan#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ 69221828Sgrehan#define VMEXIT_CONTINUE 1 /* continue from next instruction */ 70221828Sgrehan#define VMEXIT_RESTART 2 /* restart current instruction */ 71221828Sgrehan#define VMEXIT_ABORT 3 /* abort the vm run loop */ 72221828Sgrehan#define VMEXIT_RESET 4 /* guest machine has reset */ 73221828Sgrehan 74221828Sgrehan#define MB (1024UL * 1024) 75221828Sgrehan#define GB (1024UL * MB) 76221828Sgrehan 77221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 78221828Sgrehan 79221828Sgrehanint guest_tslice = DEFAULT_GUEST_TSLICE; 80221828Sgrehanint guest_hz = DEFAULT_GUEST_HZ; 81221828Sgrehanchar *vmname; 82221828Sgrehan 83221828Sgrehanu_long lomem_sz; 84221828Sgrehanu_long himem_sz; 85221828Sgrehan 86221828Sgrehanint guest_ncpus; 87221828Sgrehan 88221828Sgrehanstatic int pincpu = -1; 89221828Sgrehanstatic int guest_vcpu_mux; 90240943Sneelstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; 91221828Sgrehan 92221828Sgrehanstatic int foundcpus; 93221828Sgrehan 94222105Sgrehanstatic int strictio; 95222105Sgrehan 96243327Sgrehanstatic int acpi; 97243327Sgrehan 98221828Sgrehanstatic char *lomem_addr; 99221828Sgrehanstatic char *himem_addr; 100221828Sgrehan 101221828Sgrehanstatic char *progname; 102221828Sgrehanstatic const int BSP = 0; 103221828Sgrehan 104221828Sgrehanstatic int cpumask; 105221828Sgrehan 106221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 107221828Sgrehan 108221828Sgrehanstruct vm_exit vmexit[VM_MAXCPU]; 109221828Sgrehan 110221828Sgrehanstruct fbsdstats { 111221828Sgrehan uint64_t vmexit_bogus; 112221828Sgrehan uint64_t vmexit_bogus_switch; 113221828Sgrehan uint64_t vmexit_hlt; 114221828Sgrehan uint64_t vmexit_pause; 115221828Sgrehan uint64_t vmexit_mtrap; 116234761Sgrehan uint64_t vmexit_paging; 117221828Sgrehan uint64_t cpu_switch_rotate; 118221828Sgrehan uint64_t cpu_switch_direct; 119221828Sgrehan int io_reset; 120221828Sgrehan} stats; 121221828Sgrehan 122221828Sgrehanstruct mt_vmm_info { 123221828Sgrehan pthread_t mt_thr; 124221828Sgrehan struct vmctx *mt_ctx; 125221828Sgrehan int mt_vcpu; 126221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 127221828Sgrehan 128221828Sgrehanstatic void 129221828Sgrehanusage(int code) 130221828Sgrehan{ 131221828Sgrehan 132221828Sgrehan fprintf(stderr, 133243327Sgrehan "Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]" 134239043Sneel "[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n" 135240943Sneel " -a: local apic is in XAPIC mode (default is X2APIC)\n" 136243327Sgrehan " -A: create an ACPI table\n" 137221828Sgrehan " -g: gdb port (default is %d and 0 means don't open)\n" 138221828Sgrehan " -c: # cpus (default 1)\n" 139221828Sgrehan " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" 140221828Sgrehan " -B: inject breakpoint exception on vm entry\n" 141221828Sgrehan " -H: vmexit from the guest on hlt\n" 142239043Sneel " -I: present an ioapic to the guest\n" 143221828Sgrehan " -P: vmexit from the guest on pause\n" 144222105Sgrehan " -e: exit on unhandled i/o access\n" 145221828Sgrehan " -h: help\n" 146221828Sgrehan " -z: guest hz (default is %d)\n" 147221828Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 148234938Sgrehan " -S: <slot,driver,configinfo> legacy PCI slot config\n" 149221828Sgrehan " -m: lowmem in MB\n" 150221828Sgrehan " -M: highmem in MB\n" 151221828Sgrehan " -x: mux vcpus to 1 hcpu\n" 152221828Sgrehan " -t: mux vcpu timeslice hz (default %d)\n", 153221828Sgrehan progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, 154221828Sgrehan DEFAULT_GUEST_TSLICE); 155221828Sgrehan exit(code); 156221828Sgrehan} 157221828Sgrehan 158221828Sgrehanvoid * 159221828Sgrehanpaddr_guest2host(uintptr_t gaddr) 160221828Sgrehan{ 161221828Sgrehan if (lomem_sz == 0) 162221828Sgrehan return (NULL); 163221828Sgrehan 164221828Sgrehan if (gaddr < lomem_sz) { 165221828Sgrehan return ((void *)(lomem_addr + gaddr)); 166221828Sgrehan } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { 167221828Sgrehan return ((void *)(himem_addr + gaddr - 4*GB)); 168221828Sgrehan } else 169221828Sgrehan return (NULL); 170221828Sgrehan} 171221828Sgrehan 172221828Sgrehanint 173240943Sneelfbsdrun_disable_x2apic(void) 174240943Sneel{ 175240943Sneel 176240943Sneel return (disable_x2apic); 177240943Sneel} 178240943Sneel 179240943Sneelint 180221828Sgrehanfbsdrun_vmexit_on_pause(void) 181221828Sgrehan{ 182221828Sgrehan 183221828Sgrehan return (guest_vmexit_on_pause); 184221828Sgrehan} 185221828Sgrehan 186221828Sgrehanint 187221828Sgrehanfbsdrun_vmexit_on_hlt(void) 188221828Sgrehan{ 189221828Sgrehan 190221828Sgrehan return (guest_vmexit_on_hlt); 191221828Sgrehan} 192221828Sgrehan 193221828Sgrehanint 194221828Sgrehanfbsdrun_muxed(void) 195221828Sgrehan{ 196221828Sgrehan 197221828Sgrehan return (guest_vcpu_mux); 198221828Sgrehan} 199221828Sgrehan 200221942Sjhbstatic void * 201221828Sgrehanfbsdrun_start_thread(void *param) 202221828Sgrehan{ 203242404Sgrehan char tname[MAXCOMLEN + 1]; 204242404Sgrehan struct mt_vmm_info *mtp; 205221828Sgrehan int vcpu; 206221828Sgrehan 207242404Sgrehan mtp = param; 208221828Sgrehan vcpu = mtp->mt_vcpu; 209242404Sgrehan 210242404Sgrehan snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); 211242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 212242404Sgrehan 213221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 214221828Sgrehan 215221828Sgrehan /* not reached */ 216221828Sgrehan exit(1); 217221828Sgrehan return (NULL); 218221828Sgrehan} 219221828Sgrehan 220221828Sgrehanvoid 221221828Sgrehanfbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) 222221828Sgrehan{ 223221828Sgrehan int error; 224221828Sgrehan 225221828Sgrehan if (cpumask & (1 << vcpu)) { 226242385Sgrehan fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", 227242385Sgrehan vcpu); 228221828Sgrehan exit(1); 229221828Sgrehan } 230221828Sgrehan 231221828Sgrehan cpumask |= 1 << vcpu; 232221828Sgrehan foundcpus++; 233221828Sgrehan 234221828Sgrehan /* 235221828Sgrehan * Set up the vmexit struct to allow execution to start 236221828Sgrehan * at the given RIP 237221828Sgrehan */ 238221828Sgrehan vmexit[vcpu].rip = rip; 239221828Sgrehan vmexit[vcpu].inst_length = 0; 240221828Sgrehan 241221828Sgrehan if (vcpu == BSP || !guest_vcpu_mux){ 242221828Sgrehan mt_vmm_info[vcpu].mt_ctx = ctx; 243221828Sgrehan mt_vmm_info[vcpu].mt_vcpu = vcpu; 244221828Sgrehan 245221828Sgrehan error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, 246221828Sgrehan fbsdrun_start_thread, &mt_vmm_info[vcpu]); 247221828Sgrehan assert(error == 0); 248221828Sgrehan } 249221828Sgrehan} 250221828Sgrehan 251221828Sgrehanstatic int 252221828Sgrehanfbsdrun_get_next_cpu(int curcpu) 253221828Sgrehan{ 254221828Sgrehan 255221828Sgrehan /* 256221828Sgrehan * Get the next available CPU. Assumes they arrive 257221828Sgrehan * in ascending order with no gaps. 258221828Sgrehan */ 259221828Sgrehan return ((curcpu + 1) % foundcpus); 260221828Sgrehan} 261221828Sgrehan 262221942Sjhbstatic int 263221828Sgrehanvmexit_catch_reset(void) 264221828Sgrehan{ 265221828Sgrehan stats.io_reset++; 266221828Sgrehan return (VMEXIT_RESET); 267221828Sgrehan} 268221828Sgrehan 269221942Sjhbstatic int 270221828Sgrehanvmexit_catch_inout(void) 271221828Sgrehan{ 272221828Sgrehan return (VMEXIT_ABORT); 273221828Sgrehan} 274221828Sgrehan 275221942Sjhbstatic int 276221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 277221828Sgrehan uint32_t eax) 278221828Sgrehan{ 279221828Sgrehan#if PG_DEBUG /* put all types of debug here */ 280221828Sgrehan if (eax == 0) { 281221828Sgrehan pause_noswitch = 1; 282221828Sgrehan } else if (eax == 1) { 283221828Sgrehan pause_noswitch = 0; 284221828Sgrehan } else { 285221828Sgrehan pause_noswitch = 0; 286221828Sgrehan if (eax == 5) { 287221828Sgrehan vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); 288221828Sgrehan } 289221828Sgrehan } 290221828Sgrehan#endif 291221828Sgrehan return (VMEXIT_CONTINUE); 292221828Sgrehan} 293221828Sgrehan 294221828Sgrehanstatic int 295221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 296221828Sgrehan{ 297221828Sgrehan int error; 298221828Sgrehan int bytes, port, in, out; 299221828Sgrehan uint32_t eax; 300221828Sgrehan int vcpu; 301221828Sgrehan 302221828Sgrehan vcpu = *pvcpu; 303221828Sgrehan 304221828Sgrehan port = vme->u.inout.port; 305221828Sgrehan bytes = vme->u.inout.bytes; 306221828Sgrehan eax = vme->u.inout.eax; 307221828Sgrehan in = vme->u.inout.in; 308221828Sgrehan out = !in; 309221828Sgrehan 310221828Sgrehan /* We don't deal with these */ 311221828Sgrehan if (vme->u.inout.string || vme->u.inout.rep) 312221828Sgrehan return (VMEXIT_ABORT); 313221828Sgrehan 314221828Sgrehan /* Special case of guest reset */ 315221828Sgrehan if (out && port == 0x64 && (uint8_t)eax == 0xFE) 316221828Sgrehan return (vmexit_catch_reset()); 317221828Sgrehan 318221828Sgrehan /* Extra-special case of host notifications */ 319221828Sgrehan if (out && port == GUEST_NIO_PORT) 320221828Sgrehan return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); 321221828Sgrehan 322222105Sgrehan error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); 323221828Sgrehan if (error == 0 && in) 324221828Sgrehan error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); 325221828Sgrehan 326221828Sgrehan if (error == 0) 327221828Sgrehan return (VMEXIT_CONTINUE); 328221828Sgrehan else { 329221828Sgrehan fprintf(stderr, "Unhandled %s%c 0x%04x\n", 330221828Sgrehan in ? "in" : "out", 331221828Sgrehan bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); 332221828Sgrehan return (vmexit_catch_inout()); 333221828Sgrehan } 334221828Sgrehan} 335221828Sgrehan 336221828Sgrehanstatic int 337221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 338221828Sgrehan{ 339242385Sgrehan fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, 340242385Sgrehan *pvcpu); 341221828Sgrehan return (VMEXIT_ABORT); 342221828Sgrehan} 343221828Sgrehan 344221828Sgrehanstatic int 345221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 346221828Sgrehan{ 347221828Sgrehan int newcpu; 348221828Sgrehan int retval = VMEXIT_CONTINUE; 349221828Sgrehan 350221828Sgrehan newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); 351221828Sgrehan 352221828Sgrehan if (guest_vcpu_mux && *pvcpu != newcpu) { 353221828Sgrehan retval = VMEXIT_SWITCH; 354221828Sgrehan *pvcpu = newcpu; 355221828Sgrehan } 356221828Sgrehan 357221828Sgrehan return (retval); 358221828Sgrehan} 359221828Sgrehan 360221828Sgrehanstatic int 361240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 362240912Sneel{ 363240912Sneel int newcpu; 364240912Sneel int retval = VMEXIT_CONTINUE; 365240912Sneel 366240912Sneel newcpu = spinup_ap(ctx, *pvcpu, 367240912Sneel vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 368240912Sneel 369240912Sneel if (guest_vcpu_mux && *pvcpu != newcpu) { 370240912Sneel retval = VMEXIT_SWITCH; 371240912Sneel *pvcpu = newcpu; 372240912Sneel } 373240912Sneel 374240912Sneel return (retval); 375240912Sneel} 376240912Sneel 377240912Sneelstatic int 378221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 379221828Sgrehan{ 380221828Sgrehan 381242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 382242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 383242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 384242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 385242385Sgrehan fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); 386242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 387242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 388242385Sgrehan vmexit->u.vmx.exit_qualification); 389221828Sgrehan 390221828Sgrehan return (VMEXIT_ABORT); 391221828Sgrehan} 392221828Sgrehan 393221828Sgrehanstatic int bogus_noswitch = 1; 394221828Sgrehan 395221828Sgrehanstatic int 396221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 397221828Sgrehan{ 398221828Sgrehan stats.vmexit_bogus++; 399221828Sgrehan 400221828Sgrehan if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { 401221828Sgrehan return (VMEXIT_RESTART); 402221828Sgrehan } else { 403221828Sgrehan stats.vmexit_bogus_switch++; 404221828Sgrehan vmexit->inst_length = 0; 405221828Sgrehan *pvcpu = -1; 406221828Sgrehan return (VMEXIT_SWITCH); 407221828Sgrehan } 408221828Sgrehan} 409221828Sgrehan 410221828Sgrehanstatic int 411221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 412221828Sgrehan{ 413221828Sgrehan stats.vmexit_hlt++; 414221828Sgrehan if (fbsdrun_muxed()) { 415221828Sgrehan *pvcpu = -1; 416221828Sgrehan return (VMEXIT_SWITCH); 417221828Sgrehan } else { 418221828Sgrehan /* 419221828Sgrehan * Just continue execution with the next instruction. We use 420221828Sgrehan * the HLT VM exit as a way to be friendly with the host 421221828Sgrehan * scheduler. 422221828Sgrehan */ 423221828Sgrehan return (VMEXIT_CONTINUE); 424221828Sgrehan } 425221828Sgrehan} 426221828Sgrehan 427221828Sgrehanstatic int pause_noswitch; 428221828Sgrehan 429221828Sgrehanstatic int 430221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 431221828Sgrehan{ 432221828Sgrehan stats.vmexit_pause++; 433221828Sgrehan 434221828Sgrehan if (fbsdrun_muxed() && !pause_noswitch) { 435221828Sgrehan *pvcpu = -1; 436221828Sgrehan return (VMEXIT_SWITCH); 437221828Sgrehan } else { 438221828Sgrehan return (VMEXIT_CONTINUE); 439221828Sgrehan } 440221828Sgrehan} 441221828Sgrehan 442221828Sgrehanstatic int 443221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 444221828Sgrehan{ 445221828Sgrehan stats.vmexit_mtrap++; 446221828Sgrehan 447221828Sgrehan return (VMEXIT_RESTART); 448221828Sgrehan} 449221828Sgrehan 450234761Sgrehanstatic int 451234761Sgrehanvmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 452234761Sgrehan{ 453241744Sgrehan int err; 454234761Sgrehan stats.vmexit_paging++; 455234761Sgrehan 456243651Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, 457243640Sneel &vmexit->u.paging.vie); 458241744Sgrehan 459241744Sgrehan if (err) { 460241744Sgrehan if (err == EINVAL) { 461242385Sgrehan fprintf(stderr, 462242385Sgrehan "Failed to emulate instruction at 0x%lx\n", 463242385Sgrehan vmexit->rip); 464241744Sgrehan } else if (err == ESRCH) { 465242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 466242385Sgrehan vmexit->u.paging.gpa); 467241744Sgrehan } 468241744Sgrehan 469234761Sgrehan return (VMEXIT_ABORT); 470234761Sgrehan } 471234761Sgrehan 472234761Sgrehan return (VMEXIT_CONTINUE); 473234761Sgrehan} 474234761Sgrehan 475221828Sgrehanstatic void 476221828Sgrehansigalrm(int sig) 477221828Sgrehan{ 478221828Sgrehan return; 479221828Sgrehan} 480221828Sgrehan 481221828Sgrehanstatic void 482221828Sgrehansetup_timeslice(void) 483221828Sgrehan{ 484221828Sgrehan struct sigaction sa; 485221828Sgrehan struct itimerval itv; 486221828Sgrehan int error; 487221828Sgrehan 488221828Sgrehan /* 489221828Sgrehan * Setup a realtime timer to generate a SIGALRM at a 490221828Sgrehan * frequency of 'guest_tslice' ticks per second. 491221828Sgrehan */ 492221828Sgrehan sigemptyset(&sa.sa_mask); 493221828Sgrehan sa.sa_flags = 0; 494221828Sgrehan sa.sa_handler = sigalrm; 495221828Sgrehan 496221828Sgrehan error = sigaction(SIGALRM, &sa, NULL); 497221828Sgrehan assert(error == 0); 498221828Sgrehan 499221828Sgrehan itv.it_interval.tv_sec = 0; 500221828Sgrehan itv.it_interval.tv_usec = 1000000 / guest_tslice; 501221828Sgrehan itv.it_value.tv_sec = 0; 502221828Sgrehan itv.it_value.tv_usec = 1000000 / guest_tslice; 503221828Sgrehan 504221828Sgrehan error = setitimer(ITIMER_REAL, &itv, NULL); 505221828Sgrehan assert(error == 0); 506221828Sgrehan} 507221828Sgrehan 508221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 509234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 510234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 511234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 512234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 513234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 514234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 515240912Sneel [VM_EXITCODE_PAGING] = vmexit_paging, 516240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 517221828Sgrehan}; 518221828Sgrehan 519221828Sgrehanstatic void 520221828Sgrehanvm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) 521221828Sgrehan{ 522221828Sgrehan int error, rc, prevcpu; 523221828Sgrehan 524221828Sgrehan if (guest_vcpu_mux) 525221828Sgrehan setup_timeslice(); 526221828Sgrehan 527221828Sgrehan if (pincpu >= 0) { 528221828Sgrehan error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); 529221828Sgrehan assert(error == 0); 530221828Sgrehan } 531221828Sgrehan 532221828Sgrehan while (1) { 533221828Sgrehan error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); 534241490Sneel if (error != 0) { 535241490Sneel /* 536241490Sneel * It is possible that 'vmmctl' or some other process 537241490Sneel * has transitioned the vcpu to CANNOT_RUN state right 538241490Sneel * before we tried to transition it to RUNNING. 539241490Sneel * 540241490Sneel * This is expected to be temporary so just retry. 541241490Sneel */ 542241490Sneel if (errno == EBUSY) 543241490Sneel continue; 544241490Sneel else 545241490Sneel break; 546241490Sneel } 547221828Sgrehan 548221828Sgrehan prevcpu = vcpu; 549221828Sgrehan rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], 550221828Sgrehan &vcpu); 551221828Sgrehan switch (rc) { 552221828Sgrehan case VMEXIT_SWITCH: 553221828Sgrehan assert(guest_vcpu_mux); 554221828Sgrehan if (vcpu == -1) { 555221828Sgrehan stats.cpu_switch_rotate++; 556221828Sgrehan vcpu = fbsdrun_get_next_cpu(prevcpu); 557221828Sgrehan } else { 558221828Sgrehan stats.cpu_switch_direct++; 559221828Sgrehan } 560221828Sgrehan /* fall through */ 561221828Sgrehan case VMEXIT_CONTINUE: 562221828Sgrehan rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; 563221828Sgrehan break; 564221828Sgrehan case VMEXIT_RESTART: 565221828Sgrehan rip = vmexit[vcpu].rip; 566221828Sgrehan break; 567221828Sgrehan case VMEXIT_RESET: 568221828Sgrehan exit(0); 569221828Sgrehan default: 570221828Sgrehan exit(1); 571221828Sgrehan } 572221828Sgrehan } 573221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 574221828Sgrehan} 575221828Sgrehan 576221828Sgrehan 577221828Sgrehanint 578221828Sgrehanmain(int argc, char *argv[]) 579221828Sgrehan{ 580242192Sneel int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; 581221828Sgrehan struct vmctx *ctx; 582221828Sgrehan uint64_t rip; 583221828Sgrehan 584242192Sneel bvmcons = 0; 585221828Sgrehan inject_bkpt = 0; 586221828Sgrehan progname = basename(argv[0]); 587221828Sgrehan gdb_port = DEFAULT_GDB_PORT; 588221828Sgrehan guest_ncpus = 1; 589239043Sneel ioapic = 0; 590221828Sgrehan 591243327Sgrehan while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) { 592221828Sgrehan switch (c) { 593240943Sneel case 'a': 594240943Sneel disable_x2apic = 1; 595240943Sneel break; 596243327Sgrehan case 'A': 597243327Sgrehan acpi = 1; 598243327Sgrehan break; 599242192Sneel case 'b': 600242192Sneel bvmcons = 1; 601242192Sneel break; 602221828Sgrehan case 'B': 603221828Sgrehan inject_bkpt = 1; 604221828Sgrehan break; 605221828Sgrehan case 'x': 606221828Sgrehan guest_vcpu_mux = 1; 607221828Sgrehan break; 608221828Sgrehan case 'p': 609221828Sgrehan pincpu = atoi(optarg); 610221828Sgrehan break; 611221828Sgrehan case 'c': 612221828Sgrehan guest_ncpus = atoi(optarg); 613221828Sgrehan break; 614221828Sgrehan case 'g': 615221828Sgrehan gdb_port = atoi(optarg); 616221828Sgrehan break; 617221828Sgrehan case 'z': 618221828Sgrehan guest_hz = atoi(optarg); 619221828Sgrehan break; 620221828Sgrehan case 't': 621221828Sgrehan guest_tslice = atoi(optarg); 622221828Sgrehan break; 623221828Sgrehan case 's': 624234938Sgrehan pci_parse_slot(optarg, 0); 625221828Sgrehan break; 626234938Sgrehan case 'S': 627234938Sgrehan pci_parse_slot(optarg, 1); 628234938Sgrehan break; 629221828Sgrehan case 'm': 630221828Sgrehan lomem_sz = strtoul(optarg, NULL, 0) * MB; 631221828Sgrehan break; 632221828Sgrehan case 'M': 633221828Sgrehan himem_sz = strtoul(optarg, NULL, 0) * MB; 634221828Sgrehan break; 635221828Sgrehan case 'H': 636221828Sgrehan guest_vmexit_on_hlt = 1; 637221828Sgrehan break; 638239043Sneel case 'I': 639239043Sneel ioapic = 1; 640239043Sneel break; 641221828Sgrehan case 'P': 642221828Sgrehan guest_vmexit_on_pause = 1; 643221828Sgrehan break; 644222105Sgrehan case 'e': 645222105Sgrehan strictio = 1; 646222105Sgrehan break; 647221828Sgrehan case 'h': 648221828Sgrehan usage(0); 649221828Sgrehan default: 650221828Sgrehan usage(1); 651221828Sgrehan } 652221828Sgrehan } 653221828Sgrehan argc -= optind; 654221828Sgrehan argv += optind; 655221828Sgrehan 656221828Sgrehan if (argc != 1) 657221828Sgrehan usage(1); 658221828Sgrehan 659221828Sgrehan /* No need to mux if guest is uni-processor */ 660221828Sgrehan if (guest_ncpus <= 1) 661221828Sgrehan guest_vcpu_mux = 0; 662221828Sgrehan 663242385Sgrehan if (guest_ncpus > VM_MAXCPU) { 664242385Sgrehan fprintf(stderr, "%d vCPUs requested, max %d\n", 665242385Sgrehan guest_ncpus, VM_MAXCPU); 666242385Sgrehan exit(1); 667242385Sgrehan } 668242385Sgrehan 669221828Sgrehan /* vmexit on hlt if guest is muxed */ 670221828Sgrehan if (guest_vcpu_mux) { 671221828Sgrehan guest_vmexit_on_hlt = 1; 672221828Sgrehan guest_vmexit_on_pause = 1; 673221828Sgrehan } 674221828Sgrehan 675221828Sgrehan vmname = argv[0]; 676221828Sgrehan 677221828Sgrehan ctx = vm_open(vmname); 678221828Sgrehan if (ctx == NULL) { 679221828Sgrehan perror("vm_open"); 680221828Sgrehan exit(1); 681221828Sgrehan } 682221828Sgrehan 683221828Sgrehan if (fbsdrun_vmexit_on_hlt()) { 684221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); 685221828Sgrehan if (err < 0) { 686242385Sgrehan fprintf(stderr, "VM exit on HLT not supported\n"); 687221828Sgrehan exit(1); 688221828Sgrehan } 689221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); 690221828Sgrehan handler[VM_EXITCODE_HLT] = vmexit_hlt; 691221828Sgrehan } 692221828Sgrehan 693221828Sgrehan if (fbsdrun_vmexit_on_pause()) { 694221828Sgrehan /* 695221828Sgrehan * pause exit support required for this mode 696221828Sgrehan */ 697221828Sgrehan err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); 698221828Sgrehan if (err < 0) { 699242385Sgrehan fprintf(stderr, 700242385Sgrehan "SMP mux requested, no pause support\n"); 701221828Sgrehan exit(1); 702221828Sgrehan } 703221828Sgrehan vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); 704221828Sgrehan handler[VM_EXITCODE_PAUSE] = vmexit_pause; 705221828Sgrehan } 706221828Sgrehan 707240943Sneel if (fbsdrun_disable_x2apic()) 708240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); 709240943Sneel else 710240943Sneel err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); 711240943Sneel 712240943Sneel if (err) { 713242385Sgrehan fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 714240943Sneel exit(1); 715240943Sneel } 716240943Sneel 717221828Sgrehan if (lomem_sz != 0) { 718221828Sgrehan lomem_addr = vm_map_memory(ctx, 0, lomem_sz); 719221828Sgrehan if (lomem_addr == (char *) MAP_FAILED) { 720221828Sgrehan lomem_sz = 0; 721221828Sgrehan } else if (himem_sz != 0) { 722221828Sgrehan himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); 723221828Sgrehan if (himem_addr == (char *) MAP_FAILED) { 724221828Sgrehan lomem_sz = 0; 725221828Sgrehan himem_sz = 0; 726221828Sgrehan } 727221828Sgrehan } 728221828Sgrehan } 729221828Sgrehan 730221828Sgrehan init_inout(); 731221828Sgrehan init_pci(ctx); 732239045Sneel if (ioapic) 733239045Sneel ioapic_init(0); 734221828Sgrehan 735221828Sgrehan if (gdb_port != 0) 736221828Sgrehan init_dbgport(gdb_port); 737221828Sgrehan 738242192Sneel if (bvmcons) 739242192Sneel init_bvmcons(); 740242192Sneel 741221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 742221828Sgrehan assert(error == 0); 743221828Sgrehan 744221828Sgrehan if (inject_bkpt) { 745221828Sgrehan error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); 746221828Sgrehan assert(error == 0); 747221828Sgrehan } 748221828Sgrehan 749221828Sgrehan /* 750221828Sgrehan * build the guest tables, MP etc. 751221828Sgrehan */ 752242131Sgrehan mptable_build(ctx, guest_ncpus, ioapic); 753221828Sgrehan 754243327Sgrehan if (acpi) { 755243327Sgrehan error = acpi_build(ctx, guest_ncpus, ioapic); 756243327Sgrehan assert(error == 0); 757243327Sgrehan } 758243327Sgrehan 759221828Sgrehan /* 760221828Sgrehan * Add CPU 0 761221828Sgrehan */ 762221828Sgrehan fbsdrun_addcpu(ctx, BSP, rip); 763221828Sgrehan 764221828Sgrehan /* 765221828Sgrehan * Head off to the main event dispatch loop 766221828Sgrehan */ 767221828Sgrehan mevent_dispatch(); 768221828Sgrehan 769221828Sgrehan exit(1); 770221828Sgrehan} 771