vmmapi.c revision 239025
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/types.h> 33#include <sys/sysctl.h> 34#include <sys/ioctl.h> 35#include <sys/mman.h> 36 37#include <machine/specialreg.h> 38 39#include <stdio.h> 40#include <stdlib.h> 41#include <assert.h> 42#include <string.h> 43#include <fcntl.h> 44#include <unistd.h> 45 46#include <machine/vmm.h> 47#include <machine/vmm_dev.h> 48 49#include "vmmapi.h" 50#include "mptable.h" 51 52#define BIOS_ROM_BASE (0xf0000) 53#define BIOS_ROM_SIZE (0x10000) 54 55struct vmctx { 56 int fd; 57 char *name; 58}; 59 60#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 61#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 62 63static int 64vm_device_open(const char *name) 65{ 66 int fd, len; 67 char *vmfile; 68 69 len = strlen("/dev/vmm/") + strlen(name) + 1; 70 vmfile = malloc(len); 71 assert(vmfile != NULL); 72 snprintf(vmfile, len, "/dev/vmm/%s", name); 73 74 /* Open the device file */ 75 fd = open(vmfile, O_RDWR, 0); 76 77 free(vmfile); 78 return (fd); 79} 80 81int 82vm_create(const char *name) 83{ 84 85 return (CREATE((char *)name)); 86} 87 88struct vmctx * 89vm_open(const char *name) 90{ 91 struct vmctx *vm; 92 93 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 94 assert(vm != NULL); 95 96 vm->fd = -1; 97 vm->name = (char *)(vm + 1); 98 strcpy(vm->name, name); 99 100 if ((vm->fd = vm_device_open(vm->name)) < 0) 101 goto err; 102 103 return (vm); 104err: 105 vm_destroy(vm); 106 return (NULL); 107} 108 109void 110vm_destroy(struct vmctx *vm) 111{ 112 assert(vm != NULL); 113 114 DESTROY(vm->name); 115 if (vm->fd >= 0) 116 close(vm->fd); 117 free(vm); 118} 119 120int 121vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, 122 vm_paddr_t *ret_hpa, size_t *ret_len) 123{ 124 int error; 125 struct vm_memory_segment seg; 126 127 bzero(&seg, sizeof(seg)); 128 seg.gpa = gpa; 129 error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); 130 *ret_hpa = seg.hpa; 131 *ret_len = seg.len; 132 return (error); 133} 134 135int 136vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr) 137{ 138 int error; 139 struct vm_memory_segment seg; 140 141 /* 142 * Create and optionally map 'len' bytes of memory at guest 143 * physical address 'gpa' 144 */ 145 bzero(&seg, sizeof(seg)); 146 seg.gpa = gpa; 147 seg.len = len; 148 error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); 149 if (error == 0 && mapaddr != NULL) { 150 *mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 151 ctx->fd, gpa); 152 } 153 return (error); 154} 155 156char * 157vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 158{ 159 160 /* Map 'len' bytes of memory at guest physical address 'gpa' */ 161 return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 162 ctx->fd, gpa)); 163} 164 165int 166vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 167 uint64_t base, uint32_t limit, uint32_t access) 168{ 169 int error; 170 struct vm_seg_desc vmsegdesc; 171 172 bzero(&vmsegdesc, sizeof(vmsegdesc)); 173 vmsegdesc.cpuid = vcpu; 174 vmsegdesc.regnum = reg; 175 vmsegdesc.desc.base = base; 176 vmsegdesc.desc.limit = limit; 177 vmsegdesc.desc.access = access; 178 179 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 180 return (error); 181} 182 183int 184vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 185 uint64_t *base, uint32_t *limit, uint32_t *access) 186{ 187 int error; 188 struct vm_seg_desc vmsegdesc; 189 190 bzero(&vmsegdesc, sizeof(vmsegdesc)); 191 vmsegdesc.cpuid = vcpu; 192 vmsegdesc.regnum = reg; 193 194 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 195 if (error == 0) { 196 *base = vmsegdesc.desc.base; 197 *limit = vmsegdesc.desc.limit; 198 *access = vmsegdesc.desc.access; 199 } 200 return (error); 201} 202 203int 204vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 205{ 206 int error; 207 struct vm_register vmreg; 208 209 bzero(&vmreg, sizeof(vmreg)); 210 vmreg.cpuid = vcpu; 211 vmreg.regnum = reg; 212 vmreg.regval = val; 213 214 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 215 return (error); 216} 217 218int 219vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 220{ 221 int error; 222 struct vm_register vmreg; 223 224 bzero(&vmreg, sizeof(vmreg)); 225 vmreg.cpuid = vcpu; 226 vmreg.regnum = reg; 227 228 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 229 *ret_val = vmreg.regval; 230 return (error); 231} 232 233int 234vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid) 235{ 236 int error; 237 struct vm_pin vmpin; 238 239 bzero(&vmpin, sizeof(vmpin)); 240 vmpin.vm_cpuid = vcpu; 241 242 error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin); 243 *host_cpuid = vmpin.host_cpuid; 244 return (error); 245} 246 247int 248vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid) 249{ 250 int error; 251 struct vm_pin vmpin; 252 253 bzero(&vmpin, sizeof(vmpin)); 254 vmpin.vm_cpuid = vcpu; 255 vmpin.host_cpuid = host_cpuid; 256 257 error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin); 258 return (error); 259} 260 261int 262vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) 263{ 264 int error; 265 struct vm_run vmrun; 266 267 bzero(&vmrun, sizeof(vmrun)); 268 vmrun.cpuid = vcpu; 269 vmrun.rip = rip; 270 271 error = ioctl(ctx->fd, VM_RUN, &vmrun); 272 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 273 return (error); 274} 275 276static int 277vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, 278 int vector, int error_code, int error_code_valid) 279{ 280 struct vm_event ev; 281 282 bzero(&ev, sizeof(ev)); 283 ev.cpuid = vcpu; 284 ev.type = type; 285 ev.vector = vector; 286 ev.error_code = error_code; 287 ev.error_code_valid = error_code_valid; 288 289 return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); 290} 291 292int 293vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, 294 int vector) 295{ 296 297 return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); 298} 299 300int 301vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, 302 int vector, int error_code) 303{ 304 305 return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); 306} 307 308int 309vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz) 310{ 311 312 return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu, 313 oemtbl, oemtblsz)); 314} 315 316int 317vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 318{ 319 struct vm_lapic_irq vmirq; 320 321 bzero(&vmirq, sizeof(vmirq)); 322 vmirq.cpuid = vcpu; 323 vmirq.vector = vector; 324 325 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 326} 327 328int 329vm_inject_nmi(struct vmctx *ctx, int vcpu) 330{ 331 struct vm_nmi vmnmi; 332 333 bzero(&vmnmi, sizeof(vmnmi)); 334 vmnmi.cpuid = vcpu; 335 336 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 337} 338 339int 340vm_capability_name2type(const char *capname) 341{ 342 int i; 343 344 static struct { 345 const char *name; 346 int type; 347 } capstrmap[] = { 348 { "hlt_exit", VM_CAP_HALT_EXIT }, 349 { "mtrap_exit", VM_CAP_MTRAP_EXIT }, 350 { "pause_exit", VM_CAP_PAUSE_EXIT }, 351 { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, 352 { 0 } 353 }; 354 355 for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { 356 if (strcmp(capstrmap[i].name, capname) == 0) 357 return (capstrmap[i].type); 358 } 359 360 return (-1); 361} 362 363int 364vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 365 int *retval) 366{ 367 int error; 368 struct vm_capability vmcap; 369 370 bzero(&vmcap, sizeof(vmcap)); 371 vmcap.cpuid = vcpu; 372 vmcap.captype = cap; 373 374 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 375 *retval = vmcap.capval; 376 return (error); 377} 378 379int 380vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 381{ 382 struct vm_capability vmcap; 383 384 bzero(&vmcap, sizeof(vmcap)); 385 vmcap.cpuid = vcpu; 386 vmcap.captype = cap; 387 vmcap.capval = val; 388 389 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 390} 391 392int 393vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 394{ 395 struct vm_pptdev pptdev; 396 397 bzero(&pptdev, sizeof(pptdev)); 398 pptdev.bus = bus; 399 pptdev.slot = slot; 400 pptdev.func = func; 401 402 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 403} 404 405int 406vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 407{ 408 struct vm_pptdev pptdev; 409 410 bzero(&pptdev, sizeof(pptdev)); 411 pptdev.bus = bus; 412 pptdev.slot = slot; 413 pptdev.func = func; 414 415 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 416} 417 418int 419vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 420 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 421{ 422 struct vm_pptdev_mmio pptmmio; 423 424 bzero(&pptmmio, sizeof(pptmmio)); 425 pptmmio.bus = bus; 426 pptmmio.slot = slot; 427 pptmmio.func = func; 428 pptmmio.gpa = gpa; 429 pptmmio.len = len; 430 pptmmio.hpa = hpa; 431 432 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 433} 434 435int 436vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 437 int destcpu, int vector, int numvec) 438{ 439 struct vm_pptdev_msi pptmsi; 440 441 bzero(&pptmsi, sizeof(pptmsi)); 442 pptmsi.vcpu = vcpu; 443 pptmsi.bus = bus; 444 pptmsi.slot = slot; 445 pptmsi.func = func; 446 pptmsi.destcpu = destcpu; 447 pptmsi.vector = vector; 448 pptmsi.numvec = numvec; 449 450 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 451} 452 453int 454vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 455 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 456{ 457 struct vm_pptdev_msix pptmsix; 458 459 bzero(&pptmsix, sizeof(pptmsix)); 460 pptmsix.vcpu = vcpu; 461 pptmsix.bus = bus; 462 pptmsix.slot = slot; 463 pptmsix.func = func; 464 pptmsix.idx = idx; 465 pptmsix.msg = msg; 466 pptmsix.addr = addr; 467 pptmsix.vector_control = vector_control; 468 469 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 470} 471 472uint64_t * 473vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 474 int *ret_entries) 475{ 476 int error; 477 478 static struct vm_stats vmstats; 479 480 vmstats.cpuid = vcpu; 481 482 error = ioctl(ctx->fd, VM_STATS, &vmstats); 483 if (error == 0) { 484 if (ret_entries) 485 *ret_entries = vmstats.num_entries; 486 if (ret_tv) 487 *ret_tv = vmstats.tv; 488 return (vmstats.statbuf); 489 } else 490 return (NULL); 491} 492 493const char * 494vm_get_stat_desc(struct vmctx *ctx, int index) 495{ 496 static struct vm_stat_desc statdesc; 497 498 statdesc.index = index; 499 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 500 return (statdesc.desc); 501 else 502 return (NULL); 503} 504 505/* 506 * From Intel Vol 3a: 507 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 508 */ 509int 510vcpu_reset(struct vmctx *vmctx, int vcpu) 511{ 512 int error; 513 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 514 uint32_t desc_access, desc_limit; 515 uint16_t sel; 516 517 zero = 0; 518 519 rflags = 0x2; 520 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 521 if (error) 522 goto done; 523 524 rip = 0xfff0; 525 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 526 goto done; 527 528 cr0 = CR0_NE; 529 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 530 goto done; 531 532 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 533 goto done; 534 535 cr4 = 0; 536 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 537 goto done; 538 539 /* 540 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 541 */ 542 desc_base = 0xffff0000; 543 desc_limit = 0xffff; 544 desc_access = 0x0093; 545 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 546 desc_base, desc_limit, desc_access); 547 if (error) 548 goto done; 549 550 sel = 0xf000; 551 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 552 goto done; 553 554 /* 555 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 556 */ 557 desc_base = 0; 558 desc_limit = 0xffff; 559 desc_access = 0x0093; 560 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 561 desc_base, desc_limit, desc_access); 562 if (error) 563 goto done; 564 565 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 566 desc_base, desc_limit, desc_access); 567 if (error) 568 goto done; 569 570 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 571 desc_base, desc_limit, desc_access); 572 if (error) 573 goto done; 574 575 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 576 desc_base, desc_limit, desc_access); 577 if (error) 578 goto done; 579 580 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 581 desc_base, desc_limit, desc_access); 582 if (error) 583 goto done; 584 585 sel = 0; 586 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 587 goto done; 588 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 589 goto done; 590 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 591 goto done; 592 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 593 goto done; 594 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 595 goto done; 596 597 /* General purpose registers */ 598 rdx = 0xf00; 599 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 600 goto done; 601 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 602 goto done; 603 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 604 goto done; 605 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 606 goto done; 607 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 608 goto done; 609 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 610 goto done; 611 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 612 goto done; 613 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 614 goto done; 615 616 /* GDTR, IDTR */ 617 desc_base = 0; 618 desc_limit = 0xffff; 619 desc_access = 0; 620 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 621 desc_base, desc_limit, desc_access); 622 if (error != 0) 623 goto done; 624 625 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 626 desc_base, desc_limit, desc_access); 627 if (error != 0) 628 goto done; 629 630 /* TR */ 631 desc_base = 0; 632 desc_limit = 0xffff; 633 desc_access = 0x0000008b; 634 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 635 if (error) 636 goto done; 637 638 sel = 0; 639 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 640 goto done; 641 642 /* LDTR */ 643 desc_base = 0; 644 desc_limit = 0xffff; 645 desc_access = 0x00000082; 646 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 647 desc_limit, desc_access); 648 if (error) 649 goto done; 650 651 sel = 0; 652 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 653 goto done; 654 655 /* XXX cr2, debug registers */ 656 657 error = 0; 658done: 659 return (error); 660} 661