vmmapi.c revision 245652
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/types.h> 33#include <sys/sysctl.h> 34#include <sys/ioctl.h> 35#include <sys/mman.h> 36 37#include <machine/specialreg.h> 38 39#include <stdio.h> 40#include <stdlib.h> 41#include <assert.h> 42#include <string.h> 43#include <fcntl.h> 44#include <unistd.h> 45 46#include <machine/vmm.h> 47#include <machine/vmm_dev.h> 48 49#include "vmmapi.h" 50 51struct vmctx { 52 int fd; 53 char *name; 54}; 55 56#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 57#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 58 59static int 60vm_device_open(const char *name) 61{ 62 int fd, len; 63 char *vmfile; 64 65 len = strlen("/dev/vmm/") + strlen(name) + 1; 66 vmfile = malloc(len); 67 assert(vmfile != NULL); 68 snprintf(vmfile, len, "/dev/vmm/%s", name); 69 70 /* Open the device file */ 71 fd = open(vmfile, O_RDWR, 0); 72 73 free(vmfile); 74 return (fd); 75} 76 77int 78vm_create(const char *name) 79{ 80 81 return (CREATE((char *)name)); 82} 83 84struct vmctx * 85vm_open(const char *name) 86{ 87 struct vmctx *vm; 88 89 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 90 assert(vm != NULL); 91 92 vm->fd = -1; 93 vm->name = (char *)(vm + 1); 94 strcpy(vm->name, name); 95 96 if ((vm->fd = vm_device_open(vm->name)) < 0) 97 goto err; 98 99 return (vm); 100err: 101 vm_destroy(vm); 102 return (NULL); 103} 104 105void 106vm_destroy(struct vmctx *vm) 107{ 108 assert(vm != NULL); 109 110 if (vm->fd >= 0) 111 close(vm->fd); 112 DESTROY(vm->name); 113 114 free(vm); 115} 116 117size_t 118vmm_get_mem_total(void) 119{ 120 size_t mem_total = 0; 121 size_t oldlen = sizeof(mem_total); 122 int error; 123 error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0); 124 if (error) 125 return -1; 126 return mem_total; 127} 128 129size_t 130vmm_get_mem_free(void) 131{ 132 size_t mem_free = 0; 133 size_t oldlen = sizeof(mem_free); 134 int error; 135 error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0); 136 if (error) 137 return -1; 138 return mem_free; 139} 140 141int 142vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len) 143{ 144 int error; 145 struct vm_memory_segment seg; 146 147 bzero(&seg, sizeof(seg)); 148 seg.gpa = gpa; 149 error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); 150 *ret_len = seg.len; 151 return (error); 152} 153 154int 155vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr) 156{ 157 int error; 158 struct vm_memory_segment seg; 159 160 /* 161 * Create and optionally map 'len' bytes of memory at guest 162 * physical address 'gpa' 163 */ 164 bzero(&seg, sizeof(seg)); 165 seg.gpa = gpa; 166 seg.len = len; 167 error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); 168 if (error == 0 && mapaddr != NULL) { 169 *mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 170 ctx->fd, gpa); 171 } 172 return (error); 173} 174 175char * 176vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 177{ 178 179 /* Map 'len' bytes of memory at guest physical address 'gpa' */ 180 return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 181 ctx->fd, gpa)); 182} 183 184int 185vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 186 uint64_t base, uint32_t limit, uint32_t access) 187{ 188 int error; 189 struct vm_seg_desc vmsegdesc; 190 191 bzero(&vmsegdesc, sizeof(vmsegdesc)); 192 vmsegdesc.cpuid = vcpu; 193 vmsegdesc.regnum = reg; 194 vmsegdesc.desc.base = base; 195 vmsegdesc.desc.limit = limit; 196 vmsegdesc.desc.access = access; 197 198 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 199 return (error); 200} 201 202int 203vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 204 uint64_t *base, uint32_t *limit, uint32_t *access) 205{ 206 int error; 207 struct vm_seg_desc vmsegdesc; 208 209 bzero(&vmsegdesc, sizeof(vmsegdesc)); 210 vmsegdesc.cpuid = vcpu; 211 vmsegdesc.regnum = reg; 212 213 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 214 if (error == 0) { 215 *base = vmsegdesc.desc.base; 216 *limit = vmsegdesc.desc.limit; 217 *access = vmsegdesc.desc.access; 218 } 219 return (error); 220} 221 222int 223vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 224{ 225 int error; 226 struct vm_register vmreg; 227 228 bzero(&vmreg, sizeof(vmreg)); 229 vmreg.cpuid = vcpu; 230 vmreg.regnum = reg; 231 vmreg.regval = val; 232 233 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 234 return (error); 235} 236 237int 238vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 239{ 240 int error; 241 struct vm_register vmreg; 242 243 bzero(&vmreg, sizeof(vmreg)); 244 vmreg.cpuid = vcpu; 245 vmreg.regnum = reg; 246 247 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 248 *ret_val = vmreg.regval; 249 return (error); 250} 251 252int 253vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid) 254{ 255 int error; 256 struct vm_pin vmpin; 257 258 bzero(&vmpin, sizeof(vmpin)); 259 vmpin.vm_cpuid = vcpu; 260 261 error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin); 262 *host_cpuid = vmpin.host_cpuid; 263 return (error); 264} 265 266int 267vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid) 268{ 269 int error; 270 struct vm_pin vmpin; 271 272 bzero(&vmpin, sizeof(vmpin)); 273 vmpin.vm_cpuid = vcpu; 274 vmpin.host_cpuid = host_cpuid; 275 276 error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin); 277 return (error); 278} 279 280int 281vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) 282{ 283 int error; 284 struct vm_run vmrun; 285 286 bzero(&vmrun, sizeof(vmrun)); 287 vmrun.cpuid = vcpu; 288 vmrun.rip = rip; 289 290 error = ioctl(ctx->fd, VM_RUN, &vmrun); 291 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 292 return (error); 293} 294 295static int 296vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, 297 int vector, int error_code, int error_code_valid) 298{ 299 struct vm_event ev; 300 301 bzero(&ev, sizeof(ev)); 302 ev.cpuid = vcpu; 303 ev.type = type; 304 ev.vector = vector; 305 ev.error_code = error_code; 306 ev.error_code_valid = error_code_valid; 307 308 return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); 309} 310 311int 312vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, 313 int vector) 314{ 315 316 return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); 317} 318 319int 320vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, 321 int vector, int error_code) 322{ 323 324 return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); 325} 326 327int 328vm_apicid2vcpu(struct vmctx *ctx, int apicid) 329{ 330 /* 331 * The apic id associated with the 'vcpu' has the same numerical value 332 * as the 'vcpu' itself. 333 */ 334 return (apicid); 335} 336 337int 338vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 339{ 340 struct vm_lapic_irq vmirq; 341 342 bzero(&vmirq, sizeof(vmirq)); 343 vmirq.cpuid = vcpu; 344 vmirq.vector = vector; 345 346 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 347} 348 349int 350vm_inject_nmi(struct vmctx *ctx, int vcpu) 351{ 352 struct vm_nmi vmnmi; 353 354 bzero(&vmnmi, sizeof(vmnmi)); 355 vmnmi.cpuid = vcpu; 356 357 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 358} 359 360static struct { 361 const char *name; 362 int type; 363} capstrmap[] = { 364 { "hlt_exit", VM_CAP_HALT_EXIT }, 365 { "mtrap_exit", VM_CAP_MTRAP_EXIT }, 366 { "pause_exit", VM_CAP_PAUSE_EXIT }, 367 { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, 368 { 0 } 369}; 370 371int 372vm_capability_name2type(const char *capname) 373{ 374 int i; 375 376 for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { 377 if (strcmp(capstrmap[i].name, capname) == 0) 378 return (capstrmap[i].type); 379 } 380 381 return (-1); 382} 383 384const char * 385vm_capability_type2name(int type) 386{ 387 int i; 388 389 for (i = 0; capstrmap[i].name != NULL; i++) { 390 if (capstrmap[i].type == type) 391 return (capstrmap[i].name); 392 } 393 394 return (NULL); 395} 396 397int 398vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 399 int *retval) 400{ 401 int error; 402 struct vm_capability vmcap; 403 404 bzero(&vmcap, sizeof(vmcap)); 405 vmcap.cpuid = vcpu; 406 vmcap.captype = cap; 407 408 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 409 *retval = vmcap.capval; 410 return (error); 411} 412 413int 414vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 415{ 416 struct vm_capability vmcap; 417 418 bzero(&vmcap, sizeof(vmcap)); 419 vmcap.cpuid = vcpu; 420 vmcap.captype = cap; 421 vmcap.capval = val; 422 423 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 424} 425 426int 427vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 428{ 429 struct vm_pptdev pptdev; 430 431 bzero(&pptdev, sizeof(pptdev)); 432 pptdev.bus = bus; 433 pptdev.slot = slot; 434 pptdev.func = func; 435 436 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 437} 438 439int 440vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 441{ 442 struct vm_pptdev pptdev; 443 444 bzero(&pptdev, sizeof(pptdev)); 445 pptdev.bus = bus; 446 pptdev.slot = slot; 447 pptdev.func = func; 448 449 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 450} 451 452int 453vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 454 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 455{ 456 struct vm_pptdev_mmio pptmmio; 457 458 bzero(&pptmmio, sizeof(pptmmio)); 459 pptmmio.bus = bus; 460 pptmmio.slot = slot; 461 pptmmio.func = func; 462 pptmmio.gpa = gpa; 463 pptmmio.len = len; 464 pptmmio.hpa = hpa; 465 466 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 467} 468 469int 470vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 471 int destcpu, int vector, int numvec) 472{ 473 struct vm_pptdev_msi pptmsi; 474 475 bzero(&pptmsi, sizeof(pptmsi)); 476 pptmsi.vcpu = vcpu; 477 pptmsi.bus = bus; 478 pptmsi.slot = slot; 479 pptmsi.func = func; 480 pptmsi.destcpu = destcpu; 481 pptmsi.vector = vector; 482 pptmsi.numvec = numvec; 483 484 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 485} 486 487int 488vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 489 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 490{ 491 struct vm_pptdev_msix pptmsix; 492 493 bzero(&pptmsix, sizeof(pptmsix)); 494 pptmsix.vcpu = vcpu; 495 pptmsix.bus = bus; 496 pptmsix.slot = slot; 497 pptmsix.func = func; 498 pptmsix.idx = idx; 499 pptmsix.msg = msg; 500 pptmsix.addr = addr; 501 pptmsix.vector_control = vector_control; 502 503 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 504} 505 506uint64_t * 507vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 508 int *ret_entries) 509{ 510 int error; 511 512 static struct vm_stats vmstats; 513 514 vmstats.cpuid = vcpu; 515 516 error = ioctl(ctx->fd, VM_STATS, &vmstats); 517 if (error == 0) { 518 if (ret_entries) 519 *ret_entries = vmstats.num_entries; 520 if (ret_tv) 521 *ret_tv = vmstats.tv; 522 return (vmstats.statbuf); 523 } else 524 return (NULL); 525} 526 527const char * 528vm_get_stat_desc(struct vmctx *ctx, int index) 529{ 530 static struct vm_stat_desc statdesc; 531 532 statdesc.index = index; 533 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 534 return (statdesc.desc); 535 else 536 return (NULL); 537} 538 539int 540vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) 541{ 542 int error; 543 struct vm_x2apic x2apic; 544 545 bzero(&x2apic, sizeof(x2apic)); 546 x2apic.cpuid = vcpu; 547 548 error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); 549 *state = x2apic.state; 550 return (error); 551} 552 553int 554vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) 555{ 556 int error; 557 struct vm_x2apic x2apic; 558 559 bzero(&x2apic, sizeof(x2apic)); 560 x2apic.cpuid = vcpu; 561 x2apic.state = state; 562 563 error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); 564 565 return (error); 566} 567 568/* 569 * From Intel Vol 3a: 570 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 571 */ 572int 573vcpu_reset(struct vmctx *vmctx, int vcpu) 574{ 575 int error; 576 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 577 uint32_t desc_access, desc_limit; 578 uint16_t sel; 579 580 zero = 0; 581 582 rflags = 0x2; 583 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 584 if (error) 585 goto done; 586 587 rip = 0xfff0; 588 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 589 goto done; 590 591 cr0 = CR0_NE; 592 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 593 goto done; 594 595 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 596 goto done; 597 598 cr4 = 0; 599 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 600 goto done; 601 602 /* 603 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 604 */ 605 desc_base = 0xffff0000; 606 desc_limit = 0xffff; 607 desc_access = 0x0093; 608 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 609 desc_base, desc_limit, desc_access); 610 if (error) 611 goto done; 612 613 sel = 0xf000; 614 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 615 goto done; 616 617 /* 618 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 619 */ 620 desc_base = 0; 621 desc_limit = 0xffff; 622 desc_access = 0x0093; 623 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 624 desc_base, desc_limit, desc_access); 625 if (error) 626 goto done; 627 628 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 629 desc_base, desc_limit, desc_access); 630 if (error) 631 goto done; 632 633 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 634 desc_base, desc_limit, desc_access); 635 if (error) 636 goto done; 637 638 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 639 desc_base, desc_limit, desc_access); 640 if (error) 641 goto done; 642 643 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 644 desc_base, desc_limit, desc_access); 645 if (error) 646 goto done; 647 648 sel = 0; 649 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 650 goto done; 651 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 652 goto done; 653 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 654 goto done; 655 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 656 goto done; 657 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 658 goto done; 659 660 /* General purpose registers */ 661 rdx = 0xf00; 662 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 663 goto done; 664 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 665 goto done; 666 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 667 goto done; 668 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 669 goto done; 670 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 671 goto done; 672 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 673 goto done; 674 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 675 goto done; 676 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 677 goto done; 678 679 /* GDTR, IDTR */ 680 desc_base = 0; 681 desc_limit = 0xffff; 682 desc_access = 0; 683 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 684 desc_base, desc_limit, desc_access); 685 if (error != 0) 686 goto done; 687 688 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 689 desc_base, desc_limit, desc_access); 690 if (error != 0) 691 goto done; 692 693 /* TR */ 694 desc_base = 0; 695 desc_limit = 0xffff; 696 desc_access = 0x0000008b; 697 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 698 if (error) 699 goto done; 700 701 sel = 0; 702 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 703 goto done; 704 705 /* LDTR */ 706 desc_base = 0; 707 desc_limit = 0xffff; 708 desc_access = 0x00000082; 709 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 710 desc_limit, desc_access); 711 if (error) 712 goto done; 713 714 sel = 0; 715 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 716 goto done; 717 718 /* XXX cr2, debug registers */ 719 720 error = 0; 721done: 722 return (error); 723} 724