vmmapi.c revision 246686
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/lib/libvmmapi/vmmapi.c 246686 2013-02-11 20:36:07Z neel $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/lib/libvmmapi/vmmapi.c 246686 2013-02-11 20:36:07Z neel $"); 31 32#include <sys/types.h> 33#include <sys/sysctl.h> 34#include <sys/ioctl.h> 35#include <sys/mman.h> 36 37#include <machine/specialreg.h> 38 39#include <stdio.h> 40#include <stdlib.h> 41#include <assert.h> 42#include <string.h> 43#include <fcntl.h> 44#include <unistd.h> 45 46#include <machine/vmm.h> 47#include <machine/vmm_dev.h> 48 49#include "vmmapi.h" 50 51struct vmctx { 52 int fd; 53 char *name; 54}; 55 56#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 57#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 58 59static int 60vm_device_open(const char *name) 61{ 62 int fd, len; 63 char *vmfile; 64 65 len = strlen("/dev/vmm/") + strlen(name) + 1; 66 vmfile = malloc(len); 67 assert(vmfile != NULL); 68 snprintf(vmfile, len, "/dev/vmm/%s", name); 69 70 /* Open the device file */ 71 fd = open(vmfile, O_RDWR, 0); 72 73 free(vmfile); 74 return (fd); 75} 76 77int 78vm_create(const char *name) 79{ 80 81 return (CREATE((char *)name)); 82} 83 84struct vmctx * 85vm_open(const char *name) 86{ 87 struct vmctx *vm; 88 89 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 90 assert(vm != NULL); 91 92 vm->fd = -1; 93 vm->name = (char *)(vm + 1); 94 strcpy(vm->name, name); 95 96 if ((vm->fd = vm_device_open(vm->name)) < 0) 97 goto err; 98 99 return (vm); 100err: 101 vm_destroy(vm); 102 return (NULL); 103} 104 105void 106vm_destroy(struct vmctx *vm) 107{ 108 assert(vm != NULL); 109 110 if (vm->fd >= 0) 111 close(vm->fd); 112 DESTROY(vm->name); 113 114 free(vm); 115} 116 117size_t 118vmm_get_mem_total(void) 119{ 120 size_t mem_total = 0; 121 size_t oldlen = sizeof(mem_total); 122 int error; 123 error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0); 124 if (error) 125 return -1; 126 return mem_total; 127} 128 129size_t 130vmm_get_mem_free(void) 131{ 132 size_t mem_free = 0; 133 size_t oldlen = sizeof(mem_free); 134 int error; 135 error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0); 136 if (error) 137 return -1; 138 return mem_free; 139} 140 141int 142vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len) 143{ 144 int error; 145 struct vm_memory_segment seg; 146 147 bzero(&seg, sizeof(seg)); 148 seg.gpa = gpa; 149 error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); 150 *ret_len = seg.len; 151 return (error); 152} 153 154int 155vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr) 156{ 157 int error; 158 struct vm_memory_segment seg; 159 160 /* 161 * Create and optionally map 'len' bytes of memory at guest 162 * physical address 'gpa' 163 */ 164 bzero(&seg, sizeof(seg)); 165 seg.gpa = gpa; 166 seg.len = len; 167 error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); 168 if (error == 0 && mapaddr != NULL) { 169 *mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 170 ctx->fd, gpa); 171 } 172 return (error); 173} 174 175char * 176vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 177{ 178 179 /* Map 'len' bytes of memory at guest physical address 'gpa' */ 180 return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 181 ctx->fd, gpa)); 182} 183 184int 185vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 186 uint64_t base, uint32_t limit, uint32_t access) 187{ 188 int error; 189 struct vm_seg_desc vmsegdesc; 190 191 bzero(&vmsegdesc, sizeof(vmsegdesc)); 192 vmsegdesc.cpuid = vcpu; 193 vmsegdesc.regnum = reg; 194 vmsegdesc.desc.base = base; 195 vmsegdesc.desc.limit = limit; 196 vmsegdesc.desc.access = access; 197 198 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 199 return (error); 200} 201 202int 203vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 204 uint64_t *base, uint32_t *limit, uint32_t *access) 205{ 206 int error; 207 struct vm_seg_desc vmsegdesc; 208 209 bzero(&vmsegdesc, sizeof(vmsegdesc)); 210 vmsegdesc.cpuid = vcpu; 211 vmsegdesc.regnum = reg; 212 213 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 214 if (error == 0) { 215 *base = vmsegdesc.desc.base; 216 *limit = vmsegdesc.desc.limit; 217 *access = vmsegdesc.desc.access; 218 } 219 return (error); 220} 221 222int 223vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 224{ 225 int error; 226 struct vm_register vmreg; 227 228 bzero(&vmreg, sizeof(vmreg)); 229 vmreg.cpuid = vcpu; 230 vmreg.regnum = reg; 231 vmreg.regval = val; 232 233 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 234 return (error); 235} 236 237int 238vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 239{ 240 int error; 241 struct vm_register vmreg; 242 243 bzero(&vmreg, sizeof(vmreg)); 244 vmreg.cpuid = vcpu; 245 vmreg.regnum = reg; 246 247 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 248 *ret_val = vmreg.regval; 249 return (error); 250} 251 252int 253vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) 254{ 255 int error; 256 struct vm_run vmrun; 257 258 bzero(&vmrun, sizeof(vmrun)); 259 vmrun.cpuid = vcpu; 260 vmrun.rip = rip; 261 262 error = ioctl(ctx->fd, VM_RUN, &vmrun); 263 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 264 return (error); 265} 266 267static int 268vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, 269 int vector, int error_code, int error_code_valid) 270{ 271 struct vm_event ev; 272 273 bzero(&ev, sizeof(ev)); 274 ev.cpuid = vcpu; 275 ev.type = type; 276 ev.vector = vector; 277 ev.error_code = error_code; 278 ev.error_code_valid = error_code_valid; 279 280 return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); 281} 282 283int 284vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, 285 int vector) 286{ 287 288 return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); 289} 290 291int 292vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, 293 int vector, int error_code) 294{ 295 296 return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); 297} 298 299int 300vm_apicid2vcpu(struct vmctx *ctx, int apicid) 301{ 302 /* 303 * The apic id associated with the 'vcpu' has the same numerical value 304 * as the 'vcpu' itself. 305 */ 306 return (apicid); 307} 308 309int 310vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 311{ 312 struct vm_lapic_irq vmirq; 313 314 bzero(&vmirq, sizeof(vmirq)); 315 vmirq.cpuid = vcpu; 316 vmirq.vector = vector; 317 318 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 319} 320 321int 322vm_inject_nmi(struct vmctx *ctx, int vcpu) 323{ 324 struct vm_nmi vmnmi; 325 326 bzero(&vmnmi, sizeof(vmnmi)); 327 vmnmi.cpuid = vcpu; 328 329 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 330} 331 332static struct { 333 const char *name; 334 int type; 335} capstrmap[] = { 336 { "hlt_exit", VM_CAP_HALT_EXIT }, 337 { "mtrap_exit", VM_CAP_MTRAP_EXIT }, 338 { "pause_exit", VM_CAP_PAUSE_EXIT }, 339 { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, 340 { 0 } 341}; 342 343int 344vm_capability_name2type(const char *capname) 345{ 346 int i; 347 348 for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { 349 if (strcmp(capstrmap[i].name, capname) == 0) 350 return (capstrmap[i].type); 351 } 352 353 return (-1); 354} 355 356const char * 357vm_capability_type2name(int type) 358{ 359 int i; 360 361 for (i = 0; capstrmap[i].name != NULL; i++) { 362 if (capstrmap[i].type == type) 363 return (capstrmap[i].name); 364 } 365 366 return (NULL); 367} 368 369int 370vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 371 int *retval) 372{ 373 int error; 374 struct vm_capability vmcap; 375 376 bzero(&vmcap, sizeof(vmcap)); 377 vmcap.cpuid = vcpu; 378 vmcap.captype = cap; 379 380 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 381 *retval = vmcap.capval; 382 return (error); 383} 384 385int 386vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 387{ 388 struct vm_capability vmcap; 389 390 bzero(&vmcap, sizeof(vmcap)); 391 vmcap.cpuid = vcpu; 392 vmcap.captype = cap; 393 vmcap.capval = val; 394 395 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 396} 397 398int 399vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 400{ 401 struct vm_pptdev pptdev; 402 403 bzero(&pptdev, sizeof(pptdev)); 404 pptdev.bus = bus; 405 pptdev.slot = slot; 406 pptdev.func = func; 407 408 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 409} 410 411int 412vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 413{ 414 struct vm_pptdev pptdev; 415 416 bzero(&pptdev, sizeof(pptdev)); 417 pptdev.bus = bus; 418 pptdev.slot = slot; 419 pptdev.func = func; 420 421 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 422} 423 424int 425vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 426 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 427{ 428 struct vm_pptdev_mmio pptmmio; 429 430 bzero(&pptmmio, sizeof(pptmmio)); 431 pptmmio.bus = bus; 432 pptmmio.slot = slot; 433 pptmmio.func = func; 434 pptmmio.gpa = gpa; 435 pptmmio.len = len; 436 pptmmio.hpa = hpa; 437 438 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 439} 440 441int 442vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 443 int destcpu, int vector, int numvec) 444{ 445 struct vm_pptdev_msi pptmsi; 446 447 bzero(&pptmsi, sizeof(pptmsi)); 448 pptmsi.vcpu = vcpu; 449 pptmsi.bus = bus; 450 pptmsi.slot = slot; 451 pptmsi.func = func; 452 pptmsi.destcpu = destcpu; 453 pptmsi.vector = vector; 454 pptmsi.numvec = numvec; 455 456 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 457} 458 459int 460vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 461 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 462{ 463 struct vm_pptdev_msix pptmsix; 464 465 bzero(&pptmsix, sizeof(pptmsix)); 466 pptmsix.vcpu = vcpu; 467 pptmsix.bus = bus; 468 pptmsix.slot = slot; 469 pptmsix.func = func; 470 pptmsix.idx = idx; 471 pptmsix.msg = msg; 472 pptmsix.addr = addr; 473 pptmsix.vector_control = vector_control; 474 475 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 476} 477 478uint64_t * 479vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 480 int *ret_entries) 481{ 482 int error; 483 484 static struct vm_stats vmstats; 485 486 vmstats.cpuid = vcpu; 487 488 error = ioctl(ctx->fd, VM_STATS, &vmstats); 489 if (error == 0) { 490 if (ret_entries) 491 *ret_entries = vmstats.num_entries; 492 if (ret_tv) 493 *ret_tv = vmstats.tv; 494 return (vmstats.statbuf); 495 } else 496 return (NULL); 497} 498 499const char * 500vm_get_stat_desc(struct vmctx *ctx, int index) 501{ 502 static struct vm_stat_desc statdesc; 503 504 statdesc.index = index; 505 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 506 return (statdesc.desc); 507 else 508 return (NULL); 509} 510 511int 512vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) 513{ 514 int error; 515 struct vm_x2apic x2apic; 516 517 bzero(&x2apic, sizeof(x2apic)); 518 x2apic.cpuid = vcpu; 519 520 error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); 521 *state = x2apic.state; 522 return (error); 523} 524 525int 526vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) 527{ 528 int error; 529 struct vm_x2apic x2apic; 530 531 bzero(&x2apic, sizeof(x2apic)); 532 x2apic.cpuid = vcpu; 533 x2apic.state = state; 534 535 error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); 536 537 return (error); 538} 539 540/* 541 * From Intel Vol 3a: 542 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 543 */ 544int 545vcpu_reset(struct vmctx *vmctx, int vcpu) 546{ 547 int error; 548 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 549 uint32_t desc_access, desc_limit; 550 uint16_t sel; 551 552 zero = 0; 553 554 rflags = 0x2; 555 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 556 if (error) 557 goto done; 558 559 rip = 0xfff0; 560 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 561 goto done; 562 563 cr0 = CR0_NE; 564 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 565 goto done; 566 567 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 568 goto done; 569 570 cr4 = 0; 571 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 572 goto done; 573 574 /* 575 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 576 */ 577 desc_base = 0xffff0000; 578 desc_limit = 0xffff; 579 desc_access = 0x0093; 580 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 581 desc_base, desc_limit, desc_access); 582 if (error) 583 goto done; 584 585 sel = 0xf000; 586 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 587 goto done; 588 589 /* 590 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 591 */ 592 desc_base = 0; 593 desc_limit = 0xffff; 594 desc_access = 0x0093; 595 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 596 desc_base, desc_limit, desc_access); 597 if (error) 598 goto done; 599 600 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 601 desc_base, desc_limit, desc_access); 602 if (error) 603 goto done; 604 605 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 606 desc_base, desc_limit, desc_access); 607 if (error) 608 goto done; 609 610 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 611 desc_base, desc_limit, desc_access); 612 if (error) 613 goto done; 614 615 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 616 desc_base, desc_limit, desc_access); 617 if (error) 618 goto done; 619 620 sel = 0; 621 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 622 goto done; 623 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 624 goto done; 625 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 626 goto done; 627 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 628 goto done; 629 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 630 goto done; 631 632 /* General purpose registers */ 633 rdx = 0xf00; 634 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 635 goto done; 636 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 637 goto done; 638 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 639 goto done; 640 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 641 goto done; 642 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 643 goto done; 644 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 645 goto done; 646 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 647 goto done; 648 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 649 goto done; 650 651 /* GDTR, IDTR */ 652 desc_base = 0; 653 desc_limit = 0xffff; 654 desc_access = 0; 655 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 656 desc_base, desc_limit, desc_access); 657 if (error != 0) 658 goto done; 659 660 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 661 desc_base, desc_limit, desc_access); 662 if (error != 0) 663 goto done; 664 665 /* TR */ 666 desc_base = 0; 667 desc_limit = 0xffff; 668 desc_access = 0x0000008b; 669 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 670 if (error) 671 goto done; 672 673 sel = 0; 674 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 675 goto done; 676 677 /* LDTR */ 678 desc_base = 0; 679 desc_limit = 0xffff; 680 desc_access = 0x00000082; 681 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 682 desc_limit, desc_access); 683 if (error) 684 goto done; 685 686 sel = 0; 687 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 688 goto done; 689 690 /* XXX cr2, debug registers */ 691 692 error = 0; 693done: 694 return (error); 695} 696