vmd.c revision 1.3
1/* $OpenBSD: vmd.c,v 1.3 2015/11/22 22:29:48 deraadt Exp $ */ 2 3/* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19/* 20 * vmd(8) - virtual machine daemon 21 */ 22 23#include <sys/types.h> 24#include <sys/ioctl.h> 25#include <sys/queue.h> 26#include <sys/uio.h> 27#include <sys/socket.h> 28#include <sys/stat.h> 29#include <sys/un.h> 30#include <sys/wait.h> 31#include <sys/mman.h> 32#include <sys/time.h> 33 34#include <dev/ic/comreg.h> 35#include <dev/ic/i8253reg.h> 36#include <dev/isa/isareg.h> 37#include <dev/pci/pcireg.h> 38 39#include <machine/param.h> 40#include <machine/vmmvar.h> 41 42#include <err.h> 43#include <errno.h> 44#include <fcntl.h> 45#include <imsg.h> 46#include <limits.h> 47#include <pthread.h> 48#include <pwd.h> 49#include <signal.h> 50#include <stddef.h> 51#include <stdio.h> 52#include <stdlib.h> 53#include <string.h> 54#include <termios.h> 55#include <unistd.h> 56#include <util.h> 57 58#include "vmd.h" 59#include "loadfile.h" 60#include "pci.h" 61#include "virtio.h" 62 63#define NR_BACKLOG 5 64 65#define MAX_TAP 256 66 67/* 68 * Emulated 8250 UART 69 * 70 */ 71#define COM1_DATA 0x3f8 72#define COM1_IER 0x3f9 73#define COM1_IIR 0x3fa 74#define COM1_LCR 0x3fb 75#define COM1_MCR 0x3fc 76#define COM1_LSR 0x3fd 77#define COM1_MSR 0x3fe 78#define COM1_SCR 0x3ff 79 80/* 81 * Emulated i8253 PIT (counter) 82 */ 83#define TIMER_BASE 0x40 84#define TIMER_CTRL 0x43 /* 8253 Timer #1 */ 85#define NS_PER_TICK (1000000000 / TIMER_FREQ) 86 87/* i8253 registers */ 88struct i8253_counter { 89 struct timeval tv; /* timer start time */ 90 uint16_t start; /* starting value */ 91 uint16_t olatch; /* output latch */ 92 uint16_t ilatch; /* input latch */ 93 uint8_t last_r; /* last read byte (MSB/LSB) */ 94 uint8_t last_w; /* last written byte (MSB/LSB) */ 95}; 96 97/* ns8250 UART registers */ 98struct ns8250_regs { 99 uint8_t lcr; /* Line Control Register */ 100 uint8_t fcr; /* FIFO Control Register */ 101 uint8_t iir; /* Interrupt ID Register */ 102 uint8_t ier; /* Interrupt Enable Register */ 103 uint8_t divlo; /* Baud rate divisor low byte */ 104 uint8_t divhi; /* Baud rate divisor high byte */ 105 uint8_t msr; /* Modem Status Register */ 106 uint8_t lsr; /* Line Status Register */ 107 uint8_t mcr; /* Modem Control Register */ 108 uint8_t scr; /* Scratch Register */ 109 uint8_t data; /* Unread input data */ 110}; 111 112struct i8253_counter i8253_counter[3]; 113struct ns8250_regs com1_regs; 114 115void sighdlr(int); 116int main(int, char **); 117int control_run(void); 118int disable_vmm(void); 119int enable_vmm(void); 120int start_vm(struct imsg *); 121int terminate_vm(struct imsg *); 122int get_info_vm(struct imsgbuf *); 123int start_client_vmd(void); 124int opentap(void); 125int run_vm(int *, int *, struct vm_create_params *); 126void *vcpu_run_loop(void *); 127int vcpu_exit(struct vm_run_params *); 128int vmm_create_vm(struct vm_create_params *); 129void init_emulated_hw(struct vm_create_params *, int *, int *); 130void vcpu_exit_inout(struct vm_run_params *); 131uint8_t vcpu_exit_pci(struct vm_run_params *); 132void vcpu_exit_i8253(union vm_exit *); 133void vcpu_exit_com(struct vm_run_params *); 134void vcpu_process_com_data(union vm_exit *); 135void vcpu_process_com_lcr(union vm_exit *); 136void vcpu_process_com_lsr(union vm_exit *); 137void vcpu_process_com_ier(union vm_exit *); 138void vcpu_process_com_mcr(union vm_exit *); 139void vcpu_process_com_iir(union vm_exit *); 140void vcpu_process_com_msr(union vm_exit *); 141void vcpu_process_com_scr(union vm_exit *); 142 143int vmm_fd, con_fd, vm_id; 144volatile sig_atomic_t quit; 145 146SLIST_HEAD(vmstate_head, vmstate); 147struct vmstate_head vmstate; 148 149extern char *__progname; 150 151/* 152 * sighdlr 153 * 154 * Signal handler for TERM/INT/CHLD signals used during daemon shutdown 155 * 156 * Parameters: 157 * sig: signal caught 158 */ 159void 160sighdlr(int sig) 161{ 162 switch (sig) { 163 case SIGTERM: 164 case SIGINT: 165 /* Tell main imsg loop to exit */ 166 quit = 1; 167 break; 168 case SIGCHLD: 169 while (waitpid(WAIT_ANY, 0, WNOHANG) > 0) {} 170 break; 171 } 172} 173 174int 175main(int argc, char **argv) 176{ 177 int res; 178 179 /* Open /dev/vmm */ 180 vmm_fd = open(VMM_NODE, O_RDONLY); 181 if (vmm_fd == -1) 182 errx(1, "can't open vmm device node %s", VMM_NODE); 183 184 setproctitle("control"); 185 186 SLIST_INIT(&vmstate); 187 188 signal(SIGTERM, sighdlr); 189 signal(SIGINT, sighdlr); 190 signal(SIGCHLD, sighdlr); 191 192 if (daemon(0, 1) == -1) 193 errx(1, "can't daemonize\n"); 194 195 res = control_run(); 196 197 if (res == -1) 198 errx(1, "control socket error\n"); 199 200 return (0); 201} 202 203/* 204 * control_run 205 * 206 * Main control loop - establishes listening socket for incoming vmmctl(8) 207 * requests and dispatches appropriate calls to vmm(4). Replies to 208 * vmmctl(8) using imsg. 209 * 210 * Return values: 211 * 0: normal exit (signal to quit received) 212 * -1: abnormal exit (various causes) 213 */ 214int 215control_run(void) 216{ 217 struct sockaddr_un sun, c_sun; 218 socklen_t len; 219 int fd, connfd, n, res; 220 mode_t mode, old_umask; 221 char *socketpath; 222 struct imsgbuf *ibuf; 223 struct imsg imsg; 224 225 /* Establish and start listening on control socket */ 226 socketpath = SOCKET_NAME; 227 if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)) == -1) { 228 fprintf(stderr, "%s: socket error\n", __progname); 229 return (-1); 230 } 231 232 bzero(&sun, sizeof(sun)); 233 sun.sun_family = AF_UNIX; 234 if (strlcpy(sun.sun_path, socketpath, sizeof(sun.sun_path)) >= 235 sizeof(sun.sun_path)) { 236 fprintf(stderr, "%s: socket name too long\n", __progname); 237 close(fd); 238 return (-1); 239 } 240 241 if (unlink(socketpath) == -1) 242 if (errno != ENOENT) { 243 fprintf(stderr, "%s: unlink of %s failed\n", 244 __progname, socketpath); 245 close(fd); 246 return (-1); 247 } 248 249 old_umask = umask(S_IXUSR|S_IXGRP|S_IWOTH|S_IROTH|S_IXOTH); 250 mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP; 251 252 if (bind(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1) { 253 fprintf(stderr, "%s: control_init: bind of %s failed\n", 254 __progname, socketpath); 255 close(fd); 256 umask(old_umask); 257 return (-1); 258 } 259 260 umask(old_umask); 261 262 if (chmod(socketpath, mode) == -1) { 263 fprintf(stderr, "%s: control_init: chmod of %s failed\n", 264 __progname, socketpath); 265 close(fd); 266 unlink(socketpath); 267 return (-1); 268 } 269 270 if ((ibuf = malloc(sizeof(struct imsgbuf))) == NULL) { 271 fprintf(stderr, "%s: out of memory\n", __progname); 272 close(fd); 273 unlink(socketpath); 274 return (-1); 275 } 276 277 if (listen(fd, NR_BACKLOG) == -1) { 278 fprintf(stderr, "%s: listen failed\n", __progname); 279 close(fd); 280 unlink(socketpath); 281 return (-1); 282 } 283 284 while (!quit) { 285 if ((connfd = accept4(fd, (struct sockaddr *)&c_sun, &len, 286 SOCK_CLOEXEC)) == -1) { 287 fprintf(stderr, "%s: accept4 error\n", __progname); 288 close(fd); 289 unlink(socketpath); 290 return (-1); 291 } 292 293 imsg_init(ibuf, connfd); 294 if ((n = imsg_read(ibuf)) == -1 || n == 0) { 295 fprintf(stderr, "%s: imsg_read error, n=%d\n", 296 __progname, n); 297 continue; 298 } 299 300 for (;;) { 301 if ((n = imsg_get(ibuf, &imsg)) == -1) 302 return (-1); 303 304 if (n == 0) 305 break; 306 307 /* Process incoming message (from vmmctl(8)) */ 308 switch (imsg.hdr.type) { 309 case IMSG_VMDOP_DISABLE_VMM_REQUEST: 310 res = disable_vmm(); 311 imsg_compose(ibuf, 312 IMSG_VMDOP_DISABLE_VMM_RESPONSE, 0, 0, -1, 313 &res, sizeof(res)); 314 break; 315 case IMSG_VMDOP_ENABLE_VMM_REQUEST: 316 res = enable_vmm(); 317 imsg_compose(ibuf, 318 IMSG_VMDOP_ENABLE_VMM_RESPONSE, 0, 0, -1, 319 &res, sizeof(res)); 320 break; 321 case IMSG_VMDOP_START_VM_REQUEST: 322 res = start_vm(&imsg); 323 imsg_compose(ibuf, 324 IMSG_VMDOP_START_VM_RESPONSE, 0, 0, -1, 325 &res, sizeof(res)); 326 break; 327 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 328 res = terminate_vm(&imsg); 329 imsg_compose(ibuf, 330 IMSG_VMDOP_TERMINATE_VM_RESPONSE, 0, 0, -1, 331 &res, sizeof(res)); 332 break; 333 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 334 res = get_info_vm(ibuf); 335 imsg_compose(ibuf, 336 IMSG_VMDOP_GET_INFO_VM_END_DATA, 0, 0, -1, 337 &res, sizeof(res)); 338 break; 339 } 340 341 while (ibuf->w.queued) 342 if (msgbuf_write(&ibuf->w) <= 0 && errno != 343 EAGAIN) { 344 fprintf(stderr, "%s: msgbuf_write " 345 "error %d\n", __progname, 346 errno); 347 close(fd); 348 close(connfd); 349 unlink(socketpath); 350 return (-1); 351 } 352 imsg_free(&imsg); 353 } 354 close(connfd); 355 } 356 357 signal(SIGCHLD, SIG_IGN); 358 359 return (0); 360} 361 362/* 363 * disable_vmm 364 * 365 * Disables VMM mode on all CPUs 366 * 367 * Return values: 368 * 0: success 369 * !0 : ioctl to vmm(4) failed 370 */ 371int 372disable_vmm(void) 373{ 374 if (ioctl(vmm_fd, VMM_IOC_STOP, NULL) < 0) 375 return (errno); 376 377 return (0); 378} 379 380/* 381 * enable_vmm 382 * 383 * Enables VMM mode on all CPUs 384 * 385 * Return values: 386 * 0: success 387 * !0 : ioctl to vmm(4) failed 388 */ 389int 390enable_vmm(void) 391{ 392 if (ioctl(vmm_fd, VMM_IOC_START, NULL) < 0) 393 return (errno); 394 395 return (0); 396} 397 398/* 399 * terminate_vm 400 * 401 * Requests vmm(4) to terminate the VM whose ID is provided in the 402 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 403 * 404 * Parameters 405 * imsg: The incoming imsg body whose 'data' field contains the 406 * vm_terminate_params struct 407 * 408 * Return values: 409 * 0: success 410 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not 411 * valid) 412 */ 413int 414terminate_vm(struct imsg *imsg) 415{ 416 struct vm_terminate_params *vtp; 417 418 vtp = (struct vm_terminate_params *)imsg->data; 419 420 if (ioctl(vmm_fd, VMM_IOC_TERM, vtp) < 0) 421 return (errno); 422 423 return (0); 424} 425 426/* 427 * opentap 428 * 429 * Opens the next available tap device, up to MAX_TAP. 430 * 431 * Returns a file descriptor to the tap node opened, or -1 if no tap 432 * devices were available. 433 */ 434int 435opentap(void) 436{ 437 int i, fd; 438 char path[PATH_MAX]; 439 440 for (i = 0; i < MAX_TAP; i++) { 441 snprintf(path, PATH_MAX, "/dev/tap%d", i); 442 fd = open(path, O_RDWR | O_NONBLOCK); 443 if (fd != -1) 444 return (fd); 445 } 446 447 return (-1); 448} 449 450/* 451 * start_vm 452 * 453 * Starts a new VM with the creation parameters supplied (in the incoming 454 * imsg->data field). This function performs a basic sanity check on the 455 * incoming parameters and then performs the following steps to complete 456 * the creation of the VM: 457 * 458 * 1. opens the VM disk image files specified in the VM creation parameters 459 * 2. opens the specified VM kernel 460 * 3. creates a VM console tty pair using openpty 461 * 4. forks, passing the file descriptors opened in steps 1-3 to the child 462 * vmd responsible for dropping privilege and running the VM's VCPU 463 * loops. 464 * 465 * Parameters: 466 * imsg: The incoming imsg body whose 'data' field is a vm_create_params 467 * struct containing the VM creation parameters. 468 * 469 * Return values: 470 * 0: success 471 * !0 : failure - typically an errno indicating the source of the failure 472 */ 473int 474start_vm(struct imsg *imsg) 475{ 476 struct vm_create_params *vcp; 477 size_t i; 478 off_t kernel_size; 479 struct stat sb; 480 int child_disks[VMM_MAX_DISKS_PER_VM], kernel_fd, ret, ttym_fd; 481 int child_taps[VMM_MAX_NICS_PER_VM]; 482 int ttys_fd; 483 char ptyn[32]; 484 485 vcp = (struct vm_create_params *)imsg->data; 486 487 for (i = 0 ; i < VMM_MAX_DISKS_PER_VM; i++) 488 child_disks[i] = -1; 489 for (i = 0 ; i < VMM_MAX_NICS_PER_VM; i++) 490 child_taps[i] = -1; 491 492 /* 493 * XXX kernel_fd can't be global (possible race if multiple VMs 494 * being created at the same time). Probably need to move this 495 * into the child before dropping privs, or just make it local 496 * to this function? 497 */ 498 kernel_fd = -1; 499 500 ttym_fd = -1; 501 ttys_fd = -1; 502 503 /* Open disk images for child */ 504 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 505 child_disks[i] = open(vcp->vcp_disks[i], O_RDWR); 506 if (child_disks[i] == -1) { 507 ret = errno; 508 fprintf(stderr, "%s: can't open %s (%d)\n", __progname, 509 vcp->vcp_disks[i], errno); 510 goto err; 511 } 512 } 513 514 bzero(&sb, sizeof(sb)); 515 if (stat(vcp->vcp_kernel, &sb) == -1) { 516 ret = errno; 517 fprintf(stderr, "%s: can't stat kernel image %s (%d)\n", 518 __progname, vcp->vcp_kernel, errno); 519 goto err; 520 } 521 522 kernel_size = sb.st_size; 523 524 /* Open kernel image */ 525 kernel_fd = open(vcp->vcp_kernel, O_RDONLY); 526 if (kernel_fd == -1) { 527 ret = errno; 528 fprintf(stderr, "%s: can't open kernel image %s (%d)\n", 529 __progname, vcp->vcp_kernel, errno); 530 goto err; 531 } 532 533 if (openpty(&ttym_fd, &ttys_fd, ptyn, NULL, NULL) == -1) { 534 ret = errno; 535 fprintf(stderr, "%s: openpty failed: %d\n", 536 __progname, errno); 537 goto err; 538 } 539 540 if (close(ttys_fd)) { 541 ret = errno; 542 fprintf(stderr, "%s: close tty failed: %d\n", 543 __progname, errno); 544 goto err; 545 } 546 547 /* Open tap devices for child */ 548 for (i = 0 ; i < vcp->vcp_nnics; i++) { 549 child_taps[i] = opentap(); 550 if (child_taps[i] == -1) { 551 ret = errno; 552 fprintf(stderr, "%s: can't open tap for nic %zd (%d)\n", 553 __progname, i, errno); 554 goto err; 555 } 556 } 557 558 /* Start child vmd for this VM (fork, chroot, drop privs) */ 559 ret = start_client_vmd(); 560 561 /* Start child failed? - cleanup and leave */ 562 if (ret == -1) { 563 ret = EIO; 564 goto err; 565 } 566 567 if (ret > 0) { 568 /* Parent */ 569 for (i = 0 ; i < vcp->vcp_ndisks; i++) 570 close(child_disks[i]); 571 572 for (i = 0 ; i < vcp->vcp_nnics; i++) 573 close(child_taps[i]); 574 575 close(kernel_fd); 576 close(ttym_fd); 577 578 return (0); 579 } 580 else { 581 /* Child */ 582 fprintf(stderr, "%s: vm console: %s\n", __progname, ptyn); 583 ret = vmm_create_vm(vcp); 584 setproctitle(vcp->vcp_name); 585 if (ret) { 586 fprintf(stderr, "%s: create vmm ioctl failed - " 587 "exiting (%d)\n", __progname, ret); 588 _exit(1); 589 } 590 591 /* Load kernel image */ 592 ret = loadelf_main(kernel_fd, vcp->vcp_id, vcp->vcp_memory_size); 593 if (ret) { 594 fprintf(stderr, "%s: failed to load kernel - " 595 "exiting (%d)\n", __progname, ret); 596 _exit(1); 597 } 598 599 close(kernel_fd); 600 601 con_fd = ttym_fd; 602 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) { 603 fprintf(stderr, "%s: failed to set nonblocking mode " 604 "on console\n", __progname); 605 _exit(1); 606 } 607 608 /* Execute the vcpu run loop(s) for this VM */ 609 ret = run_vm(child_disks, child_taps, vcp); 610 _exit(ret != 0); 611 } 612 613 return (ret); 614 615err: 616 for (i = 0 ; i < vcp->vcp_ndisks; i++) 617 if (child_disks[i] != -1) 618 close(child_disks[i]); 619 620 for (i = 0 ; i < vcp->vcp_nnics; i++) 621 if (child_taps[i] != -1) 622 close(child_taps[i]); 623 624 if (kernel_fd != -1) 625 close(kernel_fd); 626 627 if (ttym_fd != -1) 628 close(ttym_fd); 629 630 return (ret); 631} 632 633/* 634 * get_info_vm 635 * 636 * Returns a list of VMs known to vmm(4). 637 * 638 * Parameters: 639 * ibuf: the imsg ibuf in which to place the results. A new imsg will 640 * be created using this ibuf. 641 * 642 * Return values: 643 * 0: success 644 * !0 : failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 645 */ 646int 647get_info_vm(struct imsgbuf *ibuf) 648{ 649 int ret; 650 size_t ct, i; 651 struct ibuf *obuf; 652 struct vm_info_params vip; 653 struct vm_info_result *info; 654 655 /* 656 * We issue the VMM_IOC_INFO ioctl twice, once with an input 657 * buffer size of 0, which results in vmm(4) returning the 658 * number of bytes required back to us in vip.vip_size, 659 * and then we call it again after malloc'ing the required 660 * number of bytes. 661 * 662 * It is possible that we could fail a second time (eg, if 663 * another VM was created in the instant between the two 664 * ioctls, but in that case the caller can just try again 665 * as vmm(4) will return a zero-sized list in that case. 666 */ 667 vip.vip_size = 0; 668 info = NULL; 669 ret = 0; 670 671 /* First ioctl to see how many bytes needed (vip.vip_size) */ 672 if (ioctl(vmm_fd, VMM_IOC_INFO, &vip) < 0) 673 return (errno); 674 675 if (vip.vip_info_ct != 0) 676 return (EIO); 677 678 info = malloc(vip.vip_size); 679 if (info == NULL) 680 return (ENOMEM); 681 682 /* Second ioctl to get the actual list */ 683 vip.vip_info = info; 684 if (ioctl(vmm_fd, VMM_IOC_INFO, &vip) < 0) { 685 ret = errno; 686 free(info); 687 return (ret); 688 } 689 690 /* Return info to vmmctl(4) */ 691 ct = vip.vip_size / sizeof(struct vm_info_result); 692 for (i = 0; i < ct; i++) { 693 obuf = imsg_create(ibuf, IMSG_VMDOP_GET_INFO_VM_DATA, 0, 0, 694 sizeof(struct vm_info_result)); 695 imsg_add(obuf, &info[i], sizeof(struct vm_info_result)); 696 imsg_close(ibuf, obuf); 697 } 698 free(info); 699 return (0); 700} 701 702 703/* 704 * start_client_vmd 705 * 706 * forks a copy of the parent vmd, chroots to VMD_USER's home, drops 707 * privileges (changes to user VMD_USER), and returns. 708 * Should the fork operation succeed, but later chroot/privsep 709 * fail, the child exits. 710 * 711 * Return values (returns to both child and parent on success): 712 * -1 : failure 713 * 0: return to child vmd returns 0 714 * !0 : return to parent vmd returns the child's pid 715 */ 716int 717start_client_vmd(void) 718{ 719 int child_pid; 720 struct passwd *pw; 721 722 pw = getpwnam(VMD_USER); 723 if (pw == NULL) { 724 fprintf(stderr, "%s: no such user %s\n", __progname, VMD_USER); 725 return (-1); 726 } 727 728 child_pid = fork(); 729 if (child_pid < 0) 730 return (-1); 731 732 if (!child_pid) { 733 /* Child */ 734 if (chroot(pw->pw_dir) != 0) 735 err(1, "unable to chroot"); 736 if (chdir("/") != 0) 737 err(1, "unable to chdir"); 738 739 if (setgroups(1, &pw->pw_gid) == -1) 740 err(1, "setgroups() failed"); 741 if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) == -1) 742 err(1, "setresgid() failed"); 743 if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) == -1) 744 err(1, "setresuid() failed"); 745 746 return (0); 747 } 748 749 /* Parent */ 750 return (child_pid); 751} 752 753/* 754 * vmm_create_vm 755 * 756 * Requests vmm(4) to create a new VM using the supplied creation 757 * parameters. This operation results in the creation of the in-kernel 758 * structures for the VM, but does not start the VM's vcpu(s). 759 * 760 * Parameters: 761 * vcp: vm_create_params struct containing the VM's desired creation 762 * configuration 763 * 764 * Return values: 765 * 0: success 766 * !0 : ioctl to vmm(4) failed 767 */ 768int 769vmm_create_vm(struct vm_create_params *vcp) 770{ 771 /* Sanity check arguments */ 772 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 773 return (EINVAL); 774 775 if (vcp->vcp_memory_size > VMM_MAX_VM_MEM_SIZE) 776 return (EINVAL); 777 778 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 779 return (EINVAL); 780 781 if (ioctl(vmm_fd, VMM_IOC_CREATE, vcp) < 0) 782 return (errno); 783 784 return (0); 785} 786 787/* 788 * init_emulated_hw 789 * 790 * Initializes the userspace hardware emulation 791 */ 792void 793init_emulated_hw(struct vm_create_params *vcp, int *child_disks, 794 int *child_taps) 795{ 796 /* Init the i8253 PIT's 3 counters */ 797 bzero(&i8253_counter, sizeof(struct i8253_counter) * 3); 798 gettimeofday(&i8253_counter[0].tv, NULL); 799 gettimeofday(&i8253_counter[1].tv, NULL); 800 gettimeofday(&i8253_counter[2].tv, NULL); 801 i8253_counter[0].start = TIMER_DIV(100); 802 i8253_counter[1].start = TIMER_DIV(100); 803 i8253_counter[2].start = TIMER_DIV(100); 804 805 /* Init ns8250 UART */ 806 bzero(&com1_regs, sizeof(struct ns8250_regs)); 807 808 /* Initialize PCI */ 809 pci_init(); 810 811 /* Initialize virtio devices */ 812 virtio_init(vcp, child_disks, child_taps); 813} 814 815/* 816 * run_vm 817 * 818 * Runs the VM whose creation parameters are specified in vcp 819 * 820 * Parameters: 821 * vcp: vm_create_params struct containing the VM's desired creation 822 * configuration 823 * child_disks: previously-opened child VM disk file file descriptors 824 * child_taps: previously-opened child tap file descriptors 825 * 826 * Return values: 827 * 0: the VM exited normally 828 * !0 : the VM exited abnormally or failed to start 829 */ 830int 831run_vm(int *child_disks, int *child_taps, struct vm_create_params *vcp) 832{ 833 size_t i; 834 int ret; 835 pthread_t *tid; 836 void *exit_status; 837 struct vm_run_params **vrp; 838 839 ret = 0; 840 841 /* XXX cap vcp_ncpus to avoid overflow here */ 842 /* 843 * XXX ensure nvcpus in vcp is same as vm, or fix vmm to return einval 844 * on bad vcpu id 845 */ 846 tid = malloc(sizeof(pthread_t) * vcp->vcp_ncpus); 847 vrp = malloc(sizeof(struct vm_run_params *) * vcp->vcp_ncpus); 848 if (tid == NULL || vrp == NULL) { 849 fprintf(stderr, "%s: memory allocation error - exiting.\n", 850 __progname); 851 return (ENOMEM); 852 } 853 854 init_emulated_hw(vcp, child_disks, child_taps); 855 856 /* 857 * Create and launch one thread for each VCPU. These threads may 858 * migrate between PCPUs over time; the need to reload CPU state 859 * in such situations is detected and performed by vmm(4) in the 860 * kernel. 861 */ 862 for (i = 0 ; i < vcp->vcp_ncpus; i++) { 863 vrp[i] = malloc(sizeof(struct vm_run_params)); 864 if (vrp[i] == NULL) { 865 fprintf(stderr, "%s: memory allocation error - " 866 "exiting.\n", __progname); 867 /* caller will exit, so skip free'ing */ 868 return (ENOMEM); 869 } 870 vrp[i]->vrp_exit = malloc(sizeof(union vm_exit)); 871 if (vrp[i]->vrp_exit == NULL) { 872 fprintf(stderr, "%s: memory allocation error - " 873 "exiting.\n", __progname); 874 /* caller will exit, so skip free'ing */ 875 return (ENOMEM); 876 } 877 vrp[i]->vrp_vm_id = vcp->vcp_id; 878 vrp[i]->vrp_vcpu_id = i; 879 880 /* Start each VCPU run thread at vcpu_run_loop */ 881 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]); 882 if (ret) { 883 /* caller will _exit after this return */ 884 return (ret); 885 } 886 } 887 888 /* Wait for all the threads to exit */ 889 for (i = 0; i < vcp->vcp_ncpus; i++) { 890 if (pthread_join(tid[i], &exit_status)) { 891 fprintf(stderr, "%s: failed to join thread %zd - " 892 "exiting\n", __progname, i); 893 return (EIO); 894 } 895 896 if (exit_status != NULL) { 897 fprintf(stderr, "%s: vm %d vcpu run thread %zd exited " 898 "abnormally\n", __progname, vcp->vcp_id, i); 899 ret = EIO; 900 } 901 } 902 903 return (ret); 904} 905 906/* 907 * vcpu_run_loop 908 * 909 * Runs a single VCPU until vmm(4) requires help handling an exit, 910 * or the VM terminates. 911 * 912 * Parameters: 913 * arg: vcpu_run_params for the VCPU being run by this thread 914 * 915 * Return values: 916 * NULL: the VCPU shutdown properly 917 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally 918 */ 919void * 920vcpu_run_loop(void *arg) 921{ 922 struct vm_run_params *vrp = (struct vm_run_params *)arg; 923 intptr_t ret; 924 925 vrp->vrp_continue = 0; 926 vrp->vrp_injint = -1; 927 928 for (;;) { 929 if (ioctl(vmm_fd, VMM_IOC_RUN, vrp) < 0) { 930 /* If run ioctl failed, exit */ 931 ret = errno; 932 return ((void *)ret); 933 } 934 935 /* If the VM is terminating, exit normally */ 936 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) 937 return (NULL); 938 939 if (vrp->vrp_exit_reason != VM_EXIT_NONE) { 940 /* 941 * vmm(4) needs help handling an exit, handle in 942 * vcpu_exit. 943 */ 944 if (vcpu_exit(vrp)) 945 return ((void *)EIO); 946 } 947 } 948 949 return (NULL); 950} 951 952/* 953 * vcpu_exit_i8253 954 * 955 * Handles emulated i8253 PIT access (in/out instruction to PIT ports). 956 * We don't emulate all the modes of the i8253, just the basic squarewave 957 * clock. 958 * 959 * Parameters: 960 * vei: VM exit information from vmm(4) containing information on the in/out 961 * instruction being performed 962 */ 963void 964vcpu_exit_i8253(union vm_exit *vei) 965{ 966 uint32_t out_data; 967 uint8_t sel, rw, data; 968 uint64_t ns, ticks; 969 struct timeval now, delta; 970 971 if (vei->vei.vei_port == TIMER_CTRL) { 972 if (vei->vei.vei_dir == 0) { /* OUT instruction */ 973 out_data = vei->vei.vei_data; 974 sel = out_data & 975 (TIMER_SEL0 | TIMER_SEL1 | TIMER_SEL2); 976 sel = sel >> 6; 977 if (sel > 2) { 978 fprintf(stderr, "%s: i8253 PIT: invalid " 979 "timer selected (%d)\n", 980 __progname, sel); 981 return; 982 } 983 984 rw = vei->vei.vei_data & 985 (TIMER_LATCH | TIMER_LSB | 986 TIMER_MSB | TIMER_16BIT); 987 988 if (rw == TIMER_16BIT) { 989 /* 990 * XXX this seems to be used on occasion, needs 991 * to be implemented 992 */ 993 fprintf(stderr, "%s: i8253 PIT: 16 bit " 994 "counter I/O not supported\n", 995 __progname); 996 return; 997 } 998 999 /* 1000 * Since we don't truly emulate each tick of the PIT 1001 * clock, when the guest asks for the timer to be 1002 * latched, simulate what the counter would have been 1003 * had we performed full emulation. We do this by 1004 * calculating when the counter was reset vs how much 1005 * time has elapsed, then bias by the counter tick 1006 * rate. 1007 */ 1008 if (rw == TIMER_LATCH) { 1009 gettimeofday(&now, NULL); 1010 delta.tv_sec = now.tv_sec - 1011 i8253_counter[sel].tv.tv_sec; 1012 delta.tv_usec = now.tv_usec - 1013 i8253_counter[sel].tv.tv_usec; 1014 if (delta.tv_usec < 0) { 1015 delta.tv_sec--; 1016 delta.tv_usec += 1000000; 1017 } 1018 if (delta.tv_usec > 1000000) { 1019 delta.tv_sec++; 1020 delta.tv_usec -= 1000000; 1021 } 1022 ns = delta.tv_usec * 1000 + 1023 delta.tv_sec * 1000000000; 1024 ticks = ns / NS_PER_TICK; 1025 i8253_counter[sel].olatch = 1026 i8253_counter[sel].start - 1027 ticks % i8253_counter[sel].start; 1028 return; 1029 } 1030 1031 fprintf(stderr, "%s: i8253 PIT: unsupported rw mode " 1032 "%d\n", __progname, rw); 1033 return; 1034 } else { 1035 /* XXX should this return 0xff? */ 1036 fprintf(stderr, "%s: i8253 PIT: read from control " 1037 "port unsupported\n", __progname); 1038 } 1039 } else { 1040 sel = vei->vei.vei_port - (TIMER_CNTR0 + TIMER_BASE); 1041 if (vei->vei.vei_dir == 0) { /* OUT instruction */ 1042 if (i8253_counter[sel].last_w == 0) { 1043 out_data = vei->vei.vei_data; 1044 i8253_counter[sel].ilatch |= (out_data << 8); 1045 i8253_counter[sel].last_w = 1; 1046 } else { 1047 out_data = vei->vei.vei_data; 1048 i8253_counter[sel].ilatch |= out_data; 1049 i8253_counter[sel].start = 1050 i8253_counter[sel].ilatch; 1051 i8253_counter[sel].last_w = 0; 1052 } 1053 } else { 1054 if (i8253_counter[sel].last_r == 0) { 1055 data = i8253_counter[sel].olatch >> 8; 1056 vei->vei.vei_data = data; 1057 i8253_counter[sel].last_w = 1; 1058 } else { 1059 data = i8253_counter[sel].olatch & 0xFF; 1060 vei->vei.vei_data = data; 1061 i8253_counter[sel].last_w = 0; 1062 } 1063 } 1064 } 1065} 1066 1067/* 1068 * vcpu_process_com_data 1069 * 1070 * Emulate in/out instructions to the com1 (ns8250) UART data register 1071 * 1072 * Parameters: 1073 * vei: vm exit information from vmm(4) containing information on the in/out 1074 * instruction being performed 1075 */ 1076void 1077vcpu_process_com_data(union vm_exit *vei) 1078{ 1079 /* 1080 * vei_dir == 0 : out instruction 1081 * 1082 * The guest wrote to the data register. Since we are emulating a 1083 * no-fifo chip, write the character immediately to the pty and 1084 * assert TXRDY in IIR (if the guest has requested TXRDY interrupt 1085 * reporting) 1086 */ 1087 if (vei->vei.vei_dir == 0) { 1088 write(con_fd, &vei->vei.vei_data, 1); 1089 if (com1_regs.ier & 0x2) { 1090 /* Set TXRDY */ 1091 com1_regs.iir |= IIR_TXRDY; 1092 /* Set "interrupt pending" (IIR low bit cleared) */ 1093 com1_regs.iir &= ~0x1; 1094 } 1095 } else { 1096 /* 1097 * vei_dir == 1 : in instruction 1098 * 1099 * The guest read from the data register. Check to see if 1100 * there is data available (RXRDY) and if so, consume the 1101 * input data and return to the guest. Also clear the 1102 * interrupt info register regardless. 1103 */ 1104 if (com1_regs.lsr & LSR_RXRDY) { 1105 vei->vei.vei_data = com1_regs.data; 1106 com1_regs.data = 0x0; 1107 com1_regs.lsr &= ~LSR_RXRDY; 1108 } else { 1109 /* XXX should this be com1_regs.data or 0xff? */ 1110 vei->vei.vei_data = com1_regs.data; 1111 fprintf(stderr, "guest reading com1 when not ready\n"); 1112 } 1113 1114 /* Reading the data register always clears RXRDY from IIR */ 1115 com1_regs.iir &= ~IIR_RXRDY; 1116 1117 /* 1118 * Clear "interrupt pending" by setting IIR low bit to 1 1119 * if no interrupt are pending 1120 */ 1121 if (com1_regs.iir == 0x0) 1122 com1_regs.iir = 0x1; 1123 } 1124} 1125 1126/* 1127 * vcpu_process_com_lcr 1128 * 1129 * Emulate in/out instructions to the com1 (ns8250) UART line control register 1130 * 1131 * Paramters: 1132 * vei: vm exit information from vmm(4) containing information on the in/out 1133 * instruction being performed 1134 */ 1135void 1136vcpu_process_com_lcr(union vm_exit *vei) 1137{ 1138 /* 1139 * vei_dir == 0 : out instruction 1140 * 1141 * Write content to line control register 1142 */ 1143 if (vei->vei.vei_dir == 0) { 1144 com1_regs.lcr = (uint8_t)vei->vei.vei_data; 1145 } else { 1146 /* 1147 * vei_dir == 1 : in instruction 1148 * 1149 * Read line control register 1150 */ 1151 vei->vei.vei_data = com1_regs.lcr; 1152 } 1153} 1154 1155/* 1156 * vcpu_process_com_iir 1157 * 1158 * Emulate in/out instructions to the com1 (ns8250) UART interrupt information 1159 * register. Note that writes to this register actually are to a different 1160 * register, the FCR (FIFO control register) that we don't emulate but still 1161 * consume the data provided. 1162 * 1163 * Parameters: 1164 * vei: vm exit information from vmm(4) containing information on the in/out 1165 * instruction being performed 1166 */ 1167void 1168vcpu_process_com_iir(union vm_exit *vei) 1169{ 1170 /* 1171 * vei_dir == 0 : out instruction 1172 * 1173 * Write to FCR 1174 */ 1175 if (vei->vei.vei_dir == 0) { 1176 com1_regs.fcr = vei->vei.vei_data; 1177 } else { 1178 /* 1179 * vei_dir == 1 : in instruction 1180 * 1181 * Read IIR. Reading the IIR resets the TXRDY bit in the IIR 1182 * after the data is read. 1183 */ 1184 vei->vei.vei_data = com1_regs.iir; 1185 com1_regs.iir &= ~IIR_TXRDY; 1186 1187 /* 1188 * Clear "interrupt pending" by setting IIR low bit to 1 1189 * if no interrupts are pending 1190 */ 1191 if (com1_regs.iir == 0x0) 1192 com1_regs.iir = 0x1; 1193 } 1194} 1195 1196/* 1197 * vcpu_process_com_mcr 1198 * 1199 * Emulate in/out instructions to the com1 (ns8250) UART modem control 1200 * register. 1201 * 1202 * Parameters: 1203 * vei: vm exit information from vmm(4) containing information on the in/out 1204 * instruction being performed 1205 */ 1206void 1207vcpu_process_com_mcr(union vm_exit *vei) 1208{ 1209 /* 1210 * vei_dir == 0 : out instruction 1211 * 1212 * Write to MCR 1213 */ 1214 if (vei->vei.vei_dir == 0) { 1215 com1_regs.mcr = vei->vei.vei_data; 1216 } else { 1217 /* 1218 * vei_dir == 1 : in instruction 1219 * 1220 * Read from MCR 1221 */ 1222 vei->vei.vei_data = com1_regs.mcr; 1223 } 1224} 1225 1226/* 1227 * vcpu_process_com_lsr 1228 * 1229 * Emulate in/out instructions to the com1 (ns8250) UART line status register. 1230 * 1231 * Parameters: 1232 * vei: vm exit information from vmm(4) containing information on the in/out 1233 * instruction being performed 1234 */ 1235void 1236vcpu_process_com_lsr(union vm_exit *vei) 1237{ 1238 /* 1239 * vei_dir == 0 : out instruction 1240 * 1241 * Write to LSR. This is an illegal operation, so we just log it and 1242 * continue. 1243 */ 1244 if (vei->vei.vei_dir == 0) { 1245 fprintf(stderr, "%s: LSR UART write 0x%x unsupported\n", 1246 __progname, vei->vei.vei_data); 1247 } else { 1248 /* 1249 * vei_dir == 1 : in instruction 1250 * 1251 * Read from LSR. We always report TXRDY and TSRE since we 1252 * can process output characters immediately (at any time). 1253 */ 1254 vei->vei.vei_data = com1_regs.lsr | LSR_TSRE | LSR_TXRDY; 1255 } 1256} 1257 1258/* 1259 * vcpu_process_com_msr 1260 * 1261 * Emulate in/out instructions to the com1 (ns8250) UART modem status register. 1262 * 1263 * Parameters: 1264 * vei: vm exit information from vmm(4) containing information on the in/out 1265 * instruction being performed 1266 */ 1267void 1268vcpu_process_com_msr(union vm_exit *vei) 1269{ 1270 /* 1271 * vei_dir == 0 : out instruction 1272 * 1273 * Write to MSR. This is an illegal operation, so we just log it and 1274 * continue. 1275 */ 1276 if (vei->vei.vei_dir == 0) { 1277 fprintf(stderr, "%s: MSR UART write 0x%x unsupported\n", 1278 __progname, vei->vei.vei_data); 1279 } else { 1280 /* 1281 * vei_dir == 1 : in instruction 1282 * 1283 * Read from MSR. We always report DCD, DSR, and CTS. 1284 */ 1285 vei->vei.vei_data = com1_regs.lsr | MSR_DCD | MSR_DSR | MSR_CTS; 1286 } 1287} 1288 1289/* 1290 * vcpu_process_com_scr 1291 * 1292 * Emulate in/out instructions to the com1 (ns8250) UART scratch register. The 1293 * scratch register is sometimes used to distinguish an 8250 from a 16450, 1294 * and/or used to distinguish submodels of the 8250 (eg 8250A, 8250B). We 1295 * simulate an "original" 8250 by forcing the scratch register to return data 1296 * on read that is different from what was written. 1297 * 1298 * Parameters: 1299 * vei: vm exit information from vmm(4) containing information on the in/out 1300 * instruction being performed 1301 */ 1302void 1303vcpu_process_com_scr(union vm_exit *vei) 1304{ 1305 /* 1306 * vei_dir == 0 : out instruction 1307 * 1308 * Write to SCR 1309 */ 1310 if (vei->vei.vei_dir == 0) { 1311 com1_regs.scr = vei->vei.vei_data; 1312 } else { 1313 /* 1314 * vei_dir == 1 : in instruction 1315 * 1316 * Read from SCR. To make sure we don't accidentally simulate 1317 * a real scratch register, we negate what was written on 1318 * subsequent readback. 1319 */ 1320 vei->vei.vei_data = ~com1_regs.scr; 1321 } 1322} 1323 1324/* 1325 * vcpu_process_com_ier 1326 * 1327 * Emulate in/out instructions to the com1 (ns8250) UART interrupt enable 1328 * register. 1329 * 1330 * Parameters: 1331 * vei: vm exit information from vmm(4) containing information on the in/out 1332 * instruction being performed 1333 */ 1334void 1335vcpu_process_com_ier(union vm_exit *vei) 1336{ 1337 /* 1338 * vei_dir == 0 : out instruction 1339 * 1340 * Write to IER 1341 */ 1342 if (vei->vei.vei_dir == 0) { 1343 com1_regs.ier = vei->vei.vei_data; 1344 } else { 1345 /* 1346 * vei_dir == 1 : in instruction 1347 * 1348 * Read from IER 1349 */ 1350 vei->vei.vei_data = com1_regs.ier; 1351 } 1352} 1353 1354/* 1355 * vcpu_exit_com 1356 * 1357 * Process com1 (ns8250) UART exits. vmd handles most basic 8250 1358 * features with the exception of the divisor latch (eg, no baud 1359 * rate support) 1360 * 1361 * Parameters: 1362 * vrp: vcpu run parameters containing guest state for this exit 1363 */ 1364void 1365vcpu_exit_com(struct vm_run_params *vrp) 1366{ 1367 union vm_exit *vei = vrp->vrp_exit; 1368 1369 switch(vei->vei.vei_port) { 1370 case COM1_LCR: 1371 vcpu_process_com_lcr(vei); 1372 break; 1373 case COM1_IER: 1374 vcpu_process_com_ier(vei); 1375 break; 1376 case COM1_IIR: 1377 vcpu_process_com_iir(vei); 1378 break; 1379 case COM1_MCR: 1380 vcpu_process_com_mcr(vei); 1381 break; 1382 case COM1_LSR: 1383 vcpu_process_com_lsr(vei); 1384 break; 1385 case COM1_MSR: 1386 vcpu_process_com_msr(vei); 1387 break; 1388 case COM1_SCR: 1389 vcpu_process_com_scr(vei); 1390 break; 1391 case COM1_DATA: 1392 vcpu_process_com_data(vei); 1393 break; 1394 } 1395} 1396 1397/* 1398 * vcpu_exit_pci 1399 * 1400 * Handle all I/O to the emulated PCI subsystem. 1401 * 1402 * Parameters: 1403 * vrp: vcpu run paramters containing guest state for this exit 1404 * 1405 * Return values: 1406 * 0xff if no interrupt is required after this pci exit, 1407 * or an interrupt vector otherwise 1408 */ 1409uint8_t 1410vcpu_exit_pci(struct vm_run_params *vrp) 1411{ 1412 union vm_exit *vei = vrp->vrp_exit; 1413 uint8_t intr; 1414 1415 intr = 0xFF; 1416 1417 switch(vei->vei.vei_port) { 1418 case PCI_MODE1_ADDRESS_REG: 1419 pci_handle_address_reg(vrp); 1420 break; 1421 case PCI_MODE1_DATA_REG: 1422 pci_handle_data_reg(vrp); 1423 break; 1424 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1425 intr = pci_handle_io(vrp); 1426 break; 1427 default: 1428 fprintf(stderr, "%s: unknown PCI register 0x%llx\n", 1429 __progname, (uint64_t)vei->vei.vei_port); 1430 break; 1431 } 1432 1433 return (intr); 1434} 1435 1436/* 1437 * vcpu_exit_inout 1438 * 1439 * Handle all I/O exits that need to be emulated in vmd. This includes the 1440 * i8253 PIT and the com1 ns8250 UART. 1441 * 1442 * Parameters: 1443 * vrp: vcpu run parameters containing guest state for this exit 1444 */ 1445void 1446vcpu_exit_inout(struct vm_run_params *vrp) 1447{ 1448 union vm_exit *vei = vrp->vrp_exit; 1449 uint8_t intr; 1450 1451 switch(vei->vei.vei_port) { 1452 case TIMER_CTRL: 1453 case (TIMER_CNTR0 + TIMER_BASE): 1454 case (TIMER_CNTR1 + TIMER_BASE): 1455 case (TIMER_CNTR2 + TIMER_BASE): 1456 vcpu_exit_i8253(vei); 1457 break; 1458 case COM1_DATA ... COM1_SCR: 1459 vcpu_exit_com(vrp); 1460 break; 1461 case PCI_MODE1_ADDRESS_REG: 1462 case PCI_MODE1_DATA_REG: 1463 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1464 intr = vcpu_exit_pci(vrp); 1465 if (intr != 0xFF) 1466 vrp->vrp_injint = intr; 1467 else 1468 vrp->vrp_injint = -1; 1469 break; 1470 default: 1471 /* IN from unsupported port gives FFs */ 1472 if (vei->vei.vei_dir == 1) 1473 vei->vei.vei_data = 0xFFFFFFFF; 1474 break; 1475 } 1476} 1477 1478/* 1479 * vcpu_exit 1480 * 1481 * Handle a vcpu exit. This function is called when it is determined that 1482 * vmm(4) requires the assistance of vmd to support a particular guest 1483 * exit type (eg, accessing an I/O port or device). Guest state is contained 1484 * in 'vrp', and will be resent to vmm(4) on exit completion. 1485 * 1486 * Upon conclusion of handling the exit, the function determines if any 1487 * interrupts should be injected into the guest, and sets vrp->vrp_injint 1488 * to the IRQ line whose interrupt should be vectored (or -1 if no interrupt 1489 * is to be injected). 1490 * 1491 * Parameters: 1492 * vrp: vcpu run parameters containing guest state for this exit 1493 * 1494 * Return values: 1495 * 0: the exit was handled successfully 1496 * 1: an error occurred (exit not handled) 1497 */ 1498int 1499vcpu_exit(struct vm_run_params *vrp) 1500{ 1501 ssize_t sz; 1502 char ch; 1503 1504 switch (vrp->vrp_exit_reason) { 1505 case VMX_EXIT_IO: 1506 vcpu_exit_inout(vrp); 1507 break; 1508 case VMX_EXIT_HLT: 1509 /* 1510 * XXX handle halted state, no reason to run this vcpu again 1511 * until a vm interrupt is to be injected 1512 */ 1513 break; 1514 default: 1515 fprintf(stderr, "%s: unknown exit reason %d\n", 1516 __progname, vrp->vrp_exit_reason); 1517 return (1); 1518 } 1519 1520 /* XXX interrupt priority */ 1521 if (vionet_process_rx()) 1522 vrp->vrp_injint = 9; 1523 1524 /* 1525 * Is there a new character available on com1? 1526 * If so, consume the character, buffer it into the com1 data register 1527 * assert IRQ4, and set the line status register RXRDY bit. 1528 * 1529 * XXX - move all this com intr checking to another function 1530 */ 1531 sz = read(con_fd, &ch, sizeof(char)); 1532 if (sz == 1) { 1533 com1_regs.lsr |= LSR_RXRDY; 1534 com1_regs.data = ch; 1535 /* XXX these ier and iir bits should be IER_x and IIR_x */ 1536 if (com1_regs.ier & 0x1) { 1537 com1_regs.iir |= (2 << 1); 1538 com1_regs.iir &= ~0x1; 1539 } 1540 } 1541 1542 /* 1543 * Clear "interrupt pending" by setting IIR low bit to 1 if no 1544 * interrupts are pending 1545 */ 1546 /* XXX these iir magic numbers should be IIR_x */ 1547 if ((com1_regs.iir & ~0x1) == 0x0) 1548 com1_regs.iir = 0x1; 1549 1550 /* If pending interrupt and nothing waiting to be injected, inject */ 1551 if ((com1_regs.iir & 0x1) == 0) 1552 if (vrp->vrp_injint == -1) 1553 vrp->vrp_injint = 0x4; 1554 vrp->vrp_continue = 1; 1555 1556 return (0); 1557} 1558 1559/* 1560 * write_page 1561 * 1562 * Pushes a page of data from 'buf' into the guest VM's memory 1563 * at paddr 'dst'. 1564 * 1565 * Parameters: 1566 * dst: the destination paddr_t in the guest VM to push into. 1567 * If there is no guest paddr mapping at 'dst', a new page will be 1568 * faulted in by the VMM (provided 'dst' represents a valid paddr 1569 * in the guest's address space) 1570 * buf: page of data to push 1571 * len: size of 'buf' 1572 * do_mask: 1 to mask the destination address (for kernel load), 0 to 1573 * leave 'dst' unmasked 1574 * 1575 * Return values: 1576 * various return values from ioctl(VMM_IOC_WRITEPAGE), or 0 if no error 1577 * occurred. 1578 * 1579 * Note - this function only handles GPAs < 4GB. 1580 */ 1581int 1582write_page(uint32_t dst, void *buf, uint32_t len, int do_mask) 1583{ 1584 int ret; 1585 struct vm_writepage_params vwp; 1586 1587 /* 1588 * Mask kernel load addresses to avoid uint32_t -> uint64_t cast 1589 * errors 1590 */ 1591 if (do_mask) 1592 dst &= 0xFFFFFFF; 1593 1594 vwp.vwp_paddr = (paddr_t)dst; 1595 vwp.vwp_data = buf; 1596 vwp.vwp_vm_id = vm_id; 1597 vwp.vwp_len = len; 1598 if (ioctl(vmm_fd, VMM_IOC_WRITEPAGE, &vwp) < 0) { 1599 ret = errno; 1600 fprintf(stderr, "writepage ioctl failed: %d\n", ret); 1601 return (ret); 1602 } 1603 return (0); 1604} 1605 1606/* 1607 * read_page 1608 * 1609 * Reads a page of memory at guest paddr 'src' into 'buf'. 1610 * 1611 * Parameters: 1612 * src: the source paddr_t in the guest VM to read from. 1613 * buf: destination (local) buffer 1614 * len: size of 'buf' 1615 * do_mask: 1 to mask the source address (for kernel load), 0 to 1616 * leave 'src' unmasked 1617 * 1618 * Return values: 1619 * various return values from ioctl(VMM_IOC_READPAGE), or 0 if no error 1620 * occurred. 1621 * 1622 * Note - this function only handles GPAs < 4GB. 1623 */ 1624int 1625read_page(uint32_t src, void *buf, uint32_t len, int do_mask) 1626{ 1627 int ret; 1628 struct vm_readpage_params vrp; 1629 1630 /* 1631 * Mask kernel load addresses to avoid uint32_t -> uint64_t cast 1632 * errors 1633 */ 1634 if (do_mask) 1635 src &= 0xFFFFFFF; 1636 1637 vrp.vrp_paddr = (paddr_t)src; 1638 vrp.vrp_data = buf; 1639 vrp.vrp_vm_id = vm_id; 1640 vrp.vrp_len = len; 1641 if (ioctl(vmm_fd, VMM_IOC_READPAGE, &vrp) < 0) { 1642 ret = errno; 1643 fprintf(stderr, "readpage ioctl failed: %d\n", ret); 1644 return (ret); 1645 } 1646 return (0); 1647} 1648