vmd.c revision 1.4
1/* $OpenBSD: vmd.c,v 1.4 2015/11/23 13:04:49 reyk Exp $ */ 2 3/* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19/* 20 * vmd(8) - virtual machine daemon 21 */ 22 23#include <sys/types.h> 24#include <sys/ioctl.h> 25#include <sys/queue.h> 26#include <sys/uio.h> 27#include <sys/socket.h> 28#include <sys/stat.h> 29#include <sys/un.h> 30#include <sys/wait.h> 31#include <sys/mman.h> 32#include <sys/time.h> 33 34#include <dev/ic/comreg.h> 35#include <dev/ic/i8253reg.h> 36#include <dev/isa/isareg.h> 37#include <dev/pci/pcireg.h> 38 39#include <machine/param.h> 40#include <machine/vmmvar.h> 41 42#include <errno.h> 43#include <fcntl.h> 44#include <imsg.h> 45#include <limits.h> 46#include <pthread.h> 47#include <pwd.h> 48#include <signal.h> 49#include <stddef.h> 50#include <stdio.h> 51#include <stdlib.h> 52#include <string.h> 53#include <syslog.h> 54#include <termios.h> 55#include <unistd.h> 56#include <util.h> 57 58#include "vmd.h" 59#include "loadfile.h" 60#include "pci.h" 61#include "virtio.h" 62 63#define NR_BACKLOG 5 64 65#define MAX_TAP 256 66 67/* 68 * Emulated 8250 UART 69 * 70 */ 71#define COM1_DATA 0x3f8 72#define COM1_IER 0x3f9 73#define COM1_IIR 0x3fa 74#define COM1_LCR 0x3fb 75#define COM1_MCR 0x3fc 76#define COM1_LSR 0x3fd 77#define COM1_MSR 0x3fe 78#define COM1_SCR 0x3ff 79 80/* 81 * Emulated i8253 PIT (counter) 82 */ 83#define TIMER_BASE 0x40 84#define TIMER_CTRL 0x43 /* 8253 Timer #1 */ 85#define NS_PER_TICK (1000000000 / TIMER_FREQ) 86 87/* i8253 registers */ 88struct i8253_counter { 89 struct timeval tv; /* timer start time */ 90 uint16_t start; /* starting value */ 91 uint16_t olatch; /* output latch */ 92 uint16_t ilatch; /* input latch */ 93 uint8_t last_r; /* last read byte (MSB/LSB) */ 94 uint8_t last_w; /* last written byte (MSB/LSB) */ 95}; 96 97/* ns8250 UART registers */ 98struct ns8250_regs { 99 uint8_t lcr; /* Line Control Register */ 100 uint8_t fcr; /* FIFO Control Register */ 101 uint8_t iir; /* Interrupt ID Register */ 102 uint8_t ier; /* Interrupt Enable Register */ 103 uint8_t divlo; /* Baud rate divisor low byte */ 104 uint8_t divhi; /* Baud rate divisor high byte */ 105 uint8_t msr; /* Modem Status Register */ 106 uint8_t lsr; /* Line Status Register */ 107 uint8_t mcr; /* Modem Control Register */ 108 uint8_t scr; /* Scratch Register */ 109 uint8_t data; /* Unread input data */ 110}; 111 112struct i8253_counter i8253_counter[3]; 113struct ns8250_regs com1_regs; 114 115__dead void usage(void); 116 117void sighdlr(int); 118int main(int, char **); 119int control_run(void); 120int disable_vmm(void); 121int enable_vmm(void); 122int start_vm(struct imsg *); 123int terminate_vm(struct imsg *); 124int get_info_vm(struct imsgbuf *); 125int start_client_vmd(void); 126int opentap(void); 127int run_vm(int *, int *, struct vm_create_params *); 128void *vcpu_run_loop(void *); 129int vcpu_exit(struct vm_run_params *); 130int vmm_create_vm(struct vm_create_params *); 131void init_emulated_hw(struct vm_create_params *, int *, int *); 132void vcpu_exit_inout(struct vm_run_params *); 133uint8_t vcpu_exit_pci(struct vm_run_params *); 134void vcpu_exit_i8253(union vm_exit *); 135void vcpu_exit_com(struct vm_run_params *); 136void vcpu_process_com_data(union vm_exit *); 137void vcpu_process_com_lcr(union vm_exit *); 138void vcpu_process_com_lsr(union vm_exit *); 139void vcpu_process_com_ier(union vm_exit *); 140void vcpu_process_com_mcr(union vm_exit *); 141void vcpu_process_com_iir(union vm_exit *); 142void vcpu_process_com_msr(union vm_exit *); 143void vcpu_process_com_scr(union vm_exit *); 144 145int vmm_fd, con_fd, vm_id; 146volatile sig_atomic_t quit; 147 148SLIST_HEAD(vmstate_head, vmstate); 149struct vmstate_head vmstate; 150 151extern char *__progname; 152 153/* 154 * sighdlr 155 * 156 * Signal handler for TERM/INT/CHLD signals used during daemon shutdown 157 * 158 * Parameters: 159 * sig: signal caught 160 */ 161void 162sighdlr(int sig) 163{ 164 switch (sig) { 165 case SIGTERM: 166 case SIGINT: 167 /* Tell main imsg loop to exit */ 168 quit = 1; 169 break; 170 case SIGCHLD: 171 while (waitpid(WAIT_ANY, 0, WNOHANG) > 0) {} 172 break; 173 } 174} 175 176__dead void 177usage(void) 178{ 179 extern char *__progname; 180 fprintf(stderr, "usage: %s [-dv]", __progname); 181 exit(1); 182} 183 184int 185main(int argc, char **argv) 186{ 187 int debug = 0, verbose = 0, c, res; 188 189 while ((c = getopt(argc, argv, "dv")) != -1) { 190 switch (c) { 191 case 'd': 192 debug = 2; 193 break; 194 case 'v': 195 verbose++; 196 break; 197 default: 198 usage(); 199 } 200 } 201 202 /* log to stderr until daemonized */ 203 log_init(debug ? debug : 1, LOG_DAEMON); 204 205 /* Open /dev/vmm */ 206 vmm_fd = open(VMM_NODE, O_RDONLY); 207 if (vmm_fd == -1) 208 fatal("can't open vmm device node %s", VMM_NODE); 209 210 setproctitle("control"); 211 212 SLIST_INIT(&vmstate); 213 214 signal(SIGTERM, sighdlr); 215 signal(SIGINT, sighdlr); 216 signal(SIGCHLD, sighdlr); 217 218 log_init(debug, LOG_DAEMON); 219 log_verbose(verbose); 220 log_procinit("control"); 221 222 if (!debug && daemon(1, 0) == -1) 223 fatal("can't daemonize"); 224 225 res = control_run(); 226 227 if (res == -1) 228 fatalx("control socket error"); 229 230 return (0); 231} 232 233/* 234 * control_run 235 * 236 * Main control loop - establishes listening socket for incoming vmmctl(8) 237 * requests and dispatches appropriate calls to vmm(4). Replies to 238 * vmmctl(8) using imsg. 239 * 240 * Return values: 241 * 0: normal exit (signal to quit received) 242 * -1: abnormal exit (various causes) 243 */ 244int 245control_run(void) 246{ 247 struct sockaddr_un sun, c_sun; 248 socklen_t len; 249 int fd, connfd, n, res; 250 mode_t mode, old_umask; 251 char *socketpath; 252 struct imsgbuf *ibuf; 253 struct imsg imsg; 254 255 /* Establish and start listening on control socket */ 256 socketpath = SOCKET_NAME; 257 if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)) == -1) { 258 log_warn("%s: socket error", __progname); 259 return (-1); 260 } 261 262 bzero(&sun, sizeof(sun)); 263 sun.sun_family = AF_UNIX; 264 if (strlcpy(sun.sun_path, socketpath, sizeof(sun.sun_path)) >= 265 sizeof(sun.sun_path)) { 266 log_warnx("%s: socket name too long", __progname); 267 close(fd); 268 return (-1); 269 } 270 271 if (unlink(socketpath) == -1) 272 if (errno != ENOENT) { 273 log_warn("%s: unlink of %s failed", 274 __progname, socketpath); 275 close(fd); 276 return (-1); 277 } 278 279 old_umask = umask(S_IXUSR|S_IXGRP|S_IWOTH|S_IROTH|S_IXOTH); 280 mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP; 281 282 if (bind(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1) { 283 log_warn("%s: control_init: bind of %s failed", 284 __progname, socketpath); 285 close(fd); 286 umask(old_umask); 287 return (-1); 288 } 289 290 umask(old_umask); 291 292 if (chmod(socketpath, mode) == -1) { 293 log_warn("%s: control_init: chmod of %s failed", 294 __progname, socketpath); 295 close(fd); 296 unlink(socketpath); 297 return (-1); 298 } 299 300 if ((ibuf = malloc(sizeof(struct imsgbuf))) == NULL) { 301 log_warn("%s: out of memory", __progname); 302 close(fd); 303 unlink(socketpath); 304 return (-1); 305 } 306 307 if (listen(fd, NR_BACKLOG) == -1) { 308 log_warn("%s: listen failed", __progname); 309 close(fd); 310 unlink(socketpath); 311 return (-1); 312 } 313 314 while (!quit) { 315 if ((connfd = accept4(fd, (struct sockaddr *)&c_sun, &len, 316 SOCK_CLOEXEC)) == -1) { 317 log_warn("%s: accept4 error", __progname); 318 close(fd); 319 unlink(socketpath); 320 return (-1); 321 } 322 323 imsg_init(ibuf, connfd); 324 if ((n = imsg_read(ibuf)) == -1 || n == 0) { 325 log_warnx("%s: imsg_read error, n=%d", 326 __progname, n); 327 continue; 328 } 329 330 for (;;) { 331 if ((n = imsg_get(ibuf, &imsg)) == -1) 332 return (-1); 333 334 if (n == 0) 335 break; 336 337 /* Process incoming message (from vmmctl(8)) */ 338 switch (imsg.hdr.type) { 339 case IMSG_VMDOP_DISABLE_VMM_REQUEST: 340 res = disable_vmm(); 341 imsg_compose(ibuf, 342 IMSG_VMDOP_DISABLE_VMM_RESPONSE, 0, 0, -1, 343 &res, sizeof(res)); 344 break; 345 case IMSG_VMDOP_ENABLE_VMM_REQUEST: 346 res = enable_vmm(); 347 imsg_compose(ibuf, 348 IMSG_VMDOP_ENABLE_VMM_RESPONSE, 0, 0, -1, 349 &res, sizeof(res)); 350 break; 351 case IMSG_VMDOP_START_VM_REQUEST: 352 res = start_vm(&imsg); 353 imsg_compose(ibuf, 354 IMSG_VMDOP_START_VM_RESPONSE, 0, 0, -1, 355 &res, sizeof(res)); 356 break; 357 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 358 res = terminate_vm(&imsg); 359 imsg_compose(ibuf, 360 IMSG_VMDOP_TERMINATE_VM_RESPONSE, 0, 0, -1, 361 &res, sizeof(res)); 362 break; 363 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 364 res = get_info_vm(ibuf); 365 imsg_compose(ibuf, 366 IMSG_VMDOP_GET_INFO_VM_END_DATA, 0, 0, -1, 367 &res, sizeof(res)); 368 break; 369 } 370 371 while (ibuf->w.queued) 372 if (msgbuf_write(&ibuf->w) <= 0 && errno != 373 EAGAIN) { 374 log_warn("%s: msgbuf_write error", 375 __progname); 376 close(fd); 377 close(connfd); 378 unlink(socketpath); 379 return (-1); 380 } 381 imsg_free(&imsg); 382 } 383 close(connfd); 384 } 385 386 signal(SIGCHLD, SIG_IGN); 387 388 return (0); 389} 390 391/* 392 * disable_vmm 393 * 394 * Disables VMM mode on all CPUs 395 * 396 * Return values: 397 * 0: success 398 * !0 : ioctl to vmm(4) failed 399 */ 400int 401disable_vmm(void) 402{ 403 if (ioctl(vmm_fd, VMM_IOC_STOP, NULL) < 0) 404 return (errno); 405 406 return (0); 407} 408 409/* 410 * enable_vmm 411 * 412 * Enables VMM mode on all CPUs 413 * 414 * Return values: 415 * 0: success 416 * !0 : ioctl to vmm(4) failed 417 */ 418int 419enable_vmm(void) 420{ 421 if (ioctl(vmm_fd, VMM_IOC_START, NULL) < 0) 422 return (errno); 423 424 return (0); 425} 426 427/* 428 * terminate_vm 429 * 430 * Requests vmm(4) to terminate the VM whose ID is provided in the 431 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 432 * 433 * Parameters 434 * imsg: The incoming imsg body whose 'data' field contains the 435 * vm_terminate_params struct 436 * 437 * Return values: 438 * 0: success 439 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not 440 * valid) 441 */ 442int 443terminate_vm(struct imsg *imsg) 444{ 445 struct vm_terminate_params *vtp; 446 447 vtp = (struct vm_terminate_params *)imsg->data; 448 449 if (ioctl(vmm_fd, VMM_IOC_TERM, vtp) < 0) 450 return (errno); 451 452 return (0); 453} 454 455/* 456 * opentap 457 * 458 * Opens the next available tap device, up to MAX_TAP. 459 * 460 * Returns a file descriptor to the tap node opened, or -1 if no tap 461 * devices were available. 462 */ 463int 464opentap(void) 465{ 466 int i, fd; 467 char path[PATH_MAX]; 468 469 for (i = 0; i < MAX_TAP; i++) { 470 snprintf(path, PATH_MAX, "/dev/tap%d", i); 471 fd = open(path, O_RDWR | O_NONBLOCK); 472 if (fd != -1) 473 return (fd); 474 } 475 476 return (-1); 477} 478 479/* 480 * start_vm 481 * 482 * Starts a new VM with the creation parameters supplied (in the incoming 483 * imsg->data field). This function performs a basic sanity check on the 484 * incoming parameters and then performs the following steps to complete 485 * the creation of the VM: 486 * 487 * 1. opens the VM disk image files specified in the VM creation parameters 488 * 2. opens the specified VM kernel 489 * 3. creates a VM console tty pair using openpty 490 * 4. forks, passing the file descriptors opened in steps 1-3 to the child 491 * vmd responsible for dropping privilege and running the VM's VCPU 492 * loops. 493 * 494 * Parameters: 495 * imsg: The incoming imsg body whose 'data' field is a vm_create_params 496 * struct containing the VM creation parameters. 497 * 498 * Return values: 499 * 0: success 500 * !0 : failure - typically an errno indicating the source of the failure 501 */ 502int 503start_vm(struct imsg *imsg) 504{ 505 struct vm_create_params *vcp; 506 size_t i; 507 off_t kernel_size; 508 struct stat sb; 509 int child_disks[VMM_MAX_DISKS_PER_VM], kernel_fd, ret, ttym_fd; 510 int child_taps[VMM_MAX_NICS_PER_VM]; 511 int ttys_fd; 512 char ptyn[32]; 513 514 vcp = (struct vm_create_params *)imsg->data; 515 516 for (i = 0 ; i < VMM_MAX_DISKS_PER_VM; i++) 517 child_disks[i] = -1; 518 for (i = 0 ; i < VMM_MAX_NICS_PER_VM; i++) 519 child_taps[i] = -1; 520 521 /* 522 * XXX kernel_fd can't be global (possible race if multiple VMs 523 * being created at the same time). Probably need to move this 524 * into the child before dropping privs, or just make it local 525 * to this function? 526 */ 527 kernel_fd = -1; 528 529 ttym_fd = -1; 530 ttys_fd = -1; 531 532 /* Open disk images for child */ 533 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 534 child_disks[i] = open(vcp->vcp_disks[i], O_RDWR); 535 if (child_disks[i] == -1) { 536 ret = errno; 537 log_warn("%s: can't open %s", __progname, 538 vcp->vcp_disks[i]); 539 goto err; 540 } 541 } 542 543 bzero(&sb, sizeof(sb)); 544 if (stat(vcp->vcp_kernel, &sb) == -1) { 545 ret = errno; 546 log_warn("%s: can't stat kernel image %s", 547 __progname, vcp->vcp_kernel); 548 goto err; 549 } 550 551 kernel_size = sb.st_size; 552 553 /* Open kernel image */ 554 kernel_fd = open(vcp->vcp_kernel, O_RDONLY); 555 if (kernel_fd == -1) { 556 ret = errno; 557 log_warn("%s: can't open kernel image %s", 558 __progname, vcp->vcp_kernel); 559 goto err; 560 } 561 562 if (openpty(&ttym_fd, &ttys_fd, ptyn, NULL, NULL) == -1) { 563 ret = errno; 564 log_warn("%s: openpty failed", __progname); 565 goto err; 566 } 567 568 if (close(ttys_fd)) { 569 ret = errno; 570 log_warn("%s: close tty failed", __progname); 571 goto err; 572 } 573 574 /* Open tap devices for child */ 575 for (i = 0 ; i < vcp->vcp_nnics; i++) { 576 child_taps[i] = opentap(); 577 if (child_taps[i] == -1) { 578 ret = errno; 579 log_warn("%s: can't open tap for nic %zd", 580 __progname, i); 581 goto err; 582 } 583 } 584 585 /* Start child vmd for this VM (fork, chroot, drop privs) */ 586 ret = start_client_vmd(); 587 588 /* Start child failed? - cleanup and leave */ 589 if (ret == -1) { 590 ret = EIO; 591 goto err; 592 } 593 594 if (ret > 0) { 595 /* Parent */ 596 for (i = 0 ; i < vcp->vcp_ndisks; i++) 597 close(child_disks[i]); 598 599 for (i = 0 ; i < vcp->vcp_nnics; i++) 600 close(child_taps[i]); 601 602 close(kernel_fd); 603 close(ttym_fd); 604 605 return (0); 606 } 607 else { 608 /* Child */ 609 setproctitle(vcp->vcp_name); 610 log_procinit(vcp->vcp_name); 611 612 log_info("%s: vm console: %s", __progname, ptyn); 613 ret = vmm_create_vm(vcp); 614 if (ret) { 615 errno = ret; 616 fatal("create vmm ioctl failed - exiting"); 617 } 618 619 /* Load kernel image */ 620 ret = loadelf_main(kernel_fd, vcp->vcp_id, vcp->vcp_memory_size); 621 if (ret) { 622 errno = ret; 623 fatal("failed to load kernel - exiting"); 624 } 625 626 close(kernel_fd); 627 628 con_fd = ttym_fd; 629 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) 630 fatal("failed to set nonblocking mode on console"); 631 632 /* Execute the vcpu run loop(s) for this VM */ 633 ret = run_vm(child_disks, child_taps, vcp); 634 _exit(ret != 0); 635 } 636 637 return (ret); 638 639err: 640 for (i = 0 ; i < vcp->vcp_ndisks; i++) 641 if (child_disks[i] != -1) 642 close(child_disks[i]); 643 644 for (i = 0 ; i < vcp->vcp_nnics; i++) 645 if (child_taps[i] != -1) 646 close(child_taps[i]); 647 648 if (kernel_fd != -1) 649 close(kernel_fd); 650 651 if (ttym_fd != -1) 652 close(ttym_fd); 653 654 return (ret); 655} 656 657/* 658 * get_info_vm 659 * 660 * Returns a list of VMs known to vmm(4). 661 * 662 * Parameters: 663 * ibuf: the imsg ibuf in which to place the results. A new imsg will 664 * be created using this ibuf. 665 * 666 * Return values: 667 * 0: success 668 * !0 : failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 669 */ 670int 671get_info_vm(struct imsgbuf *ibuf) 672{ 673 int ret; 674 size_t ct, i; 675 struct ibuf *obuf; 676 struct vm_info_params vip; 677 struct vm_info_result *info; 678 679 /* 680 * We issue the VMM_IOC_INFO ioctl twice, once with an input 681 * buffer size of 0, which results in vmm(4) returning the 682 * number of bytes required back to us in vip.vip_size, 683 * and then we call it again after malloc'ing the required 684 * number of bytes. 685 * 686 * It is possible that we could fail a second time (eg, if 687 * another VM was created in the instant between the two 688 * ioctls, but in that case the caller can just try again 689 * as vmm(4) will return a zero-sized list in that case. 690 */ 691 vip.vip_size = 0; 692 info = NULL; 693 ret = 0; 694 695 /* First ioctl to see how many bytes needed (vip.vip_size) */ 696 if (ioctl(vmm_fd, VMM_IOC_INFO, &vip) < 0) 697 return (errno); 698 699 if (vip.vip_info_ct != 0) 700 return (EIO); 701 702 info = malloc(vip.vip_size); 703 if (info == NULL) 704 return (ENOMEM); 705 706 /* Second ioctl to get the actual list */ 707 vip.vip_info = info; 708 if (ioctl(vmm_fd, VMM_IOC_INFO, &vip) < 0) { 709 ret = errno; 710 free(info); 711 return (ret); 712 } 713 714 /* Return info to vmmctl(4) */ 715 ct = vip.vip_size / sizeof(struct vm_info_result); 716 for (i = 0; i < ct; i++) { 717 obuf = imsg_create(ibuf, IMSG_VMDOP_GET_INFO_VM_DATA, 0, 0, 718 sizeof(struct vm_info_result)); 719 imsg_add(obuf, &info[i], sizeof(struct vm_info_result)); 720 imsg_close(ibuf, obuf); 721 } 722 free(info); 723 return (0); 724} 725 726 727/* 728 * start_client_vmd 729 * 730 * forks a copy of the parent vmd, chroots to VMD_USER's home, drops 731 * privileges (changes to user VMD_USER), and returns. 732 * Should the fork operation succeed, but later chroot/privsep 733 * fail, the child exits. 734 * 735 * Return values (returns to both child and parent on success): 736 * -1 : failure 737 * 0: return to child vmd returns 0 738 * !0 : return to parent vmd returns the child's pid 739 */ 740int 741start_client_vmd(void) 742{ 743 int child_pid; 744 struct passwd *pw; 745 746 pw = getpwnam(VMD_USER); 747 if (pw == NULL) { 748 log_warnx("%s: no such user %s", __progname, VMD_USER); 749 return (-1); 750 } 751 752 child_pid = fork(); 753 if (child_pid < 0) 754 return (-1); 755 756 if (!child_pid) { 757 /* Child */ 758 if (chroot(pw->pw_dir) != 0) 759 fatal("unable to chroot"); 760 if (chdir("/") != 0) 761 fatal("unable to chdir"); 762 763 if (setgroups(1, &pw->pw_gid) == -1) 764 fatal("setgroups() failed"); 765 if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) == -1) 766 fatal("setresgid() failed"); 767 if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) == -1) 768 fatal("setresuid() failed"); 769 770 return (0); 771 } 772 773 /* Parent */ 774 return (child_pid); 775} 776 777/* 778 * vmm_create_vm 779 * 780 * Requests vmm(4) to create a new VM using the supplied creation 781 * parameters. This operation results in the creation of the in-kernel 782 * structures for the VM, but does not start the VM's vcpu(s). 783 * 784 * Parameters: 785 * vcp: vm_create_params struct containing the VM's desired creation 786 * configuration 787 * 788 * Return values: 789 * 0: success 790 * !0 : ioctl to vmm(4) failed 791 */ 792int 793vmm_create_vm(struct vm_create_params *vcp) 794{ 795 /* Sanity check arguments */ 796 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 797 return (EINVAL); 798 799 if (vcp->vcp_memory_size > VMM_MAX_VM_MEM_SIZE) 800 return (EINVAL); 801 802 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 803 return (EINVAL); 804 805 if (ioctl(vmm_fd, VMM_IOC_CREATE, vcp) < 0) 806 return (errno); 807 808 return (0); 809} 810 811/* 812 * init_emulated_hw 813 * 814 * Initializes the userspace hardware emulation 815 */ 816void 817init_emulated_hw(struct vm_create_params *vcp, int *child_disks, 818 int *child_taps) 819{ 820 /* Init the i8253 PIT's 3 counters */ 821 bzero(&i8253_counter, sizeof(struct i8253_counter) * 3); 822 gettimeofday(&i8253_counter[0].tv, NULL); 823 gettimeofday(&i8253_counter[1].tv, NULL); 824 gettimeofday(&i8253_counter[2].tv, NULL); 825 i8253_counter[0].start = TIMER_DIV(100); 826 i8253_counter[1].start = TIMER_DIV(100); 827 i8253_counter[2].start = TIMER_DIV(100); 828 829 /* Init ns8250 UART */ 830 bzero(&com1_regs, sizeof(struct ns8250_regs)); 831 832 /* Initialize PCI */ 833 pci_init(); 834 835 /* Initialize virtio devices */ 836 virtio_init(vcp, child_disks, child_taps); 837} 838 839/* 840 * run_vm 841 * 842 * Runs the VM whose creation parameters are specified in vcp 843 * 844 * Parameters: 845 * vcp: vm_create_params struct containing the VM's desired creation 846 * configuration 847 * child_disks: previously-opened child VM disk file file descriptors 848 * child_taps: previously-opened child tap file descriptors 849 * 850 * Return values: 851 * 0: the VM exited normally 852 * !0 : the VM exited abnormally or failed to start 853 */ 854int 855run_vm(int *child_disks, int *child_taps, struct vm_create_params *vcp) 856{ 857 size_t i; 858 int ret; 859 pthread_t *tid; 860 void *exit_status; 861 struct vm_run_params **vrp; 862 863 ret = 0; 864 865 /* XXX cap vcp_ncpus to avoid overflow here */ 866 /* 867 * XXX ensure nvcpus in vcp is same as vm, or fix vmm to return einval 868 * on bad vcpu id 869 */ 870 tid = malloc(sizeof(pthread_t) * vcp->vcp_ncpus); 871 vrp = malloc(sizeof(struct vm_run_params *) * vcp->vcp_ncpus); 872 if (tid == NULL || vrp == NULL) { 873 log_warn("%s: memory allocation error - exiting.", 874 __progname); 875 return (ENOMEM); 876 } 877 878 init_emulated_hw(vcp, child_disks, child_taps); 879 880 /* 881 * Create and launch one thread for each VCPU. These threads may 882 * migrate between PCPUs over time; the need to reload CPU state 883 * in such situations is detected and performed by vmm(4) in the 884 * kernel. 885 */ 886 for (i = 0 ; i < vcp->vcp_ncpus; i++) { 887 vrp[i] = malloc(sizeof(struct vm_run_params)); 888 if (vrp[i] == NULL) { 889 log_warn("%s: memory allocation error - " 890 "exiting.", __progname); 891 /* caller will exit, so skip free'ing */ 892 return (ENOMEM); 893 } 894 vrp[i]->vrp_exit = malloc(sizeof(union vm_exit)); 895 if (vrp[i]->vrp_exit == NULL) { 896 log_warn("%s: memory allocation error - " 897 "exiting.", __progname); 898 /* caller will exit, so skip free'ing */ 899 return (ENOMEM); 900 } 901 vrp[i]->vrp_vm_id = vcp->vcp_id; 902 vrp[i]->vrp_vcpu_id = i; 903 904 /* Start each VCPU run thread at vcpu_run_loop */ 905 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]); 906 if (ret) { 907 /* caller will _exit after this return */ 908 return (ret); 909 } 910 } 911 912 /* Wait for all the threads to exit */ 913 for (i = 0; i < vcp->vcp_ncpus; i++) { 914 if (pthread_join(tid[i], &exit_status)) { 915 log_warn("%s: failed to join thread %zd - " 916 "exiting", __progname, i); 917 return (EIO); 918 } 919 920 if (exit_status != NULL) { 921 log_warnx("%s: vm %d vcpu run thread %zd exited " 922 "abnormally", __progname, vcp->vcp_id, i); 923 ret = EIO; 924 } 925 } 926 927 return (ret); 928} 929 930/* 931 * vcpu_run_loop 932 * 933 * Runs a single VCPU until vmm(4) requires help handling an exit, 934 * or the VM terminates. 935 * 936 * Parameters: 937 * arg: vcpu_run_params for the VCPU being run by this thread 938 * 939 * Return values: 940 * NULL: the VCPU shutdown properly 941 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally 942 */ 943void * 944vcpu_run_loop(void *arg) 945{ 946 struct vm_run_params *vrp = (struct vm_run_params *)arg; 947 intptr_t ret; 948 949 vrp->vrp_continue = 0; 950 vrp->vrp_injint = -1; 951 952 for (;;) { 953 if (ioctl(vmm_fd, VMM_IOC_RUN, vrp) < 0) { 954 /* If run ioctl failed, exit */ 955 ret = errno; 956 return ((void *)ret); 957 } 958 959 /* If the VM is terminating, exit normally */ 960 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) 961 return (NULL); 962 963 if (vrp->vrp_exit_reason != VM_EXIT_NONE) { 964 /* 965 * vmm(4) needs help handling an exit, handle in 966 * vcpu_exit. 967 */ 968 if (vcpu_exit(vrp)) 969 return ((void *)EIO); 970 } 971 } 972 973 return (NULL); 974} 975 976/* 977 * vcpu_exit_i8253 978 * 979 * Handles emulated i8253 PIT access (in/out instruction to PIT ports). 980 * We don't emulate all the modes of the i8253, just the basic squarewave 981 * clock. 982 * 983 * Parameters: 984 * vei: VM exit information from vmm(4) containing information on the in/out 985 * instruction being performed 986 */ 987void 988vcpu_exit_i8253(union vm_exit *vei) 989{ 990 uint32_t out_data; 991 uint8_t sel, rw, data; 992 uint64_t ns, ticks; 993 struct timeval now, delta; 994 995 if (vei->vei.vei_port == TIMER_CTRL) { 996 if (vei->vei.vei_dir == 0) { /* OUT instruction */ 997 out_data = vei->vei.vei_data; 998 sel = out_data & 999 (TIMER_SEL0 | TIMER_SEL1 | TIMER_SEL2); 1000 sel = sel >> 6; 1001 if (sel > 2) { 1002 log_warnx("%s: i8253 PIT: invalid " 1003 "timer selected (%d)", 1004 __progname, sel); 1005 return; 1006 } 1007 1008 rw = vei->vei.vei_data & 1009 (TIMER_LATCH | TIMER_LSB | 1010 TIMER_MSB | TIMER_16BIT); 1011 1012 if (rw == TIMER_16BIT) { 1013 /* 1014 * XXX this seems to be used on occasion, needs 1015 * to be implemented 1016 */ 1017 log_warnx("%s: i8253 PIT: 16 bit " 1018 "counter I/O not supported", 1019 __progname); 1020 return; 1021 } 1022 1023 /* 1024 * Since we don't truly emulate each tick of the PIT 1025 * clock, when the guest asks for the timer to be 1026 * latched, simulate what the counter would have been 1027 * had we performed full emulation. We do this by 1028 * calculating when the counter was reset vs how much 1029 * time has elapsed, then bias by the counter tick 1030 * rate. 1031 */ 1032 if (rw == TIMER_LATCH) { 1033 gettimeofday(&now, NULL); 1034 delta.tv_sec = now.tv_sec - 1035 i8253_counter[sel].tv.tv_sec; 1036 delta.tv_usec = now.tv_usec - 1037 i8253_counter[sel].tv.tv_usec; 1038 if (delta.tv_usec < 0) { 1039 delta.tv_sec--; 1040 delta.tv_usec += 1000000; 1041 } 1042 if (delta.tv_usec > 1000000) { 1043 delta.tv_sec++; 1044 delta.tv_usec -= 1000000; 1045 } 1046 ns = delta.tv_usec * 1000 + 1047 delta.tv_sec * 1000000000; 1048 ticks = ns / NS_PER_TICK; 1049 i8253_counter[sel].olatch = 1050 i8253_counter[sel].start - 1051 ticks % i8253_counter[sel].start; 1052 return; 1053 } 1054 1055 log_warnx("%s: i8253 PIT: unsupported rw mode " 1056 "%d", __progname, rw); 1057 return; 1058 } else { 1059 /* XXX should this return 0xff? */ 1060 log_warnx("%s: i8253 PIT: read from control " 1061 "port unsupported", __progname); 1062 } 1063 } else { 1064 sel = vei->vei.vei_port - (TIMER_CNTR0 + TIMER_BASE); 1065 if (vei->vei.vei_dir == 0) { /* OUT instruction */ 1066 if (i8253_counter[sel].last_w == 0) { 1067 out_data = vei->vei.vei_data; 1068 i8253_counter[sel].ilatch |= (out_data << 8); 1069 i8253_counter[sel].last_w = 1; 1070 } else { 1071 out_data = vei->vei.vei_data; 1072 i8253_counter[sel].ilatch |= out_data; 1073 i8253_counter[sel].start = 1074 i8253_counter[sel].ilatch; 1075 i8253_counter[sel].last_w = 0; 1076 } 1077 } else { 1078 if (i8253_counter[sel].last_r == 0) { 1079 data = i8253_counter[sel].olatch >> 8; 1080 vei->vei.vei_data = data; 1081 i8253_counter[sel].last_w = 1; 1082 } else { 1083 data = i8253_counter[sel].olatch & 0xFF; 1084 vei->vei.vei_data = data; 1085 i8253_counter[sel].last_w = 0; 1086 } 1087 } 1088 } 1089} 1090 1091/* 1092 * vcpu_process_com_data 1093 * 1094 * Emulate in/out instructions to the com1 (ns8250) UART data register 1095 * 1096 * Parameters: 1097 * vei: vm exit information from vmm(4) containing information on the in/out 1098 * instruction being performed 1099 */ 1100void 1101vcpu_process_com_data(union vm_exit *vei) 1102{ 1103 /* 1104 * vei_dir == 0 : out instruction 1105 * 1106 * The guest wrote to the data register. Since we are emulating a 1107 * no-fifo chip, write the character immediately to the pty and 1108 * assert TXRDY in IIR (if the guest has requested TXRDY interrupt 1109 * reporting) 1110 */ 1111 if (vei->vei.vei_dir == 0) { 1112 write(con_fd, &vei->vei.vei_data, 1); 1113 if (com1_regs.ier & 0x2) { 1114 /* Set TXRDY */ 1115 com1_regs.iir |= IIR_TXRDY; 1116 /* Set "interrupt pending" (IIR low bit cleared) */ 1117 com1_regs.iir &= ~0x1; 1118 } 1119 } else { 1120 /* 1121 * vei_dir == 1 : in instruction 1122 * 1123 * The guest read from the data register. Check to see if 1124 * there is data available (RXRDY) and if so, consume the 1125 * input data and return to the guest. Also clear the 1126 * interrupt info register regardless. 1127 */ 1128 if (com1_regs.lsr & LSR_RXRDY) { 1129 vei->vei.vei_data = com1_regs.data; 1130 com1_regs.data = 0x0; 1131 com1_regs.lsr &= ~LSR_RXRDY; 1132 } else { 1133 /* XXX should this be com1_regs.data or 0xff? */ 1134 vei->vei.vei_data = com1_regs.data; 1135 log_warnx("guest reading com1 when not ready"); 1136 } 1137 1138 /* Reading the data register always clears RXRDY from IIR */ 1139 com1_regs.iir &= ~IIR_RXRDY; 1140 1141 /* 1142 * Clear "interrupt pending" by setting IIR low bit to 1 1143 * if no interrupt are pending 1144 */ 1145 if (com1_regs.iir == 0x0) 1146 com1_regs.iir = 0x1; 1147 } 1148} 1149 1150/* 1151 * vcpu_process_com_lcr 1152 * 1153 * Emulate in/out instructions to the com1 (ns8250) UART line control register 1154 * 1155 * Paramters: 1156 * vei: vm exit information from vmm(4) containing information on the in/out 1157 * instruction being performed 1158 */ 1159void 1160vcpu_process_com_lcr(union vm_exit *vei) 1161{ 1162 /* 1163 * vei_dir == 0 : out instruction 1164 * 1165 * Write content to line control register 1166 */ 1167 if (vei->vei.vei_dir == 0) { 1168 com1_regs.lcr = (uint8_t)vei->vei.vei_data; 1169 } else { 1170 /* 1171 * vei_dir == 1 : in instruction 1172 * 1173 * Read line control register 1174 */ 1175 vei->vei.vei_data = com1_regs.lcr; 1176 } 1177} 1178 1179/* 1180 * vcpu_process_com_iir 1181 * 1182 * Emulate in/out instructions to the com1 (ns8250) UART interrupt information 1183 * register. Note that writes to this register actually are to a different 1184 * register, the FCR (FIFO control register) that we don't emulate but still 1185 * consume the data provided. 1186 * 1187 * Parameters: 1188 * vei: vm exit information from vmm(4) containing information on the in/out 1189 * instruction being performed 1190 */ 1191void 1192vcpu_process_com_iir(union vm_exit *vei) 1193{ 1194 /* 1195 * vei_dir == 0 : out instruction 1196 * 1197 * Write to FCR 1198 */ 1199 if (vei->vei.vei_dir == 0) { 1200 com1_regs.fcr = vei->vei.vei_data; 1201 } else { 1202 /* 1203 * vei_dir == 1 : in instruction 1204 * 1205 * Read IIR. Reading the IIR resets the TXRDY bit in the IIR 1206 * after the data is read. 1207 */ 1208 vei->vei.vei_data = com1_regs.iir; 1209 com1_regs.iir &= ~IIR_TXRDY; 1210 1211 /* 1212 * Clear "interrupt pending" by setting IIR low bit to 1 1213 * if no interrupts are pending 1214 */ 1215 if (com1_regs.iir == 0x0) 1216 com1_regs.iir = 0x1; 1217 } 1218} 1219 1220/* 1221 * vcpu_process_com_mcr 1222 * 1223 * Emulate in/out instructions to the com1 (ns8250) UART modem control 1224 * register. 1225 * 1226 * Parameters: 1227 * vei: vm exit information from vmm(4) containing information on the in/out 1228 * instruction being performed 1229 */ 1230void 1231vcpu_process_com_mcr(union vm_exit *vei) 1232{ 1233 /* 1234 * vei_dir == 0 : out instruction 1235 * 1236 * Write to MCR 1237 */ 1238 if (vei->vei.vei_dir == 0) { 1239 com1_regs.mcr = vei->vei.vei_data; 1240 } else { 1241 /* 1242 * vei_dir == 1 : in instruction 1243 * 1244 * Read from MCR 1245 */ 1246 vei->vei.vei_data = com1_regs.mcr; 1247 } 1248} 1249 1250/* 1251 * vcpu_process_com_lsr 1252 * 1253 * Emulate in/out instructions to the com1 (ns8250) UART line status register. 1254 * 1255 * Parameters: 1256 * vei: vm exit information from vmm(4) containing information on the in/out 1257 * instruction being performed 1258 */ 1259void 1260vcpu_process_com_lsr(union vm_exit *vei) 1261{ 1262 /* 1263 * vei_dir == 0 : out instruction 1264 * 1265 * Write to LSR. This is an illegal operation, so we just log it and 1266 * continue. 1267 */ 1268 if (vei->vei.vei_dir == 0) { 1269 log_warnx("%s: LSR UART write 0x%x unsupported", 1270 __progname, vei->vei.vei_data); 1271 } else { 1272 /* 1273 * vei_dir == 1 : in instruction 1274 * 1275 * Read from LSR. We always report TXRDY and TSRE since we 1276 * can process output characters immediately (at any time). 1277 */ 1278 vei->vei.vei_data = com1_regs.lsr | LSR_TSRE | LSR_TXRDY; 1279 } 1280} 1281 1282/* 1283 * vcpu_process_com_msr 1284 * 1285 * Emulate in/out instructions to the com1 (ns8250) UART modem status register. 1286 * 1287 * Parameters: 1288 * vei: vm exit information from vmm(4) containing information on the in/out 1289 * instruction being performed 1290 */ 1291void 1292vcpu_process_com_msr(union vm_exit *vei) 1293{ 1294 /* 1295 * vei_dir == 0 : out instruction 1296 * 1297 * Write to MSR. This is an illegal operation, so we just log it and 1298 * continue. 1299 */ 1300 if (vei->vei.vei_dir == 0) { 1301 log_warnx("%s: MSR UART write 0x%x unsupported", 1302 __progname, vei->vei.vei_data); 1303 } else { 1304 /* 1305 * vei_dir == 1 : in instruction 1306 * 1307 * Read from MSR. We always report DCD, DSR, and CTS. 1308 */ 1309 vei->vei.vei_data = com1_regs.lsr | MSR_DCD | MSR_DSR | MSR_CTS; 1310 } 1311} 1312 1313/* 1314 * vcpu_process_com_scr 1315 * 1316 * Emulate in/out instructions to the com1 (ns8250) UART scratch register. The 1317 * scratch register is sometimes used to distinguish an 8250 from a 16450, 1318 * and/or used to distinguish submodels of the 8250 (eg 8250A, 8250B). We 1319 * simulate an "original" 8250 by forcing the scratch register to return data 1320 * on read that is different from what was written. 1321 * 1322 * Parameters: 1323 * vei: vm exit information from vmm(4) containing information on the in/out 1324 * instruction being performed 1325 */ 1326void 1327vcpu_process_com_scr(union vm_exit *vei) 1328{ 1329 /* 1330 * vei_dir == 0 : out instruction 1331 * 1332 * Write to SCR 1333 */ 1334 if (vei->vei.vei_dir == 0) { 1335 com1_regs.scr = vei->vei.vei_data; 1336 } else { 1337 /* 1338 * vei_dir == 1 : in instruction 1339 * 1340 * Read from SCR. To make sure we don't accidentally simulate 1341 * a real scratch register, we negate what was written on 1342 * subsequent readback. 1343 */ 1344 vei->vei.vei_data = ~com1_regs.scr; 1345 } 1346} 1347 1348/* 1349 * vcpu_process_com_ier 1350 * 1351 * Emulate in/out instructions to the com1 (ns8250) UART interrupt enable 1352 * register. 1353 * 1354 * Parameters: 1355 * vei: vm exit information from vmm(4) containing information on the in/out 1356 * instruction being performed 1357 */ 1358void 1359vcpu_process_com_ier(union vm_exit *vei) 1360{ 1361 /* 1362 * vei_dir == 0 : out instruction 1363 * 1364 * Write to IER 1365 */ 1366 if (vei->vei.vei_dir == 0) { 1367 com1_regs.ier = vei->vei.vei_data; 1368 } else { 1369 /* 1370 * vei_dir == 1 : in instruction 1371 * 1372 * Read from IER 1373 */ 1374 vei->vei.vei_data = com1_regs.ier; 1375 } 1376} 1377 1378/* 1379 * vcpu_exit_com 1380 * 1381 * Process com1 (ns8250) UART exits. vmd handles most basic 8250 1382 * features with the exception of the divisor latch (eg, no baud 1383 * rate support) 1384 * 1385 * Parameters: 1386 * vrp: vcpu run parameters containing guest state for this exit 1387 */ 1388void 1389vcpu_exit_com(struct vm_run_params *vrp) 1390{ 1391 union vm_exit *vei = vrp->vrp_exit; 1392 1393 switch(vei->vei.vei_port) { 1394 case COM1_LCR: 1395 vcpu_process_com_lcr(vei); 1396 break; 1397 case COM1_IER: 1398 vcpu_process_com_ier(vei); 1399 break; 1400 case COM1_IIR: 1401 vcpu_process_com_iir(vei); 1402 break; 1403 case COM1_MCR: 1404 vcpu_process_com_mcr(vei); 1405 break; 1406 case COM1_LSR: 1407 vcpu_process_com_lsr(vei); 1408 break; 1409 case COM1_MSR: 1410 vcpu_process_com_msr(vei); 1411 break; 1412 case COM1_SCR: 1413 vcpu_process_com_scr(vei); 1414 break; 1415 case COM1_DATA: 1416 vcpu_process_com_data(vei); 1417 break; 1418 } 1419} 1420 1421/* 1422 * vcpu_exit_pci 1423 * 1424 * Handle all I/O to the emulated PCI subsystem. 1425 * 1426 * Parameters: 1427 * vrp: vcpu run paramters containing guest state for this exit 1428 * 1429 * Return values: 1430 * 0xff if no interrupt is required after this pci exit, 1431 * or an interrupt vector otherwise 1432 */ 1433uint8_t 1434vcpu_exit_pci(struct vm_run_params *vrp) 1435{ 1436 union vm_exit *vei = vrp->vrp_exit; 1437 uint8_t intr; 1438 1439 intr = 0xFF; 1440 1441 switch(vei->vei.vei_port) { 1442 case PCI_MODE1_ADDRESS_REG: 1443 pci_handle_address_reg(vrp); 1444 break; 1445 case PCI_MODE1_DATA_REG: 1446 pci_handle_data_reg(vrp); 1447 break; 1448 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1449 intr = pci_handle_io(vrp); 1450 break; 1451 default: 1452 log_warnx("%s: unknown PCI register 0x%llx", 1453 __progname, (uint64_t)vei->vei.vei_port); 1454 break; 1455 } 1456 1457 return (intr); 1458} 1459 1460/* 1461 * vcpu_exit_inout 1462 * 1463 * Handle all I/O exits that need to be emulated in vmd. This includes the 1464 * i8253 PIT and the com1 ns8250 UART. 1465 * 1466 * Parameters: 1467 * vrp: vcpu run parameters containing guest state for this exit 1468 */ 1469void 1470vcpu_exit_inout(struct vm_run_params *vrp) 1471{ 1472 union vm_exit *vei = vrp->vrp_exit; 1473 uint8_t intr; 1474 1475 switch(vei->vei.vei_port) { 1476 case TIMER_CTRL: 1477 case (TIMER_CNTR0 + TIMER_BASE): 1478 case (TIMER_CNTR1 + TIMER_BASE): 1479 case (TIMER_CNTR2 + TIMER_BASE): 1480 vcpu_exit_i8253(vei); 1481 break; 1482 case COM1_DATA ... COM1_SCR: 1483 vcpu_exit_com(vrp); 1484 break; 1485 case PCI_MODE1_ADDRESS_REG: 1486 case PCI_MODE1_DATA_REG: 1487 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1488 intr = vcpu_exit_pci(vrp); 1489 if (intr != 0xFF) 1490 vrp->vrp_injint = intr; 1491 else 1492 vrp->vrp_injint = -1; 1493 break; 1494 default: 1495 /* IN from unsupported port gives FFs */ 1496 if (vei->vei.vei_dir == 1) 1497 vei->vei.vei_data = 0xFFFFFFFF; 1498 break; 1499 } 1500} 1501 1502/* 1503 * vcpu_exit 1504 * 1505 * Handle a vcpu exit. This function is called when it is determined that 1506 * vmm(4) requires the assistance of vmd to support a particular guest 1507 * exit type (eg, accessing an I/O port or device). Guest state is contained 1508 * in 'vrp', and will be resent to vmm(4) on exit completion. 1509 * 1510 * Upon conclusion of handling the exit, the function determines if any 1511 * interrupts should be injected into the guest, and sets vrp->vrp_injint 1512 * to the IRQ line whose interrupt should be vectored (or -1 if no interrupt 1513 * is to be injected). 1514 * 1515 * Parameters: 1516 * vrp: vcpu run parameters containing guest state for this exit 1517 * 1518 * Return values: 1519 * 0: the exit was handled successfully 1520 * 1: an error occurred (exit not handled) 1521 */ 1522int 1523vcpu_exit(struct vm_run_params *vrp) 1524{ 1525 ssize_t sz; 1526 char ch; 1527 1528 switch (vrp->vrp_exit_reason) { 1529 case VMX_EXIT_IO: 1530 vcpu_exit_inout(vrp); 1531 break; 1532 case VMX_EXIT_HLT: 1533 /* 1534 * XXX handle halted state, no reason to run this vcpu again 1535 * until a vm interrupt is to be injected 1536 */ 1537 break; 1538 default: 1539 log_warnx("%s: unknown exit reason %d", 1540 __progname, vrp->vrp_exit_reason); 1541 return (1); 1542 } 1543 1544 /* XXX interrupt priority */ 1545 if (vionet_process_rx()) 1546 vrp->vrp_injint = 9; 1547 1548 /* 1549 * Is there a new character available on com1? 1550 * If so, consume the character, buffer it into the com1 data register 1551 * assert IRQ4, and set the line status register RXRDY bit. 1552 * 1553 * XXX - move all this com intr checking to another function 1554 */ 1555 sz = read(con_fd, &ch, sizeof(char)); 1556 if (sz == 1) { 1557 com1_regs.lsr |= LSR_RXRDY; 1558 com1_regs.data = ch; 1559 /* XXX these ier and iir bits should be IER_x and IIR_x */ 1560 if (com1_regs.ier & 0x1) { 1561 com1_regs.iir |= (2 << 1); 1562 com1_regs.iir &= ~0x1; 1563 } 1564 } 1565 1566 /* 1567 * Clear "interrupt pending" by setting IIR low bit to 1 if no 1568 * interrupts are pending 1569 */ 1570 /* XXX these iir magic numbers should be IIR_x */ 1571 if ((com1_regs.iir & ~0x1) == 0x0) 1572 com1_regs.iir = 0x1; 1573 1574 /* If pending interrupt and nothing waiting to be injected, inject */ 1575 if ((com1_regs.iir & 0x1) == 0) 1576 if (vrp->vrp_injint == -1) 1577 vrp->vrp_injint = 0x4; 1578 vrp->vrp_continue = 1; 1579 1580 return (0); 1581} 1582 1583/* 1584 * write_page 1585 * 1586 * Pushes a page of data from 'buf' into the guest VM's memory 1587 * at paddr 'dst'. 1588 * 1589 * Parameters: 1590 * dst: the destination paddr_t in the guest VM to push into. 1591 * If there is no guest paddr mapping at 'dst', a new page will be 1592 * faulted in by the VMM (provided 'dst' represents a valid paddr 1593 * in the guest's address space) 1594 * buf: page of data to push 1595 * len: size of 'buf' 1596 * do_mask: 1 to mask the destination address (for kernel load), 0 to 1597 * leave 'dst' unmasked 1598 * 1599 * Return values: 1600 * various return values from ioctl(VMM_IOC_WRITEPAGE), or 0 if no error 1601 * occurred. 1602 * 1603 * Note - this function only handles GPAs < 4GB. 1604 */ 1605int 1606write_page(uint32_t dst, void *buf, uint32_t len, int do_mask) 1607{ 1608 struct vm_writepage_params vwp; 1609 1610 /* 1611 * Mask kernel load addresses to avoid uint32_t -> uint64_t cast 1612 * errors 1613 */ 1614 if (do_mask) 1615 dst &= 0xFFFFFFF; 1616 1617 vwp.vwp_paddr = (paddr_t)dst; 1618 vwp.vwp_data = buf; 1619 vwp.vwp_vm_id = vm_id; 1620 vwp.vwp_len = len; 1621 if (ioctl(vmm_fd, VMM_IOC_WRITEPAGE, &vwp) < 0) { 1622 log_warn("writepage ioctl failed"); 1623 return (errno); 1624 } 1625 return (0); 1626} 1627 1628/* 1629 * read_page 1630 * 1631 * Reads a page of memory at guest paddr 'src' into 'buf'. 1632 * 1633 * Parameters: 1634 * src: the source paddr_t in the guest VM to read from. 1635 * buf: destination (local) buffer 1636 * len: size of 'buf' 1637 * do_mask: 1 to mask the source address (for kernel load), 0 to 1638 * leave 'src' unmasked 1639 * 1640 * Return values: 1641 * various return values from ioctl(VMM_IOC_READPAGE), or 0 if no error 1642 * occurred. 1643 * 1644 * Note - this function only handles GPAs < 4GB. 1645 */ 1646int 1647read_page(uint32_t src, void *buf, uint32_t len, int do_mask) 1648{ 1649 struct vm_readpage_params vrp; 1650 1651 /* 1652 * Mask kernel load addresses to avoid uint32_t -> uint64_t cast 1653 * errors 1654 */ 1655 if (do_mask) 1656 src &= 0xFFFFFFF; 1657 1658 vrp.vrp_paddr = (paddr_t)src; 1659 vrp.vrp_data = buf; 1660 vrp.vrp_vm_id = vm_id; 1661 vrp.vrp_len = len; 1662 if (ioctl(vmm_fd, VMM_IOC_READPAGE, &vrp) < 0) { 1663 log_warn("readpage ioctl failed"); 1664 return (errno); 1665 } 1666 return (0); 1667} 1668