1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $ 27 */ 28 29/*- 30 * Copyright (c) 2011 Google, Inc. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * 54 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $ 55 */ 56 57#include <sys/cdefs.h> 58__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $"); 59 60#include <sys/ioctl.h> 61#include <sys/stat.h> 62#include <sys/disk.h> 63#include <sys/queue.h> 64 65#include <machine/specialreg.h> 66#include <machine/vmm.h> 67 68#include <dirent.h> 69#include <dlfcn.h> 70#include <errno.h> 71#include <err.h> 72#include <fcntl.h> 73#include <getopt.h> 74#include <libgen.h> 75#include <limits.h> 76#include <stdio.h> 77#include <stdlib.h> 78#include <string.h> 79#include <sysexits.h> 80#include <termios.h> 81#include <unistd.h> 82 83#include <vmmapi.h> 84 85#include "userboot.h" 86 87#define MB (1024 * 1024UL) 88#define GB (1024 * 1024 * 1024UL) 89#define BSP 0 90 91#define NDISKS 32 92 93static char *host_base; 94static struct termios term, oldterm; 95static int disk_fd[NDISKS]; 96static int ndisks; 97static int consin_fd, consout_fd; 98 99static char *vmname, *progname; 100static struct vmctx *ctx; 101 102static uint64_t gdtbase, cr3, rsp; 103 104static void cb_exit(void *arg, int v); 105 106/* 107 * Console i/o callbacks 108 */ 109 110static void 111cb_putc(void *arg, int ch) 112{ 113 char c = ch; 114 115 (void) write(consout_fd, &c, 1); 116} 117 118static int 119cb_getc(void *arg) 120{ 121 char c; 122 123 if (read(consin_fd, &c, 1) == 1) 124 return (c); 125 return (-1); 126} 127 128static int 129cb_poll(void *arg) 130{ 131 int n; 132 133 if (ioctl(consin_fd, FIONREAD, &n) >= 0) 134 return (n > 0); 135 return (0); 136} 137 138/* 139 * Host filesystem i/o callbacks 140 */ 141 142struct cb_file { 143 int cf_isdir; 144 size_t cf_size; 145 struct stat cf_stat; 146 union { 147 int fd; 148 DIR *dir; 149 } cf_u; 150}; 151 152static int 153cb_open(void *arg, const char *filename, void **hp) 154{ 155 struct stat st; 156 struct cb_file *cf; 157 char path[PATH_MAX]; 158 159 if (!host_base) 160 return (ENOENT); 161 162 strlcpy(path, host_base, PATH_MAX); 163 if (path[strlen(path) - 1] == '/') 164 path[strlen(path) - 1] = 0; 165 strlcat(path, filename, PATH_MAX); 166 cf = malloc(sizeof(struct cb_file)); 167 if (stat(path, &cf->cf_stat) < 0) { 168 free(cf); 169 return (errno); 170 } 171 172 cf->cf_size = st.st_size; 173 if (S_ISDIR(cf->cf_stat.st_mode)) { 174 cf->cf_isdir = 1; 175 cf->cf_u.dir = opendir(path); 176 if (!cf->cf_u.dir) 177 goto out; 178 *hp = cf; 179 return (0); 180 } 181 if (S_ISREG(cf->cf_stat.st_mode)) { 182 cf->cf_isdir = 0; 183 cf->cf_u.fd = open(path, O_RDONLY); 184 if (cf->cf_u.fd < 0) 185 goto out; 186 *hp = cf; 187 return (0); 188 } 189 190out: 191 free(cf); 192 return (EINVAL); 193} 194 195static int 196cb_close(void *arg, void *h) 197{ 198 struct cb_file *cf = h; 199 200 if (cf->cf_isdir) 201 closedir(cf->cf_u.dir); 202 else 203 close(cf->cf_u.fd); 204 free(cf); 205 206 return (0); 207} 208 209static int 210cb_isdir(void *arg, void *h) 211{ 212 struct cb_file *cf = h; 213 214 return (cf->cf_isdir); 215} 216 217static int 218cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) 219{ 220 struct cb_file *cf = h; 221 ssize_t sz; 222 223 if (cf->cf_isdir) 224 return (EINVAL); 225 sz = read(cf->cf_u.fd, buf, size); 226 if (sz < 0) 227 return (EINVAL); 228 *resid = size - sz; 229 return (0); 230} 231 232static int 233cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, 234 size_t *namelen_return, char *name) 235{ 236 struct cb_file *cf = h; 237 struct dirent *dp; 238 239 if (!cf->cf_isdir) 240 return (EINVAL); 241 242 dp = readdir(cf->cf_u.dir); 243 if (!dp) 244 return (ENOENT); 245 246 /* 247 * Note: d_namlen is in the range 0..255 and therefore less 248 * than PATH_MAX so we don't need to test before copying. 249 */ 250 *fileno_return = dp->d_fileno; 251 *type_return = dp->d_type; 252 *namelen_return = dp->d_namlen; 253 memcpy(name, dp->d_name, dp->d_namlen); 254 name[dp->d_namlen] = 0; 255 256 return (0); 257} 258 259static int 260cb_seek(void *arg, void *h, uint64_t offset, int whence) 261{ 262 struct cb_file *cf = h; 263 264 if (cf->cf_isdir) 265 return (EINVAL); 266 if (lseek(cf->cf_u.fd, offset, whence) < 0) 267 return (errno); 268 return (0); 269} 270 271static int 272cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) 273{ 274 struct cb_file *cf = h; 275 276 *mode = cf->cf_stat.st_mode; 277 *uid = cf->cf_stat.st_uid; 278 *gid = cf->cf_stat.st_gid; 279 *size = cf->cf_stat.st_size; 280 return (0); 281} 282 283/* 284 * Disk image i/o callbacks 285 */ 286 287static int 288cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, 289 size_t *resid) 290{ 291 ssize_t n; 292 293 if (unit < 0 || unit >= ndisks ) 294 return (EIO); 295 n = pread(disk_fd[unit], to, size, from); 296 if (n < 0) 297 return (errno); 298 *resid = size - n; 299 return (0); 300} 301 302static int 303cb_diskioctl(void *arg, int unit, u_long cmd, void *data) 304{ 305 struct stat sb; 306 307 if (unit < 0 || unit >= ndisks) 308 return (EBADF); 309 310 switch (cmd) { 311 case DIOCGSECTORSIZE: 312 *(u_int *)data = 512; 313 break; 314 case DIOCGMEDIASIZE: 315 if (fstat(disk_fd[unit], &sb) != 0) 316 return (ENOTTY); 317 if (S_ISCHR(sb.st_mode) && 318 ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0) 319 return (ENOTTY); 320 *(off_t *)data = sb.st_size; 321 break; 322 default: 323 return (ENOTTY); 324 } 325 326 return (0); 327} 328 329/* 330 * Guest virtual machine i/o callbacks 331 */ 332static int 333cb_copyin(void *arg, const void *from, uint64_t to, size_t size) 334{ 335 char *ptr; 336 337 to &= 0x7fffffff; 338 339 ptr = vm_map_gpa(ctx, to, size); 340 if (ptr == NULL) 341 return (EFAULT); 342 343 memcpy(ptr, from, size); 344 return (0); 345} 346 347static int 348cb_copyout(void *arg, uint64_t from, void *to, size_t size) 349{ 350 char *ptr; 351 352 from &= 0x7fffffff; 353 354 ptr = vm_map_gpa(ctx, from, size); 355 if (ptr == NULL) 356 return (EFAULT); 357 358 memcpy(to, ptr, size); 359 return (0); 360} 361 362static void 363cb_setreg(void *arg, int r, uint64_t v) 364{ 365 int error; 366 enum vm_reg_name vmreg; 367 368 vmreg = VM_REG_LAST; 369 370 switch (r) { 371 case 4: 372 vmreg = VM_REG_GUEST_RSP; 373 rsp = v; 374 break; 375 default: 376 break; 377 } 378 379 if (vmreg == VM_REG_LAST) { 380 printf("test_setreg(%d): not implemented\n", r); 381 cb_exit(NULL, USERBOOT_EXIT_QUIT); 382 } 383 384 error = vm_set_register(ctx, BSP, vmreg, v); 385 if (error) { 386 perror("vm_set_register"); 387 cb_exit(NULL, USERBOOT_EXIT_QUIT); 388 } 389} 390 391static void 392cb_setmsr(void *arg, int r, uint64_t v) 393{ 394 int error; 395 enum vm_reg_name vmreg; 396 397 vmreg = VM_REG_LAST; 398 399 switch (r) { 400 case MSR_EFER: 401 vmreg = VM_REG_GUEST_EFER; 402 break; 403 default: 404 break; 405 } 406 407 if (vmreg == VM_REG_LAST) { 408 printf("test_setmsr(%d): not implemented\n", r); 409 cb_exit(NULL, USERBOOT_EXIT_QUIT); 410 } 411 412 error = vm_set_register(ctx, BSP, vmreg, v); 413 if (error) { 414 perror("vm_set_msr"); 415 cb_exit(NULL, USERBOOT_EXIT_QUIT); 416 } 417} 418 419static void 420cb_setcr(void *arg, int r, uint64_t v) 421{ 422 int error; 423 enum vm_reg_name vmreg; 424 425 vmreg = VM_REG_LAST; 426 427 switch (r) { 428 case 0: 429 vmreg = VM_REG_GUEST_CR0; 430 break; 431 case 3: 432 vmreg = VM_REG_GUEST_CR3; 433 cr3 = v; 434 break; 435 case 4: 436 vmreg = VM_REG_GUEST_CR4; 437 break; 438 default: 439 break; 440 } 441 442 if (vmreg == VM_REG_LAST) { 443 printf("test_setcr(%d): not implemented\n", r); 444 cb_exit(NULL, USERBOOT_EXIT_QUIT); 445 } 446 447 error = vm_set_register(ctx, BSP, vmreg, v); 448 if (error) { 449 perror("vm_set_cr"); 450 cb_exit(NULL, USERBOOT_EXIT_QUIT); 451 } 452} 453 454static void 455cb_setgdt(void *arg, uint64_t base, size_t size) 456{ 457 int error; 458 459 error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); 460 if (error != 0) { 461 perror("vm_set_desc(gdt)"); 462 cb_exit(NULL, USERBOOT_EXIT_QUIT); 463 } 464 465 gdtbase = base; 466} 467 468static void 469cb_exec(void *arg, uint64_t rip) 470{ 471 int error; 472 473 if (cr3 == 0) 474 error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase, 475 rsp); 476 else 477 error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, 478 rsp); 479 if (error) { 480 perror("vm_setup_freebsd_registers"); 481 cb_exit(NULL, USERBOOT_EXIT_QUIT); 482 } 483 484 cb_exit(NULL, 0); 485} 486 487/* 488 * Misc 489 */ 490 491static void 492cb_delay(void *arg, int usec) 493{ 494 495 usleep(usec); 496} 497 498static void 499cb_exit(void *arg, int v) 500{ 501 502 tcsetattr(consout_fd, TCSAFLUSH, &oldterm); 503 exit(v); 504} 505 506static void 507cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) 508{ 509 510 *ret_lowmem = vm_get_lowmem_size(ctx); 511 *ret_highmem = vm_get_highmem_size(ctx); 512} 513 514struct env { 515 const char *str; /* name=value */ 516 SLIST_ENTRY(env) next; 517}; 518 519static SLIST_HEAD(envhead, env) envhead; 520 521static void 522addenv(const char *str) 523{ 524 struct env *env; 525 526 env = malloc(sizeof(struct env)); 527 env->str = str; 528 SLIST_INSERT_HEAD(&envhead, env, next); 529} 530 531static const char * 532cb_getenv(void *arg, int num) 533{ 534 int i; 535 struct env *env; 536 537 i = 0; 538 SLIST_FOREACH(env, &envhead, next) { 539 if (i == num) 540 return (env->str); 541 i++; 542 } 543 544 return (NULL); 545} 546 547static struct loader_callbacks cb = { 548 .getc = cb_getc, 549 .putc = cb_putc, 550 .poll = cb_poll, 551 552 .open = cb_open, 553 .close = cb_close, 554 .isdir = cb_isdir, 555 .read = cb_read, 556 .readdir = cb_readdir, 557 .seek = cb_seek, 558 .stat = cb_stat, 559 560 .diskread = cb_diskread, 561 .diskioctl = cb_diskioctl, 562 563 .copyin = cb_copyin, 564 .copyout = cb_copyout, 565 .setreg = cb_setreg, 566 .setmsr = cb_setmsr, 567 .setcr = cb_setcr, 568 .setgdt = cb_setgdt, 569 .exec = cb_exec, 570 571 .delay = cb_delay, 572 .exit = cb_exit, 573 .getmem = cb_getmem, 574 575 .getenv = cb_getenv, 576}; 577 578static int 579altcons_open(char *path) 580{ 581 struct stat sb; 582 int err; 583 int fd; 584 585 /* 586 * Allow stdio to be passed in so that the same string 587 * can be used for the bhyveload console and bhyve com-port 588 * parameters 589 */ 590 if (!strcmp(path, "stdio")) 591 return (0); 592 593 err = stat(path, &sb); 594 if (err == 0) { 595 if (!S_ISCHR(sb.st_mode)) 596 err = ENOTSUP; 597 else { 598 fd = open(path, O_RDWR | O_NONBLOCK); 599 if (fd < 0) 600 err = errno; 601 else 602 consin_fd = consout_fd = fd; 603 } 604 } 605 606 return (err); 607} 608 609static int 610disk_open(char *path) 611{ 612 int err, fd; 613 614 if (ndisks >= NDISKS) 615 return (ERANGE); 616 617 err = 0; 618 fd = open(path, O_RDONLY); 619 620 if (fd > 0) { 621 disk_fd[ndisks] = fd; 622 ndisks++; 623 } else 624 err = errno; 625 626 return (err); 627} 628 629static void 630usage(void) 631{ 632 633 fprintf(stderr, 634 "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n" 635 " %*s [-h <host-path>] [-m mem-size] <vmname>\n", 636 progname, 637 (int)strlen(progname), ""); 638 exit(1); 639} 640 641int 642main(int argc, char** argv) 643{ 644 char *loader; 645 void *h; 646 void (*func)(struct loader_callbacks *, void *, int, int); 647 uint64_t mem_size; 648 int opt, error, need_reinit, memflags; 649 650 progname = basename(argv[0]); 651 652 loader = NULL; 653 654 memflags = 0; 655 mem_size = 256 * MB; 656 657 consin_fd = STDIN_FILENO; 658 consout_fd = STDOUT_FILENO; 659 660 while ((opt = getopt(argc, argv, "Sc:d:e:h:l:m:")) != -1) { 661 switch (opt) { 662 case 'c': 663 error = altcons_open(optarg); 664 if (error != 0) 665 errx(EX_USAGE, "Could not open '%s'", optarg); 666 break; 667 668 case 'd': 669 error = disk_open(optarg); 670 if (error != 0) 671 errx(EX_USAGE, "Could not open '%s'", optarg); 672 break; 673 674 case 'e': 675 addenv(optarg); 676 break; 677 678 case 'h': 679 host_base = optarg; 680 break; 681 682 case 'l': 683 if (loader != NULL) 684 errx(EX_USAGE, "-l can only be given once"); 685 loader = strdup(optarg); 686 if (loader == NULL) 687 err(EX_OSERR, "malloc"); 688 break; 689 690 case 'm': 691 error = vm_parse_memsize(optarg, &mem_size); 692 if (error != 0) 693 errx(EX_USAGE, "Invalid memsize '%s'", optarg); 694 break; 695 case 'S': 696 memflags |= VM_MEM_F_WIRED; 697 break; 698 case '?': 699 usage(); 700 } 701 } 702 703 argc -= optind; 704 argv += optind; 705 706 if (argc != 1) 707 usage(); 708 709 vmname = argv[0]; 710 711 need_reinit = 0; 712 error = vm_create(vmname); 713 if (error) { 714 if (errno != EEXIST) { 715 perror("vm_create"); 716 exit(1); 717 } 718 need_reinit = 1; 719 } 720 721 ctx = vm_open(vmname); 722 if (ctx == NULL) { 723 perror("vm_open"); 724 exit(1); 725 } 726 727 if (need_reinit) { 728 error = vm_reinit(ctx); 729 if (error) { 730 perror("vm_reinit"); 731 exit(1); 732 } 733 } 734 735 vm_set_memflags(ctx, memflags); 736 error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); 737 if (error) { 738 perror("vm_setup_memory"); 739 exit(1); 740 } 741 742 if (loader == NULL) { 743 loader = strdup("/boot/userboot.so"); 744 if (loader == NULL) 745 err(EX_OSERR, "malloc"); 746 } 747 h = dlopen(loader, RTLD_LOCAL); 748 if (!h) { 749 printf("%s\n", dlerror()); 750 free(loader); 751 return (1); 752 } 753 func = dlsym(h, "loader_main"); 754 if (!func) { 755 printf("%s\n", dlerror()); 756 free(loader); 757 return (1); 758 } 759 760 tcgetattr(consout_fd, &term); 761 oldterm = term; 762 cfmakeraw(&term); 763 term.c_cflag |= CLOCAL; 764 765 tcsetattr(consout_fd, TCSAFLUSH, &term); 766 767 addenv("smbios.bios.vendor=BHYVE"); 768 addenv("boot_serial=1"); 769 770 func(&cb, NULL, USERBOOT_VERSION_3, ndisks); 771 772 free(loader); 773 return (0); 774} 775