vmd.c revision 1.59
1/* $OpenBSD: vmd.c,v 1.59 2017/04/25 16:38:23 reyk Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/param.h> /* nitems */ 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/cdefs.h> 23#include <sys/stat.h> 24#include <sys/tty.h> 25#include <sys/ioctl.h> 26 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <termios.h> 31#include <errno.h> 32#include <event.h> 33#include <fcntl.h> 34#include <pwd.h> 35#include <signal.h> 36#include <syslog.h> 37#include <unistd.h> 38#include <ctype.h> 39#include <pwd.h> 40#include <grp.h> 41 42#include "proc.h" 43#include "vmd.h" 44 45__dead void usage(void); 46 47int main(int, char **); 48int vmd_configure(void); 49void vmd_sighdlr(int sig, short event, void *arg); 50void vmd_shutdown(void); 51int vmd_control_run(void); 52int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 53int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 54 55struct vmd *env; 56 57static struct privsep_proc procs[] = { 58 /* Keep "priv" on top as procs[0] */ 59 { "priv", PROC_PRIV, NULL, priv }, 60 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 61 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 62}; 63 64/* For the privileged process */ 65static struct privsep_proc *proc_priv = &procs[0]; 66static struct passwd proc_privpw; 67 68int 69vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 70{ 71 struct privsep *ps = p->p_ps; 72 int res = 0, ret = 0, cmd = 0, verbose; 73 unsigned int v = 0; 74 struct vmop_create_params vmc; 75 struct vmop_id vid; 76 struct vm_terminate_params vtp; 77 struct vmop_result vmr; 78 struct vmd_vm *vm = NULL; 79 char *str = NULL; 80 uint32_t id = 0; 81 82 switch (imsg->hdr.type) { 83 case IMSG_VMDOP_START_VM_REQUEST: 84 IMSG_SIZE_CHECK(imsg, &vmc); 85 memcpy(&vmc, imsg->data, sizeof(vmc)); 86 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_uid); 87 if (vmc.vmc_flags == 0) { 88 /* start an existing VM with pre-configured options */ 89 if (!(ret == -1 && errno == EALREADY && 90 vm->vm_running == 0)) { 91 res = errno; 92 cmd = IMSG_VMDOP_START_VM_RESPONSE; 93 } 94 } else if (ret != 0) { 95 res = errno; 96 cmd = IMSG_VMDOP_START_VM_RESPONSE; 97 } 98 if (res == 0 && 99 config_setvm(ps, vm, imsg->hdr.peerid, vmc.vmc_uid) == -1) { 100 res = errno; 101 cmd = IMSG_VMDOP_START_VM_RESPONSE; 102 } 103 break; 104 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 105 IMSG_SIZE_CHECK(imsg, &vid); 106 memcpy(&vid, imsg->data, sizeof(vid)); 107 if ((id = vid.vid_id) == 0) { 108 /* Lookup vm (id) by name */ 109 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 110 res = ENOENT; 111 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 112 break; 113 } else if (vm->vm_shutdown) { 114 res = EALREADY; 115 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 116 break; 117 } 118 id = vm->vm_vmid; 119 } else 120 vm = vm_getbyvmid(id); 121 if (vm_checkperm(vm, vid.vid_uid) != 0) { 122 res = EPERM; 123 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 124 break; 125 } 126 memset(&vtp, 0, sizeof(vtp)); 127 vtp.vtp_vm_id = id; 128 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 129 imsg->hdr.peerid, -1, &vtp, sizeof(vtp)) == -1) 130 return (-1); 131 break; 132 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 133 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 134 break; 135 case IMSG_VMDOP_LOAD: 136 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 137 str = get_string((uint8_t *)imsg->data, 138 IMSG_DATA_SIZE(imsg)); 139 case IMSG_VMDOP_RELOAD: 140 vmd_reload(0, str); 141 free(str); 142 break; 143 case IMSG_CTL_RESET: 144 IMSG_SIZE_CHECK(imsg, &v); 145 memcpy(&v, imsg->data, sizeof(v)); 146 vmd_reload(v, str); 147 break; 148 case IMSG_CTL_VERBOSE: 149 IMSG_SIZE_CHECK(imsg, &verbose); 150 memcpy(&verbose, imsg->data, sizeof(verbose)); 151 log_setverbose(verbose); 152 153 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 154 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 155 break; 156 default: 157 return (-1); 158 } 159 160 switch (cmd) { 161 case 0: 162 break; 163 case IMSG_VMDOP_START_VM_RESPONSE: 164 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 165 memset(&vmr, 0, sizeof(vmr)); 166 vmr.vmr_result = res; 167 vmr.vmr_id = id; 168 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 169 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 170 return (-1); 171 break; 172 default: 173 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 174 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 175 return (-1); 176 break; 177 } 178 179 return (0); 180} 181 182int 183vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 184{ 185 struct vmop_result vmr; 186 struct privsep *ps = p->p_ps; 187 int res = 0; 188 struct vmd_vm *vm; 189 struct vm_create_params *vcp; 190 struct vmop_info_result vir; 191 192 switch (imsg->hdr.type) { 193 case IMSG_VMDOP_START_VM_RESPONSE: 194 IMSG_SIZE_CHECK(imsg, &vmr); 195 memcpy(&vmr, imsg->data, sizeof(vmr)); 196 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 197 break; 198 vm->vm_pid = vmr.vmr_pid; 199 vcp = &vm->vm_params.vmc_params; 200 vcp->vcp_id = vmr.vmr_id; 201 202 /* 203 * If the peerid is not -1, forward the response back to the 204 * the control socket. If it is -1, the request originated 205 * from the parent, not the control socket. 206 */ 207 if (vm->vm_peerid != (uint32_t)-1) { 208 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 209 sizeof(vmr.vmr_ttyname)); 210 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 211 imsg->hdr.type, vm->vm_peerid, -1, 212 &vmr, sizeof(vmr)) == -1) { 213 errno = vmr.vmr_result; 214 log_warn("%s: failed to foward vm result", 215 vcp->vcp_name); 216 vm_remove(vm); 217 return (-1); 218 } 219 } 220 221 if (vmr.vmr_result) { 222 errno = vmr.vmr_result; 223 log_warn("%s: failed to start vm", vcp->vcp_name); 224 vm_remove(vm); 225 break; 226 } 227 228 /* Now configure all the interfaces */ 229 if (vm_priv_ifconfig(ps, vm) == -1) { 230 log_warn("%s: failed to configure vm", vcp->vcp_name); 231 vm_remove(vm); 232 break; 233 } 234 235 log_info("%s: started vm %d successfully, tty %s", 236 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 237 break; 238 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 239 IMSG_SIZE_CHECK(imsg, &vmr); 240 memcpy(&vmr, imsg->data, sizeof(vmr)); 241 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 242 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 243 break; 244 if (vmr.vmr_result == 0) { 245 /* Mark VM as shutting down */ 246 vm->vm_shutdown = 1; 247 } 248 break; 249 case IMSG_VMDOP_TERMINATE_VM_EVENT: 250 IMSG_SIZE_CHECK(imsg, &vmr); 251 memcpy(&vmr, imsg->data, sizeof(vmr)); 252 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 253 break; 254 if (vmr.vmr_result == 0) { 255 if (vm->vm_from_config) 256 vm_stop(vm, 0); 257 else 258 vm_remove(vm); 259 } else if (vmr.vmr_result == EAGAIN) { 260 /* Stop VM instance but keep the tty open */ 261 vm_stop(vm, 1); 262 config_setvm(ps, vm, (uint32_t)-1, 0); 263 } 264 break; 265 case IMSG_VMDOP_GET_INFO_VM_DATA: 266 IMSG_SIZE_CHECK(imsg, &vir); 267 memcpy(&vir, imsg->data, sizeof(vir)); 268 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 269 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 270 if (vm->vm_ttyname != NULL) 271 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 272 sizeof(vir.vir_ttyname)); 273 if (vm->vm_shutdown) { 274 /* XXX there might be a nicer way */ 275 (void)strlcat(vir.vir_info.vir_name, 276 " - stopping", 277 sizeof(vir.vir_info.vir_name)); 278 } 279 /* get the user id who started the vm */ 280 vir.vir_uid = vm->vm_uid; 281 vir.vir_gid = vm->vm_params.vmc_gid; 282 } 283 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 284 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 285 vm_remove(vm); 286 return (-1); 287 } 288 break; 289 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 290 /* 291 * PROC_VMM has responded with the *running* VMs, now we 292 * append the others. These use the special value 0 for their 293 * kernel id to indicate that they are not running. 294 */ 295 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 296 if (!vm->vm_running) { 297 memset(&vir, 0, sizeof(vir)); 298 vir.vir_info.vir_id = vm->vm_vmid; 299 strlcpy(vir.vir_info.vir_name, 300 vm->vm_params.vmc_params.vcp_name, 301 VMM_MAX_NAME_LEN); 302 vir.vir_info.vir_memory_size = 303 vm->vm_params.vmc_params.vcp_memranges[0].vmr_size; 304 vir.vir_info.vir_ncpus = 305 vm->vm_params.vmc_params.vcp_ncpus; 306 /* get the configured user id for this vm */ 307 vir.vir_uid = vm->vm_params.vmc_uid; 308 vir.vir_gid = vm->vm_params.vmc_gid; 309 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 310 IMSG_VMDOP_GET_INFO_VM_DATA, 311 imsg->hdr.peerid, -1, &vir, 312 sizeof(vir)) == -1) { 313 vm_remove(vm); 314 return (-1); 315 } 316 } 317 } 318 IMSG_SIZE_CHECK(imsg, &res); 319 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 320 break; 321 default: 322 return (-1); 323 } 324 325 return (0); 326} 327 328void 329vmd_sighdlr(int sig, short event, void *arg) 330{ 331 if (privsep_process != PROC_PARENT) 332 return; 333 334 switch (sig) { 335 case SIGHUP: 336 log_info("%s: reload requested with SIGHUP", __func__); 337 338 /* 339 * This is safe because libevent uses async signal handlers 340 * that run in the event loop and not in signal context. 341 */ 342 vmd_reload(0, NULL); 343 break; 344 case SIGPIPE: 345 log_info("%s: ignoring SIGPIPE", __func__); 346 break; 347 case SIGUSR1: 348 log_info("%s: ignoring SIGUSR1", __func__); 349 break; 350 case SIGTERM: 351 case SIGINT: 352 vmd_shutdown(); 353 break; 354 default: 355 fatalx("unexpected signal"); 356 } 357} 358 359__dead void 360usage(void) 361{ 362 extern char *__progname; 363 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 364 __progname); 365 exit(1); 366} 367 368int 369main(int argc, char **argv) 370{ 371 struct privsep *ps; 372 int ch; 373 const char *conffile = VMD_CONF; 374 enum privsep_procid proc_id = PROC_PARENT; 375 int proc_instance = 0; 376 const char *errp, *title = NULL; 377 int argc0 = argc; 378 379 /* log to stderr until daemonized */ 380 log_init(1, LOG_DAEMON); 381 382 if ((env = calloc(1, sizeof(*env))) == NULL) 383 fatal("calloc: env"); 384 385 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 386 switch (ch) { 387 case 'D': 388 if (cmdline_symset(optarg) < 0) 389 log_warnx("could not parse macro definition %s", 390 optarg); 391 break; 392 case 'd': 393 env->vmd_debug = 2; 394 break; 395 case 'f': 396 conffile = optarg; 397 break; 398 case 'v': 399 env->vmd_verbose++; 400 break; 401 case 'n': 402 env->vmd_noaction = 1; 403 break; 404 case 'P': 405 title = optarg; 406 proc_id = proc_getid(procs, nitems(procs), title); 407 if (proc_id == PROC_MAX) 408 fatalx("invalid process name"); 409 break; 410 case 'I': 411 proc_instance = strtonum(optarg, 0, 412 PROC_MAX_INSTANCES, &errp); 413 if (errp) 414 fatalx("invalid process instance"); 415 break; 416 default: 417 usage(); 418 } 419 } 420 421 argc -= optind; 422 if (argc > 0) 423 usage(); 424 425 if (env->vmd_noaction && !env->vmd_debug) 426 env->vmd_debug = 1; 427 428 /* check for root privileges */ 429 if (env->vmd_noaction == 0) { 430 if (geteuid()) 431 fatalx("need root privileges"); 432 } 433 434 ps = &env->vmd_ps; 435 ps->ps_env = env; 436 env->vmd_fd = -1; 437 438 if (config_init(env) == -1) 439 fatal("failed to initialize configuration"); 440 441 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 442 fatal("unknown user %s", VMD_USER); 443 444 /* First proc runs as root without pledge but in default chroot */ 445 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 446 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 447 448 /* Open /dev/vmm */ 449 if (env->vmd_noaction == 0) { 450 env->vmd_fd = open(VMM_NODE, O_RDWR); 451 if (env->vmd_fd == -1) 452 fatal("%s", VMM_NODE); 453 } 454 455 /* Configure the control socket */ 456 ps->ps_csock.cs_name = SOCKET_NAME; 457 TAILQ_INIT(&ps->ps_rcsocks); 458 459 /* Configuration will be parsed after forking the children */ 460 env->vmd_conffile = conffile; 461 462 log_init(env->vmd_debug, LOG_DAEMON); 463 log_setverbose(env->vmd_verbose); 464 465 if (env->vmd_noaction) 466 ps->ps_noaction = 1; 467 ps->ps_instance = proc_instance; 468 if (title != NULL) 469 ps->ps_title[proc_id] = title; 470 471 /* only the parent returns */ 472 proc_init(ps, procs, nitems(procs), argc0, argv, proc_id); 473 474 log_procinit("parent"); 475 if (!env->vmd_debug && daemon(0, 0) == -1) 476 fatal("can't daemonize"); 477 478 if (ps->ps_noaction == 0) 479 log_info("startup"); 480 481 event_init(); 482 483 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 484 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 485 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 486 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 487 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 488 489 signal_add(&ps->ps_evsigint, NULL); 490 signal_add(&ps->ps_evsigterm, NULL); 491 signal_add(&ps->ps_evsighup, NULL); 492 signal_add(&ps->ps_evsigpipe, NULL); 493 signal_add(&ps->ps_evsigusr1, NULL); 494 495 if (!env->vmd_noaction) 496 proc_connect(ps); 497 498 if (vmd_configure() == -1) 499 fatalx("configuration failed"); 500 501 event_dispatch(); 502 503 log_debug("parent exiting"); 504 505 return (0); 506} 507 508int 509vmd_configure(void) 510{ 511 struct vmd_vm *vm; 512 struct vmd_switch *vsw; 513 514 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 515 fatal("open %s", PATH_PTMDEV); 516 517 /* 518 * pledge in the parent process: 519 * stdio - for malloc and basic I/O including events. 520 * rpath - for reload to open and read the configuration files. 521 * wpath - for opening disk images and tap devices. 522 * tty - for openpty. 523 * proc - run kill to terminate its children safely. 524 * sendfd - for disks, interfaces and other fds. 525 * getpw - lookup user or group id by name. 526 * chown, fattr - change tty ownership 527 */ 528 if (pledge("stdio rpath wpath proc tty sendfd getpw" 529 " chown fattr", NULL) == -1) 530 fatal("pledge"); 531 532 if (parse_config(env->vmd_conffile) == -1) { 533 proc_kill(&env->vmd_ps); 534 exit(1); 535 } 536 537 if (env->vmd_noaction) { 538 fprintf(stderr, "configuration OK\n"); 539 proc_kill(&env->vmd_ps); 540 exit(0); 541 } 542 543 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 544 if (vsw->sw_running) 545 continue; 546 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 547 log_warn("%s: failed to create switch %s", 548 __func__, vsw->sw_name); 549 switch_remove(vsw); 550 return (-1); 551 } 552 } 553 554 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 555 if (vm->vm_disabled) { 556 log_debug("%s: not creating vm %s (disabled)", 557 __func__, 558 vm->vm_params.vmc_params.vcp_name); 559 continue; 560 } 561 if (config_setvm(&env->vmd_ps, vm, -1, 0) == -1) 562 return (-1); 563 } 564 565 /* Send shared global configuration to all children */ 566 if (config_setconfig(env) == -1) 567 return (-1); 568 569 return (0); 570} 571 572void 573vmd_reload(unsigned int reset, const char *filename) 574{ 575 struct vmd_vm *vm, *next_vm; 576 struct vmd_switch *vsw; 577 int reload = 0; 578 579 /* Switch back to the default config file */ 580 if (filename == NULL || *filename == '\0') { 581 filename = env->vmd_conffile; 582 reload = 1; 583 } 584 585 log_debug("%s: level %d config file %s", __func__, reset, filename); 586 587 if (reset) { 588 /* Purge the configuration */ 589 config_purge(env, reset); 590 config_setreset(env, reset); 591 } else { 592 /* 593 * Load or reload the configuration. 594 * 595 * Reloading removes all non-running VMs before processing the 596 * config file, whereas loading only adds to the existing list 597 * of VMs. 598 */ 599 600 if (reload) { 601 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, next_vm) { 602 if (vm->vm_running == 0) 603 vm_remove(vm); 604 } 605 606 /* Update shared global configuration in all children */ 607 if (config_setconfig(env) == -1) 608 return; 609 } 610 611 if (parse_config(filename) == -1) { 612 log_debug("%s: failed to load config file %s", 613 __func__, filename); 614 } 615 616 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 617 if (vsw->sw_running) 618 continue; 619 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 620 log_warn("%s: failed to create switch %s", 621 __func__, vsw->sw_name); 622 switch_remove(vsw); 623 return; 624 } 625 } 626 627 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 628 if (vm->vm_running == 0) { 629 if (vm->vm_disabled) { 630 log_debug("%s: not creating vm %s" 631 " (disabled)", __func__, 632 vm->vm_params.vmc_params.vcp_name); 633 continue; 634 } 635 if (config_setvm(&env->vmd_ps, vm, -1, 0) == -1) 636 return; 637 } else { 638 log_debug("%s: not creating vm \"%s\": " 639 "(running)", __func__, 640 vm->vm_params.vmc_params.vcp_name); 641 } 642 } 643 } 644} 645 646void 647vmd_shutdown(void) 648{ 649 struct vmd_vm *vm, *vm_next; 650 651 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 652 vm_remove(vm); 653 } 654 655 proc_kill(&env->vmd_ps); 656 free(env); 657 658 log_warnx("parent terminating"); 659 exit(0); 660} 661 662struct vmd_vm * 663vm_getbyvmid(uint32_t vmid) 664{ 665 struct vmd_vm *vm; 666 667 if (vmid == 0) 668 return (NULL); 669 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 670 if (vm->vm_vmid == vmid) 671 return (vm); 672 } 673 674 return (NULL); 675} 676 677struct vmd_vm * 678vm_getbyid(uint32_t id) 679{ 680 struct vmd_vm *vm; 681 682 if (id == 0) 683 return (NULL); 684 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 685 if (vm->vm_params.vmc_params.vcp_id == id) 686 return (vm); 687 } 688 689 return (NULL); 690} 691 692uint32_t 693vm_id2vmid(uint32_t id, struct vmd_vm *vm) 694{ 695 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 696 return (0); 697 dprintf("%s: vmm id %u is vmid %u", __func__, 698 id, vm->vm_vmid); 699 return (vm->vm_vmid); 700} 701 702uint32_t 703vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 704{ 705 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 706 return (0); 707 dprintf("%s: vmid %u is vmm id %u", __func__, 708 vmid, vm->vm_params.vmc_params.vcp_id); 709 return (vm->vm_params.vmc_params.vcp_id); 710} 711 712struct vmd_vm * 713vm_getbyname(const char *name) 714{ 715 struct vmd_vm *vm; 716 717 if (name == NULL) 718 return (NULL); 719 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 720 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 721 return (vm); 722 } 723 724 return (NULL); 725} 726 727struct vmd_vm * 728vm_getbypid(pid_t pid) 729{ 730 struct vmd_vm *vm; 731 732 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 733 if (vm->vm_pid == pid) 734 return (vm); 735 } 736 737 return (NULL); 738} 739 740void 741vm_stop(struct vmd_vm *vm, int keeptty) 742{ 743 unsigned int i; 744 745 if (vm == NULL) 746 return; 747 748 vm->vm_running = 0; 749 vm->vm_shutdown = 0; 750 751 if (vm->vm_iev.ibuf.fd != -1) { 752 event_del(&vm->vm_iev.ev); 753 close(vm->vm_iev.ibuf.fd); 754 } 755 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 756 if (vm->vm_disks[i] != -1) { 757 close(vm->vm_disks[i]); 758 vm->vm_disks[i] = -1; 759 } 760 } 761 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 762 if (vm->vm_ifs[i].vif_fd != -1) { 763 close(vm->vm_ifs[i].vif_fd); 764 vm->vm_ifs[i].vif_fd = -1; 765 } 766 free(vm->vm_ifs[i].vif_name); 767 free(vm->vm_ifs[i].vif_switch); 768 free(vm->vm_ifs[i].vif_group); 769 vm->vm_ifs[i].vif_name = NULL; 770 vm->vm_ifs[i].vif_switch = NULL; 771 vm->vm_ifs[i].vif_group = NULL; 772 } 773 if (vm->vm_kernel != -1) { 774 close(vm->vm_kernel); 775 vm->vm_kernel = -1; 776 } 777 vm->vm_uid = 0; 778 if (!keeptty) 779 vm_closetty(vm); 780} 781 782void 783vm_remove(struct vmd_vm *vm) 784{ 785 if (vm == NULL) 786 return; 787 788 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 789 vm_stop(vm, 0); 790 free(vm); 791} 792 793int 794vm_register(struct privsep *ps, struct vmop_create_params *vmc, 795 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 796{ 797 struct vmd_vm *vm = NULL; 798 struct vm_create_params *vcp = &vmc->vmc_params; 799 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 800 uint32_t rng; 801 unsigned int i; 802 struct vmd_switch *sw; 803 804 errno = 0; 805 *ret_vm = NULL; 806 807 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 808 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 809 if (vm_checkperm(vm, uid) != 0 || vmc->vmc_flags != 0) { 810 errno = EPERM; 811 goto fail; 812 } 813 *ret_vm = vm; 814 errno = EALREADY; 815 goto fail; 816 } 817 818 /* 819 * non-root users can only start existing VMs 820 * XXX there could be a mechanism to allow overriding some options 821 */ 822 if (vm_checkperm(NULL, uid) != 0) { 823 errno = EPERM; 824 goto fail; 825 } 826 if (vmc->vmc_flags == 0) { 827 errno = ENOENT; 828 goto fail; 829 } 830 if (vcp->vcp_ncpus == 0) 831 vcp->vcp_ncpus = 1; 832 if (vcp->vcp_memranges[0].vmr_size == 0) 833 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 834 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 835 log_warnx("invalid number of CPUs"); 836 goto fail; 837 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 838 log_warnx("invalid number of disks"); 839 goto fail; 840 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 841 log_warnx("invalid number of interfaces"); 842 goto fail; 843 } else if (strlen(vcp->vcp_kernel) == 0 && vcp->vcp_ndisks == 0) { 844 log_warnx("no kernel or disk specified"); 845 goto fail; 846 } else if (strlen(vcp->vcp_name) == 0) { 847 log_warnx("invalid VM name"); 848 goto fail; 849 } 850 851 if ((vm = calloc(1, sizeof(*vm))) == NULL) 852 goto fail; 853 854 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 855 vmc = &vm->vm_params; 856 vcp = &vmc->vmc_params; 857 vm->vm_pid = -1; 858 vm->vm_tty = -1; 859 860 for (i = 0; i < vcp->vcp_ndisks; i++) 861 vm->vm_disks[i] = -1; 862 for (i = 0; i < vcp->vcp_nnics; i++) { 863 vm->vm_ifs[i].vif_fd = -1; 864 865 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 866 /* inherit per-interface flags from the switch */ 867 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 868 } 869 870 /* 871 * If the MAC address is zero, always randomize it in vmd(8) 872 * because we cannot rely on the guest OS to do the right 873 * thing like OpenBSD does. Based on ether_fakeaddr() 874 * from the kernel, incremented by one to differentiate 875 * the source. 876 */ 877 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 878 rng = arc4random(); 879 vcp->vcp_macs[i][0] = 0xfe; 880 vcp->vcp_macs[i][1] = 0xe1; 881 vcp->vcp_macs[i][2] = 0xba + 1; 882 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 883 vcp->vcp_macs[i][4] = rng; 884 vcp->vcp_macs[i][5] = rng >> 8; 885 } 886 } 887 vm->vm_kernel = -1; 888 vm->vm_iev.ibuf.fd = -1; 889 890 if (++env->vmd_nvm == 0) 891 fatalx("too many vms"); 892 893 /* Assign a new internal Id if not specified */ 894 vm->vm_vmid = id == 0 ? env->vmd_nvm : id; 895 896 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 897 898 *ret_vm = vm; 899 return (0); 900 fail: 901 if (errno == 0) 902 errno = EINVAL; 903 return (-1); 904} 905 906int 907vm_checkperm(struct vmd_vm *vm, uid_t uid) 908{ 909 struct group *gr; 910 struct passwd *pw; 911 char **grmem; 912 913 /* root has no restrictions */ 914 if (uid == 0) 915 return (0); 916 917 if (vm == NULL) 918 return (-1); 919 920 /* check supplementary groups */ 921 if (vm->vm_params.vmc_gid != -1 && 922 (pw = getpwuid(uid)) != NULL && 923 (gr = getgrgid(vm->vm_params.vmc_gid)) != NULL) { 924 for (grmem = gr->gr_mem; *grmem; grmem++) 925 if (strcmp(*grmem, pw->pw_name) == 0) 926 return (0); 927 } 928 929 /* check user */ 930 if ((vm->vm_running && vm->vm_uid == uid) || 931 (!vm->vm_running && vm->vm_params.vmc_uid == uid)) 932 return (0); 933 934 return (-1); 935} 936 937int 938vm_opentty(struct vmd_vm *vm) 939{ 940 struct ptmget ptm; 941 struct stat st; 942 struct group *gr; 943 uid_t uid; 944 gid_t gid; 945 mode_t mode; 946 947 /* 948 * Open tty with pre-opened PTM fd 949 */ 950 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 951 return (-1); 952 953 vm->vm_tty = ptm.cfd; 954 close(ptm.sfd); 955 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 956 goto fail; 957 958 uid = vm->vm_uid; 959 gid = vm->vm_params.vmc_gid; 960 961 if (vm->vm_params.vmc_gid != -1) { 962 mode = 0660; 963 } else if ((gr = getgrnam("tty")) != NULL) { 964 gid = gr->gr_gid; 965 mode = 0620; 966 } else { 967 mode = 0600; 968 gid = 0; 969 } 970 971 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 972 __func__, vm->vm_params.vmc_params.vcp_name, 973 vm->vm_ttyname, uid, gid, mode); 974 975 /* 976 * Change ownership and mode of the tty as required. 977 * Loosely based on the implementation of sshpty.c 978 */ 979 if (stat(vm->vm_ttyname, &st) == -1) 980 goto fail; 981 982 if (st.st_uid != uid || st.st_gid != gid) { 983 if (chown(vm->vm_ttyname, uid, gid) == -1) { 984 log_warn("chown %s %d %d failed, uid %d", 985 vm->vm_ttyname, uid, gid, getuid()); 986 987 /* Ignore failure on read-only filesystems */ 988 if (!((errno == EROFS) && 989 (st.st_uid == uid || st.st_uid == 0))) 990 goto fail; 991 } 992 } 993 994 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 995 if (chmod(vm->vm_ttyname, mode) == -1) { 996 log_warn("chmod %s %o failed, uid %d", 997 vm->vm_ttyname, mode, getuid()); 998 999 /* Ignore failure on read-only filesystems */ 1000 if (!((errno == EROFS) && 1001 (st.st_uid == uid || st.st_uid == 0))) 1002 goto fail; 1003 } 1004 } 1005 1006 return (0); 1007 fail: 1008 vm_closetty(vm); 1009 return (-1); 1010} 1011 1012void 1013vm_closetty(struct vmd_vm *vm) 1014{ 1015 if (vm->vm_tty != -1) { 1016 /* Release and close the tty */ 1017 if (fchown(vm->vm_tty, 0, 0) == -1) 1018 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1019 if (fchmod(vm->vm_tty, 0666) == -1) 1020 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1021 close(vm->vm_tty); 1022 vm->vm_tty = -1; 1023 } 1024 free(vm->vm_ttyname); 1025 vm->vm_ttyname = NULL; 1026} 1027 1028void 1029switch_remove(struct vmd_switch *vsw) 1030{ 1031 struct vmd_if *vif; 1032 1033 if (vsw == NULL) 1034 return; 1035 1036 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1037 1038 while ((vif = TAILQ_FIRST(&vsw->sw_ifs)) != NULL) { 1039 free(vif->vif_name); 1040 free(vif->vif_switch); 1041 TAILQ_REMOVE(&vsw->sw_ifs, vif, vif_entry); 1042 free(vif); 1043 } 1044 1045 free(vsw->sw_group); 1046 free(vsw->sw_name); 1047 free(vsw); 1048} 1049 1050struct vmd_switch * 1051switch_getbyname(const char *name) 1052{ 1053 struct vmd_switch *vsw; 1054 1055 if (name == NULL) 1056 return (NULL); 1057 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1058 if (strcmp(vsw->sw_name, name) == 0) 1059 return (vsw); 1060 } 1061 1062 return (NULL); 1063} 1064 1065char * 1066get_string(uint8_t *ptr, size_t len) 1067{ 1068 size_t i; 1069 1070 for (i = 0; i < len; i++) 1071 if (!isprint(ptr[i])) 1072 break; 1073 1074 return strndup(ptr, i); 1075} 1076 1077uint32_t 1078prefixlen2mask(uint8_t prefixlen) 1079{ 1080 if (prefixlen == 0) 1081 return (0); 1082 1083 if (prefixlen > 32) 1084 prefixlen = 32; 1085 1086 return (htonl(0xffffffff << (32 - prefixlen))); 1087} 1088