vmd.c revision 1.156
1/* $OpenBSD: vmd.c,v 1.156 2024/04/08 12:48:26 tobhe Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/types.h> 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/stat.h> 23#include <sys/sysctl.h> 24#include <sys/tty.h> 25#include <sys/ttycom.h> 26#include <sys/ioctl.h> 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31#include <termios.h> 32#include <errno.h> 33#include <event.h> 34#include <fcntl.h> 35#include <pwd.h> 36#include <signal.h> 37#include <syslog.h> 38#include <unistd.h> 39#include <util.h> 40#include <ctype.h> 41#include <grp.h> 42 43#include <machine/specialreg.h> 44#include <machine/vmmvar.h> 45 46#include "proc.h" 47#include "atomicio.h" 48#include "vmd.h" 49 50__dead void usage(void); 51 52int main(int, char **); 53int vmd_configure(void); 54void vmd_sighdlr(int sig, short event, void *arg); 55void vmd_shutdown(void); 56int vmd_control_run(void); 57int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 58int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 59int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); 60int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 61int vmd_check_vmh(struct vm_dump_header *); 62 63int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66int vm_claimid(const char *, int, uint32_t *); 67void start_vm_batch(int, short, void*); 68 69static inline void vm_terminate(struct vmd_vm *, const char *); 70 71struct vmd *env; 72 73static struct privsep_proc procs[] = { 74 /* Keep "priv" on top as procs[0] */ 75 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 76 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 77 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, 78 vmm_shutdown, "/" }, 79 { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, 80 vm_agentx_shutdown, "/" } 81}; 82 83enum privsep_procid privsep_process; 84 85struct event staggered_start_timer; 86 87/* For the privileged process */ 88static struct privsep_proc *proc_priv = &procs[0]; 89static struct passwd proc_privpw; 90static const uint8_t zero_mac[ETHER_ADDR_LEN]; 91 92const char default_conffile[] = VMD_CONF; 93const char *conffile = default_conffile; 94 95int 96vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 97{ 98 struct privsep *ps = p->p_ps; 99 int res = 0, ret = 0, cmd = 0, verbose; 100 int ifd; 101 unsigned int v = 0, flags; 102 struct vmop_create_params vmc; 103 struct vmop_id vid; 104 struct vmop_result vmr; 105 struct vm_dump_header vmh; 106 struct vmd_vm *vm = NULL; 107 char *str = NULL; 108 uint32_t id = 0; 109 struct control_sock *rcs; 110 111 switch (imsg->hdr.type) { 112 case IMSG_VMDOP_START_VM_REQUEST: 113 IMSG_SIZE_CHECK(imsg, &vmc); 114 memcpy(&vmc, imsg->data, sizeof(vmc)); 115 vmc.vmc_kernel = imsg_get_fd(imsg); 116 117 /* Try registering our VM in our list of known VMs. */ 118 if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { 119 res = errno; 120 121 /* Did we have a failure during lookup of a parent? */ 122 if (vm == NULL) { 123 cmd = IMSG_VMDOP_START_VM_RESPONSE; 124 break; 125 } 126 127 /* Does the VM already exist? */ 128 if (res == EALREADY) { 129 /* Is it already running? */ 130 if (vm->vm_state & VM_STATE_RUNNING) { 131 cmd = IMSG_VMDOP_START_VM_RESPONSE; 132 break; 133 } 134 135 /* If not running, are our flags ok? */ 136 if (vmc.vmc_flags && 137 vmc.vmc_flags != VMOP_CREATE_KERNEL) { 138 cmd = IMSG_VMDOP_START_VM_RESPONSE; 139 break; 140 } 141 } 142 res = 0; 143 } 144 145 /* Try to start the launch of the VM. */ 146 res = config_setvm(ps, vm, imsg->hdr.peerid, 147 vm->vm_params.vmc_owner.uid); 148 if (res) 149 cmd = IMSG_VMDOP_START_VM_RESPONSE; 150 break; 151 case IMSG_VMDOP_WAIT_VM_REQUEST: 152 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 153 IMSG_SIZE_CHECK(imsg, &vid); 154 memcpy(&vid, imsg->data, sizeof(vid)); 155 flags = vid.vid_flags; 156 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 157 158 if ((id = vid.vid_id) == 0) { 159 /* Lookup vm (id) by name */ 160 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 161 res = ENOENT; 162 break; 163 } 164 id = vm->vm_vmid; 165 } else if ((vm = vm_getbyvmid(id)) == NULL) { 166 res = ENOENT; 167 break; 168 } 169 170 /* Validate curent state of vm */ 171 if ((vm->vm_state & VM_STATE_SHUTDOWN) && 172 (flags & VMOP_FORCE) == 0) { 173 res = EALREADY; 174 break; 175 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 176 res = EINVAL; 177 break; 178 } else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 179 res = EPERM; 180 break; 181 } 182 183 /* Only relay TERMINATION requests, not WAIT requests */ 184 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 185 memset(&vid, 0, sizeof(vid)); 186 vid.vid_id = id; 187 vid.vid_flags = flags; 188 189 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 190 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 191 return (-1); 192 } 193 break; 194 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 195 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 196 break; 197 case IMSG_VMDOP_LOAD: 198 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 199 str = get_string((uint8_t *)imsg->data, 200 IMSG_DATA_SIZE(imsg)); 201 case IMSG_VMDOP_RELOAD: 202 if (vmd_reload(0, str) == -1) 203 cmd = IMSG_CTL_FAIL; 204 else 205 cmd = IMSG_CTL_OK; 206 free(str); 207 break; 208 case IMSG_CTL_RESET: 209 IMSG_SIZE_CHECK(imsg, &v); 210 memcpy(&v, imsg->data, sizeof(v)); 211 if (vmd_reload(v, NULL) == -1) 212 cmd = IMSG_CTL_FAIL; 213 else 214 cmd = IMSG_CTL_OK; 215 break; 216 case IMSG_CTL_VERBOSE: 217 IMSG_SIZE_CHECK(imsg, &verbose); 218 memcpy(&verbose, imsg->data, sizeof(verbose)); 219 log_setverbose(verbose); 220 221 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 222 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 223 cmd = IMSG_CTL_OK; 224 break; 225 case IMSG_VMDOP_PAUSE_VM: 226 case IMSG_VMDOP_UNPAUSE_VM: 227 IMSG_SIZE_CHECK(imsg, &vid); 228 memcpy(&vid, imsg->data, sizeof(vid)); 229 if (vid.vid_id == 0) { 230 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 231 res = ENOENT; 232 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 233 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 234 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 235 break; 236 } else { 237 vid.vid_id = vm->vm_vmid; 238 } 239 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 240 res = ENOENT; 241 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 242 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 243 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 244 break; 245 } 246 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 247 vid.vid_uid) != 0) { 248 res = EPERM; 249 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 250 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 251 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 252 break; 253 } 254 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 255 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 256 break; 257 case IMSG_VMDOP_SEND_VM_REQUEST: 258 IMSG_SIZE_CHECK(imsg, &vid); 259 memcpy(&vid, imsg->data, sizeof(vid)); 260 id = vid.vid_id; 261 ifd = imsg_get_fd(imsg); 262 if (vid.vid_id == 0) { 263 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 264 res = ENOENT; 265 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 266 close(ifd); 267 break; 268 } else { 269 vid.vid_id = vm->vm_vmid; 270 } 271 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 272 res = ENOENT; 273 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 274 close(ifd); 275 break; 276 } 277 vmr.vmr_id = vid.vid_id; 278 log_debug("%s: sending fd to vmm", __func__); 279 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 280 imsg->hdr.peerid, ifd, &vid, sizeof(vid)); 281 break; 282 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 283 IMSG_SIZE_CHECK(imsg, &vid); 284 memcpy(&vid, imsg->data, sizeof(vid)); 285 ifd = imsg_get_fd(imsg); 286 if (ifd == -1) { 287 log_warnx("%s: invalid fd", __func__); 288 return (-1); 289 } 290 if (atomicio(read, ifd, &vmh, sizeof(vmh)) != sizeof(vmh)) { 291 log_warnx("%s: error reading vmh from received vm", 292 __func__); 293 res = EIO; 294 close(ifd); 295 cmd = IMSG_VMDOP_START_VM_RESPONSE; 296 break; 297 } 298 299 if (vmd_check_vmh(&vmh)) { 300 res = ENOENT; 301 close(ifd); 302 cmd = IMSG_VMDOP_START_VM_RESPONSE; 303 break; 304 } 305 if (atomicio(read, ifd, &vmc, sizeof(vmc)) != sizeof(vmc)) { 306 log_warnx("%s: error reading vmc from received vm", 307 __func__); 308 res = EIO; 309 close(ifd); 310 cmd = IMSG_VMDOP_START_VM_RESPONSE; 311 break; 312 } 313 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 314 sizeof(vmc.vmc_params.vcp_name)); 315 vmc.vmc_params.vcp_id = 0; 316 317 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 318 if (ret != 0) { 319 res = errno; 320 cmd = IMSG_VMDOP_START_VM_RESPONSE; 321 close(ifd); 322 } else { 323 vm->vm_state |= VM_STATE_RECEIVED; 324 config_setvm(ps, vm, imsg->hdr.peerid, 325 vmc.vmc_owner.uid); 326 log_debug("%s: sending fd to vmm", __func__); 327 proc_compose_imsg(ps, PROC_VMM, -1, 328 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, ifd, 329 NULL, 0); 330 } 331 break; 332 case IMSG_VMDOP_DONE: 333 control_reset(&ps->ps_csock); 334 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 335 control_reset(rcs); 336 cmd = 0; 337 break; 338 default: 339 return (-1); 340 } 341 342 switch (cmd) { 343 case 0: 344 break; 345 case IMSG_VMDOP_START_VM_RESPONSE: 346 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 347 memset(&vmr, 0, sizeof(vmr)); 348 vmr.vmr_result = res; 349 vmr.vmr_id = id; 350 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 351 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 352 return (-1); 353 break; 354 default: 355 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 356 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 357 return (-1); 358 break; 359 } 360 361 return (0); 362} 363 364int 365vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 366{ 367 struct vmop_result vmr; 368 struct privsep *ps = p->p_ps; 369 int res = 0; 370 struct vmd_vm *vm; 371 struct vm_create_params *vcp; 372 struct vmop_info_result vir; 373 374 switch (imsg->hdr.type) { 375 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 376 IMSG_SIZE_CHECK(imsg, &vmr); 377 memcpy(&vmr, imsg->data, sizeof(vmr)); 378 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 379 break; 380 proc_compose_imsg(ps, PROC_CONTROL, -1, 381 imsg->hdr.type, imsg->hdr.peerid, -1, 382 imsg->data, sizeof(imsg->data)); 383 log_info("%s: paused vm %d successfully", 384 vm->vm_params.vmc_params.vcp_name, 385 vm->vm_vmid); 386 vm->vm_state |= VM_STATE_PAUSED; 387 break; 388 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 389 IMSG_SIZE_CHECK(imsg, &vmr); 390 memcpy(&vmr, imsg->data, sizeof(vmr)); 391 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 392 break; 393 proc_compose_imsg(ps, PROC_CONTROL, -1, 394 imsg->hdr.type, imsg->hdr.peerid, -1, 395 imsg->data, sizeof(imsg->data)); 396 log_info("%s: unpaused vm %d successfully.", 397 vm->vm_params.vmc_params.vcp_name, 398 vm->vm_vmid); 399 vm->vm_state &= ~VM_STATE_PAUSED; 400 break; 401 case IMSG_VMDOP_START_VM_RESPONSE: 402 IMSG_SIZE_CHECK(imsg, &vmr); 403 memcpy(&vmr, imsg->data, sizeof(vmr)); 404 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 405 break; 406 vm->vm_pid = vmr.vmr_pid; 407 vcp = &vm->vm_params.vmc_params; 408 vcp->vcp_id = vmr.vmr_id; 409 410 /* 411 * If the peerid is not -1, forward the response back to the 412 * the control socket. If it is -1, the request originated 413 * from the parent, not the control socket. 414 */ 415 if (vm->vm_peerid != (uint32_t)-1) { 416 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 417 sizeof(vmr.vmr_ttyname)); 418 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 419 imsg->hdr.type, vm->vm_peerid, -1, 420 &vmr, sizeof(vmr)) == -1) { 421 errno = vmr.vmr_result; 422 log_warn("%s: failed to forward vm result", 423 vcp->vcp_name); 424 vm_terminate(vm, __func__); 425 return (-1); 426 } 427 } 428 429 if (vmr.vmr_result) { 430 log_warnx("%s: failed to start vm", vcp->vcp_name); 431 vm_terminate(vm, __func__); 432 errno = vmr.vmr_result; 433 break; 434 } 435 436 /* Now configure all the interfaces */ 437 if (vm_priv_ifconfig(ps, vm) == -1) { 438 log_warn("%s: failed to configure vm", vcp->vcp_name); 439 vm_terminate(vm, __func__); 440 break; 441 } 442 443 log_info("started %s (vm %d) successfully, tty %s", 444 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 445 break; 446 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 447 IMSG_SIZE_CHECK(imsg, &vmr); 448 memcpy(&vmr, imsg->data, sizeof(vmr)); 449 450 if (vmr.vmr_result) { 451 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 452 __func__, vmr.vmr_id); 453 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 454 } else { 455 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 456 break; 457 /* Mark VM as shutting down */ 458 vm->vm_state |= VM_STATE_SHUTDOWN; 459 } 460 break; 461 case IMSG_VMDOP_SEND_VM_RESPONSE: 462 IMSG_SIZE_CHECK(imsg, &vmr); 463 memcpy(&vmr, imsg->data, sizeof(vmr)); 464 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 465 break; 466 if (!vmr.vmr_result) { 467 log_info("%s: sent vm %d successfully.", 468 vm->vm_params.vmc_params.vcp_name, 469 vm->vm_vmid); 470 vm_terminate(vm, __func__); 471 } 472 473 /* Send a response if a control client is waiting for it */ 474 if (imsg->hdr.peerid != (uint32_t)-1) { 475 /* the error is meaningless for deferred responses */ 476 vmr.vmr_result = 0; 477 478 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 479 IMSG_VMDOP_SEND_VM_RESPONSE, 480 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 481 return (-1); 482 } 483 break; 484 case IMSG_VMDOP_TERMINATE_VM_EVENT: 485 IMSG_SIZE_CHECK(imsg, &vmr); 486 memcpy(&vmr, imsg->data, sizeof(vmr)); 487 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 488 __func__, vmr.vmr_id, vmr.vmr_result); 489 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 490 log_debug("%s: vm %d is no longer available", 491 __func__, vmr.vmr_id); 492 break; 493 } 494 if (vmr.vmr_result != EAGAIN || 495 vm->vm_params.vmc_bootdevice) { 496 vm_terminate(vm, __func__); 497 } else { 498 /* Stop VM instance but keep the tty open */ 499 vm_stop(vm, 1, __func__); 500 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 501 } 502 503 /* The error is meaningless for deferred responses */ 504 vmr.vmr_result = 0; 505 506 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 507 IMSG_VMDOP_TERMINATE_VM_EVENT, 508 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 509 return (-1); 510 break; 511 case IMSG_VMDOP_GET_INFO_VM_DATA: 512 IMSG_SIZE_CHECK(imsg, &vir); 513 memcpy(&vir, imsg->data, sizeof(vir)); 514 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 515 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 516 if (vm->vm_ttyname[0] != '\0') 517 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 518 sizeof(vir.vir_ttyname)); 519 log_debug("%s: running vm: %d, vm_state: 0x%x", 520 __func__, vm->vm_vmid, vm->vm_state); 521 vir.vir_state = vm->vm_state; 522 /* get the user id who started the vm */ 523 vir.vir_uid = vm->vm_uid; 524 vir.vir_gid = vm->vm_params.vmc_owner.gid; 525 } 526 if (proc_compose_imsg(ps, 527 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 528 PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, 529 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 530 if (vm) 531 vm_terminate(vm, __func__); 532 return (-1); 533 } 534 break; 535 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 536 /* 537 * PROC_VMM has responded with the *running* VMs, now we 538 * append the others. These use the special value 0 for their 539 * kernel id to indicate that they are not running. 540 */ 541 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 542 if (!(vm->vm_state & VM_STATE_RUNNING)) { 543 memset(&vir, 0, sizeof(vir)); 544 vir.vir_info.vir_id = vm->vm_vmid; 545 strlcpy(vir.vir_info.vir_name, 546 vm->vm_params.vmc_params.vcp_name, 547 VMM_MAX_NAME_LEN); 548 vir.vir_info.vir_memory_size = 549 vm->vm_params.vmc_params. 550 vcp_memranges[0].vmr_size; 551 vir.vir_info.vir_ncpus = 552 vm->vm_params.vmc_params.vcp_ncpus; 553 /* get the configured user id for this vm */ 554 vir.vir_uid = vm->vm_params.vmc_owner.uid; 555 vir.vir_gid = vm->vm_params.vmc_owner.gid; 556 log_debug("%s: vm: %d, vm_state: 0x%x", 557 __func__, vm->vm_vmid, vm->vm_state); 558 vir.vir_state = vm->vm_state; 559 if (proc_compose_imsg(ps, 560 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 561 PROC_AGENTX : PROC_CONTROL, -1, 562 IMSG_VMDOP_GET_INFO_VM_DATA, 563 imsg->hdr.peerid, -1, &vir, 564 sizeof(vir)) == -1) { 565 log_debug("%s: GET_INFO_VM_END failed", 566 __func__); 567 vm_terminate(vm, __func__); 568 return (-1); 569 } 570 } 571 } 572 IMSG_SIZE_CHECK(imsg, &res); 573 proc_forward_imsg(ps, imsg, 574 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 575 PROC_AGENTX : PROC_CONTROL, -1); 576 break; 577 default: 578 return (-1); 579 } 580 581 return (0); 582} 583 584int 585vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) 586{ 587 struct privsep *ps = p->p_ps; 588 589 switch (imsg->hdr.type) { 590 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 591 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 592 return (0); 593 default: 594 break; 595 } 596 return (-1); 597} 598 599int 600vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 601{ 602 struct vmop_addr_result var; 603 604 switch (imsg->hdr.type) { 605 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 606 IMSG_SIZE_CHECK(imsg, &var); 607 memcpy(&var, imsg->data, sizeof(var)); 608 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 609 break; 610 default: 611 return (-1); 612 } 613 614 return (0); 615} 616 617int 618vmd_check_vmh(struct vm_dump_header *vmh) 619{ 620 int i; 621 unsigned int code, leaf; 622 unsigned int a, b, c, d; 623 624 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 625 log_warnx("%s: incompatible dump signature", __func__); 626 return (-1); 627 } 628 629 if (vmh->vmh_version != VM_DUMP_VERSION) { 630 log_warnx("%s: incompatible dump version", __func__); 631 return (-1); 632 } 633 634 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 635 code = vmh->vmh_cpuids[i].code; 636 leaf = vmh->vmh_cpuids[i].leaf; 637 if (leaf != 0x00) { 638 log_debug("%s: invalid leaf 0x%x for code 0x%x", 639 __func__, leaf, code); 640 return (-1); 641 } 642 643 switch (code) { 644 case 0x00: 645 CPUID_LEAF(code, leaf, a, b, c, d); 646 if (vmh->vmh_cpuids[i].a > a) { 647 log_debug("%s: incompatible cpuid level", 648 __func__); 649 return (-1); 650 } 651 if (!(vmh->vmh_cpuids[i].b == b && 652 vmh->vmh_cpuids[i].c == c && 653 vmh->vmh_cpuids[i].d == d)) { 654 log_debug("%s: incompatible cpu brand", 655 __func__); 656 return (-1); 657 } 658 break; 659 660 case 0x01: 661 CPUID_LEAF(code, leaf, a, b, c, d); 662 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 663 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 664 log_debug("%s: incompatible cpu features " 665 "code: 0x%x leaf: 0x%x reg: c", __func__, 666 code, leaf); 667 return (-1); 668 } 669 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 670 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 671 log_debug("%s: incompatible cpu features " 672 "code: 0x%x leaf: 0x%x reg: d", __func__, 673 code, leaf); 674 return (-1); 675 } 676 break; 677 678 case 0x07: 679 CPUID_LEAF(code, leaf, a, b, c, d); 680 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 681 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 682 log_debug("%s: incompatible cpu features " 683 "code: 0x%x leaf: 0x%x reg: c", __func__, 684 code, leaf); 685 return (-1); 686 } 687 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 688 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 689 log_debug("%s: incompatible cpu features " 690 "code: 0x%x leaf: 0x%x reg: d", __func__, 691 code, leaf); 692 return (-1); 693 } 694 break; 695 696 case 0x0d: 697 CPUID_LEAF(code, leaf, a, b, c, d); 698 if (vmh->vmh_cpuids[i].b > b) { 699 log_debug("%s: incompatible cpu: insufficient " 700 "max save area for enabled XCR0 features", 701 __func__); 702 return (-1); 703 } 704 if (vmh->vmh_cpuids[i].c > c) { 705 log_debug("%s: incompatible cpu: insufficient " 706 "max save area for supported XCR0 features", 707 __func__); 708 return (-1); 709 } 710 break; 711 712 case 0x80000001: 713 CPUID_LEAF(code, leaf, a, b, c, d); 714 if ((vmh->vmh_cpuids[i].a & a) != 715 vmh->vmh_cpuids[i].a) { 716 log_debug("%s: incompatible cpu features " 717 "code: 0x%x leaf: 0x%x reg: a", __func__, 718 code, leaf); 719 return (-1); 720 } 721 if ((vmh->vmh_cpuids[i].c & c) != 722 vmh->vmh_cpuids[i].c) { 723 log_debug("%s: incompatible cpu features " 724 "code: 0x%x leaf: 0x%x reg: c", __func__, 725 code, leaf); 726 return (-1); 727 } 728 if ((vmh->vmh_cpuids[i].d & d) != 729 vmh->vmh_cpuids[i].d) { 730 log_debug("%s: incompatible cpu features " 731 "code: 0x%x leaf: 0x%x reg: d", __func__, 732 code, leaf); 733 return (-1); 734 } 735 break; 736 737 default: 738 log_debug("%s: unknown code 0x%x", __func__, code); 739 return (-1); 740 } 741 } 742 743 return (0); 744} 745 746void 747vmd_sighdlr(int sig, short event, void *arg) 748{ 749 if (privsep_process != PROC_PARENT) 750 return; 751 log_debug("%s: handling signal", __func__); 752 753 switch (sig) { 754 case SIGHUP: 755 log_info("%s: reload requested with SIGHUP", __func__); 756 757 /* 758 * This is safe because libevent uses async signal handlers 759 * that run in the event loop and not in signal context. 760 */ 761 (void)vmd_reload(0, NULL); 762 break; 763 case SIGPIPE: 764 log_info("%s: ignoring SIGPIPE", __func__); 765 break; 766 case SIGUSR1: 767 log_info("%s: ignoring SIGUSR1", __func__); 768 break; 769 case SIGTERM: 770 case SIGINT: 771 vmd_shutdown(); 772 break; 773 default: 774 fatalx("unexpected signal"); 775 } 776} 777 778__dead void 779usage(void) 780{ 781 extern char *__progname; 782 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 783 __progname); 784 exit(1); 785} 786 787int 788main(int argc, char **argv) 789{ 790 struct privsep *ps; 791 int ch; 792 enum privsep_procid proc_id = PROC_PARENT; 793 int proc_instance = 0, vm_launch = 0; 794 int vmm_fd = -1, vm_fd = -1; 795 const char *errp, *title = NULL; 796 int argc0 = argc; 797 char dev_type = '\0'; 798 799 log_init(0, LOG_DAEMON); 800 801 if ((env = calloc(1, sizeof(*env))) == NULL) 802 fatal("calloc: env"); 803 env->vmd_fd = -1; 804 env->vmd_fd6 = -1; 805 806 while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { 807 switch (ch) { 808 case 'D': 809 if (cmdline_symset(optarg) < 0) 810 log_warnx("could not parse macro definition %s", 811 optarg); 812 break; 813 case 'd': 814 env->vmd_debug = 2; 815 break; 816 case 'f': 817 conffile = optarg; 818 break; 819 case 'v': 820 env->vmd_verbose++; 821 break; 822 /* vmd fork/exec */ 823 case 'n': 824 env->vmd_noaction = 1; 825 break; 826 case 'P': 827 title = optarg; 828 proc_id = proc_getid(procs, nitems(procs), title); 829 if (proc_id == PROC_MAX) 830 fatalx("invalid process name"); 831 break; 832 case 'I': 833 proc_instance = strtonum(optarg, 0, 834 PROC_MAX_INSTANCES, &errp); 835 if (errp) 836 fatalx("invalid process instance"); 837 break; 838 /* child vm and device fork/exec */ 839 case 'p': 840 title = optarg; 841 break; 842 case 'V': 843 vm_launch = VMD_LAUNCH_VM; 844 vm_fd = strtonum(optarg, 0, 128, &errp); 845 if (errp) 846 fatalx("invalid vm fd"); 847 break; 848 case 'X': 849 vm_launch = VMD_LAUNCH_DEV; 850 vm_fd = strtonum(optarg, 0, 128, &errp); 851 if (errp) 852 fatalx("invalid device fd"); 853 break; 854 case 't': 855 dev_type = *optarg; 856 switch (dev_type) { 857 case VMD_DEVTYPE_NET: 858 case VMD_DEVTYPE_DISK: 859 break; 860 default: fatalx("invalid device type"); 861 } 862 break; 863 case 'i': 864 vmm_fd = strtonum(optarg, 0, 128, &errp); 865 if (errp) 866 fatalx("invalid vmm fd"); 867 break; 868 default: 869 usage(); 870 } 871 } 872 873 argc -= optind; 874 if (argc > 0) 875 usage(); 876 877 if (env->vmd_noaction && !env->vmd_debug) 878 env->vmd_debug = 1; 879 880 log_init(env->vmd_debug, LOG_DAEMON); 881 log_setverbose(env->vmd_verbose); 882 883 /* Re-exec from the vmm child process requires an absolute path. */ 884 if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) 885 fatalx("re-exec requires execution with an absolute path"); 886 env->argv0 = argv[0]; 887 888 /* check for root privileges */ 889 if (env->vmd_noaction == 0 && !vm_launch) { 890 if (geteuid()) 891 fatalx("need root privileges"); 892 } 893 894 ps = &env->vmd_ps; 895 ps->ps_env = env; 896 897 if (config_init(env) == -1) 898 fatal("failed to initialize configuration"); 899 900 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 901 fatal("unknown user %s", VMD_USER); 902 903 /* First proc runs as root without pledge but in default chroot */ 904 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 905 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 906 907 /* 908 * If we're launching a new vm or its device, we short out here. 909 */ 910 if (vm_launch == VMD_LAUNCH_VM) { 911 vm_main(vm_fd, vmm_fd); 912 /* NOTREACHED */ 913 } else if (vm_launch == VMD_LAUNCH_DEV) { 914 if (dev_type == VMD_DEVTYPE_NET) { 915 log_procinit("vm/%s/vionet", title); 916 vionet_main(vm_fd, vmm_fd); 917 /* NOTREACHED */ 918 } else if (dev_type == VMD_DEVTYPE_DISK) { 919 log_procinit("vm/%s/vioblk", title); 920 vioblk_main(vm_fd, vmm_fd); 921 /* NOTREACHED */ 922 } 923 fatalx("unsupported device type '%c'", dev_type); 924 } 925 926 /* Open /dev/vmm early. */ 927 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { 928 env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC); 929 if (env->vmd_fd == -1) 930 fatal("%s", VMM_NODE); 931 } 932 933 /* Configure the control socket */ 934 ps->ps_csock.cs_name = SOCKET_NAME; 935 TAILQ_INIT(&ps->ps_rcsocks); 936 937 /* Configuration will be parsed after forking the children */ 938 env->vmd_conffile = conffile; 939 940 if (env->vmd_noaction) 941 ps->ps_noaction = 1; 942 ps->ps_instance = proc_instance; 943 if (title != NULL) 944 ps->ps_title[proc_id] = title; 945 946 /* only the parent returns */ 947 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 948 proc_id); 949 950 if (ps->ps_noaction == 0) 951 log_info("startup"); 952 953 event_init(); 954 955 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 956 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 957 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 958 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 959 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 960 961 signal_add(&ps->ps_evsigint, NULL); 962 signal_add(&ps->ps_evsigterm, NULL); 963 signal_add(&ps->ps_evsighup, NULL); 964 signal_add(&ps->ps_evsigpipe, NULL); 965 signal_add(&ps->ps_evsigusr1, NULL); 966 967 if (!env->vmd_noaction) 968 proc_connect(ps); 969 970 if (vmd_configure() == -1) 971 fatalx("configuration failed"); 972 973 event_dispatch(); 974 975 log_debug("exiting"); 976 977 return (0); 978} 979 980void 981start_vm_batch(int fd, short type, void *args) 982{ 983 int i = 0; 984 struct vmd_vm *vm; 985 986 log_debug("%s: starting batch of %d vms", __func__, 987 env->vmd_cfg.parallelism); 988 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 989 if (!(vm->vm_state & VM_STATE_WAITING)) { 990 log_debug("%s: not starting vm %s (disabled)", 991 __func__, 992 vm->vm_params.vmc_params.vcp_name); 993 continue; 994 } 995 i++; 996 if (i > env->vmd_cfg.parallelism) { 997 evtimer_add(&staggered_start_timer, 998 &env->vmd_cfg.delay); 999 break; 1000 } 1001 vm->vm_state &= ~VM_STATE_WAITING; 1002 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 1003 } 1004 log_debug("%s: done starting vms", __func__); 1005} 1006 1007int 1008vmd_configure(void) 1009{ 1010 int ncpus; 1011 struct vmd_switch *vsw; 1012 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 1013 size_t ncpus_sz = sizeof(ncpus); 1014 1015 /* 1016 * pledge in the parent process: 1017 * stdio - for malloc and basic I/O including events. 1018 * rpath - for reload to open and read the configuration files. 1019 * wpath - for opening disk images and tap devices. 1020 * tty - for openpty and TIOCUCNTL. 1021 * proc - run kill to terminate its children safely. 1022 * sendfd - for disks, interfaces and other fds. 1023 * recvfd - for send and receive. 1024 * getpw - lookup user or group id by name. 1025 * chown, fattr - change tty ownership 1026 * flock - locking disk files 1027 */ 1028 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 1029 " chown fattr flock", NULL) == -1) 1030 fatal("pledge"); 1031 1032 if ((env->vmd_ptmfd = getptmfd()) == -1) 1033 fatal("getptmfd %s", PATH_PTMDEV); 1034 1035 if (parse_config(env->vmd_conffile) == -1) { 1036 proc_kill(&env->vmd_ps); 1037 exit(1); 1038 } 1039 1040 if (env->vmd_noaction) { 1041 fprintf(stderr, "configuration OK\n"); 1042 proc_kill(&env->vmd_ps); 1043 exit(0); 1044 } 1045 1046 /* Send VMM device fd to vmm proc. */ 1047 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 1048 IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); 1049 1050 /* Send shared global configuration to all children */ 1051 if (config_setconfig(env) == -1) 1052 return (-1); 1053 1054 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1055 if (vsw->sw_running) 1056 continue; 1057 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1058 log_warn("%s: failed to create switch %s", 1059 __func__, vsw->sw_name); 1060 switch_remove(vsw); 1061 return (-1); 1062 } 1063 } 1064 1065 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 1066 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 1067 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 1068 ncpus = 1; 1069 env->vmd_cfg.parallelism = ncpus; 1070 log_debug("%s: setting staggered start configuration to " 1071 "parallelism: %d and delay: %lld", 1072 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 1073 } 1074 1075 log_debug("%s: starting vms in staggered fashion", __func__); 1076 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1077 /* start first batch */ 1078 start_vm_batch(0, 0, NULL); 1079 1080 return (0); 1081} 1082 1083int 1084vmd_reload(unsigned int reset, const char *filename) 1085{ 1086 struct vmd_vm *vm, *next_vm; 1087 struct vmd_switch *vsw; 1088 int reload = 0; 1089 1090 /* Switch back to the default config file */ 1091 if (filename == NULL || *filename == '\0') { 1092 filename = env->vmd_conffile; 1093 reload = 1; 1094 } 1095 1096 log_debug("%s: level %d config file %s", __func__, reset, filename); 1097 1098 if (reset) { 1099 /* Purge the configuration */ 1100 config_purge(env, reset); 1101 config_setreset(env, reset); 1102 } else { 1103 /* 1104 * Load or reload the configuration. 1105 * 1106 * Reloading removes all non-running VMs before processing the 1107 * config file, whereas loading only adds to the existing list 1108 * of VMs. 1109 */ 1110 1111 if (reload) { 1112 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1113 next_vm) { 1114 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1115 DPRINTF("%s: calling vm_remove", 1116 __func__); 1117 vm_remove(vm, __func__); 1118 } 1119 } 1120 } 1121 1122 if (parse_config(filename) == -1) { 1123 log_debug("%s: failed to load config file %s", 1124 __func__, filename); 1125 return (-1); 1126 } 1127 1128 if (reload) { 1129 /* Update shared global configuration in all children */ 1130 if (config_setconfig(env) == -1) 1131 return (-1); 1132 } 1133 1134 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1135 if (vsw->sw_running) 1136 continue; 1137 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1138 log_warn("%s: failed to create switch %s", 1139 __func__, vsw->sw_name); 1140 switch_remove(vsw); 1141 return (-1); 1142 } 1143 } 1144 1145 log_debug("%s: starting vms in staggered fashion", __func__); 1146 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1147 /* start first batch */ 1148 start_vm_batch(0, 0, NULL); 1149 1150 } 1151 1152 return (0); 1153} 1154 1155void 1156vmd_shutdown(void) 1157{ 1158 struct vmd_vm *vm, *vm_next; 1159 1160 log_debug("%s: performing shutdown", __func__); 1161 1162 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1163 vm_remove(vm, __func__); 1164 } 1165 1166 proc_kill(&env->vmd_ps); 1167 free(env); 1168 1169 log_warnx("terminating"); 1170 exit(0); 1171} 1172 1173struct vmd_vm * 1174vm_getbyvmid(uint32_t vmid) 1175{ 1176 struct vmd_vm *vm; 1177 1178 if (vmid == 0) 1179 return (NULL); 1180 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1181 if (vm->vm_vmid == vmid) 1182 return (vm); 1183 } 1184 1185 return (NULL); 1186} 1187 1188struct vmd_vm * 1189vm_getbyid(uint32_t id) 1190{ 1191 struct vmd_vm *vm; 1192 1193 if (id == 0) 1194 return (NULL); 1195 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1196 if (vm->vm_params.vmc_params.vcp_id == id) 1197 return (vm); 1198 } 1199 1200 return (NULL); 1201} 1202 1203uint32_t 1204vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1205{ 1206 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1207 return (0); 1208 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1209 id, vm->vm_vmid); 1210 return (vm->vm_vmid); 1211} 1212 1213uint32_t 1214vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1215{ 1216 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1217 return (0); 1218 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1219 vmid, vm->vm_params.vmc_params.vcp_id); 1220 return (vm->vm_params.vmc_params.vcp_id); 1221} 1222 1223struct vmd_vm * 1224vm_getbyname(const char *name) 1225{ 1226 struct vmd_vm *vm; 1227 1228 if (name == NULL) 1229 return (NULL); 1230 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1231 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1232 return (vm); 1233 } 1234 1235 return (NULL); 1236} 1237 1238struct vmd_vm * 1239vm_getbypid(pid_t pid) 1240{ 1241 struct vmd_vm *vm; 1242 1243 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1244 if (vm->vm_pid == pid) 1245 return (vm); 1246 } 1247 1248 return (NULL); 1249} 1250 1251void 1252vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1253{ 1254 struct privsep *ps = &env->vmd_ps; 1255 unsigned int i, j; 1256 1257 if (vm == NULL) 1258 return; 1259 1260 log_debug("%s: %s %s stopping vm %d%s", 1261 __func__, ps->ps_title[privsep_process], caller, 1262 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1263 1264 vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING 1265 | VM_STATE_SHUTDOWN); 1266 1267 if (vm->vm_iev.ibuf.fd != -1) { 1268 event_del(&vm->vm_iev.ev); 1269 close(vm->vm_iev.ibuf.fd); 1270 } 1271 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) { 1272 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1273 if (vm->vm_disks[i][j] != -1) { 1274 close(vm->vm_disks[i][j]); 1275 vm->vm_disks[i][j] = -1; 1276 } 1277 } 1278 } 1279 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) { 1280 if (vm->vm_ifs[i].vif_fd != -1) { 1281 close(vm->vm_ifs[i].vif_fd); 1282 vm->vm_ifs[i].vif_fd = -1; 1283 } 1284 free(vm->vm_ifs[i].vif_name); 1285 free(vm->vm_ifs[i].vif_switch); 1286 free(vm->vm_ifs[i].vif_group); 1287 vm->vm_ifs[i].vif_name = NULL; 1288 vm->vm_ifs[i].vif_switch = NULL; 1289 vm->vm_ifs[i].vif_group = NULL; 1290 } 1291 if (vm->vm_kernel != -1) { 1292 close(vm->vm_kernel); 1293 vm->vm_kernel = -1; 1294 } 1295 if (vm->vm_cdrom != -1) { 1296 close(vm->vm_cdrom); 1297 vm->vm_cdrom = -1; 1298 } 1299 if (!keeptty) { 1300 vm_closetty(vm); 1301 vm->vm_uid = 0; 1302 } 1303} 1304 1305void 1306vm_remove(struct vmd_vm *vm, const char *caller) 1307{ 1308 struct privsep *ps = &env->vmd_ps; 1309 1310 if (vm == NULL) 1311 return; 1312 1313 log_debug("%s: %s %s removing vm %d from running config", 1314 __func__, ps->ps_title[privsep_process], caller, 1315 vm->vm_vmid); 1316 1317 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1318 1319 vm_stop(vm, 0, caller); 1320 if (vm->vm_kernel_path != NULL && !vm->vm_from_config) 1321 free(vm->vm_kernel_path); 1322 free(vm); 1323} 1324 1325int 1326vm_claimid(const char *name, int uid, uint32_t *id) 1327{ 1328 struct name2id *n2i = NULL; 1329 1330 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1331 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1332 goto out; 1333 1334 if (++env->vmd_nvm == 0) { 1335 log_warnx("too many vms"); 1336 return (-1); 1337 } 1338 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1339 log_warnx("could not alloc vm name"); 1340 return (-1); 1341 } 1342 n2i->id = env->vmd_nvm; 1343 n2i->uid = uid; 1344 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1345 log_warnx("vm name too long"); 1346 free(n2i); 1347 return (-1); 1348 } 1349 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1350 1351out: 1352 *id = n2i->id; 1353 return (0); 1354} 1355 1356int 1357vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1358 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1359{ 1360 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1361 struct vm_create_params *vcp = &vmc->vmc_params; 1362 struct vmop_owner *vmo = NULL; 1363 uint32_t nid, rng; 1364 unsigned int i, j; 1365 struct vmd_switch *sw; 1366 char *s; 1367 int ret = 0; 1368 1369 /* Check if this is an instance of another VM */ 1370 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1371 errno = ret; /* XXX might set invalid errno */ 1372 return (-1); 1373 } 1374 1375 errno = 0; 1376 *ret_vm = NULL; 1377 1378 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1379 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1380 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1381 uid) != 0) { 1382 errno = EPERM; 1383 goto fail; 1384 } 1385 vm->vm_kernel = vmc->vmc_kernel; 1386 *ret_vm = vm; 1387 errno = EALREADY; 1388 goto fail; 1389 } 1390 1391 if (vm_parent != NULL) 1392 vmo = &vm_parent->vm_params.vmc_insowner; 1393 1394 /* non-root users can only start existing VMs or instances */ 1395 if (vm_checkperm(NULL, vmo, uid) != 0) { 1396 log_warnx("permission denied"); 1397 errno = EPERM; 1398 goto fail; 1399 } 1400 if (vmc->vmc_flags == 0) { 1401 log_warnx("invalid configuration, no devices"); 1402 errno = VMD_DISK_MISSING; 1403 goto fail; 1404 } 1405 if (vcp->vcp_ncpus == 0) 1406 vcp->vcp_ncpus = 1; 1407 if (vcp->vcp_memranges[0].vmr_size == 0) 1408 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1409 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1410 log_warnx("invalid number of CPUs"); 1411 goto fail; 1412 } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) { 1413 log_warnx("invalid number of disks"); 1414 goto fail; 1415 } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { 1416 log_warnx("invalid number of interfaces"); 1417 goto fail; 1418 } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 1419 && strlen(vmc->vmc_cdrom) == 0) { 1420 log_warnx("no kernel or disk/cdrom specified"); 1421 goto fail; 1422 } else if (strlen(vcp->vcp_name) == 0) { 1423 log_warnx("invalid VM name"); 1424 goto fail; 1425 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1426 *vcp->vcp_name == '_') { 1427 log_warnx("invalid VM name"); 1428 goto fail; 1429 } else { 1430 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1431 if (!(isalnum((unsigned char)*s) || *s == '.' || \ 1432 *s == '-' || *s == '_')) { 1433 log_warnx("invalid VM name"); 1434 goto fail; 1435 } 1436 } 1437 } 1438 1439 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1440 goto fail; 1441 1442 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1443 vmc = &vm->vm_params; 1444 vcp = &vmc->vmc_params; 1445 vm->vm_pid = -1; 1446 vm->vm_tty = -1; 1447 vm->vm_receive_fd = -1; 1448 vm->vm_kernel = -1; 1449 vm->vm_state &= ~VM_STATE_PAUSED; 1450 1451 if (vmc->vmc_kernel > -1) 1452 vm->vm_kernel = vmc->vmc_kernel; 1453 1454 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 1455 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1456 vm->vm_disks[i][j] = -1; 1457 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) 1458 vm->vm_ifs[i].vif_fd = -1; 1459 for (i = 0; i < vmc->vmc_nnics; i++) { 1460 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1461 /* inherit per-interface flags from the switch */ 1462 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1463 } 1464 1465 /* 1466 * If the MAC address is zero, always randomize it in vmd(8) 1467 * because we cannot rely on the guest OS to do the right 1468 * thing like OpenBSD does. Based on ether_fakeaddr() 1469 * from the kernel, incremented by one to differentiate 1470 * the source. 1471 */ 1472 if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) { 1473 rng = arc4random(); 1474 vmc->vmc_macs[i][0] = 0xfe; 1475 vmc->vmc_macs[i][1] = 0xe1; 1476 vmc->vmc_macs[i][2] = 0xba + 1; 1477 vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1478 vmc->vmc_macs[i][4] = rng; 1479 vmc->vmc_macs[i][5] = rng >> 8; 1480 } 1481 } 1482 vm->vm_cdrom = -1; 1483 vm->vm_iev.ibuf.fd = -1; 1484 1485 /* 1486 * Assign a new internal Id if not specified and we succeed in 1487 * claiming a new Id. 1488 */ 1489 if (id != 0) 1490 vm->vm_vmid = id; 1491 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1492 goto fail; 1493 else 1494 vm->vm_vmid = nid; 1495 1496 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1497 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1498 1499 *ret_vm = vm; 1500 return (0); 1501 fail: 1502 if (errno == 0) 1503 errno = EINVAL; 1504 return (-1); 1505} 1506 1507int 1508vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1509 struct vmop_create_params *vmc, uid_t uid) 1510{ 1511 char *name; 1512 struct vm_create_params *vcp = &vmc->vmc_params; 1513 struct vmop_create_params *vmcp; 1514 struct vm_create_params *vcpp; 1515 unsigned int i, j; 1516 1517 /* return without error if the parent is NULL (nothing to inherit) */ 1518 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1519 vmc->vmc_instance[0] == '\0') 1520 return (0); 1521 1522 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1523 return (VMD_PARENT_INVALID); 1524 } 1525 1526 vmcp = &(*vm_parent)->vm_params; 1527 vcpp = &vmcp->vmc_params; 1528 1529 /* Are we allowed to create an instance from this VM? */ 1530 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1531 log_warnx("vm \"%s\" no permission to create vm instance", 1532 vcpp->vcp_name); 1533 return (ENAMETOOLONG); 1534 } 1535 1536 name = vcp->vcp_name; 1537 1538 if (vm_getbyname(vcp->vcp_name) != NULL || 1539 vm_getbyvmid(vcp->vcp_id) != NULL) { 1540 return (EPROCLIM); 1541 } 1542 1543 /* CPU */ 1544 if (vcp->vcp_ncpus == 0) 1545 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1546 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1547 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1548 log_warnx("vm \"%s\" no permission to set cpus", name); 1549 return (EPERM); 1550 } 1551 1552 /* memory */ 1553 if (vcp->vcp_memranges[0].vmr_size == 0) 1554 vcp->vcp_memranges[0].vmr_size = 1555 vcpp->vcp_memranges[0].vmr_size; 1556 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1557 vcp->vcp_memranges[0].vmr_size != 1558 vcpp->vcp_memranges[0].vmr_size) { 1559 log_warnx("vm \"%s\" no permission to set memory", name); 1560 return (EPERM); 1561 } 1562 1563 /* disks cannot be inherited */ 1564 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1565 vmc->vmc_ndisks) { 1566 log_warnx("vm \"%s\" no permission to set disks", name); 1567 return (EPERM); 1568 } 1569 for (i = 0; i < vmc->vmc_ndisks; i++) { 1570 /* Check if this disk is already used in the parent */ 1571 for (j = 0; j < vmcp->vmc_ndisks; j++) { 1572 if (strcmp(vmc->vmc_disks[i], 1573 vmcp->vmc_disks[j]) == 0) { 1574 log_warnx("vm \"%s\" disk %s cannot be reused", 1575 name, vmc->vmc_disks[i]); 1576 return (EBUSY); 1577 } 1578 } 1579 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1580 } 1581 1582 /* interfaces */ 1583 if (vmc->vmc_nnics > 0 && 1584 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1585 vmc->vmc_nnics != vmcp->vmc_nnics) { 1586 log_warnx("vm \"%s\" no permission to set interfaces", name); 1587 return (EPERM); 1588 } 1589 for (i = 0; i < vmcp->vmc_nnics; i++) { 1590 /* Interface got overwritten */ 1591 if (i < vmc->vmc_nnics) 1592 continue; 1593 1594 /* Copy interface from parent */ 1595 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1596 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1597 sizeof(vmc->vmc_ifnames[i])); 1598 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1599 sizeof(vmc->vmc_ifswitch[i])); 1600 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1601 sizeof(vmc->vmc_ifgroup[i])); 1602 memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], 1603 sizeof(vmc->vmc_macs[i])); 1604 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1605 vmc->vmc_nnics++; 1606 } 1607 for (i = 0; i < vmc->vmc_nnics; i++) { 1608 for (j = 0; j < vmcp->vmc_nnics; j++) { 1609 if (memcmp(zero_mac, vmc->vmc_macs[i], 1610 sizeof(vmc->vmc_macs[i])) != 0 && 1611 memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], 1612 sizeof(vmc->vmc_macs[i])) != 0) { 1613 log_warnx("vm \"%s\" lladdr cannot be reused", 1614 name); 1615 return (EBUSY); 1616 } 1617 if (strlen(vmc->vmc_ifnames[i]) && 1618 strcmp(vmc->vmc_ifnames[i], 1619 vmcp->vmc_ifnames[j]) == 0) { 1620 log_warnx("vm \"%s\" %s cannot be reused", 1621 vmc->vmc_ifnames[i], name); 1622 return (EBUSY); 1623 } 1624 } 1625 } 1626 1627 /* kernel */ 1628 if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL && 1629 strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) { 1630 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1631 log_warnx("vm \"%s\" no permission to set boot image", 1632 name); 1633 return (EPERM); 1634 } 1635 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1636 } 1637 1638 /* cdrom */ 1639 if (strlen(vmc->vmc_cdrom) > 0) { 1640 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1641 log_warnx("vm \"%s\" no permission to set cdrom", name); 1642 return (EPERM); 1643 } 1644 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1645 } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, 1646 sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { 1647 log_warnx("vm \"%s\" cdrom name too long", name); 1648 return (EINVAL); 1649 } 1650 1651 /* user */ 1652 if (vmc->vmc_owner.uid == 0) 1653 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1654 else if (vmc->vmc_owner.uid != uid && 1655 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1656 log_warnx("vm \"%s\" user mismatch", name); 1657 return (EPERM); 1658 } 1659 1660 /* group */ 1661 if (vmc->vmc_owner.gid == 0) 1662 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1663 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1664 log_warnx("vm \"%s\" group mismatch", name); 1665 return (EPERM); 1666 } 1667 1668 /* child instances */ 1669 if (vmc->vmc_insflags) { 1670 log_warnx("vm \"%s\" cannot change instance permissions", name); 1671 return (EPERM); 1672 } 1673 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1674 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1675 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1676 vmc->vmc_insflags = vmcp->vmc_insflags; 1677 } else { 1678 vmc->vmc_insowner.gid = 0; 1679 vmc->vmc_insowner.uid = 0; 1680 vmc->vmc_insflags = 0; 1681 } 1682 1683 /* finished, remove instance flags */ 1684 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1685 1686 return (0); 1687} 1688 1689/* 1690 * vm_checkperm 1691 * 1692 * Checks if the user represented by the 'uid' parameter is allowed to 1693 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1694 * console.) 1695 * 1696 * Parameters: 1697 * vm: the VM whose permission is to be checked 1698 * vmo: the required uid/gid to be checked 1699 * uid: the user ID of the user making the request 1700 * 1701 * Return values: 1702 * 0: the permission should be granted 1703 * -1: the permission check failed (also returned if vm == null) 1704 */ 1705int 1706vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1707{ 1708 struct group *gr; 1709 struct passwd *pw; 1710 char **grmem; 1711 1712 /* root has no restrictions */ 1713 if (uid == 0) 1714 return (0); 1715 1716 if (vmo == NULL) 1717 return (-1); 1718 1719 /* check user */ 1720 if (vm == NULL) { 1721 if (vmo->uid == uid) 1722 return (0); 1723 } else { 1724 /* 1725 * check user of running vm (the owner of a running vm can 1726 * be different to (or more specific than) the configured owner. 1727 */ 1728 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1729 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1730 return (0); 1731 } 1732 1733 /* check groups */ 1734 if (vmo->gid != -1) { 1735 if ((pw = getpwuid(uid)) == NULL) 1736 return (-1); 1737 if (pw->pw_gid == vmo->gid) 1738 return (0); 1739 if ((gr = getgrgid(vmo->gid)) != NULL) { 1740 for (grmem = gr->gr_mem; *grmem; grmem++) 1741 if (strcmp(*grmem, pw->pw_name) == 0) 1742 return (0); 1743 } 1744 } 1745 1746 return (-1); 1747} 1748 1749/* 1750 * vm_checkinsflag 1751 * 1752 * Checks whether the non-root user is allowed to set an instance option. 1753 * 1754 * Parameters: 1755 * vmc: the VM create parameters 1756 * flag: the flag to be checked 1757 * uid: the user ID of the user making the request 1758 * 1759 * Return values: 1760 * 0: the permission should be granted 1761 * -1: the permission check failed (also returned if vm == null) 1762 */ 1763int 1764vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1765{ 1766 /* root has no restrictions */ 1767 if (uid == 0) 1768 return (0); 1769 1770 if ((vmc->vmc_insflags & flag) == 0) 1771 return (-1); 1772 1773 return (0); 1774} 1775 1776/* 1777 * vm_checkaccess 1778 * 1779 * Checks if the user represented by the 'uid' parameter is allowed to 1780 * access the file described by the 'path' parameter. 1781 * 1782 * Parameters: 1783 * fd: the file descriptor of the opened file 1784 * uflag: check if the userid has access to the file 1785 * uid: the user ID of the user making the request 1786 * amode: the access flags of R_OK and W_OK 1787 * 1788 * Return values: 1789 * 0: the permission should be granted 1790 * -1: the permission check failed 1791 */ 1792int 1793vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1794{ 1795 struct group *gr; 1796 struct passwd *pw; 1797 char **grmem; 1798 struct stat st; 1799 mode_t mode; 1800 1801 if (fd == -1) 1802 return (-1); 1803 1804 /* 1805 * File has to be accessible and a regular file 1806 */ 1807 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1808 return (-1); 1809 1810 /* root has no restrictions */ 1811 if (uid == 0 || uflag == 0) 1812 return (0); 1813 1814 /* check other */ 1815 mode = amode & W_OK ? S_IWOTH : 0; 1816 mode |= amode & R_OK ? S_IROTH : 0; 1817 if ((st.st_mode & mode) == mode) 1818 return (0); 1819 1820 /* check user */ 1821 mode = amode & W_OK ? S_IWUSR : 0; 1822 mode |= amode & R_OK ? S_IRUSR : 0; 1823 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1824 return (0); 1825 1826 /* check groups */ 1827 mode = amode & W_OK ? S_IWGRP : 0; 1828 mode |= amode & R_OK ? S_IRGRP : 0; 1829 if ((st.st_mode & mode) != mode) 1830 return (-1); 1831 if ((pw = getpwuid(uid)) == NULL) 1832 return (-1); 1833 if (pw->pw_gid == st.st_gid) 1834 return (0); 1835 if ((gr = getgrgid(st.st_gid)) != NULL) { 1836 for (grmem = gr->gr_mem; *grmem; grmem++) 1837 if (strcmp(*grmem, pw->pw_name) == 0) 1838 return (0); 1839 } 1840 1841 return (-1); 1842} 1843 1844int 1845vm_opentty(struct vmd_vm *vm) 1846{ 1847 struct stat st; 1848 struct group *gr; 1849 uid_t uid; 1850 gid_t gid; 1851 mode_t mode; 1852 int on = 1, tty_slave; 1853 1854 /* 1855 * Open tty with pre-opened PTM fd 1856 */ 1857 if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname, 1858 NULL, NULL) == -1) { 1859 log_warn("fdopenpty"); 1860 return (-1); 1861 } 1862 close(tty_slave); 1863 1864 /* 1865 * We use user ioctl(2) mode to pass break commands. 1866 */ 1867 if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) { 1868 log_warn("could not enable user ioctl mode on %s", 1869 vm->vm_ttyname); 1870 goto fail; 1871 } 1872 1873 uid = vm->vm_uid; 1874 gid = vm->vm_params.vmc_owner.gid; 1875 1876 if (vm->vm_params.vmc_owner.gid != -1) { 1877 mode = 0660; 1878 } else if ((gr = getgrnam("tty")) != NULL) { 1879 gid = gr->gr_gid; 1880 mode = 0620; 1881 } else { 1882 mode = 0600; 1883 gid = 0; 1884 } 1885 1886 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1887 __func__, vm->vm_params.vmc_params.vcp_name, 1888 vm->vm_ttyname, uid, gid, mode); 1889 1890 /* 1891 * Change ownership and mode of the tty as required. 1892 * Loosely based on the implementation of sshpty.c 1893 */ 1894 if (fstat(vm->vm_tty, &st) == -1) { 1895 log_warn("fstat failed for %s", vm->vm_ttyname); 1896 goto fail; 1897 } 1898 1899 if (st.st_uid != uid || st.st_gid != gid) { 1900 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1901 log_warn("chown %s %d %d failed, uid %d", 1902 vm->vm_ttyname, uid, gid, getuid()); 1903 1904 /* Ignore failure on read-only filesystems */ 1905 if (!((errno == EROFS) && 1906 (st.st_uid == uid || st.st_uid == 0))) 1907 goto fail; 1908 } 1909 } 1910 1911 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1912 if (chmod(vm->vm_ttyname, mode) == -1) { 1913 log_warn("chmod %s %o failed, uid %d", 1914 vm->vm_ttyname, mode, getuid()); 1915 1916 /* Ignore failure on read-only filesystems */ 1917 if (!((errno == EROFS) && 1918 (st.st_uid == uid || st.st_uid == 0))) 1919 goto fail; 1920 } 1921 } 1922 1923 return (0); 1924 fail: 1925 vm_closetty(vm); 1926 return (-1); 1927} 1928 1929void 1930vm_closetty(struct vmd_vm *vm) 1931{ 1932 if (vm->vm_tty != -1) { 1933 /* Release and close the tty */ 1934 if (fchown(vm->vm_tty, 0, 0) == -1) 1935 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1936 if (fchmod(vm->vm_tty, 0666) == -1) 1937 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1938 close(vm->vm_tty); 1939 vm->vm_tty = -1; 1940 } 1941 memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); 1942} 1943 1944void 1945switch_remove(struct vmd_switch *vsw) 1946{ 1947 if (vsw == NULL) 1948 return; 1949 1950 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1951 1952 free(vsw->sw_group); 1953 free(vsw->sw_name); 1954 free(vsw); 1955} 1956 1957struct vmd_switch * 1958switch_getbyname(const char *name) 1959{ 1960 struct vmd_switch *vsw; 1961 1962 if (name == NULL) 1963 return (NULL); 1964 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1965 if (strcmp(vsw->sw_name, name) == 0) 1966 return (vsw); 1967 } 1968 1969 return (NULL); 1970} 1971 1972char * 1973get_string(uint8_t *ptr, size_t len) 1974{ 1975 size_t i; 1976 1977 for (i = 0; i < len; i++) 1978 if (!isprint((unsigned char)ptr[i])) 1979 break; 1980 1981 return strndup(ptr, i); 1982} 1983 1984uint32_t 1985prefixlen2mask(uint8_t prefixlen) 1986{ 1987 if (prefixlen == 0) 1988 return (0); 1989 1990 if (prefixlen > 32) 1991 prefixlen = 32; 1992 1993 return (htonl(0xffffffff << (32 - prefixlen))); 1994} 1995 1996void 1997prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1998{ 1999 struct in6_addr s6; 2000 int i; 2001 2002 if (prefixlen > 128) 2003 prefixlen = 128; 2004 2005 memset(&s6, 0, sizeof(s6)); 2006 for (i = 0; i < prefixlen / 8; i++) 2007 s6.s6_addr[i] = 0xff; 2008 i = prefixlen % 8; 2009 if (i) 2010 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2011 2012 memcpy(mask, &s6, sizeof(s6)); 2013} 2014 2015void 2016getmonotime(struct timeval *tv) 2017{ 2018 struct timespec ts; 2019 2020 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2021 fatal("clock_gettime"); 2022 2023 TIMESPEC_TO_TIMEVAL(tv, &ts); 2024} 2025 2026static inline void 2027vm_terminate(struct vmd_vm *vm, const char *caller) 2028{ 2029 if (vm->vm_from_config) 2030 vm_stop(vm, 0, caller); 2031 else { 2032 /* vm_remove calls vm_stop */ 2033 vm_remove(vm, caller); 2034 } 2035} 2036 2037/* 2038 * Utility function for closing vm file descriptors. Assumes an fd of -1 was 2039 * already closed or never opened. 2040 * 2041 * Returns 0 on success, otherwise -1 on failure. 2042 */ 2043int 2044close_fd(int fd) 2045{ 2046 int ret; 2047 2048 if (fd == -1) 2049 return (0); 2050 2051#ifdef POSIX_CLOSE_RESTART 2052 do { ret = close(fd); } while (ret == -1 && errno == EINTR); 2053#else 2054 ret = close(fd); 2055#endif /* POSIX_CLOSE_RESTART */ 2056 2057 if (ret == -1 && errno == EIO) 2058 log_warn("%s(%d)", __func__, fd); 2059 2060 return (ret); 2061} 2062