vmd.c revision 1.157
1/* $OpenBSD: vmd.c,v 1.157 2024/05/18 06:45:00 jsg Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/types.h> 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/stat.h> 23#include <sys/sysctl.h> 24#include <sys/tty.h> 25#include <sys/ttycom.h> 26#include <sys/ioctl.h> 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31#include <termios.h> 32#include <errno.h> 33#include <event.h> 34#include <fcntl.h> 35#include <pwd.h> 36#include <signal.h> 37#include <syslog.h> 38#include <unistd.h> 39#include <util.h> 40#include <ctype.h> 41#include <grp.h> 42 43#include <machine/specialreg.h> 44#include <machine/vmmvar.h> 45 46#include "proc.h" 47#include "atomicio.h" 48#include "vmd.h" 49 50__dead void usage(void); 51 52int main(int, char **); 53int vmd_configure(void); 54void vmd_sighdlr(int sig, short event, void *arg); 55void vmd_shutdown(void); 56int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 57int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 58int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); 59int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 60int vmd_check_vmh(struct vm_dump_header *); 61 62int vm_instance(struct privsep *, struct vmd_vm **, 63 struct vmop_create_params *, uid_t); 64int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 65int vm_claimid(const char *, int, uint32_t *); 66void start_vm_batch(int, short, void*); 67 68static inline void vm_terminate(struct vmd_vm *, const char *); 69 70struct vmd *env; 71 72static struct privsep_proc procs[] = { 73 /* Keep "priv" on top as procs[0] */ 74 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 75 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 76 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, 77 vmm_shutdown, "/" }, 78 { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, 79 vm_agentx_shutdown, "/" } 80}; 81 82enum privsep_procid privsep_process; 83 84struct event staggered_start_timer; 85 86/* For the privileged process */ 87static struct privsep_proc *proc_priv = &procs[0]; 88static struct passwd proc_privpw; 89static const uint8_t zero_mac[ETHER_ADDR_LEN]; 90 91const char default_conffile[] = VMD_CONF; 92const char *conffile = default_conffile; 93 94int 95vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 96{ 97 struct privsep *ps = p->p_ps; 98 int res = 0, ret = 0, cmd = 0, verbose; 99 int ifd; 100 unsigned int v = 0, flags; 101 struct vmop_create_params vmc; 102 struct vmop_id vid; 103 struct vmop_result vmr; 104 struct vm_dump_header vmh; 105 struct vmd_vm *vm = NULL; 106 char *str = NULL; 107 uint32_t id = 0; 108 struct control_sock *rcs; 109 110 switch (imsg->hdr.type) { 111 case IMSG_VMDOP_START_VM_REQUEST: 112 IMSG_SIZE_CHECK(imsg, &vmc); 113 memcpy(&vmc, imsg->data, sizeof(vmc)); 114 vmc.vmc_kernel = imsg_get_fd(imsg); 115 116 /* Try registering our VM in our list of known VMs. */ 117 if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { 118 res = errno; 119 120 /* Did we have a failure during lookup of a parent? */ 121 if (vm == NULL) { 122 cmd = IMSG_VMDOP_START_VM_RESPONSE; 123 break; 124 } 125 126 /* Does the VM already exist? */ 127 if (res == EALREADY) { 128 /* Is it already running? */ 129 if (vm->vm_state & VM_STATE_RUNNING) { 130 cmd = IMSG_VMDOP_START_VM_RESPONSE; 131 break; 132 } 133 134 /* If not running, are our flags ok? */ 135 if (vmc.vmc_flags && 136 vmc.vmc_flags != VMOP_CREATE_KERNEL) { 137 cmd = IMSG_VMDOP_START_VM_RESPONSE; 138 break; 139 } 140 } 141 res = 0; 142 } 143 144 /* Try to start the launch of the VM. */ 145 res = config_setvm(ps, vm, imsg->hdr.peerid, 146 vm->vm_params.vmc_owner.uid); 147 if (res) 148 cmd = IMSG_VMDOP_START_VM_RESPONSE; 149 break; 150 case IMSG_VMDOP_WAIT_VM_REQUEST: 151 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 152 IMSG_SIZE_CHECK(imsg, &vid); 153 memcpy(&vid, imsg->data, sizeof(vid)); 154 flags = vid.vid_flags; 155 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 156 157 if ((id = vid.vid_id) == 0) { 158 /* Lookup vm (id) by name */ 159 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 160 res = ENOENT; 161 break; 162 } 163 id = vm->vm_vmid; 164 } else if ((vm = vm_getbyvmid(id)) == NULL) { 165 res = ENOENT; 166 break; 167 } 168 169 /* Validate curent state of vm */ 170 if ((vm->vm_state & VM_STATE_SHUTDOWN) && 171 (flags & VMOP_FORCE) == 0) { 172 res = EALREADY; 173 break; 174 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 175 res = EINVAL; 176 break; 177 } else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 178 res = EPERM; 179 break; 180 } 181 182 /* Only relay TERMINATION requests, not WAIT requests */ 183 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 184 memset(&vid, 0, sizeof(vid)); 185 vid.vid_id = id; 186 vid.vid_flags = flags; 187 188 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 189 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 190 return (-1); 191 } 192 break; 193 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 194 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 195 break; 196 case IMSG_VMDOP_LOAD: 197 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 198 str = get_string((uint8_t *)imsg->data, 199 IMSG_DATA_SIZE(imsg)); 200 case IMSG_VMDOP_RELOAD: 201 if (vmd_reload(0, str) == -1) 202 cmd = IMSG_CTL_FAIL; 203 else 204 cmd = IMSG_CTL_OK; 205 free(str); 206 break; 207 case IMSG_CTL_RESET: 208 IMSG_SIZE_CHECK(imsg, &v); 209 memcpy(&v, imsg->data, sizeof(v)); 210 if (vmd_reload(v, NULL) == -1) 211 cmd = IMSG_CTL_FAIL; 212 else 213 cmd = IMSG_CTL_OK; 214 break; 215 case IMSG_CTL_VERBOSE: 216 IMSG_SIZE_CHECK(imsg, &verbose); 217 memcpy(&verbose, imsg->data, sizeof(verbose)); 218 log_setverbose(verbose); 219 220 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 221 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 222 cmd = IMSG_CTL_OK; 223 break; 224 case IMSG_VMDOP_PAUSE_VM: 225 case IMSG_VMDOP_UNPAUSE_VM: 226 IMSG_SIZE_CHECK(imsg, &vid); 227 memcpy(&vid, imsg->data, sizeof(vid)); 228 if (vid.vid_id == 0) { 229 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 230 res = ENOENT; 231 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 232 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 233 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 234 break; 235 } else { 236 vid.vid_id = vm->vm_vmid; 237 } 238 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 239 res = ENOENT; 240 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 241 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 242 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 243 break; 244 } 245 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 246 vid.vid_uid) != 0) { 247 res = EPERM; 248 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 249 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 250 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 251 break; 252 } 253 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 254 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 255 break; 256 case IMSG_VMDOP_SEND_VM_REQUEST: 257 IMSG_SIZE_CHECK(imsg, &vid); 258 memcpy(&vid, imsg->data, sizeof(vid)); 259 id = vid.vid_id; 260 ifd = imsg_get_fd(imsg); 261 if (vid.vid_id == 0) { 262 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 263 res = ENOENT; 264 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 265 close(ifd); 266 break; 267 } else { 268 vid.vid_id = vm->vm_vmid; 269 } 270 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 271 res = ENOENT; 272 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 273 close(ifd); 274 break; 275 } 276 vmr.vmr_id = vid.vid_id; 277 log_debug("%s: sending fd to vmm", __func__); 278 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 279 imsg->hdr.peerid, ifd, &vid, sizeof(vid)); 280 break; 281 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 282 IMSG_SIZE_CHECK(imsg, &vid); 283 memcpy(&vid, imsg->data, sizeof(vid)); 284 ifd = imsg_get_fd(imsg); 285 if (ifd == -1) { 286 log_warnx("%s: invalid fd", __func__); 287 return (-1); 288 } 289 if (atomicio(read, ifd, &vmh, sizeof(vmh)) != sizeof(vmh)) { 290 log_warnx("%s: error reading vmh from received vm", 291 __func__); 292 res = EIO; 293 close(ifd); 294 cmd = IMSG_VMDOP_START_VM_RESPONSE; 295 break; 296 } 297 298 if (vmd_check_vmh(&vmh)) { 299 res = ENOENT; 300 close(ifd); 301 cmd = IMSG_VMDOP_START_VM_RESPONSE; 302 break; 303 } 304 if (atomicio(read, ifd, &vmc, sizeof(vmc)) != sizeof(vmc)) { 305 log_warnx("%s: error reading vmc from received vm", 306 __func__); 307 res = EIO; 308 close(ifd); 309 cmd = IMSG_VMDOP_START_VM_RESPONSE; 310 break; 311 } 312 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 313 sizeof(vmc.vmc_params.vcp_name)); 314 vmc.vmc_params.vcp_id = 0; 315 316 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 317 if (ret != 0) { 318 res = errno; 319 cmd = IMSG_VMDOP_START_VM_RESPONSE; 320 close(ifd); 321 } else { 322 vm->vm_state |= VM_STATE_RECEIVED; 323 config_setvm(ps, vm, imsg->hdr.peerid, 324 vmc.vmc_owner.uid); 325 log_debug("%s: sending fd to vmm", __func__); 326 proc_compose_imsg(ps, PROC_VMM, -1, 327 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, ifd, 328 NULL, 0); 329 } 330 break; 331 case IMSG_VMDOP_DONE: 332 control_reset(&ps->ps_csock); 333 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 334 control_reset(rcs); 335 cmd = 0; 336 break; 337 default: 338 return (-1); 339 } 340 341 switch (cmd) { 342 case 0: 343 break; 344 case IMSG_VMDOP_START_VM_RESPONSE: 345 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 346 memset(&vmr, 0, sizeof(vmr)); 347 vmr.vmr_result = res; 348 vmr.vmr_id = id; 349 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 350 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 351 return (-1); 352 break; 353 default: 354 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 355 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 356 return (-1); 357 break; 358 } 359 360 return (0); 361} 362 363int 364vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 365{ 366 struct vmop_result vmr; 367 struct privsep *ps = p->p_ps; 368 int res = 0; 369 struct vmd_vm *vm; 370 struct vm_create_params *vcp; 371 struct vmop_info_result vir; 372 373 switch (imsg->hdr.type) { 374 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 375 IMSG_SIZE_CHECK(imsg, &vmr); 376 memcpy(&vmr, imsg->data, sizeof(vmr)); 377 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 378 break; 379 proc_compose_imsg(ps, PROC_CONTROL, -1, 380 imsg->hdr.type, imsg->hdr.peerid, -1, 381 imsg->data, sizeof(imsg->data)); 382 log_info("%s: paused vm %d successfully", 383 vm->vm_params.vmc_params.vcp_name, 384 vm->vm_vmid); 385 vm->vm_state |= VM_STATE_PAUSED; 386 break; 387 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 388 IMSG_SIZE_CHECK(imsg, &vmr); 389 memcpy(&vmr, imsg->data, sizeof(vmr)); 390 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 391 break; 392 proc_compose_imsg(ps, PROC_CONTROL, -1, 393 imsg->hdr.type, imsg->hdr.peerid, -1, 394 imsg->data, sizeof(imsg->data)); 395 log_info("%s: unpaused vm %d successfully.", 396 vm->vm_params.vmc_params.vcp_name, 397 vm->vm_vmid); 398 vm->vm_state &= ~VM_STATE_PAUSED; 399 break; 400 case IMSG_VMDOP_START_VM_RESPONSE: 401 IMSG_SIZE_CHECK(imsg, &vmr); 402 memcpy(&vmr, imsg->data, sizeof(vmr)); 403 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 404 break; 405 vm->vm_pid = vmr.vmr_pid; 406 vcp = &vm->vm_params.vmc_params; 407 vcp->vcp_id = vmr.vmr_id; 408 409 /* 410 * If the peerid is not -1, forward the response back to the 411 * the control socket. If it is -1, the request originated 412 * from the parent, not the control socket. 413 */ 414 if (vm->vm_peerid != (uint32_t)-1) { 415 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 416 sizeof(vmr.vmr_ttyname)); 417 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 418 imsg->hdr.type, vm->vm_peerid, -1, 419 &vmr, sizeof(vmr)) == -1) { 420 errno = vmr.vmr_result; 421 log_warn("%s: failed to forward vm result", 422 vcp->vcp_name); 423 vm_terminate(vm, __func__); 424 return (-1); 425 } 426 } 427 428 if (vmr.vmr_result) { 429 log_warnx("%s: failed to start vm", vcp->vcp_name); 430 vm_terminate(vm, __func__); 431 errno = vmr.vmr_result; 432 break; 433 } 434 435 /* Now configure all the interfaces */ 436 if (vm_priv_ifconfig(ps, vm) == -1) { 437 log_warn("%s: failed to configure vm", vcp->vcp_name); 438 vm_terminate(vm, __func__); 439 break; 440 } 441 442 log_info("started %s (vm %d) successfully, tty %s", 443 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 444 break; 445 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 446 IMSG_SIZE_CHECK(imsg, &vmr); 447 memcpy(&vmr, imsg->data, sizeof(vmr)); 448 449 if (vmr.vmr_result) { 450 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 451 __func__, vmr.vmr_id); 452 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 453 } else { 454 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 455 break; 456 /* Mark VM as shutting down */ 457 vm->vm_state |= VM_STATE_SHUTDOWN; 458 } 459 break; 460 case IMSG_VMDOP_SEND_VM_RESPONSE: 461 IMSG_SIZE_CHECK(imsg, &vmr); 462 memcpy(&vmr, imsg->data, sizeof(vmr)); 463 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 464 break; 465 if (!vmr.vmr_result) { 466 log_info("%s: sent vm %d successfully.", 467 vm->vm_params.vmc_params.vcp_name, 468 vm->vm_vmid); 469 vm_terminate(vm, __func__); 470 } 471 472 /* Send a response if a control client is waiting for it */ 473 if (imsg->hdr.peerid != (uint32_t)-1) { 474 /* the error is meaningless for deferred responses */ 475 vmr.vmr_result = 0; 476 477 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 478 IMSG_VMDOP_SEND_VM_RESPONSE, 479 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 480 return (-1); 481 } 482 break; 483 case IMSG_VMDOP_TERMINATE_VM_EVENT: 484 IMSG_SIZE_CHECK(imsg, &vmr); 485 memcpy(&vmr, imsg->data, sizeof(vmr)); 486 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 487 __func__, vmr.vmr_id, vmr.vmr_result); 488 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 489 log_debug("%s: vm %d is no longer available", 490 __func__, vmr.vmr_id); 491 break; 492 } 493 if (vmr.vmr_result != EAGAIN || 494 vm->vm_params.vmc_bootdevice) { 495 vm_terminate(vm, __func__); 496 } else { 497 /* Stop VM instance but keep the tty open */ 498 vm_stop(vm, 1, __func__); 499 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 500 } 501 502 /* The error is meaningless for deferred responses */ 503 vmr.vmr_result = 0; 504 505 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 506 IMSG_VMDOP_TERMINATE_VM_EVENT, 507 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 508 return (-1); 509 break; 510 case IMSG_VMDOP_GET_INFO_VM_DATA: 511 IMSG_SIZE_CHECK(imsg, &vir); 512 memcpy(&vir, imsg->data, sizeof(vir)); 513 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 514 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 515 if (vm->vm_ttyname[0] != '\0') 516 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 517 sizeof(vir.vir_ttyname)); 518 log_debug("%s: running vm: %d, vm_state: 0x%x", 519 __func__, vm->vm_vmid, vm->vm_state); 520 vir.vir_state = vm->vm_state; 521 /* get the user id who started the vm */ 522 vir.vir_uid = vm->vm_uid; 523 vir.vir_gid = vm->vm_params.vmc_owner.gid; 524 } 525 if (proc_compose_imsg(ps, 526 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 527 PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, 528 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 529 if (vm) 530 vm_terminate(vm, __func__); 531 return (-1); 532 } 533 break; 534 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 535 /* 536 * PROC_VMM has responded with the *running* VMs, now we 537 * append the others. These use the special value 0 for their 538 * kernel id to indicate that they are not running. 539 */ 540 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 541 if (!(vm->vm_state & VM_STATE_RUNNING)) { 542 memset(&vir, 0, sizeof(vir)); 543 vir.vir_info.vir_id = vm->vm_vmid; 544 strlcpy(vir.vir_info.vir_name, 545 vm->vm_params.vmc_params.vcp_name, 546 VMM_MAX_NAME_LEN); 547 vir.vir_info.vir_memory_size = 548 vm->vm_params.vmc_params. 549 vcp_memranges[0].vmr_size; 550 vir.vir_info.vir_ncpus = 551 vm->vm_params.vmc_params.vcp_ncpus; 552 /* get the configured user id for this vm */ 553 vir.vir_uid = vm->vm_params.vmc_owner.uid; 554 vir.vir_gid = vm->vm_params.vmc_owner.gid; 555 log_debug("%s: vm: %d, vm_state: 0x%x", 556 __func__, vm->vm_vmid, vm->vm_state); 557 vir.vir_state = vm->vm_state; 558 if (proc_compose_imsg(ps, 559 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 560 PROC_AGENTX : PROC_CONTROL, -1, 561 IMSG_VMDOP_GET_INFO_VM_DATA, 562 imsg->hdr.peerid, -1, &vir, 563 sizeof(vir)) == -1) { 564 log_debug("%s: GET_INFO_VM_END failed", 565 __func__); 566 vm_terminate(vm, __func__); 567 return (-1); 568 } 569 } 570 } 571 IMSG_SIZE_CHECK(imsg, &res); 572 proc_forward_imsg(ps, imsg, 573 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 574 PROC_AGENTX : PROC_CONTROL, -1); 575 break; 576 default: 577 return (-1); 578 } 579 580 return (0); 581} 582 583int 584vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) 585{ 586 struct privsep *ps = p->p_ps; 587 588 switch (imsg->hdr.type) { 589 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 590 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 591 return (0); 592 default: 593 break; 594 } 595 return (-1); 596} 597 598int 599vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 600{ 601 struct vmop_addr_result var; 602 603 switch (imsg->hdr.type) { 604 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 605 IMSG_SIZE_CHECK(imsg, &var); 606 memcpy(&var, imsg->data, sizeof(var)); 607 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 608 break; 609 default: 610 return (-1); 611 } 612 613 return (0); 614} 615 616int 617vmd_check_vmh(struct vm_dump_header *vmh) 618{ 619 int i; 620 unsigned int code, leaf; 621 unsigned int a, b, c, d; 622 623 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 624 log_warnx("%s: incompatible dump signature", __func__); 625 return (-1); 626 } 627 628 if (vmh->vmh_version != VM_DUMP_VERSION) { 629 log_warnx("%s: incompatible dump version", __func__); 630 return (-1); 631 } 632 633 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 634 code = vmh->vmh_cpuids[i].code; 635 leaf = vmh->vmh_cpuids[i].leaf; 636 if (leaf != 0x00) { 637 log_debug("%s: invalid leaf 0x%x for code 0x%x", 638 __func__, leaf, code); 639 return (-1); 640 } 641 642 switch (code) { 643 case 0x00: 644 CPUID_LEAF(code, leaf, a, b, c, d); 645 if (vmh->vmh_cpuids[i].a > a) { 646 log_debug("%s: incompatible cpuid level", 647 __func__); 648 return (-1); 649 } 650 if (!(vmh->vmh_cpuids[i].b == b && 651 vmh->vmh_cpuids[i].c == c && 652 vmh->vmh_cpuids[i].d == d)) { 653 log_debug("%s: incompatible cpu brand", 654 __func__); 655 return (-1); 656 } 657 break; 658 659 case 0x01: 660 CPUID_LEAF(code, leaf, a, b, c, d); 661 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 662 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 663 log_debug("%s: incompatible cpu features " 664 "code: 0x%x leaf: 0x%x reg: c", __func__, 665 code, leaf); 666 return (-1); 667 } 668 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 669 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 670 log_debug("%s: incompatible cpu features " 671 "code: 0x%x leaf: 0x%x reg: d", __func__, 672 code, leaf); 673 return (-1); 674 } 675 break; 676 677 case 0x07: 678 CPUID_LEAF(code, leaf, a, b, c, d); 679 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 680 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 681 log_debug("%s: incompatible cpu features " 682 "code: 0x%x leaf: 0x%x reg: c", __func__, 683 code, leaf); 684 return (-1); 685 } 686 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 687 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 688 log_debug("%s: incompatible cpu features " 689 "code: 0x%x leaf: 0x%x reg: d", __func__, 690 code, leaf); 691 return (-1); 692 } 693 break; 694 695 case 0x0d: 696 CPUID_LEAF(code, leaf, a, b, c, d); 697 if (vmh->vmh_cpuids[i].b > b) { 698 log_debug("%s: incompatible cpu: insufficient " 699 "max save area for enabled XCR0 features", 700 __func__); 701 return (-1); 702 } 703 if (vmh->vmh_cpuids[i].c > c) { 704 log_debug("%s: incompatible cpu: insufficient " 705 "max save area for supported XCR0 features", 706 __func__); 707 return (-1); 708 } 709 break; 710 711 case 0x80000001: 712 CPUID_LEAF(code, leaf, a, b, c, d); 713 if ((vmh->vmh_cpuids[i].a & a) != 714 vmh->vmh_cpuids[i].a) { 715 log_debug("%s: incompatible cpu features " 716 "code: 0x%x leaf: 0x%x reg: a", __func__, 717 code, leaf); 718 return (-1); 719 } 720 if ((vmh->vmh_cpuids[i].c & c) != 721 vmh->vmh_cpuids[i].c) { 722 log_debug("%s: incompatible cpu features " 723 "code: 0x%x leaf: 0x%x reg: c", __func__, 724 code, leaf); 725 return (-1); 726 } 727 if ((vmh->vmh_cpuids[i].d & d) != 728 vmh->vmh_cpuids[i].d) { 729 log_debug("%s: incompatible cpu features " 730 "code: 0x%x leaf: 0x%x reg: d", __func__, 731 code, leaf); 732 return (-1); 733 } 734 break; 735 736 default: 737 log_debug("%s: unknown code 0x%x", __func__, code); 738 return (-1); 739 } 740 } 741 742 return (0); 743} 744 745void 746vmd_sighdlr(int sig, short event, void *arg) 747{ 748 if (privsep_process != PROC_PARENT) 749 return; 750 log_debug("%s: handling signal", __func__); 751 752 switch (sig) { 753 case SIGHUP: 754 log_info("%s: reload requested with SIGHUP", __func__); 755 756 /* 757 * This is safe because libevent uses async signal handlers 758 * that run in the event loop and not in signal context. 759 */ 760 (void)vmd_reload(0, NULL); 761 break; 762 case SIGPIPE: 763 log_info("%s: ignoring SIGPIPE", __func__); 764 break; 765 case SIGUSR1: 766 log_info("%s: ignoring SIGUSR1", __func__); 767 break; 768 case SIGTERM: 769 case SIGINT: 770 vmd_shutdown(); 771 break; 772 default: 773 fatalx("unexpected signal"); 774 } 775} 776 777__dead void 778usage(void) 779{ 780 extern char *__progname; 781 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 782 __progname); 783 exit(1); 784} 785 786int 787main(int argc, char **argv) 788{ 789 struct privsep *ps; 790 int ch; 791 enum privsep_procid proc_id = PROC_PARENT; 792 int proc_instance = 0, vm_launch = 0; 793 int vmm_fd = -1, vm_fd = -1; 794 const char *errp, *title = NULL; 795 int argc0 = argc; 796 char dev_type = '\0'; 797 798 log_init(0, LOG_DAEMON); 799 800 if ((env = calloc(1, sizeof(*env))) == NULL) 801 fatal("calloc: env"); 802 env->vmd_fd = -1; 803 env->vmd_fd6 = -1; 804 805 while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { 806 switch (ch) { 807 case 'D': 808 if (cmdline_symset(optarg) < 0) 809 log_warnx("could not parse macro definition %s", 810 optarg); 811 break; 812 case 'd': 813 env->vmd_debug = 2; 814 break; 815 case 'f': 816 conffile = optarg; 817 break; 818 case 'v': 819 env->vmd_verbose++; 820 break; 821 /* vmd fork/exec */ 822 case 'n': 823 env->vmd_noaction = 1; 824 break; 825 case 'P': 826 title = optarg; 827 proc_id = proc_getid(procs, nitems(procs), title); 828 if (proc_id == PROC_MAX) 829 fatalx("invalid process name"); 830 break; 831 case 'I': 832 proc_instance = strtonum(optarg, 0, 833 PROC_MAX_INSTANCES, &errp); 834 if (errp) 835 fatalx("invalid process instance"); 836 break; 837 /* child vm and device fork/exec */ 838 case 'p': 839 title = optarg; 840 break; 841 case 'V': 842 vm_launch = VMD_LAUNCH_VM; 843 vm_fd = strtonum(optarg, 0, 128, &errp); 844 if (errp) 845 fatalx("invalid vm fd"); 846 break; 847 case 'X': 848 vm_launch = VMD_LAUNCH_DEV; 849 vm_fd = strtonum(optarg, 0, 128, &errp); 850 if (errp) 851 fatalx("invalid device fd"); 852 break; 853 case 't': 854 dev_type = *optarg; 855 switch (dev_type) { 856 case VMD_DEVTYPE_NET: 857 case VMD_DEVTYPE_DISK: 858 break; 859 default: fatalx("invalid device type"); 860 } 861 break; 862 case 'i': 863 vmm_fd = strtonum(optarg, 0, 128, &errp); 864 if (errp) 865 fatalx("invalid vmm fd"); 866 break; 867 default: 868 usage(); 869 } 870 } 871 872 argc -= optind; 873 if (argc > 0) 874 usage(); 875 876 if (env->vmd_noaction && !env->vmd_debug) 877 env->vmd_debug = 1; 878 879 log_init(env->vmd_debug, LOG_DAEMON); 880 log_setverbose(env->vmd_verbose); 881 882 /* Re-exec from the vmm child process requires an absolute path. */ 883 if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) 884 fatalx("re-exec requires execution with an absolute path"); 885 env->argv0 = argv[0]; 886 887 /* check for root privileges */ 888 if (env->vmd_noaction == 0 && !vm_launch) { 889 if (geteuid()) 890 fatalx("need root privileges"); 891 } 892 893 ps = &env->vmd_ps; 894 ps->ps_env = env; 895 896 if (config_init(env) == -1) 897 fatal("failed to initialize configuration"); 898 899 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 900 fatal("unknown user %s", VMD_USER); 901 902 /* First proc runs as root without pledge but in default chroot */ 903 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 904 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 905 906 /* 907 * If we're launching a new vm or its device, we short out here. 908 */ 909 if (vm_launch == VMD_LAUNCH_VM) { 910 vm_main(vm_fd, vmm_fd); 911 /* NOTREACHED */ 912 } else if (vm_launch == VMD_LAUNCH_DEV) { 913 if (dev_type == VMD_DEVTYPE_NET) { 914 log_procinit("vm/%s/vionet", title); 915 vionet_main(vm_fd, vmm_fd); 916 /* NOTREACHED */ 917 } else if (dev_type == VMD_DEVTYPE_DISK) { 918 log_procinit("vm/%s/vioblk", title); 919 vioblk_main(vm_fd, vmm_fd); 920 /* NOTREACHED */ 921 } 922 fatalx("unsupported device type '%c'", dev_type); 923 } 924 925 /* Open /dev/vmm early. */ 926 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { 927 env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC); 928 if (env->vmd_fd == -1) 929 fatal("%s", VMM_NODE); 930 } 931 932 /* Configure the control socket */ 933 ps->ps_csock.cs_name = SOCKET_NAME; 934 TAILQ_INIT(&ps->ps_rcsocks); 935 936 /* Configuration will be parsed after forking the children */ 937 env->vmd_conffile = conffile; 938 939 if (env->vmd_noaction) 940 ps->ps_noaction = 1; 941 ps->ps_instance = proc_instance; 942 if (title != NULL) 943 ps->ps_title[proc_id] = title; 944 945 /* only the parent returns */ 946 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 947 proc_id); 948 949 if (ps->ps_noaction == 0) 950 log_info("startup"); 951 952 event_init(); 953 954 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 955 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 956 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 957 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 958 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 959 960 signal_add(&ps->ps_evsigint, NULL); 961 signal_add(&ps->ps_evsigterm, NULL); 962 signal_add(&ps->ps_evsighup, NULL); 963 signal_add(&ps->ps_evsigpipe, NULL); 964 signal_add(&ps->ps_evsigusr1, NULL); 965 966 if (!env->vmd_noaction) 967 proc_connect(ps); 968 969 if (vmd_configure() == -1) 970 fatalx("configuration failed"); 971 972 event_dispatch(); 973 974 log_debug("exiting"); 975 976 return (0); 977} 978 979void 980start_vm_batch(int fd, short type, void *args) 981{ 982 int i = 0; 983 struct vmd_vm *vm; 984 985 log_debug("%s: starting batch of %d vms", __func__, 986 env->vmd_cfg.parallelism); 987 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 988 if (!(vm->vm_state & VM_STATE_WAITING)) { 989 log_debug("%s: not starting vm %s (disabled)", 990 __func__, 991 vm->vm_params.vmc_params.vcp_name); 992 continue; 993 } 994 i++; 995 if (i > env->vmd_cfg.parallelism) { 996 evtimer_add(&staggered_start_timer, 997 &env->vmd_cfg.delay); 998 break; 999 } 1000 vm->vm_state &= ~VM_STATE_WAITING; 1001 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 1002 } 1003 log_debug("%s: done starting vms", __func__); 1004} 1005 1006int 1007vmd_configure(void) 1008{ 1009 int ncpus; 1010 struct vmd_switch *vsw; 1011 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 1012 size_t ncpus_sz = sizeof(ncpus); 1013 1014 /* 1015 * pledge in the parent process: 1016 * stdio - for malloc and basic I/O including events. 1017 * rpath - for reload to open and read the configuration files. 1018 * wpath - for opening disk images and tap devices. 1019 * tty - for openpty and TIOCUCNTL. 1020 * proc - run kill to terminate its children safely. 1021 * sendfd - for disks, interfaces and other fds. 1022 * recvfd - for send and receive. 1023 * getpw - lookup user or group id by name. 1024 * chown, fattr - change tty ownership 1025 * flock - locking disk files 1026 */ 1027 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 1028 " chown fattr flock", NULL) == -1) 1029 fatal("pledge"); 1030 1031 if ((env->vmd_ptmfd = getptmfd()) == -1) 1032 fatal("getptmfd %s", PATH_PTMDEV); 1033 1034 if (parse_config(env->vmd_conffile) == -1) { 1035 proc_kill(&env->vmd_ps); 1036 exit(1); 1037 } 1038 1039 if (env->vmd_noaction) { 1040 fprintf(stderr, "configuration OK\n"); 1041 proc_kill(&env->vmd_ps); 1042 exit(0); 1043 } 1044 1045 /* Send VMM device fd to vmm proc. */ 1046 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 1047 IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); 1048 1049 /* Send shared global configuration to all children */ 1050 if (config_setconfig(env) == -1) 1051 return (-1); 1052 1053 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1054 if (vsw->sw_running) 1055 continue; 1056 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1057 log_warn("%s: failed to create switch %s", 1058 __func__, vsw->sw_name); 1059 switch_remove(vsw); 1060 return (-1); 1061 } 1062 } 1063 1064 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 1065 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 1066 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 1067 ncpus = 1; 1068 env->vmd_cfg.parallelism = ncpus; 1069 log_debug("%s: setting staggered start configuration to " 1070 "parallelism: %d and delay: %lld", 1071 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 1072 } 1073 1074 log_debug("%s: starting vms in staggered fashion", __func__); 1075 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1076 /* start first batch */ 1077 start_vm_batch(0, 0, NULL); 1078 1079 return (0); 1080} 1081 1082int 1083vmd_reload(unsigned int reset, const char *filename) 1084{ 1085 struct vmd_vm *vm, *next_vm; 1086 struct vmd_switch *vsw; 1087 int reload = 0; 1088 1089 /* Switch back to the default config file */ 1090 if (filename == NULL || *filename == '\0') { 1091 filename = env->vmd_conffile; 1092 reload = 1; 1093 } 1094 1095 log_debug("%s: level %d config file %s", __func__, reset, filename); 1096 1097 if (reset) { 1098 /* Purge the configuration */ 1099 config_purge(env, reset); 1100 config_setreset(env, reset); 1101 } else { 1102 /* 1103 * Load or reload the configuration. 1104 * 1105 * Reloading removes all non-running VMs before processing the 1106 * config file, whereas loading only adds to the existing list 1107 * of VMs. 1108 */ 1109 1110 if (reload) { 1111 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1112 next_vm) { 1113 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1114 DPRINTF("%s: calling vm_remove", 1115 __func__); 1116 vm_remove(vm, __func__); 1117 } 1118 } 1119 } 1120 1121 if (parse_config(filename) == -1) { 1122 log_debug("%s: failed to load config file %s", 1123 __func__, filename); 1124 return (-1); 1125 } 1126 1127 if (reload) { 1128 /* Update shared global configuration in all children */ 1129 if (config_setconfig(env) == -1) 1130 return (-1); 1131 } 1132 1133 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1134 if (vsw->sw_running) 1135 continue; 1136 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1137 log_warn("%s: failed to create switch %s", 1138 __func__, vsw->sw_name); 1139 switch_remove(vsw); 1140 return (-1); 1141 } 1142 } 1143 1144 log_debug("%s: starting vms in staggered fashion", __func__); 1145 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1146 /* start first batch */ 1147 start_vm_batch(0, 0, NULL); 1148 1149 } 1150 1151 return (0); 1152} 1153 1154void 1155vmd_shutdown(void) 1156{ 1157 struct vmd_vm *vm, *vm_next; 1158 1159 log_debug("%s: performing shutdown", __func__); 1160 1161 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1162 vm_remove(vm, __func__); 1163 } 1164 1165 proc_kill(&env->vmd_ps); 1166 free(env); 1167 1168 log_warnx("terminating"); 1169 exit(0); 1170} 1171 1172struct vmd_vm * 1173vm_getbyvmid(uint32_t vmid) 1174{ 1175 struct vmd_vm *vm; 1176 1177 if (vmid == 0) 1178 return (NULL); 1179 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1180 if (vm->vm_vmid == vmid) 1181 return (vm); 1182 } 1183 1184 return (NULL); 1185} 1186 1187struct vmd_vm * 1188vm_getbyid(uint32_t id) 1189{ 1190 struct vmd_vm *vm; 1191 1192 if (id == 0) 1193 return (NULL); 1194 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1195 if (vm->vm_params.vmc_params.vcp_id == id) 1196 return (vm); 1197 } 1198 1199 return (NULL); 1200} 1201 1202uint32_t 1203vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1204{ 1205 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1206 return (0); 1207 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1208 id, vm->vm_vmid); 1209 return (vm->vm_vmid); 1210} 1211 1212uint32_t 1213vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1214{ 1215 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1216 return (0); 1217 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1218 vmid, vm->vm_params.vmc_params.vcp_id); 1219 return (vm->vm_params.vmc_params.vcp_id); 1220} 1221 1222struct vmd_vm * 1223vm_getbyname(const char *name) 1224{ 1225 struct vmd_vm *vm; 1226 1227 if (name == NULL) 1228 return (NULL); 1229 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1230 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1231 return (vm); 1232 } 1233 1234 return (NULL); 1235} 1236 1237struct vmd_vm * 1238vm_getbypid(pid_t pid) 1239{ 1240 struct vmd_vm *vm; 1241 1242 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1243 if (vm->vm_pid == pid) 1244 return (vm); 1245 } 1246 1247 return (NULL); 1248} 1249 1250void 1251vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1252{ 1253 struct privsep *ps = &env->vmd_ps; 1254 unsigned int i, j; 1255 1256 if (vm == NULL) 1257 return; 1258 1259 log_debug("%s: %s %s stopping vm %d%s", 1260 __func__, ps->ps_title[privsep_process], caller, 1261 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1262 1263 vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING 1264 | VM_STATE_SHUTDOWN); 1265 1266 if (vm->vm_iev.ibuf.fd != -1) { 1267 event_del(&vm->vm_iev.ev); 1268 close(vm->vm_iev.ibuf.fd); 1269 } 1270 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) { 1271 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1272 if (vm->vm_disks[i][j] != -1) { 1273 close(vm->vm_disks[i][j]); 1274 vm->vm_disks[i][j] = -1; 1275 } 1276 } 1277 } 1278 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) { 1279 if (vm->vm_ifs[i].vif_fd != -1) { 1280 close(vm->vm_ifs[i].vif_fd); 1281 vm->vm_ifs[i].vif_fd = -1; 1282 } 1283 free(vm->vm_ifs[i].vif_name); 1284 free(vm->vm_ifs[i].vif_switch); 1285 free(vm->vm_ifs[i].vif_group); 1286 vm->vm_ifs[i].vif_name = NULL; 1287 vm->vm_ifs[i].vif_switch = NULL; 1288 vm->vm_ifs[i].vif_group = NULL; 1289 } 1290 if (vm->vm_kernel != -1) { 1291 close(vm->vm_kernel); 1292 vm->vm_kernel = -1; 1293 } 1294 if (vm->vm_cdrom != -1) { 1295 close(vm->vm_cdrom); 1296 vm->vm_cdrom = -1; 1297 } 1298 if (!keeptty) { 1299 vm_closetty(vm); 1300 vm->vm_uid = 0; 1301 } 1302} 1303 1304void 1305vm_remove(struct vmd_vm *vm, const char *caller) 1306{ 1307 struct privsep *ps = &env->vmd_ps; 1308 1309 if (vm == NULL) 1310 return; 1311 1312 log_debug("%s: %s %s removing vm %d from running config", 1313 __func__, ps->ps_title[privsep_process], caller, 1314 vm->vm_vmid); 1315 1316 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1317 1318 vm_stop(vm, 0, caller); 1319 if (vm->vm_kernel_path != NULL && !vm->vm_from_config) 1320 free(vm->vm_kernel_path); 1321 free(vm); 1322} 1323 1324int 1325vm_claimid(const char *name, int uid, uint32_t *id) 1326{ 1327 struct name2id *n2i = NULL; 1328 1329 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1330 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1331 goto out; 1332 1333 if (++env->vmd_nvm == 0) { 1334 log_warnx("too many vms"); 1335 return (-1); 1336 } 1337 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1338 log_warnx("could not alloc vm name"); 1339 return (-1); 1340 } 1341 n2i->id = env->vmd_nvm; 1342 n2i->uid = uid; 1343 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1344 log_warnx("vm name too long"); 1345 free(n2i); 1346 return (-1); 1347 } 1348 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1349 1350out: 1351 *id = n2i->id; 1352 return (0); 1353} 1354 1355int 1356vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1357 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1358{ 1359 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1360 struct vm_create_params *vcp = &vmc->vmc_params; 1361 struct vmop_owner *vmo = NULL; 1362 uint32_t nid, rng; 1363 unsigned int i, j; 1364 struct vmd_switch *sw; 1365 char *s; 1366 int ret = 0; 1367 1368 /* Check if this is an instance of another VM */ 1369 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1370 errno = ret; /* XXX might set invalid errno */ 1371 return (-1); 1372 } 1373 1374 errno = 0; 1375 *ret_vm = NULL; 1376 1377 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1378 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1379 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1380 uid) != 0) { 1381 errno = EPERM; 1382 goto fail; 1383 } 1384 vm->vm_kernel = vmc->vmc_kernel; 1385 *ret_vm = vm; 1386 errno = EALREADY; 1387 goto fail; 1388 } 1389 1390 if (vm_parent != NULL) 1391 vmo = &vm_parent->vm_params.vmc_insowner; 1392 1393 /* non-root users can only start existing VMs or instances */ 1394 if (vm_checkperm(NULL, vmo, uid) != 0) { 1395 log_warnx("permission denied"); 1396 errno = EPERM; 1397 goto fail; 1398 } 1399 if (vmc->vmc_flags == 0) { 1400 log_warnx("invalid configuration, no devices"); 1401 errno = VMD_DISK_MISSING; 1402 goto fail; 1403 } 1404 if (vcp->vcp_ncpus == 0) 1405 vcp->vcp_ncpus = 1; 1406 if (vcp->vcp_memranges[0].vmr_size == 0) 1407 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1408 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1409 log_warnx("invalid number of CPUs"); 1410 goto fail; 1411 } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) { 1412 log_warnx("invalid number of disks"); 1413 goto fail; 1414 } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { 1415 log_warnx("invalid number of interfaces"); 1416 goto fail; 1417 } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 1418 && strlen(vmc->vmc_cdrom) == 0) { 1419 log_warnx("no kernel or disk/cdrom specified"); 1420 goto fail; 1421 } else if (strlen(vcp->vcp_name) == 0) { 1422 log_warnx("invalid VM name"); 1423 goto fail; 1424 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1425 *vcp->vcp_name == '_') { 1426 log_warnx("invalid VM name"); 1427 goto fail; 1428 } else { 1429 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1430 if (!(isalnum((unsigned char)*s) || *s == '.' || \ 1431 *s == '-' || *s == '_')) { 1432 log_warnx("invalid VM name"); 1433 goto fail; 1434 } 1435 } 1436 } 1437 1438 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1439 goto fail; 1440 1441 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1442 vmc = &vm->vm_params; 1443 vcp = &vmc->vmc_params; 1444 vm->vm_pid = -1; 1445 vm->vm_tty = -1; 1446 vm->vm_receive_fd = -1; 1447 vm->vm_kernel = -1; 1448 vm->vm_state &= ~VM_STATE_PAUSED; 1449 1450 if (vmc->vmc_kernel > -1) 1451 vm->vm_kernel = vmc->vmc_kernel; 1452 1453 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 1454 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1455 vm->vm_disks[i][j] = -1; 1456 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) 1457 vm->vm_ifs[i].vif_fd = -1; 1458 for (i = 0; i < vmc->vmc_nnics; i++) { 1459 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1460 /* inherit per-interface flags from the switch */ 1461 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1462 } 1463 1464 /* 1465 * If the MAC address is zero, always randomize it in vmd(8) 1466 * because we cannot rely on the guest OS to do the right 1467 * thing like OpenBSD does. Based on ether_fakeaddr() 1468 * from the kernel, incremented by one to differentiate 1469 * the source. 1470 */ 1471 if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) { 1472 rng = arc4random(); 1473 vmc->vmc_macs[i][0] = 0xfe; 1474 vmc->vmc_macs[i][1] = 0xe1; 1475 vmc->vmc_macs[i][2] = 0xba + 1; 1476 vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1477 vmc->vmc_macs[i][4] = rng; 1478 vmc->vmc_macs[i][5] = rng >> 8; 1479 } 1480 } 1481 vm->vm_cdrom = -1; 1482 vm->vm_iev.ibuf.fd = -1; 1483 1484 /* 1485 * Assign a new internal Id if not specified and we succeed in 1486 * claiming a new Id. 1487 */ 1488 if (id != 0) 1489 vm->vm_vmid = id; 1490 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1491 goto fail; 1492 else 1493 vm->vm_vmid = nid; 1494 1495 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1496 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1497 1498 *ret_vm = vm; 1499 return (0); 1500 fail: 1501 if (errno == 0) 1502 errno = EINVAL; 1503 return (-1); 1504} 1505 1506int 1507vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1508 struct vmop_create_params *vmc, uid_t uid) 1509{ 1510 char *name; 1511 struct vm_create_params *vcp = &vmc->vmc_params; 1512 struct vmop_create_params *vmcp; 1513 struct vm_create_params *vcpp; 1514 unsigned int i, j; 1515 1516 /* return without error if the parent is NULL (nothing to inherit) */ 1517 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1518 vmc->vmc_instance[0] == '\0') 1519 return (0); 1520 1521 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1522 return (VMD_PARENT_INVALID); 1523 } 1524 1525 vmcp = &(*vm_parent)->vm_params; 1526 vcpp = &vmcp->vmc_params; 1527 1528 /* Are we allowed to create an instance from this VM? */ 1529 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1530 log_warnx("vm \"%s\" no permission to create vm instance", 1531 vcpp->vcp_name); 1532 return (ENAMETOOLONG); 1533 } 1534 1535 name = vcp->vcp_name; 1536 1537 if (vm_getbyname(vcp->vcp_name) != NULL || 1538 vm_getbyvmid(vcp->vcp_id) != NULL) { 1539 return (EPROCLIM); 1540 } 1541 1542 /* CPU */ 1543 if (vcp->vcp_ncpus == 0) 1544 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1545 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1546 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1547 log_warnx("vm \"%s\" no permission to set cpus", name); 1548 return (EPERM); 1549 } 1550 1551 /* memory */ 1552 if (vcp->vcp_memranges[0].vmr_size == 0) 1553 vcp->vcp_memranges[0].vmr_size = 1554 vcpp->vcp_memranges[0].vmr_size; 1555 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1556 vcp->vcp_memranges[0].vmr_size != 1557 vcpp->vcp_memranges[0].vmr_size) { 1558 log_warnx("vm \"%s\" no permission to set memory", name); 1559 return (EPERM); 1560 } 1561 1562 /* disks cannot be inherited */ 1563 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1564 vmc->vmc_ndisks) { 1565 log_warnx("vm \"%s\" no permission to set disks", name); 1566 return (EPERM); 1567 } 1568 for (i = 0; i < vmc->vmc_ndisks; i++) { 1569 /* Check if this disk is already used in the parent */ 1570 for (j = 0; j < vmcp->vmc_ndisks; j++) { 1571 if (strcmp(vmc->vmc_disks[i], 1572 vmcp->vmc_disks[j]) == 0) { 1573 log_warnx("vm \"%s\" disk %s cannot be reused", 1574 name, vmc->vmc_disks[i]); 1575 return (EBUSY); 1576 } 1577 } 1578 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1579 } 1580 1581 /* interfaces */ 1582 if (vmc->vmc_nnics > 0 && 1583 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1584 vmc->vmc_nnics != vmcp->vmc_nnics) { 1585 log_warnx("vm \"%s\" no permission to set interfaces", name); 1586 return (EPERM); 1587 } 1588 for (i = 0; i < vmcp->vmc_nnics; i++) { 1589 /* Interface got overwritten */ 1590 if (i < vmc->vmc_nnics) 1591 continue; 1592 1593 /* Copy interface from parent */ 1594 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1595 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1596 sizeof(vmc->vmc_ifnames[i])); 1597 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1598 sizeof(vmc->vmc_ifswitch[i])); 1599 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1600 sizeof(vmc->vmc_ifgroup[i])); 1601 memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], 1602 sizeof(vmc->vmc_macs[i])); 1603 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1604 vmc->vmc_nnics++; 1605 } 1606 for (i = 0; i < vmc->vmc_nnics; i++) { 1607 for (j = 0; j < vmcp->vmc_nnics; j++) { 1608 if (memcmp(zero_mac, vmc->vmc_macs[i], 1609 sizeof(vmc->vmc_macs[i])) != 0 && 1610 memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], 1611 sizeof(vmc->vmc_macs[i])) != 0) { 1612 log_warnx("vm \"%s\" lladdr cannot be reused", 1613 name); 1614 return (EBUSY); 1615 } 1616 if (strlen(vmc->vmc_ifnames[i]) && 1617 strcmp(vmc->vmc_ifnames[i], 1618 vmcp->vmc_ifnames[j]) == 0) { 1619 log_warnx("vm \"%s\" %s cannot be reused", 1620 vmc->vmc_ifnames[i], name); 1621 return (EBUSY); 1622 } 1623 } 1624 } 1625 1626 /* kernel */ 1627 if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL && 1628 strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) { 1629 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1630 log_warnx("vm \"%s\" no permission to set boot image", 1631 name); 1632 return (EPERM); 1633 } 1634 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1635 } 1636 1637 /* cdrom */ 1638 if (strlen(vmc->vmc_cdrom) > 0) { 1639 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1640 log_warnx("vm \"%s\" no permission to set cdrom", name); 1641 return (EPERM); 1642 } 1643 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1644 } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, 1645 sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { 1646 log_warnx("vm \"%s\" cdrom name too long", name); 1647 return (EINVAL); 1648 } 1649 1650 /* user */ 1651 if (vmc->vmc_owner.uid == 0) 1652 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1653 else if (vmc->vmc_owner.uid != uid && 1654 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1655 log_warnx("vm \"%s\" user mismatch", name); 1656 return (EPERM); 1657 } 1658 1659 /* group */ 1660 if (vmc->vmc_owner.gid == 0) 1661 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1662 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1663 log_warnx("vm \"%s\" group mismatch", name); 1664 return (EPERM); 1665 } 1666 1667 /* child instances */ 1668 if (vmc->vmc_insflags) { 1669 log_warnx("vm \"%s\" cannot change instance permissions", name); 1670 return (EPERM); 1671 } 1672 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1673 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1674 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1675 vmc->vmc_insflags = vmcp->vmc_insflags; 1676 } else { 1677 vmc->vmc_insowner.gid = 0; 1678 vmc->vmc_insowner.uid = 0; 1679 vmc->vmc_insflags = 0; 1680 } 1681 1682 /* finished, remove instance flags */ 1683 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1684 1685 return (0); 1686} 1687 1688/* 1689 * vm_checkperm 1690 * 1691 * Checks if the user represented by the 'uid' parameter is allowed to 1692 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1693 * console.) 1694 * 1695 * Parameters: 1696 * vm: the VM whose permission is to be checked 1697 * vmo: the required uid/gid to be checked 1698 * uid: the user ID of the user making the request 1699 * 1700 * Return values: 1701 * 0: the permission should be granted 1702 * -1: the permission check failed (also returned if vm == null) 1703 */ 1704int 1705vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1706{ 1707 struct group *gr; 1708 struct passwd *pw; 1709 char **grmem; 1710 1711 /* root has no restrictions */ 1712 if (uid == 0) 1713 return (0); 1714 1715 if (vmo == NULL) 1716 return (-1); 1717 1718 /* check user */ 1719 if (vm == NULL) { 1720 if (vmo->uid == uid) 1721 return (0); 1722 } else { 1723 /* 1724 * check user of running vm (the owner of a running vm can 1725 * be different to (or more specific than) the configured owner. 1726 */ 1727 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1728 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1729 return (0); 1730 } 1731 1732 /* check groups */ 1733 if (vmo->gid != -1) { 1734 if ((pw = getpwuid(uid)) == NULL) 1735 return (-1); 1736 if (pw->pw_gid == vmo->gid) 1737 return (0); 1738 if ((gr = getgrgid(vmo->gid)) != NULL) { 1739 for (grmem = gr->gr_mem; *grmem; grmem++) 1740 if (strcmp(*grmem, pw->pw_name) == 0) 1741 return (0); 1742 } 1743 } 1744 1745 return (-1); 1746} 1747 1748/* 1749 * vm_checkinsflag 1750 * 1751 * Checks whether the non-root user is allowed to set an instance option. 1752 * 1753 * Parameters: 1754 * vmc: the VM create parameters 1755 * flag: the flag to be checked 1756 * uid: the user ID of the user making the request 1757 * 1758 * Return values: 1759 * 0: the permission should be granted 1760 * -1: the permission check failed (also returned if vm == null) 1761 */ 1762int 1763vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1764{ 1765 /* root has no restrictions */ 1766 if (uid == 0) 1767 return (0); 1768 1769 if ((vmc->vmc_insflags & flag) == 0) 1770 return (-1); 1771 1772 return (0); 1773} 1774 1775/* 1776 * vm_checkaccess 1777 * 1778 * Checks if the user represented by the 'uid' parameter is allowed to 1779 * access the file described by the 'path' parameter. 1780 * 1781 * Parameters: 1782 * fd: the file descriptor of the opened file 1783 * uflag: check if the userid has access to the file 1784 * uid: the user ID of the user making the request 1785 * amode: the access flags of R_OK and W_OK 1786 * 1787 * Return values: 1788 * 0: the permission should be granted 1789 * -1: the permission check failed 1790 */ 1791int 1792vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1793{ 1794 struct group *gr; 1795 struct passwd *pw; 1796 char **grmem; 1797 struct stat st; 1798 mode_t mode; 1799 1800 if (fd == -1) 1801 return (-1); 1802 1803 /* 1804 * File has to be accessible and a regular file 1805 */ 1806 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1807 return (-1); 1808 1809 /* root has no restrictions */ 1810 if (uid == 0 || uflag == 0) 1811 return (0); 1812 1813 /* check other */ 1814 mode = amode & W_OK ? S_IWOTH : 0; 1815 mode |= amode & R_OK ? S_IROTH : 0; 1816 if ((st.st_mode & mode) == mode) 1817 return (0); 1818 1819 /* check user */ 1820 mode = amode & W_OK ? S_IWUSR : 0; 1821 mode |= amode & R_OK ? S_IRUSR : 0; 1822 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1823 return (0); 1824 1825 /* check groups */ 1826 mode = amode & W_OK ? S_IWGRP : 0; 1827 mode |= amode & R_OK ? S_IRGRP : 0; 1828 if ((st.st_mode & mode) != mode) 1829 return (-1); 1830 if ((pw = getpwuid(uid)) == NULL) 1831 return (-1); 1832 if (pw->pw_gid == st.st_gid) 1833 return (0); 1834 if ((gr = getgrgid(st.st_gid)) != NULL) { 1835 for (grmem = gr->gr_mem; *grmem; grmem++) 1836 if (strcmp(*grmem, pw->pw_name) == 0) 1837 return (0); 1838 } 1839 1840 return (-1); 1841} 1842 1843int 1844vm_opentty(struct vmd_vm *vm) 1845{ 1846 struct stat st; 1847 struct group *gr; 1848 uid_t uid; 1849 gid_t gid; 1850 mode_t mode; 1851 int on = 1, tty_slave; 1852 1853 /* 1854 * Open tty with pre-opened PTM fd 1855 */ 1856 if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname, 1857 NULL, NULL) == -1) { 1858 log_warn("fdopenpty"); 1859 return (-1); 1860 } 1861 close(tty_slave); 1862 1863 /* 1864 * We use user ioctl(2) mode to pass break commands. 1865 */ 1866 if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) { 1867 log_warn("could not enable user ioctl mode on %s", 1868 vm->vm_ttyname); 1869 goto fail; 1870 } 1871 1872 uid = vm->vm_uid; 1873 gid = vm->vm_params.vmc_owner.gid; 1874 1875 if (vm->vm_params.vmc_owner.gid != -1) { 1876 mode = 0660; 1877 } else if ((gr = getgrnam("tty")) != NULL) { 1878 gid = gr->gr_gid; 1879 mode = 0620; 1880 } else { 1881 mode = 0600; 1882 gid = 0; 1883 } 1884 1885 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1886 __func__, vm->vm_params.vmc_params.vcp_name, 1887 vm->vm_ttyname, uid, gid, mode); 1888 1889 /* 1890 * Change ownership and mode of the tty as required. 1891 * Loosely based on the implementation of sshpty.c 1892 */ 1893 if (fstat(vm->vm_tty, &st) == -1) { 1894 log_warn("fstat failed for %s", vm->vm_ttyname); 1895 goto fail; 1896 } 1897 1898 if (st.st_uid != uid || st.st_gid != gid) { 1899 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1900 log_warn("chown %s %d %d failed, uid %d", 1901 vm->vm_ttyname, uid, gid, getuid()); 1902 1903 /* Ignore failure on read-only filesystems */ 1904 if (!((errno == EROFS) && 1905 (st.st_uid == uid || st.st_uid == 0))) 1906 goto fail; 1907 } 1908 } 1909 1910 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1911 if (chmod(vm->vm_ttyname, mode) == -1) { 1912 log_warn("chmod %s %o failed, uid %d", 1913 vm->vm_ttyname, mode, getuid()); 1914 1915 /* Ignore failure on read-only filesystems */ 1916 if (!((errno == EROFS) && 1917 (st.st_uid == uid || st.st_uid == 0))) 1918 goto fail; 1919 } 1920 } 1921 1922 return (0); 1923 fail: 1924 vm_closetty(vm); 1925 return (-1); 1926} 1927 1928void 1929vm_closetty(struct vmd_vm *vm) 1930{ 1931 if (vm->vm_tty != -1) { 1932 /* Release and close the tty */ 1933 if (fchown(vm->vm_tty, 0, 0) == -1) 1934 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1935 if (fchmod(vm->vm_tty, 0666) == -1) 1936 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1937 close(vm->vm_tty); 1938 vm->vm_tty = -1; 1939 } 1940 memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); 1941} 1942 1943void 1944switch_remove(struct vmd_switch *vsw) 1945{ 1946 if (vsw == NULL) 1947 return; 1948 1949 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1950 1951 free(vsw->sw_group); 1952 free(vsw->sw_name); 1953 free(vsw); 1954} 1955 1956struct vmd_switch * 1957switch_getbyname(const char *name) 1958{ 1959 struct vmd_switch *vsw; 1960 1961 if (name == NULL) 1962 return (NULL); 1963 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1964 if (strcmp(vsw->sw_name, name) == 0) 1965 return (vsw); 1966 } 1967 1968 return (NULL); 1969} 1970 1971char * 1972get_string(uint8_t *ptr, size_t len) 1973{ 1974 size_t i; 1975 1976 for (i = 0; i < len; i++) 1977 if (!isprint((unsigned char)ptr[i])) 1978 break; 1979 1980 return strndup(ptr, i); 1981} 1982 1983uint32_t 1984prefixlen2mask(uint8_t prefixlen) 1985{ 1986 if (prefixlen == 0) 1987 return (0); 1988 1989 if (prefixlen > 32) 1990 prefixlen = 32; 1991 1992 return (htonl(0xffffffff << (32 - prefixlen))); 1993} 1994 1995void 1996prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1997{ 1998 struct in6_addr s6; 1999 int i; 2000 2001 if (prefixlen > 128) 2002 prefixlen = 128; 2003 2004 memset(&s6, 0, sizeof(s6)); 2005 for (i = 0; i < prefixlen / 8; i++) 2006 s6.s6_addr[i] = 0xff; 2007 i = prefixlen % 8; 2008 if (i) 2009 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2010 2011 memcpy(mask, &s6, sizeof(s6)); 2012} 2013 2014void 2015getmonotime(struct timeval *tv) 2016{ 2017 struct timespec ts; 2018 2019 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2020 fatal("clock_gettime"); 2021 2022 TIMESPEC_TO_TIMEVAL(tv, &ts); 2023} 2024 2025static inline void 2026vm_terminate(struct vmd_vm *vm, const char *caller) 2027{ 2028 if (vm->vm_from_config) 2029 vm_stop(vm, 0, caller); 2030 else { 2031 /* vm_remove calls vm_stop */ 2032 vm_remove(vm, caller); 2033 } 2034} 2035 2036/* 2037 * Utility function for closing vm file descriptors. Assumes an fd of -1 was 2038 * already closed or never opened. 2039 * 2040 * Returns 0 on success, otherwise -1 on failure. 2041 */ 2042int 2043close_fd(int fd) 2044{ 2045 int ret; 2046 2047 if (fd == -1) 2048 return (0); 2049 2050#ifdef POSIX_CLOSE_RESTART 2051 do { ret = close(fd); } while (ret == -1 && errno == EINTR); 2052#else 2053 ret = close(fd); 2054#endif /* POSIX_CLOSE_RESTART */ 2055 2056 if (ret == -1 && errno == EIO) 2057 log_warn("%s(%d)", __func__, fd); 2058 2059 return (ret); 2060} 2061