vmd.c revision 1.150
1/* $OpenBSD: vmd.c,v 1.150 2023/06/18 11:45:11 op Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/types.h> 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/stat.h> 23#include <sys/sysctl.h> 24#include <sys/tty.h> 25#include <sys/ttycom.h> 26#include <sys/ioctl.h> 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31#include <termios.h> 32#include <errno.h> 33#include <event.h> 34#include <fcntl.h> 35#include <pwd.h> 36#include <signal.h> 37#include <syslog.h> 38#include <unistd.h> 39#include <util.h> 40#include <ctype.h> 41#include <grp.h> 42 43#include <machine/specialreg.h> 44#include <machine/vmmvar.h> 45 46#include "proc.h" 47#include "atomicio.h" 48#include "vmd.h" 49 50__dead void usage(void); 51 52int main(int, char **); 53int vmd_configure(void); 54void vmd_sighdlr(int sig, short event, void *arg); 55void vmd_shutdown(void); 56int vmd_control_run(void); 57int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 58int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 59int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); 60int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 61int vmd_check_vmh(struct vm_dump_header *); 62 63int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66int vm_claimid(const char *, int, uint32_t *); 67void start_vm_batch(int, short, void*); 68 69static inline void vm_terminate(struct vmd_vm *, const char *); 70 71struct vmd *env; 72 73static struct privsep_proc procs[] = { 74 /* Keep "priv" on top as procs[0] */ 75 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 76 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 77 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, 78 vmm_shutdown, "/" }, 79 { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, 80 vm_agentx_shutdown, "/" } 81}; 82 83enum privsep_procid privsep_process; 84 85struct event staggered_start_timer; 86 87/* For the privileged process */ 88static struct privsep_proc *proc_priv = &procs[0]; 89static struct passwd proc_privpw; 90static const uint8_t zero_mac[ETHER_ADDR_LEN]; 91 92const char default_conffile[] = VMD_CONF; 93const char *conffile = default_conffile; 94 95int 96vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 97{ 98 struct privsep *ps = p->p_ps; 99 int res = 0, ret = 0, cmd = 0, verbose; 100 unsigned int v = 0, flags; 101 struct vmop_create_params vmc; 102 struct vmop_id vid; 103 struct vmop_result vmr; 104 struct vm_dump_header vmh; 105 struct vmd_vm *vm = NULL; 106 char *str = NULL; 107 uint32_t id = 0; 108 struct control_sock *rcs; 109 110 switch (imsg->hdr.type) { 111 case IMSG_VMDOP_START_VM_REQUEST: 112 IMSG_SIZE_CHECK(imsg, &vmc); 113 memcpy(&vmc, imsg->data, sizeof(vmc)); 114 vmc.vmc_kernel = imsg->fd; 115 116 /* Try registering our VM in our list of known VMs. */ 117 if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { 118 res = errno; 119 120 /* Did we have a failure during lookup of a parent? */ 121 if (vm == NULL) { 122 cmd = IMSG_VMDOP_START_VM_RESPONSE; 123 break; 124 } 125 126 /* Does the VM already exist? */ 127 if (res == EALREADY) { 128 /* Is it already running? */ 129 if (vm->vm_state & VM_STATE_RUNNING) { 130 cmd = IMSG_VMDOP_START_VM_RESPONSE; 131 break; 132 } 133 134 /* If not running, are our flags ok? */ 135 if (vmc.vmc_flags && 136 vmc.vmc_flags != VMOP_CREATE_KERNEL) { 137 cmd = IMSG_VMDOP_START_VM_RESPONSE; 138 break; 139 } 140 } 141 res = 0; 142 } 143 144 /* Try to start the launch of the VM. */ 145 res = config_setvm(ps, vm, imsg->hdr.peerid, 146 vm->vm_params.vmc_owner.uid); 147 if (res) 148 cmd = IMSG_VMDOP_START_VM_RESPONSE; 149 break; 150 case IMSG_VMDOP_WAIT_VM_REQUEST: 151 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 152 IMSG_SIZE_CHECK(imsg, &vid); 153 memcpy(&vid, imsg->data, sizeof(vid)); 154 flags = vid.vid_flags; 155 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 156 157 if ((id = vid.vid_id) == 0) { 158 /* Lookup vm (id) by name */ 159 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 160 res = ENOENT; 161 break; 162 } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 163 (flags & VMOP_FORCE) == 0) { 164 res = EALREADY; 165 break; 166 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 167 res = EINVAL; 168 break; 169 } 170 id = vm->vm_vmid; 171 } else if ((vm = vm_getbyvmid(id)) == NULL) { 172 res = ENOENT; 173 break; 174 } 175 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 176 res = EPERM; 177 break; 178 } 179 180 /* Only relay TERMINATION requests, not WAIT requests */ 181 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 182 memset(&vid, 0, sizeof(vid)); 183 vid.vid_id = id; 184 vid.vid_flags = flags; 185 186 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 187 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 188 return (-1); 189 } 190 break; 191 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 192 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 193 break; 194 case IMSG_VMDOP_LOAD: 195 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 196 str = get_string((uint8_t *)imsg->data, 197 IMSG_DATA_SIZE(imsg)); 198 case IMSG_VMDOP_RELOAD: 199 if (vmd_reload(0, str) == -1) 200 cmd = IMSG_CTL_FAIL; 201 else 202 cmd = IMSG_CTL_OK; 203 free(str); 204 break; 205 case IMSG_CTL_RESET: 206 IMSG_SIZE_CHECK(imsg, &v); 207 memcpy(&v, imsg->data, sizeof(v)); 208 if (vmd_reload(v, NULL) == -1) 209 cmd = IMSG_CTL_FAIL; 210 else 211 cmd = IMSG_CTL_OK; 212 break; 213 case IMSG_CTL_VERBOSE: 214 IMSG_SIZE_CHECK(imsg, &verbose); 215 memcpy(&verbose, imsg->data, sizeof(verbose)); 216 log_setverbose(verbose); 217 218 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 219 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 220 cmd = IMSG_CTL_OK; 221 break; 222 case IMSG_VMDOP_PAUSE_VM: 223 case IMSG_VMDOP_UNPAUSE_VM: 224 IMSG_SIZE_CHECK(imsg, &vid); 225 memcpy(&vid, imsg->data, sizeof(vid)); 226 if (vid.vid_id == 0) { 227 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 228 res = ENOENT; 229 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 230 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 231 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 232 break; 233 } else { 234 vid.vid_id = vm->vm_vmid; 235 } 236 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 237 res = ENOENT; 238 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 239 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 240 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 241 break; 242 } 243 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 244 vid.vid_uid) != 0) { 245 res = EPERM; 246 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 247 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 248 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 249 break; 250 } 251 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 252 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 253 break; 254 case IMSG_VMDOP_SEND_VM_REQUEST: 255 IMSG_SIZE_CHECK(imsg, &vid); 256 memcpy(&vid, imsg->data, sizeof(vid)); 257 id = vid.vid_id; 258 if (vid.vid_id == 0) { 259 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 260 res = ENOENT; 261 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 262 close(imsg->fd); 263 break; 264 } else { 265 vid.vid_id = vm->vm_vmid; 266 } 267 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 268 res = ENOENT; 269 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 270 close(imsg->fd); 271 break; 272 } 273 vmr.vmr_id = vid.vid_id; 274 log_debug("%s: sending fd to vmm", __func__); 275 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 276 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 277 break; 278 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 279 IMSG_SIZE_CHECK(imsg, &vid); 280 memcpy(&vid, imsg->data, sizeof(vid)); 281 if (imsg->fd == -1) { 282 log_warnx("%s: invalid fd", __func__); 283 return (-1); 284 } 285 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 286 sizeof(vmh)) { 287 log_warnx("%s: error reading vmh from received vm", 288 __func__); 289 res = EIO; 290 close(imsg->fd); 291 cmd = IMSG_VMDOP_START_VM_RESPONSE; 292 break; 293 } 294 295 if (vmd_check_vmh(&vmh)) { 296 res = ENOENT; 297 close(imsg->fd); 298 cmd = IMSG_VMDOP_START_VM_RESPONSE; 299 break; 300 } 301 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 302 sizeof(vmc)) { 303 log_warnx("%s: error reading vmc from received vm", 304 __func__); 305 res = EIO; 306 close(imsg->fd); 307 cmd = IMSG_VMDOP_START_VM_RESPONSE; 308 break; 309 } 310 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 311 sizeof(vmc.vmc_params.vcp_name)); 312 vmc.vmc_params.vcp_id = 0; 313 314 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 315 if (ret != 0) { 316 res = errno; 317 cmd = IMSG_VMDOP_START_VM_RESPONSE; 318 close(imsg->fd); 319 } else { 320 vm->vm_state |= VM_STATE_RECEIVED; 321 config_setvm(ps, vm, imsg->hdr.peerid, 322 vmc.vmc_owner.uid); 323 log_debug("%s: sending fd to vmm", __func__); 324 proc_compose_imsg(ps, PROC_VMM, -1, 325 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 326 NULL, 0); 327 } 328 break; 329 case IMSG_VMDOP_DONE: 330 control_reset(&ps->ps_csock); 331 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 332 control_reset(rcs); 333 cmd = 0; 334 break; 335 default: 336 return (-1); 337 } 338 339 switch (cmd) { 340 case 0: 341 break; 342 case IMSG_VMDOP_START_VM_RESPONSE: 343 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 344 memset(&vmr, 0, sizeof(vmr)); 345 vmr.vmr_result = res; 346 vmr.vmr_id = id; 347 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 348 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 349 return (-1); 350 break; 351 default: 352 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 353 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 354 return (-1); 355 break; 356 } 357 358 return (0); 359} 360 361int 362vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 363{ 364 struct vmop_result vmr; 365 struct privsep *ps = p->p_ps; 366 int res = 0; 367 struct vmd_vm *vm; 368 struct vm_create_params *vcp; 369 struct vmop_info_result vir; 370 371 switch (imsg->hdr.type) { 372 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 373 IMSG_SIZE_CHECK(imsg, &vmr); 374 memcpy(&vmr, imsg->data, sizeof(vmr)); 375 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 376 break; 377 proc_compose_imsg(ps, PROC_CONTROL, -1, 378 imsg->hdr.type, imsg->hdr.peerid, -1, 379 imsg->data, sizeof(imsg->data)); 380 log_info("%s: paused vm %d successfully", 381 vm->vm_params.vmc_params.vcp_name, 382 vm->vm_vmid); 383 vm->vm_state |= VM_STATE_PAUSED; 384 break; 385 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 386 IMSG_SIZE_CHECK(imsg, &vmr); 387 memcpy(&vmr, imsg->data, sizeof(vmr)); 388 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 389 break; 390 proc_compose_imsg(ps, PROC_CONTROL, -1, 391 imsg->hdr.type, imsg->hdr.peerid, -1, 392 imsg->data, sizeof(imsg->data)); 393 log_info("%s: unpaused vm %d successfully.", 394 vm->vm_params.vmc_params.vcp_name, 395 vm->vm_vmid); 396 vm->vm_state &= ~VM_STATE_PAUSED; 397 break; 398 case IMSG_VMDOP_START_VM_RESPONSE: 399 IMSG_SIZE_CHECK(imsg, &vmr); 400 memcpy(&vmr, imsg->data, sizeof(vmr)); 401 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 402 break; 403 vm->vm_pid = vmr.vmr_pid; 404 vcp = &vm->vm_params.vmc_params; 405 vcp->vcp_id = vmr.vmr_id; 406 407 /* 408 * If the peerid is not -1, forward the response back to the 409 * the control socket. If it is -1, the request originated 410 * from the parent, not the control socket. 411 */ 412 if (vm->vm_peerid != (uint32_t)-1) { 413 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 414 sizeof(vmr.vmr_ttyname)); 415 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 416 imsg->hdr.type, vm->vm_peerid, -1, 417 &vmr, sizeof(vmr)) == -1) { 418 errno = vmr.vmr_result; 419 log_warn("%s: failed to forward vm result", 420 vcp->vcp_name); 421 vm_terminate(vm, __func__); 422 return (-1); 423 } 424 } 425 426 if (vmr.vmr_result) { 427 log_warnx("%s: failed to start vm", vcp->vcp_name); 428 vm_terminate(vm, __func__); 429 errno = vmr.vmr_result; 430 break; 431 } 432 433 /* Now configure all the interfaces */ 434 if (vm_priv_ifconfig(ps, vm) == -1) { 435 log_warn("%s: failed to configure vm", vcp->vcp_name); 436 vm_terminate(vm, __func__); 437 break; 438 } 439 440 log_info("%s: started vm %d successfully, tty %s", 441 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 442 break; 443 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 444 IMSG_SIZE_CHECK(imsg, &vmr); 445 memcpy(&vmr, imsg->data, sizeof(vmr)); 446 447 if (vmr.vmr_result) { 448 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 449 __func__, vmr.vmr_id); 450 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 451 } else { 452 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 453 break; 454 /* Mark VM as shutting down */ 455 vm->vm_state |= VM_STATE_SHUTDOWN; 456 } 457 break; 458 case IMSG_VMDOP_SEND_VM_RESPONSE: 459 IMSG_SIZE_CHECK(imsg, &vmr); 460 memcpy(&vmr, imsg->data, sizeof(vmr)); 461 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 462 break; 463 if (!vmr.vmr_result) { 464 log_info("%s: sent vm %d successfully.", 465 vm->vm_params.vmc_params.vcp_name, 466 vm->vm_vmid); 467 vm_terminate(vm, __func__); 468 } 469 470 /* Send a response if a control client is waiting for it */ 471 if (imsg->hdr.peerid != (uint32_t)-1) { 472 /* the error is meaningless for deferred responses */ 473 vmr.vmr_result = 0; 474 475 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 476 IMSG_VMDOP_SEND_VM_RESPONSE, 477 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 478 return (-1); 479 } 480 break; 481 case IMSG_VMDOP_TERMINATE_VM_EVENT: 482 IMSG_SIZE_CHECK(imsg, &vmr); 483 memcpy(&vmr, imsg->data, sizeof(vmr)); 484 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 485 __func__, vmr.vmr_id, vmr.vmr_result); 486 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 487 log_debug("%s: vm %d is no longer available", 488 __func__, vmr.vmr_id); 489 break; 490 } 491 if (vmr.vmr_result != EAGAIN || 492 vm->vm_params.vmc_bootdevice) { 493 vm_terminate(vm, __func__); 494 } else { 495 /* Stop VM instance but keep the tty open */ 496 vm_stop(vm, 1, __func__); 497 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 498 } 499 500 /* The error is meaningless for deferred responses */ 501 vmr.vmr_result = 0; 502 503 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 504 IMSG_VMDOP_TERMINATE_VM_EVENT, 505 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 506 return (-1); 507 break; 508 case IMSG_VMDOP_GET_INFO_VM_DATA: 509 IMSG_SIZE_CHECK(imsg, &vir); 510 memcpy(&vir, imsg->data, sizeof(vir)); 511 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 512 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 513 if (vm->vm_ttyname[0] != '\0') 514 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 515 sizeof(vir.vir_ttyname)); 516 log_debug("%s: running vm: %d, vm_state: 0x%x", 517 __func__, vm->vm_vmid, vm->vm_state); 518 vir.vir_state = vm->vm_state; 519 /* get the user id who started the vm */ 520 vir.vir_uid = vm->vm_uid; 521 vir.vir_gid = vm->vm_params.vmc_owner.gid; 522 } 523 if (proc_compose_imsg(ps, 524 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 525 PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, 526 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 527 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 528 __func__, vm->vm_vmid); 529 vm_terminate(vm, __func__); 530 return (-1); 531 } 532 break; 533 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 534 /* 535 * PROC_VMM has responded with the *running* VMs, now we 536 * append the others. These use the special value 0 for their 537 * kernel id to indicate that they are not running. 538 */ 539 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 540 if (!(vm->vm_state & VM_STATE_RUNNING)) { 541 memset(&vir, 0, sizeof(vir)); 542 vir.vir_info.vir_id = vm->vm_vmid; 543 strlcpy(vir.vir_info.vir_name, 544 vm->vm_params.vmc_params.vcp_name, 545 VMM_MAX_NAME_LEN); 546 vir.vir_info.vir_memory_size = 547 vm->vm_params.vmc_params. 548 vcp_memranges[0].vmr_size; 549 vir.vir_info.vir_ncpus = 550 vm->vm_params.vmc_params.vcp_ncpus; 551 /* get the configured user id for this vm */ 552 vir.vir_uid = vm->vm_params.vmc_owner.uid; 553 vir.vir_gid = vm->vm_params.vmc_owner.gid; 554 log_debug("%s: vm: %d, vm_state: 0x%x", 555 __func__, vm->vm_vmid, vm->vm_state); 556 vir.vir_state = vm->vm_state; 557 if (proc_compose_imsg(ps, 558 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 559 PROC_AGENTX : PROC_CONTROL, -1, 560 IMSG_VMDOP_GET_INFO_VM_DATA, 561 imsg->hdr.peerid, -1, &vir, 562 sizeof(vir)) == -1) { 563 log_debug("%s: GET_INFO_VM_END failed", 564 __func__); 565 vm_terminate(vm, __func__); 566 return (-1); 567 } 568 } 569 } 570 IMSG_SIZE_CHECK(imsg, &res); 571 proc_forward_imsg(ps, imsg, 572 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 573 PROC_AGENTX : PROC_CONTROL, -1); 574 break; 575 default: 576 return (-1); 577 } 578 579 return (0); 580} 581 582int 583vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) 584{ 585 struct privsep *ps = p->p_ps; 586 587 switch (imsg->hdr.type) { 588 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 589 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 590 return (0); 591 default: 592 break; 593 } 594 return (-1); 595} 596 597int 598vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 599{ 600 struct vmop_addr_result var; 601 602 switch (imsg->hdr.type) { 603 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 604 IMSG_SIZE_CHECK(imsg, &var); 605 memcpy(&var, imsg->data, sizeof(var)); 606 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 607 break; 608 default: 609 return (-1); 610 } 611 612 return (0); 613} 614 615int 616vmd_check_vmh(struct vm_dump_header *vmh) 617{ 618 int i; 619 unsigned int code, leaf; 620 unsigned int a, b, c, d; 621 622 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 623 log_warnx("%s: incompatible dump signature", __func__); 624 return (-1); 625 } 626 627 if (vmh->vmh_version != VM_DUMP_VERSION) { 628 log_warnx("%s: incompatible dump version", __func__); 629 return (-1); 630 } 631 632 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 633 code = vmh->vmh_cpuids[i].code; 634 leaf = vmh->vmh_cpuids[i].leaf; 635 if (leaf != 0x00) { 636 log_debug("%s: invalid leaf 0x%x for code 0x%x", 637 __func__, leaf, code); 638 return (-1); 639 } 640 641 switch (code) { 642 case 0x00: 643 CPUID_LEAF(code, leaf, a, b, c, d); 644 if (vmh->vmh_cpuids[i].a > a) { 645 log_debug("%s: incompatible cpuid level", 646 __func__); 647 return (-1); 648 } 649 if (!(vmh->vmh_cpuids[i].b == b && 650 vmh->vmh_cpuids[i].c == c && 651 vmh->vmh_cpuids[i].d == d)) { 652 log_debug("%s: incompatible cpu brand", 653 __func__); 654 return (-1); 655 } 656 break; 657 658 case 0x01: 659 CPUID_LEAF(code, leaf, a, b, c, d); 660 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 661 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 662 log_debug("%s: incompatible cpu features " 663 "code: 0x%x leaf: 0x%x reg: c", __func__, 664 code, leaf); 665 return (-1); 666 } 667 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 668 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 669 log_debug("%s: incompatible cpu features " 670 "code: 0x%x leaf: 0x%x reg: d", __func__, 671 code, leaf); 672 return (-1); 673 } 674 break; 675 676 case 0x07: 677 CPUID_LEAF(code, leaf, a, b, c, d); 678 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 679 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 680 log_debug("%s: incompatible cpu features " 681 "code: 0x%x leaf: 0x%x reg: c", __func__, 682 code, leaf); 683 return (-1); 684 } 685 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 686 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 687 log_debug("%s: incompatible cpu features " 688 "code: 0x%x leaf: 0x%x reg: d", __func__, 689 code, leaf); 690 return (-1); 691 } 692 break; 693 694 case 0x0d: 695 CPUID_LEAF(code, leaf, a, b, c, d); 696 if (vmh->vmh_cpuids[i].b > b) { 697 log_debug("%s: incompatible cpu: insufficient " 698 "max save area for enabled XCR0 features", 699 __func__); 700 return (-1); 701 } 702 if (vmh->vmh_cpuids[i].c > c) { 703 log_debug("%s: incompatible cpu: insufficient " 704 "max save area for supported XCR0 features", 705 __func__); 706 return (-1); 707 } 708 break; 709 710 case 0x80000001: 711 CPUID_LEAF(code, leaf, a, b, c, d); 712 if ((vmh->vmh_cpuids[i].a & a) != 713 vmh->vmh_cpuids[i].a) { 714 log_debug("%s: incompatible cpu features " 715 "code: 0x%x leaf: 0x%x reg: a", __func__, 716 code, leaf); 717 return (-1); 718 } 719 if ((vmh->vmh_cpuids[i].c & c) != 720 vmh->vmh_cpuids[i].c) { 721 log_debug("%s: incompatible cpu features " 722 "code: 0x%x leaf: 0x%x reg: c", __func__, 723 code, leaf); 724 return (-1); 725 } 726 if ((vmh->vmh_cpuids[i].d & d) != 727 vmh->vmh_cpuids[i].d) { 728 log_debug("%s: incompatible cpu features " 729 "code: 0x%x leaf: 0x%x reg: d", __func__, 730 code, leaf); 731 return (-1); 732 } 733 break; 734 735 default: 736 log_debug("%s: unknown code 0x%x", __func__, code); 737 return (-1); 738 } 739 } 740 741 return (0); 742} 743 744void 745vmd_sighdlr(int sig, short event, void *arg) 746{ 747 if (privsep_process != PROC_PARENT) 748 return; 749 log_debug("%s: handling signal", __func__); 750 751 switch (sig) { 752 case SIGHUP: 753 log_info("%s: reload requested with SIGHUP", __func__); 754 755 /* 756 * This is safe because libevent uses async signal handlers 757 * that run in the event loop and not in signal context. 758 */ 759 (void)vmd_reload(0, NULL); 760 break; 761 case SIGPIPE: 762 log_info("%s: ignoring SIGPIPE", __func__); 763 break; 764 case SIGUSR1: 765 log_info("%s: ignoring SIGUSR1", __func__); 766 break; 767 case SIGTERM: 768 case SIGINT: 769 vmd_shutdown(); 770 break; 771 default: 772 fatalx("unexpected signal"); 773 } 774} 775 776__dead void 777usage(void) 778{ 779 extern char *__progname; 780 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 781 __progname); 782 exit(1); 783} 784 785int 786main(int argc, char **argv) 787{ 788 struct privsep *ps; 789 int ch; 790 enum privsep_procid proc_id = PROC_PARENT; 791 int proc_instance = 0, vm_launch = 0; 792 int vmm_fd = -1, vm_fd = -1; 793 const char *errp, *title = NULL; 794 int argc0 = argc; 795 char dev_type = '\0'; 796 797 log_init(0, LOG_DAEMON); 798 799 if ((env = calloc(1, sizeof(*env))) == NULL) 800 fatal("calloc: env"); 801 802 while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:v")) != -1) { 803 switch (ch) { 804 case 'D': 805 if (cmdline_symset(optarg) < 0) 806 log_warnx("could not parse macro definition %s", 807 optarg); 808 break; 809 case 'd': 810 env->vmd_debug = 2; 811 break; 812 case 'f': 813 conffile = optarg; 814 break; 815 case 'v': 816 env->vmd_verbose++; 817 break; 818 /* vmd fork/exec */ 819 case 'n': 820 env->vmd_noaction = 1; 821 break; 822 case 'P': 823 title = optarg; 824 proc_id = proc_getid(procs, nitems(procs), title); 825 if (proc_id == PROC_MAX) 826 fatalx("invalid process name"); 827 break; 828 case 'I': 829 proc_instance = strtonum(optarg, 0, 830 PROC_MAX_INSTANCES, &errp); 831 if (errp) 832 fatalx("invalid process instance"); 833 break; 834 /* child vm and device fork/exec */ 835 case 'V': 836 vm_launch = VMD_LAUNCH_VM; 837 vm_fd = strtonum(optarg, 0, 128, &errp); 838 if (errp) 839 fatalx("invalid vm fd"); 840 break; 841 case 'X': 842 vm_launch = VMD_LAUNCH_DEV; 843 vm_fd = strtonum(optarg, 0, 128, &errp); 844 if (errp) 845 fatalx("invalid device fd"); 846 break; 847 case 't': 848 dev_type = *optarg; 849 switch (dev_type) { 850 case VMD_DEVTYPE_NET: 851 case VMD_DEVTYPE_DISK: 852 break; 853 default: fatalx("invalid device type"); 854 } 855 break; 856 case 'i': 857 vmm_fd = strtonum(optarg, 0, 128, &errp); 858 if (errp) 859 fatalx("invalid vmm fd"); 860 break; 861 default: 862 usage(); 863 } 864 } 865 866 argc -= optind; 867 if (argc > 0) 868 usage(); 869 870 if (env->vmd_noaction && !env->vmd_debug) 871 env->vmd_debug = 1; 872 873 log_init(env->vmd_debug, LOG_DAEMON); 874 log_setverbose(env->vmd_verbose); 875 876 /* Re-exec from the vmm child process requires an absolute path. */ 877 if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) 878 fatalx("re-exec requires execution with an absolute path"); 879 env->argv0 = argv[0]; 880 881 /* check for root privileges */ 882 if (env->vmd_noaction == 0 && !vm_launch) { 883 if (geteuid()) 884 fatalx("need root privileges"); 885 } 886 887 ps = &env->vmd_ps; 888 ps->ps_env = env; 889 env->vmd_fd = vmm_fd; 890 891 if (config_init(env) == -1) 892 fatal("failed to initialize configuration"); 893 894 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 895 fatal("unknown user %s", VMD_USER); 896 897 /* First proc runs as root without pledge but in default chroot */ 898 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 899 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 900 901 /* 902 * If we're launching a new vm or its device, we short out here. 903 */ 904 if (vm_launch == VMD_LAUNCH_VM) { 905 vm_main(vm_fd, vmm_fd); 906 /* NOTREACHED */ 907 } else if (vm_launch == VMD_LAUNCH_DEV) { 908 if (dev_type == VMD_DEVTYPE_NET) { 909 vionet_main(vm_fd, vmm_fd); 910 /* NOTREACHED */ 911 } else if (dev_type == VMD_DEVTYPE_DISK) { 912 vioblk_main(vm_fd, vmm_fd); 913 /* NOTREACHED */ 914 } 915 fatalx("unsupported device type '%c'", dev_type); 916 } 917 918 /* Open /dev/vmm early. */ 919 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { 920 env->vmd_fd = open(VMM_NODE, O_RDWR); 921 if (env->vmd_fd == -1) 922 fatal("%s", VMM_NODE); 923 } 924 925 /* Configure the control socket */ 926 ps->ps_csock.cs_name = SOCKET_NAME; 927 TAILQ_INIT(&ps->ps_rcsocks); 928 929 /* Configuration will be parsed after forking the children */ 930 env->vmd_conffile = conffile; 931 932 if (env->vmd_noaction) 933 ps->ps_noaction = 1; 934 ps->ps_instance = proc_instance; 935 if (title != NULL) 936 ps->ps_title[proc_id] = title; 937 938 /* only the parent returns */ 939 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 940 proc_id); 941 942 log_procinit("parent"); 943 if (!env->vmd_debug && daemon(0, 0) == -1) 944 fatal("can't daemonize"); 945 946 if (ps->ps_noaction == 0) 947 log_info("startup"); 948 949 event_init(); 950 951 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 952 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 953 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 954 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 955 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 956 957 signal_add(&ps->ps_evsigint, NULL); 958 signal_add(&ps->ps_evsigterm, NULL); 959 signal_add(&ps->ps_evsighup, NULL); 960 signal_add(&ps->ps_evsigpipe, NULL); 961 signal_add(&ps->ps_evsigusr1, NULL); 962 963 if (!env->vmd_noaction) 964 proc_connect(ps); 965 966 if (vmd_configure() == -1) 967 fatalx("configuration failed"); 968 969 event_dispatch(); 970 971 log_debug("parent exiting"); 972 973 return (0); 974} 975 976void 977start_vm_batch(int fd, short type, void *args) 978{ 979 int i = 0; 980 struct vmd_vm *vm; 981 982 log_debug("%s: starting batch of %d vms", __func__, 983 env->vmd_cfg.parallelism); 984 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 985 if (!(vm->vm_state & VM_STATE_WAITING)) { 986 log_debug("%s: not starting vm %s (disabled)", 987 __func__, 988 vm->vm_params.vmc_params.vcp_name); 989 continue; 990 } 991 i++; 992 if (i > env->vmd_cfg.parallelism) { 993 evtimer_add(&staggered_start_timer, 994 &env->vmd_cfg.delay); 995 break; 996 } 997 vm->vm_state &= ~VM_STATE_WAITING; 998 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 999 } 1000 log_debug("%s: done starting vms", __func__); 1001} 1002 1003int 1004vmd_configure(void) 1005{ 1006 int ncpus; 1007 struct vmd_switch *vsw; 1008 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 1009 size_t ncpus_sz = sizeof(ncpus); 1010 1011 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 1012 fatal("open %s", PATH_PTMDEV); 1013 1014 /* 1015 * pledge in the parent process: 1016 * stdio - for malloc and basic I/O including events. 1017 * rpath - for reload to open and read the configuration files. 1018 * wpath - for opening disk images and tap devices. 1019 * tty - for openpty and TIOCUCNTL. 1020 * proc - run kill to terminate its children safely. 1021 * sendfd - for disks, interfaces and other fds. 1022 * recvfd - for send and receive. 1023 * getpw - lookup user or group id by name. 1024 * chown, fattr - change tty ownership 1025 * flock - locking disk files 1026 */ 1027 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 1028 " chown fattr flock", NULL) == -1) 1029 fatal("pledge"); 1030 1031 if (parse_config(env->vmd_conffile) == -1) { 1032 proc_kill(&env->vmd_ps); 1033 exit(1); 1034 } 1035 1036 if (env->vmd_noaction) { 1037 fprintf(stderr, "configuration OK\n"); 1038 proc_kill(&env->vmd_ps); 1039 exit(0); 1040 } 1041 1042 /* Send VMM device fd to vmm proc. */ 1043 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 1044 IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); 1045 1046 /* Send shared global configuration to all children */ 1047 if (config_setconfig(env) == -1) 1048 return (-1); 1049 1050 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1051 if (vsw->sw_running) 1052 continue; 1053 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1054 log_warn("%s: failed to create switch %s", 1055 __func__, vsw->sw_name); 1056 switch_remove(vsw); 1057 return (-1); 1058 } 1059 } 1060 1061 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 1062 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 1063 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 1064 ncpus = 1; 1065 env->vmd_cfg.parallelism = ncpus; 1066 log_debug("%s: setting staggered start configuration to " 1067 "parallelism: %d and delay: %lld", 1068 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 1069 } 1070 1071 log_debug("%s: starting vms in staggered fashion", __func__); 1072 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1073 /* start first batch */ 1074 start_vm_batch(0, 0, NULL); 1075 1076 return (0); 1077} 1078 1079int 1080vmd_reload(unsigned int reset, const char *filename) 1081{ 1082 struct vmd_vm *vm, *next_vm; 1083 struct vmd_switch *vsw; 1084 int reload = 0; 1085 1086 /* Switch back to the default config file */ 1087 if (filename == NULL || *filename == '\0') { 1088 filename = env->vmd_conffile; 1089 reload = 1; 1090 } 1091 1092 log_debug("%s: level %d config file %s", __func__, reset, filename); 1093 1094 if (reset) { 1095 /* Purge the configuration */ 1096 config_purge(env, reset); 1097 config_setreset(env, reset); 1098 } else { 1099 /* 1100 * Load or reload the configuration. 1101 * 1102 * Reloading removes all non-running VMs before processing the 1103 * config file, whereas loading only adds to the existing list 1104 * of VMs. 1105 */ 1106 1107 if (reload) { 1108 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1109 next_vm) { 1110 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1111 DPRINTF("%s: calling vm_remove", 1112 __func__); 1113 vm_remove(vm, __func__); 1114 } 1115 } 1116 } 1117 1118 if (parse_config(filename) == -1) { 1119 log_debug("%s: failed to load config file %s", 1120 __func__, filename); 1121 return (-1); 1122 } 1123 1124 if (reload) { 1125 /* Update shared global configuration in all children */ 1126 if (config_setconfig(env) == -1) 1127 return (-1); 1128 } 1129 1130 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1131 if (vsw->sw_running) 1132 continue; 1133 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1134 log_warn("%s: failed to create switch %s", 1135 __func__, vsw->sw_name); 1136 switch_remove(vsw); 1137 return (-1); 1138 } 1139 } 1140 1141 log_debug("%s: starting vms in staggered fashion", __func__); 1142 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1143 /* start first batch */ 1144 start_vm_batch(0, 0, NULL); 1145 1146 } 1147 1148 return (0); 1149} 1150 1151void 1152vmd_shutdown(void) 1153{ 1154 struct vmd_vm *vm, *vm_next; 1155 1156 log_debug("%s: performing shutdown", __func__); 1157 1158 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1159 vm_remove(vm, __func__); 1160 } 1161 1162 proc_kill(&env->vmd_ps); 1163 free(env); 1164 1165 log_warnx("parent terminating"); 1166 exit(0); 1167} 1168 1169struct vmd_vm * 1170vm_getbyvmid(uint32_t vmid) 1171{ 1172 struct vmd_vm *vm; 1173 1174 if (vmid == 0) 1175 return (NULL); 1176 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1177 if (vm->vm_vmid == vmid) 1178 return (vm); 1179 } 1180 1181 return (NULL); 1182} 1183 1184struct vmd_vm * 1185vm_getbyid(uint32_t id) 1186{ 1187 struct vmd_vm *vm; 1188 1189 if (id == 0) 1190 return (NULL); 1191 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1192 if (vm->vm_params.vmc_params.vcp_id == id) 1193 return (vm); 1194 } 1195 1196 return (NULL); 1197} 1198 1199uint32_t 1200vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1201{ 1202 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1203 return (0); 1204 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1205 id, vm->vm_vmid); 1206 return (vm->vm_vmid); 1207} 1208 1209uint32_t 1210vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1211{ 1212 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1213 return (0); 1214 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1215 vmid, vm->vm_params.vmc_params.vcp_id); 1216 return (vm->vm_params.vmc_params.vcp_id); 1217} 1218 1219struct vmd_vm * 1220vm_getbyname(const char *name) 1221{ 1222 struct vmd_vm *vm; 1223 1224 if (name == NULL) 1225 return (NULL); 1226 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1227 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1228 return (vm); 1229 } 1230 1231 return (NULL); 1232} 1233 1234struct vmd_vm * 1235vm_getbypid(pid_t pid) 1236{ 1237 struct vmd_vm *vm; 1238 1239 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1240 if (vm->vm_pid == pid) 1241 return (vm); 1242 } 1243 1244 return (NULL); 1245} 1246 1247void 1248vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1249{ 1250 struct privsep *ps = &env->vmd_ps; 1251 unsigned int i, j; 1252 1253 if (vm == NULL) 1254 return; 1255 1256 log_debug("%s: %s %s stopping vm %d%s", 1257 __func__, ps->ps_title[privsep_process], caller, 1258 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1259 1260 vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING 1261 | VM_STATE_SHUTDOWN); 1262 1263 if (vm->vm_iev.ibuf.fd != -1) { 1264 event_del(&vm->vm_iev.ev); 1265 close(vm->vm_iev.ibuf.fd); 1266 } 1267 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) { 1268 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1269 if (vm->vm_disks[i][j] != -1) { 1270 close(vm->vm_disks[i][j]); 1271 vm->vm_disks[i][j] = -1; 1272 } 1273 } 1274 } 1275 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) { 1276 if (vm->vm_ifs[i].vif_fd != -1) { 1277 close(vm->vm_ifs[i].vif_fd); 1278 vm->vm_ifs[i].vif_fd = -1; 1279 } 1280 free(vm->vm_ifs[i].vif_name); 1281 free(vm->vm_ifs[i].vif_switch); 1282 free(vm->vm_ifs[i].vif_group); 1283 vm->vm_ifs[i].vif_name = NULL; 1284 vm->vm_ifs[i].vif_switch = NULL; 1285 vm->vm_ifs[i].vif_group = NULL; 1286 } 1287 if (vm->vm_kernel != -1) { 1288 close(vm->vm_kernel); 1289 vm->vm_kernel = -1; 1290 } 1291 if (vm->vm_cdrom != -1) { 1292 close(vm->vm_cdrom); 1293 vm->vm_cdrom = -1; 1294 } 1295 if (!keeptty) { 1296 vm_closetty(vm); 1297 vm->vm_uid = 0; 1298 } 1299} 1300 1301void 1302vm_remove(struct vmd_vm *vm, const char *caller) 1303{ 1304 struct privsep *ps = &env->vmd_ps; 1305 1306 if (vm == NULL) 1307 return; 1308 1309 log_debug("%s: %s %s removing vm %d from running config", 1310 __func__, ps->ps_title[privsep_process], caller, 1311 vm->vm_vmid); 1312 1313 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1314 1315 vm_stop(vm, 0, caller); 1316 if (vm->vm_kernel_path != NULL && !vm->vm_from_config) 1317 free(vm->vm_kernel_path); 1318 free(vm); 1319} 1320 1321int 1322vm_claimid(const char *name, int uid, uint32_t *id) 1323{ 1324 struct name2id *n2i = NULL; 1325 1326 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1327 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1328 goto out; 1329 1330 if (++env->vmd_nvm == 0) { 1331 log_warnx("too many vms"); 1332 return (-1); 1333 } 1334 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1335 log_warnx("could not alloc vm name"); 1336 return (-1); 1337 } 1338 n2i->id = env->vmd_nvm; 1339 n2i->uid = uid; 1340 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1341 log_warnx("vm name too long"); 1342 free(n2i); 1343 return (-1); 1344 } 1345 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1346 1347out: 1348 *id = n2i->id; 1349 return (0); 1350} 1351 1352int 1353vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1354 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1355{ 1356 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1357 struct vm_create_params *vcp = &vmc->vmc_params; 1358 struct vmop_owner *vmo = NULL; 1359 uint32_t nid, rng; 1360 unsigned int i, j; 1361 struct vmd_switch *sw; 1362 char *s; 1363 int ret = 0; 1364 1365 /* Check if this is an instance of another VM */ 1366 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1367 errno = ret; /* XXX might set invalid errno */ 1368 return (-1); 1369 } 1370 1371 errno = 0; 1372 *ret_vm = NULL; 1373 1374 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1375 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1376 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1377 uid) != 0) { 1378 errno = EPERM; 1379 goto fail; 1380 } 1381 vm->vm_kernel = vmc->vmc_kernel; 1382 *ret_vm = vm; 1383 errno = EALREADY; 1384 goto fail; 1385 } 1386 1387 if (vm_parent != NULL) 1388 vmo = &vm_parent->vm_params.vmc_insowner; 1389 1390 /* non-root users can only start existing VMs or instances */ 1391 if (vm_checkperm(NULL, vmo, uid) != 0) { 1392 log_warnx("permission denied"); 1393 errno = EPERM; 1394 goto fail; 1395 } 1396 if (vmc->vmc_flags == 0) { 1397 log_warnx("invalid configuration, no devices"); 1398 errno = VMD_DISK_MISSING; 1399 goto fail; 1400 } 1401 if (vcp->vcp_ncpus == 0) 1402 vcp->vcp_ncpus = 1; 1403 if (vcp->vcp_memranges[0].vmr_size == 0) 1404 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1405 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1406 log_warnx("invalid number of CPUs"); 1407 goto fail; 1408 } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) { 1409 log_warnx("invalid number of disks"); 1410 goto fail; 1411 } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { 1412 log_warnx("invalid number of interfaces"); 1413 goto fail; 1414 } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 1415 && strlen(vmc->vmc_cdrom) == 0) { 1416 log_warnx("no kernel or disk/cdrom specified"); 1417 goto fail; 1418 } else if (strlen(vcp->vcp_name) == 0) { 1419 log_warnx("invalid VM name"); 1420 goto fail; 1421 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1422 *vcp->vcp_name == '_') { 1423 log_warnx("invalid VM name"); 1424 goto fail; 1425 } else { 1426 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1427 if (!(isalnum((unsigned char)*s) || *s == '.' || \ 1428 *s == '-' || *s == '_')) { 1429 log_warnx("invalid VM name"); 1430 goto fail; 1431 } 1432 } 1433 } 1434 1435 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1436 goto fail; 1437 1438 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1439 vmc = &vm->vm_params; 1440 vcp = &vmc->vmc_params; 1441 vm->vm_pid = -1; 1442 vm->vm_tty = -1; 1443 vm->vm_receive_fd = -1; 1444 vm->vm_kernel = -1; 1445 vm->vm_state &= ~VM_STATE_PAUSED; 1446 1447 if (vmc->vmc_kernel > -1) 1448 vm->vm_kernel = vmc->vmc_kernel; 1449 1450 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 1451 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1452 vm->vm_disks[i][j] = -1; 1453 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) 1454 vm->vm_ifs[i].vif_fd = -1; 1455 for (i = 0; i < vmc->vmc_nnics; i++) { 1456 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1457 /* inherit per-interface flags from the switch */ 1458 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1459 } 1460 1461 /* 1462 * If the MAC address is zero, always randomize it in vmd(8) 1463 * because we cannot rely on the guest OS to do the right 1464 * thing like OpenBSD does. Based on ether_fakeaddr() 1465 * from the kernel, incremented by one to differentiate 1466 * the source. 1467 */ 1468 if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) { 1469 rng = arc4random(); 1470 vmc->vmc_macs[i][0] = 0xfe; 1471 vmc->vmc_macs[i][1] = 0xe1; 1472 vmc->vmc_macs[i][2] = 0xba + 1; 1473 vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1474 vmc->vmc_macs[i][4] = rng; 1475 vmc->vmc_macs[i][5] = rng >> 8; 1476 } 1477 } 1478 vm->vm_cdrom = -1; 1479 vm->vm_iev.ibuf.fd = -1; 1480 1481 /* 1482 * Assign a new internal Id if not specified and we succeed in 1483 * claiming a new Id. 1484 */ 1485 if (id != 0) 1486 vm->vm_vmid = id; 1487 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1488 goto fail; 1489 else 1490 vm->vm_vmid = nid; 1491 1492 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1493 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1494 1495 *ret_vm = vm; 1496 return (0); 1497 fail: 1498 if (errno == 0) 1499 errno = EINVAL; 1500 return (-1); 1501} 1502 1503int 1504vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1505 struct vmop_create_params *vmc, uid_t uid) 1506{ 1507 char *name; 1508 struct vm_create_params *vcp = &vmc->vmc_params; 1509 struct vmop_create_params *vmcp; 1510 struct vm_create_params *vcpp; 1511 unsigned int i, j; 1512 1513 /* return without error if the parent is NULL (nothing to inherit) */ 1514 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1515 vmc->vmc_instance[0] == '\0') 1516 return (0); 1517 1518 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1519 return (VMD_PARENT_INVALID); 1520 } 1521 1522 vmcp = &(*vm_parent)->vm_params; 1523 vcpp = &vmcp->vmc_params; 1524 1525 /* Are we allowed to create an instance from this VM? */ 1526 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1527 log_warnx("vm \"%s\" no permission to create vm instance", 1528 vcpp->vcp_name); 1529 return (ENAMETOOLONG); 1530 } 1531 1532 name = vcp->vcp_name; 1533 1534 if (vm_getbyname(vcp->vcp_name) != NULL || 1535 vm_getbyvmid(vcp->vcp_id) != NULL) { 1536 return (EPROCLIM); 1537 } 1538 1539 /* CPU */ 1540 if (vcp->vcp_ncpus == 0) 1541 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1542 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1543 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1544 log_warnx("vm \"%s\" no permission to set cpus", name); 1545 return (EPERM); 1546 } 1547 1548 /* memory */ 1549 if (vcp->vcp_memranges[0].vmr_size == 0) 1550 vcp->vcp_memranges[0].vmr_size = 1551 vcpp->vcp_memranges[0].vmr_size; 1552 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1553 vcp->vcp_memranges[0].vmr_size != 1554 vcpp->vcp_memranges[0].vmr_size) { 1555 log_warnx("vm \"%s\" no permission to set memory", name); 1556 return (EPERM); 1557 } 1558 1559 /* disks cannot be inherited */ 1560 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1561 vmc->vmc_ndisks) { 1562 log_warnx("vm \"%s\" no permission to set disks", name); 1563 return (EPERM); 1564 } 1565 for (i = 0; i < vmc->vmc_ndisks; i++) { 1566 /* Check if this disk is already used in the parent */ 1567 for (j = 0; j < vmcp->vmc_ndisks; j++) { 1568 if (strcmp(vmc->vmc_disks[i], 1569 vmcp->vmc_disks[j]) == 0) { 1570 log_warnx("vm \"%s\" disk %s cannot be reused", 1571 name, vmc->vmc_disks[i]); 1572 return (EBUSY); 1573 } 1574 } 1575 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1576 } 1577 1578 /* interfaces */ 1579 if (vmc->vmc_nnics > 0 && 1580 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1581 vmc->vmc_nnics != vmcp->vmc_nnics) { 1582 log_warnx("vm \"%s\" no permission to set interfaces", name); 1583 return (EPERM); 1584 } 1585 for (i = 0; i < vmcp->vmc_nnics; i++) { 1586 /* Interface got overwritten */ 1587 if (i < vmc->vmc_nnics) 1588 continue; 1589 1590 /* Copy interface from parent */ 1591 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1592 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1593 sizeof(vmc->vmc_ifnames[i])); 1594 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1595 sizeof(vmc->vmc_ifswitch[i])); 1596 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1597 sizeof(vmc->vmc_ifgroup[i])); 1598 memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], 1599 sizeof(vmc->vmc_macs[i])); 1600 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1601 vmc->vmc_nnics++; 1602 } 1603 for (i = 0; i < vmc->vmc_nnics; i++) { 1604 for (j = 0; j < vmcp->vmc_nnics; j++) { 1605 if (memcmp(zero_mac, vmc->vmc_macs[i], 1606 sizeof(vmc->vmc_macs[i])) != 0 && 1607 memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], 1608 sizeof(vmc->vmc_macs[i])) != 0) { 1609 log_warnx("vm \"%s\" lladdr cannot be reused", 1610 name); 1611 return (EBUSY); 1612 } 1613 if (strlen(vmc->vmc_ifnames[i]) && 1614 strcmp(vmc->vmc_ifnames[i], 1615 vmcp->vmc_ifnames[j]) == 0) { 1616 log_warnx("vm \"%s\" %s cannot be reused", 1617 vmc->vmc_ifnames[i], name); 1618 return (EBUSY); 1619 } 1620 } 1621 } 1622 1623 /* kernel */ 1624 if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL && 1625 strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) { 1626 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1627 log_warnx("vm \"%s\" no permission to set boot image", 1628 name); 1629 return (EPERM); 1630 } 1631 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1632 } 1633 1634 /* cdrom */ 1635 if (strlen(vmc->vmc_cdrom) > 0) { 1636 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1637 log_warnx("vm \"%s\" no permission to set cdrom", name); 1638 return (EPERM); 1639 } 1640 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1641 } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, 1642 sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { 1643 log_warnx("vm \"%s\" cdrom name too long", name); 1644 return (EINVAL); 1645 } 1646 1647 /* user */ 1648 if (vmc->vmc_owner.uid == 0) 1649 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1650 else if (vmc->vmc_owner.uid != uid && 1651 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1652 log_warnx("vm \"%s\" user mismatch", name); 1653 return (EPERM); 1654 } 1655 1656 /* group */ 1657 if (vmc->vmc_owner.gid == 0) 1658 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1659 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1660 log_warnx("vm \"%s\" group mismatch", name); 1661 return (EPERM); 1662 } 1663 1664 /* child instances */ 1665 if (vmc->vmc_insflags) { 1666 log_warnx("vm \"%s\" cannot change instance permissions", name); 1667 return (EPERM); 1668 } 1669 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1670 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1671 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1672 vmc->vmc_insflags = vmcp->vmc_insflags; 1673 } else { 1674 vmc->vmc_insowner.gid = 0; 1675 vmc->vmc_insowner.uid = 0; 1676 vmc->vmc_insflags = 0; 1677 } 1678 1679 /* finished, remove instance flags */ 1680 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1681 1682 return (0); 1683} 1684 1685/* 1686 * vm_checkperm 1687 * 1688 * Checks if the user represented by the 'uid' parameter is allowed to 1689 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1690 * console.) 1691 * 1692 * Parameters: 1693 * vm: the VM whose permission is to be checked 1694 * vmo: the required uid/gid to be checked 1695 * uid: the user ID of the user making the request 1696 * 1697 * Return values: 1698 * 0: the permission should be granted 1699 * -1: the permission check failed (also returned if vm == null) 1700 */ 1701int 1702vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1703{ 1704 struct group *gr; 1705 struct passwd *pw; 1706 char **grmem; 1707 1708 /* root has no restrictions */ 1709 if (uid == 0) 1710 return (0); 1711 1712 if (vmo == NULL) 1713 return (-1); 1714 1715 /* check user */ 1716 if (vm == NULL) { 1717 if (vmo->uid == uid) 1718 return (0); 1719 } else { 1720 /* 1721 * check user of running vm (the owner of a running vm can 1722 * be different to (or more specific than) the configured owner. 1723 */ 1724 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1725 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1726 return (0); 1727 } 1728 1729 /* check groups */ 1730 if (vmo->gid != -1) { 1731 if ((pw = getpwuid(uid)) == NULL) 1732 return (-1); 1733 if (pw->pw_gid == vmo->gid) 1734 return (0); 1735 if ((gr = getgrgid(vmo->gid)) != NULL) { 1736 for (grmem = gr->gr_mem; *grmem; grmem++) 1737 if (strcmp(*grmem, pw->pw_name) == 0) 1738 return (0); 1739 } 1740 } 1741 1742 return (-1); 1743} 1744 1745/* 1746 * vm_checkinsflag 1747 * 1748 * Checks whether the non-root user is allowed to set an instance option. 1749 * 1750 * Parameters: 1751 * vmc: the VM create parameters 1752 * flag: the flag to be checked 1753 * uid: the user ID of the user making the request 1754 * 1755 * Return values: 1756 * 0: the permission should be granted 1757 * -1: the permission check failed (also returned if vm == null) 1758 */ 1759int 1760vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1761{ 1762 /* root has no restrictions */ 1763 if (uid == 0) 1764 return (0); 1765 1766 if ((vmc->vmc_insflags & flag) == 0) 1767 return (-1); 1768 1769 return (0); 1770} 1771 1772/* 1773 * vm_checkaccess 1774 * 1775 * Checks if the user represented by the 'uid' parameter is allowed to 1776 * access the file described by the 'path' parameter. 1777 * 1778 * Parameters: 1779 * fd: the file descriptor of the opened file 1780 * uflag: check if the userid has access to the file 1781 * uid: the user ID of the user making the request 1782 * amode: the access flags of R_OK and W_OK 1783 * 1784 * Return values: 1785 * 0: the permission should be granted 1786 * -1: the permission check failed 1787 */ 1788int 1789vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1790{ 1791 struct group *gr; 1792 struct passwd *pw; 1793 char **grmem; 1794 struct stat st; 1795 mode_t mode; 1796 1797 if (fd == -1) 1798 return (-1); 1799 1800 /* 1801 * File has to be accessible and a regular file 1802 */ 1803 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1804 return (-1); 1805 1806 /* root has no restrictions */ 1807 if (uid == 0 || uflag == 0) 1808 return (0); 1809 1810 /* check other */ 1811 mode = amode & W_OK ? S_IWOTH : 0; 1812 mode |= amode & R_OK ? S_IROTH : 0; 1813 if ((st.st_mode & mode) == mode) 1814 return (0); 1815 1816 /* check user */ 1817 mode = amode & W_OK ? S_IWUSR : 0; 1818 mode |= amode & R_OK ? S_IRUSR : 0; 1819 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1820 return (0); 1821 1822 /* check groups */ 1823 mode = amode & W_OK ? S_IWGRP : 0; 1824 mode |= amode & R_OK ? S_IRGRP : 0; 1825 if ((st.st_mode & mode) != mode) 1826 return (-1); 1827 if ((pw = getpwuid(uid)) == NULL) 1828 return (-1); 1829 if (pw->pw_gid == st.st_gid) 1830 return (0); 1831 if ((gr = getgrgid(st.st_gid)) != NULL) { 1832 for (grmem = gr->gr_mem; *grmem; grmem++) 1833 if (strcmp(*grmem, pw->pw_name) == 0) 1834 return (0); 1835 } 1836 1837 return (-1); 1838} 1839 1840int 1841vm_opentty(struct vmd_vm *vm) 1842{ 1843 struct ptmget ptm; 1844 struct stat st; 1845 struct group *gr; 1846 uid_t uid; 1847 gid_t gid; 1848 mode_t mode; 1849 int on; 1850 1851 /* 1852 * Open tty with pre-opened PTM fd 1853 */ 1854 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1855 return (-1); 1856 1857 /* 1858 * We use user ioctl(2) mode to pass break commands. 1859 */ 1860 on = 1; 1861 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1862 fatal("could not enable user ioctl mode"); 1863 1864 vm->vm_tty = ptm.cfd; 1865 close(ptm.sfd); 1866 if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname)) 1867 >= sizeof(vm->vm_ttyname)) { 1868 log_warnx("%s: truncated ttyname", __func__); 1869 goto fail; 1870 } 1871 1872 uid = vm->vm_uid; 1873 gid = vm->vm_params.vmc_owner.gid; 1874 1875 if (vm->vm_params.vmc_owner.gid != -1) { 1876 mode = 0660; 1877 } else if ((gr = getgrnam("tty")) != NULL) { 1878 gid = gr->gr_gid; 1879 mode = 0620; 1880 } else { 1881 mode = 0600; 1882 gid = 0; 1883 } 1884 1885 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1886 __func__, vm->vm_params.vmc_params.vcp_name, 1887 vm->vm_ttyname, uid, gid, mode); 1888 1889 /* 1890 * Change ownership and mode of the tty as required. 1891 * Loosely based on the implementation of sshpty.c 1892 */ 1893 if (stat(vm->vm_ttyname, &st) == -1) 1894 goto fail; 1895 1896 if (st.st_uid != uid || st.st_gid != gid) { 1897 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1898 log_warn("chown %s %d %d failed, uid %d", 1899 vm->vm_ttyname, uid, gid, getuid()); 1900 1901 /* Ignore failure on read-only filesystems */ 1902 if (!((errno == EROFS) && 1903 (st.st_uid == uid || st.st_uid == 0))) 1904 goto fail; 1905 } 1906 } 1907 1908 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1909 if (chmod(vm->vm_ttyname, mode) == -1) { 1910 log_warn("chmod %s %o failed, uid %d", 1911 vm->vm_ttyname, mode, getuid()); 1912 1913 /* Ignore failure on read-only filesystems */ 1914 if (!((errno == EROFS) && 1915 (st.st_uid == uid || st.st_uid == 0))) 1916 goto fail; 1917 } 1918 } 1919 1920 return (0); 1921 fail: 1922 vm_closetty(vm); 1923 return (-1); 1924} 1925 1926void 1927vm_closetty(struct vmd_vm *vm) 1928{ 1929 if (vm->vm_tty != -1) { 1930 /* Release and close the tty */ 1931 if (fchown(vm->vm_tty, 0, 0) == -1) 1932 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1933 if (fchmod(vm->vm_tty, 0666) == -1) 1934 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1935 close(vm->vm_tty); 1936 vm->vm_tty = -1; 1937 } 1938 memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); 1939} 1940 1941void 1942switch_remove(struct vmd_switch *vsw) 1943{ 1944 if (vsw == NULL) 1945 return; 1946 1947 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1948 1949 free(vsw->sw_group); 1950 free(vsw->sw_name); 1951 free(vsw); 1952} 1953 1954struct vmd_switch * 1955switch_getbyname(const char *name) 1956{ 1957 struct vmd_switch *vsw; 1958 1959 if (name == NULL) 1960 return (NULL); 1961 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1962 if (strcmp(vsw->sw_name, name) == 0) 1963 return (vsw); 1964 } 1965 1966 return (NULL); 1967} 1968 1969char * 1970get_string(uint8_t *ptr, size_t len) 1971{ 1972 size_t i; 1973 1974 for (i = 0; i < len; i++) 1975 if (!isprint((unsigned char)ptr[i])) 1976 break; 1977 1978 return strndup(ptr, i); 1979} 1980 1981uint32_t 1982prefixlen2mask(uint8_t prefixlen) 1983{ 1984 if (prefixlen == 0) 1985 return (0); 1986 1987 if (prefixlen > 32) 1988 prefixlen = 32; 1989 1990 return (htonl(0xffffffff << (32 - prefixlen))); 1991} 1992 1993void 1994prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1995{ 1996 struct in6_addr s6; 1997 int i; 1998 1999 if (prefixlen > 128) 2000 prefixlen = 128; 2001 2002 memset(&s6, 0, sizeof(s6)); 2003 for (i = 0; i < prefixlen / 8; i++) 2004 s6.s6_addr[i] = 0xff; 2005 i = prefixlen % 8; 2006 if (i) 2007 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2008 2009 memcpy(mask, &s6, sizeof(s6)); 2010} 2011 2012void 2013getmonotime(struct timeval *tv) 2014{ 2015 struct timespec ts; 2016 2017 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2018 fatal("clock_gettime"); 2019 2020 TIMESPEC_TO_TIMEVAL(tv, &ts); 2021} 2022 2023static inline void 2024vm_terminate(struct vmd_vm *vm, const char *caller) 2025{ 2026 if (vm->vm_from_config) 2027 vm_stop(vm, 0, caller); 2028 else { 2029 /* vm_remove calls vm_stop */ 2030 vm_remove(vm, caller); 2031 } 2032} 2033 2034/* 2035 * Utility function for closing vm file descriptors. Assumes an fd of -1 was 2036 * already closed or never opened. 2037 * 2038 * Returns 0 on success, otherwise -1 on failure. 2039 */ 2040int 2041close_fd(int fd) 2042{ 2043 int ret; 2044 2045 if (fd == -1) 2046 return (0); 2047 2048#ifdef POSIX_CLOSE_RESTART 2049 do { ret = close(fd); } while (ret == -1 && errno == EINTR); 2050#else 2051 ret = close(fd); 2052#endif /* POSIX_CLOSE_RESTART */ 2053 2054 if (ret == -1 && errno == EIO) 2055 log_warn("%s(%d)", __func__, fd); 2056 2057 return (ret); 2058} 2059