vmd.c revision 1.106
1/* $OpenBSD: vmd.c,v 1.106 2018/11/26 05:44:46 ori Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/param.h> /* nitems */ 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/cdefs.h> 23#include <sys/stat.h> 24#include <sys/tty.h> 25#include <sys/ttycom.h> 26#include <sys/ioctl.h> 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31#include <termios.h> 32#include <errno.h> 33#include <event.h> 34#include <fcntl.h> 35#include <pwd.h> 36#include <signal.h> 37#include <syslog.h> 38#include <unistd.h> 39#include <util.h> 40#include <ctype.h> 41#include <pwd.h> 42#include <grp.h> 43 44#include <machine/specialreg.h> 45#include <machine/vmmvar.h> 46 47#include "proc.h" 48#include "atomicio.h" 49#include "vmd.h" 50 51__dead void usage(void); 52 53int main(int, char **); 54int vmd_configure(void); 55void vmd_sighdlr(int sig, short event, void *arg); 56void vmd_shutdown(void); 57int vmd_control_run(void); 58int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 59int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 60int vmd_check_vmh(struct vm_dump_header *); 61 62int vm_instance(struct privsep *, struct vmd_vm **, 63 struct vmop_create_params *, uid_t); 64int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 65uint32_t vm_claimid(const char *, int); 66 67struct vmd *env; 68 69static struct privsep_proc procs[] = { 70 /* Keep "priv" on top as procs[0] */ 71 { "priv", PROC_PRIV, NULL, priv }, 72 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 73 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 74}; 75 76/* For the privileged process */ 77static struct privsep_proc *proc_priv = &procs[0]; 78static struct passwd proc_privpw; 79static const uint8_t zero_mac[ETHER_ADDR_LEN]; 80 81int 82vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 83{ 84 struct privsep *ps = p->p_ps; 85 int res = 0, ret = 0, cmd = 0, verbose; 86 unsigned int v = 0, flags; 87 struct vmop_create_params vmc; 88 struct vmop_id vid; 89 struct vmop_result vmr; 90 struct vm_dump_header vmh; 91 struct vmd_vm *vm = NULL; 92 char *str = NULL; 93 uint32_t id = 0; 94 struct control_sock *rcs; 95 96 switch (imsg->hdr.type) { 97 case IMSG_VMDOP_START_VM_REQUEST: 98 IMSG_SIZE_CHECK(imsg, &vmc); 99 memcpy(&vmc, imsg->data, sizeof(vmc)); 100 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 101 if (vmc.vmc_flags == 0) { 102 /* start an existing VM with pre-configured options */ 103 if (!(ret == -1 && errno == EALREADY && 104 vm->vm_running == 0)) { 105 res = errno; 106 cmd = IMSG_VMDOP_START_VM_RESPONSE; 107 } 108 } else if (ret != 0) { 109 res = errno; 110 cmd = IMSG_VMDOP_START_VM_RESPONSE; 111 } 112 if (res == 0 && 113 config_setvm(ps, vm, 114 imsg->hdr.peerid, vm->vm_params.vmc_owner.uid) == -1) { 115 res = errno; 116 cmd = IMSG_VMDOP_START_VM_RESPONSE; 117 } 118 break; 119 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 120 IMSG_SIZE_CHECK(imsg, &vid); 121 memcpy(&vid, imsg->data, sizeof(vid)); 122 flags = vid.vid_flags; 123 124 if ((id = vid.vid_id) == 0) { 125 /* Lookup vm (id) by name */ 126 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 127 res = ENOENT; 128 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 129 break; 130 } else if (vm->vm_shutdown && 131 (flags & VMOP_FORCE) == 0) { 132 res = EALREADY; 133 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 134 break; 135 } else if (vm->vm_running == 0) { 136 res = EINVAL; 137 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 138 break; 139 } 140 id = vm->vm_vmid; 141 } else if ((vm = vm_getbyvmid(id)) == NULL) { 142 res = ENOENT; 143 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 144 break; 145 } 146 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 147 vid.vid_uid) != 0) { 148 res = EPERM; 149 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 150 break; 151 } 152 153 memset(&vid, 0, sizeof(vid)); 154 vid.vid_id = id; 155 vid.vid_flags = flags; 156 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 157 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 158 return (-1); 159 break; 160 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 161 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 162 break; 163 case IMSG_VMDOP_LOAD: 164 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 165 str = get_string((uint8_t *)imsg->data, 166 IMSG_DATA_SIZE(imsg)); 167 case IMSG_VMDOP_RELOAD: 168 if (vmd_reload(0, str) == -1) 169 cmd = IMSG_CTL_FAIL; 170 else 171 cmd = IMSG_CTL_OK; 172 free(str); 173 break; 174 case IMSG_CTL_RESET: 175 IMSG_SIZE_CHECK(imsg, &v); 176 memcpy(&v, imsg->data, sizeof(v)); 177 if (vmd_reload(v, NULL) == -1) 178 cmd = IMSG_CTL_FAIL; 179 else 180 cmd = IMSG_CTL_OK; 181 break; 182 case IMSG_CTL_VERBOSE: 183 IMSG_SIZE_CHECK(imsg, &verbose); 184 memcpy(&verbose, imsg->data, sizeof(verbose)); 185 log_setverbose(verbose); 186 187 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 188 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 189 cmd = IMSG_CTL_OK; 190 break; 191 case IMSG_VMDOP_PAUSE_VM: 192 case IMSG_VMDOP_UNPAUSE_VM: 193 IMSG_SIZE_CHECK(imsg, &vid); 194 memcpy(&vid, imsg->data, sizeof(vid)); 195 if (vid.vid_id == 0) { 196 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 197 res = ENOENT; 198 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 199 break; 200 } else { 201 vid.vid_id = vm->vm_vmid; 202 } 203 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 204 res = ENOENT; 205 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 206 break; 207 } 208 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 209 vid.vid_uid) != 0) { 210 res = EPERM; 211 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 212 break; 213 } 214 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 215 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 216 break; 217 case IMSG_VMDOP_SEND_VM_REQUEST: 218 IMSG_SIZE_CHECK(imsg, &vid); 219 memcpy(&vid, imsg->data, sizeof(vid)); 220 id = vid.vid_id; 221 if (vid.vid_id == 0) { 222 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 223 res = ENOENT; 224 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 225 close(imsg->fd); 226 break; 227 } else { 228 vid.vid_id = vm->vm_vmid; 229 } 230 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 231 res = ENOENT; 232 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 233 close(imsg->fd); 234 break; 235 } else { 236 } 237 vmr.vmr_id = vid.vid_id; 238 log_debug("%s: sending fd to vmm", __func__); 239 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 240 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 241 break; 242 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 243 IMSG_SIZE_CHECK(imsg, &vid); 244 memcpy(&vid, imsg->data, sizeof(vid)); 245 if (imsg->fd == -1) { 246 log_warnx("%s: invalid fd", __func__); 247 return (-1); 248 } 249 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 250 sizeof(vmh)) { 251 log_warnx("%s: error reading vmh from received vm", 252 __func__); 253 res = EIO; 254 close(imsg->fd); 255 cmd = IMSG_VMDOP_START_VM_RESPONSE; 256 break; 257 } 258 259 if (vmd_check_vmh(&vmh)) { 260 res = ENOENT; 261 close(imsg->fd); 262 cmd = IMSG_VMDOP_START_VM_RESPONSE; 263 break; 264 } 265 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 266 sizeof(vmc)) { 267 log_warnx("%s: error reading vmc from received vm", 268 __func__); 269 res = EIO; 270 close(imsg->fd); 271 cmd = IMSG_VMDOP_START_VM_RESPONSE; 272 break; 273 } 274 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 275 sizeof(vmc.vmc_params.vcp_name)); 276 vmc.vmc_params.vcp_id = 0; 277 278 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 279 if (ret != 0) { 280 res = errno; 281 cmd = IMSG_VMDOP_START_VM_RESPONSE; 282 close(imsg->fd); 283 } else { 284 vm->vm_received = 1; 285 config_setvm(ps, vm, imsg->hdr.peerid, 286 vmc.vmc_owner.uid); 287 log_debug("%s: sending fd to vmm", __func__); 288 proc_compose_imsg(ps, PROC_VMM, -1, 289 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 290 NULL, 0); 291 } 292 break; 293 case IMSG_VMDOP_DONE: 294 control_reset(&ps->ps_csock); 295 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 296 control_reset(rcs); 297 cmd = 0; 298 break; 299 default: 300 return (-1); 301 } 302 303 switch (cmd) { 304 case 0: 305 break; 306 case IMSG_VMDOP_START_VM_RESPONSE: 307 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 308 memset(&vmr, 0, sizeof(vmr)); 309 vmr.vmr_result = res; 310 vmr.vmr_id = id; 311 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 312 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 313 return (-1); 314 break; 315 default: 316 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 317 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 318 return (-1); 319 break; 320 } 321 322 return (0); 323} 324 325int 326vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 327{ 328 struct vmop_result vmr; 329 struct privsep *ps = p->p_ps; 330 int res = 0; 331 struct vmd_vm *vm; 332 struct vm_create_params *vcp; 333 struct vmop_info_result vir; 334 335 switch (imsg->hdr.type) { 336 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 337 IMSG_SIZE_CHECK(imsg, &vmr); 338 memcpy(&vmr, imsg->data, sizeof(vmr)); 339 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 340 break; 341 proc_compose_imsg(ps, PROC_CONTROL, -1, 342 imsg->hdr.type, imsg->hdr.peerid, -1, 343 imsg->data, sizeof(imsg->data)); 344 log_info("%s: paused vm %d successfully", 345 vm->vm_params.vmc_params.vcp_name, 346 vm->vm_vmid); 347 break; 348 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 349 IMSG_SIZE_CHECK(imsg, &vmr); 350 memcpy(&vmr, imsg->data, sizeof(vmr)); 351 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 352 break; 353 proc_compose_imsg(ps, PROC_CONTROL, -1, 354 imsg->hdr.type, imsg->hdr.peerid, -1, 355 imsg->data, sizeof(imsg->data)); 356 log_info("%s: unpaused vm %d successfully.", 357 vm->vm_params.vmc_params.vcp_name, 358 vm->vm_vmid); 359 break; 360 case IMSG_VMDOP_START_VM_RESPONSE: 361 IMSG_SIZE_CHECK(imsg, &vmr); 362 memcpy(&vmr, imsg->data, sizeof(vmr)); 363 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 364 break; 365 vm->vm_pid = vmr.vmr_pid; 366 vcp = &vm->vm_params.vmc_params; 367 vcp->vcp_id = vmr.vmr_id; 368 369 /* 370 * If the peerid is not -1, forward the response back to the 371 * the control socket. If it is -1, the request originated 372 * from the parent, not the control socket. 373 */ 374 if (vm->vm_peerid != (uint32_t)-1) { 375 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 376 sizeof(vmr.vmr_ttyname)); 377 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 378 imsg->hdr.type, vm->vm_peerid, -1, 379 &vmr, sizeof(vmr)) == -1) { 380 errno = vmr.vmr_result; 381 log_warn("%s: failed to foward vm result", 382 vcp->vcp_name); 383 vm_remove(vm, __func__); 384 return (-1); 385 } 386 } 387 388 if (vmr.vmr_result) { 389 errno = vmr.vmr_result; 390 log_warn("%s: failed to start vm", vcp->vcp_name); 391 vm_remove(vm, __func__); 392 break; 393 } 394 395 /* Now configure all the interfaces */ 396 if (vm_priv_ifconfig(ps, vm) == -1) { 397 log_warn("%s: failed to configure vm", vcp->vcp_name); 398 vm_remove(vm, __func__); 399 break; 400 } 401 402 log_info("%s: started vm %d successfully, tty %s", 403 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 404 break; 405 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 406 IMSG_SIZE_CHECK(imsg, &vmr); 407 memcpy(&vmr, imsg->data, sizeof(vmr)); 408 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 409 __func__, vmr.vmr_id); 410 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 411 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 412 break; 413 if (vmr.vmr_result == 0) { 414 /* Mark VM as shutting down */ 415 vm->vm_shutdown = 1; 416 } 417 break; 418 case IMSG_VMDOP_SEND_VM_RESPONSE: 419 IMSG_SIZE_CHECK(imsg, &vmr); 420 memcpy(&vmr, imsg->data, sizeof(vmr)); 421 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 422 break; 423 if (!vmr.vmr_result) { 424 log_info("%s: sent vm %d successfully.", 425 vm->vm_params.vmc_params.vcp_name, 426 vm->vm_vmid); 427 if (vm->vm_from_config) 428 vm_stop(vm, 0, __func__); 429 else 430 vm_remove(vm, __func__); 431 } 432 433 /* Send a response if a control client is waiting for it */ 434 if (imsg->hdr.peerid != (uint32_t)-1) { 435 /* the error is meaningless for deferred responses */ 436 vmr.vmr_result = 0; 437 438 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 439 IMSG_VMDOP_SEND_VM_RESPONSE, 440 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 441 return (-1); 442 } 443 break; 444 case IMSG_VMDOP_TERMINATE_VM_EVENT: 445 IMSG_SIZE_CHECK(imsg, &vmr); 446 memcpy(&vmr, imsg->data, sizeof(vmr)); 447 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 448 __func__, vmr.vmr_id, vmr.vmr_result); 449 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 450 log_debug("%s: vm %d is no longer available", 451 __func__, vmr.vmr_id); 452 break; 453 } 454 if (vmr.vmr_result != EAGAIN) { 455 if (vm->vm_from_config) 456 vm_stop(vm, 0, __func__); 457 else 458 vm_remove(vm, __func__); 459 } else { 460 /* Stop VM instance but keep the tty open */ 461 vm_stop(vm, 1, __func__); 462 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 463 } 464 465 /* Send a response if a control client is waiting for it */ 466 if (imsg->hdr.peerid != (uint32_t)-1) { 467 /* the error is meaningless for deferred responses */ 468 vmr.vmr_result = 0; 469 470 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 471 IMSG_VMDOP_TERMINATE_VM_RESPONSE, 472 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 473 return (-1); 474 } 475 break; 476 case IMSG_VMDOP_GET_INFO_VM_DATA: 477 IMSG_SIZE_CHECK(imsg, &vir); 478 memcpy(&vir, imsg->data, sizeof(vir)); 479 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 480 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 481 if (vm->vm_ttyname != NULL) 482 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 483 sizeof(vir.vir_ttyname)); 484 if (vm->vm_shutdown) { 485 /* XXX there might be a nicer way */ 486 (void)strlcat(vir.vir_info.vir_name, 487 " - stopping", 488 sizeof(vir.vir_info.vir_name)); 489 } 490 /* get the user id who started the vm */ 491 vir.vir_uid = vm->vm_uid; 492 vir.vir_gid = vm->vm_params.vmc_owner.gid; 493 } 494 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 495 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 496 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 497 __func__, vm->vm_vmid); 498 vm_remove(vm, __func__); 499 return (-1); 500 } 501 break; 502 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 503 /* 504 * PROC_VMM has responded with the *running* VMs, now we 505 * append the others. These use the special value 0 for their 506 * kernel id to indicate that they are not running. 507 */ 508 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 509 if (!vm->vm_running) { 510 memset(&vir, 0, sizeof(vir)); 511 vir.vir_info.vir_id = vm->vm_vmid; 512 strlcpy(vir.vir_info.vir_name, 513 vm->vm_params.vmc_params.vcp_name, 514 VMM_MAX_NAME_LEN); 515 vir.vir_info.vir_memory_size = 516 vm->vm_params.vmc_params. 517 vcp_memranges[0].vmr_size; 518 vir.vir_info.vir_ncpus = 519 vm->vm_params.vmc_params.vcp_ncpus; 520 /* get the configured user id for this vm */ 521 vir.vir_uid = vm->vm_params.vmc_owner.uid; 522 vir.vir_gid = vm->vm_params.vmc_owner.gid; 523 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 524 IMSG_VMDOP_GET_INFO_VM_DATA, 525 imsg->hdr.peerid, -1, &vir, 526 sizeof(vir)) == -1) { 527 log_debug("%s: GET_INFO_VM_END failed", 528 __func__); 529 vm_remove(vm, __func__); 530 return (-1); 531 } 532 } 533 } 534 IMSG_SIZE_CHECK(imsg, &res); 535 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 536 break; 537 default: 538 return (-1); 539 } 540 541 return (0); 542} 543 544int 545vmd_check_vmh(struct vm_dump_header *vmh) 546{ 547 int i; 548 unsigned int code, leaf; 549 unsigned int a, b, c, d; 550 551 552 if (vmh->vmh_version != VM_DUMP_VERSION) { 553 log_warnx("%s: incompatible dump version", __func__); 554 return (-1); 555 } 556 557 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 558 code = vmh->vmh_cpuids[i].code; 559 leaf = vmh->vmh_cpuids[i].leaf; 560 if (leaf != 0x00) { 561 log_debug("%s: invalid leaf 0x%x for code 0x%x", 562 __func__, leaf, code); 563 return (-1); 564 } 565 566 switch (code) { 567 case 0x00: 568 CPUID_LEAF(code, leaf, a, b, c, d); 569 if (vmh->vmh_cpuids[i].a > a) { 570 log_debug("%s: incompatible cpuid level", 571 __func__); 572 return (-1); 573 } 574 if (!(vmh->vmh_cpuids[i].b == b && 575 vmh->vmh_cpuids[i].c == c && 576 vmh->vmh_cpuids[i].d == d)) { 577 log_debug("%s: incompatible cpu brand", 578 __func__); 579 return (-1); 580 } 581 break; 582 583 case 0x01: 584 CPUID_LEAF(code, leaf, a, b, c, d); 585 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 586 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 587 log_debug("%s: incompatible cpu features " 588 "code: 0x%x leaf: 0x%x reg: c", __func__, 589 code, leaf); 590 return (-1); 591 } 592 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 593 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 594 log_debug("%s: incompatible cpu features " 595 "code: 0x%x leaf: 0x%x reg: d", __func__, 596 code, leaf); 597 return (-1); 598 } 599 break; 600 601 case 0x07: 602 CPUID_LEAF(code, leaf, a, b, c, d); 603 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 604 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 605 log_debug("%s: incompatible cpu features " 606 "code: 0x%x leaf: 0x%x reg: c", __func__, 607 code, leaf); 608 return (-1); 609 } 610 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 611 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 612 log_debug("%s: incompatible cpu features " 613 "code: 0x%x leaf: 0x%x reg: d", __func__, 614 code, leaf); 615 return (-1); 616 } 617 break; 618 619 case 0x0d: 620 CPUID_LEAF(code, leaf, a, b, c, d); 621 if (vmh->vmh_cpuids[i].b > b) { 622 log_debug("%s: incompatible cpu: insufficient " 623 "max save area for enabled XCR0 features", 624 __func__); 625 return (-1); 626 } 627 if (vmh->vmh_cpuids[i].c > c) { 628 log_debug("%s: incompatible cpu: insufficient " 629 "max save area for supported XCR0 features", 630 __func__); 631 return (-1); 632 } 633 break; 634 635 case 0x80000001: 636 CPUID_LEAF(code, leaf, a, b, c, d); 637 if ((vmh->vmh_cpuids[i].a & a) != 638 vmh->vmh_cpuids[i].a) { 639 log_debug("%s: incompatible cpu features " 640 "code: 0x%x leaf: 0x%x reg: a", __func__, 641 code, leaf); 642 return (-1); 643 } 644 if ((vmh->vmh_cpuids[i].c & c) != 645 vmh->vmh_cpuids[i].c) { 646 log_debug("%s: incompatible cpu features " 647 "code: 0x%x leaf: 0x%x reg: c", __func__, 648 code, leaf); 649 return (-1); 650 } 651 if ((vmh->vmh_cpuids[i].d & d) != 652 vmh->vmh_cpuids[i].d) { 653 log_debug("%s: incompatible cpu features " 654 "code: 0x%x leaf: 0x%x reg: d", __func__, 655 code, leaf); 656 return (-1); 657 } 658 break; 659 660 default: 661 log_debug("%s: unknown code 0x%x", __func__, code); 662 return (-1); 663 } 664 } 665 666 return (0); 667} 668 669void 670vmd_sighdlr(int sig, short event, void *arg) 671{ 672 if (privsep_process != PROC_PARENT) 673 return; 674 log_debug("%s: handling signal", __func__); 675 676 switch (sig) { 677 case SIGHUP: 678 log_info("%s: reload requested with SIGHUP", __func__); 679 680 /* 681 * This is safe because libevent uses async signal handlers 682 * that run in the event loop and not in signal context. 683 */ 684 (void)vmd_reload(0, NULL); 685 break; 686 case SIGPIPE: 687 log_info("%s: ignoring SIGPIPE", __func__); 688 break; 689 case SIGUSR1: 690 log_info("%s: ignoring SIGUSR1", __func__); 691 break; 692 case SIGTERM: 693 case SIGINT: 694 vmd_shutdown(); 695 break; 696 default: 697 fatalx("unexpected signal"); 698 } 699} 700 701__dead void 702usage(void) 703{ 704 extern char *__progname; 705 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 706 __progname); 707 exit(1); 708} 709 710int 711main(int argc, char **argv) 712{ 713 struct privsep *ps; 714 int ch; 715 const char *conffile = VMD_CONF; 716 enum privsep_procid proc_id = PROC_PARENT; 717 int proc_instance = 0; 718 const char *errp, *title = NULL; 719 int argc0 = argc; 720 721 log_init(0, LOG_DAEMON); 722 723 if ((env = calloc(1, sizeof(*env))) == NULL) 724 fatal("calloc: env"); 725 726 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 727 switch (ch) { 728 case 'D': 729 if (cmdline_symset(optarg) < 0) 730 log_warnx("could not parse macro definition %s", 731 optarg); 732 break; 733 case 'd': 734 env->vmd_debug = 2; 735 break; 736 case 'f': 737 conffile = optarg; 738 break; 739 case 'v': 740 env->vmd_verbose++; 741 break; 742 case 'n': 743 env->vmd_noaction = 1; 744 break; 745 case 'P': 746 title = optarg; 747 proc_id = proc_getid(procs, nitems(procs), title); 748 if (proc_id == PROC_MAX) 749 fatalx("invalid process name"); 750 break; 751 case 'I': 752 proc_instance = strtonum(optarg, 0, 753 PROC_MAX_INSTANCES, &errp); 754 if (errp) 755 fatalx("invalid process instance"); 756 break; 757 default: 758 usage(); 759 } 760 } 761 762 argc -= optind; 763 if (argc > 0) 764 usage(); 765 766 if (env->vmd_noaction && !env->vmd_debug) 767 env->vmd_debug = 1; 768 769 /* check for root privileges */ 770 if (env->vmd_noaction == 0) { 771 if (geteuid()) 772 fatalx("need root privileges"); 773 } 774 775 ps = &env->vmd_ps; 776 ps->ps_env = env; 777 env->vmd_fd = -1; 778 779 if (config_init(env) == -1) 780 fatal("failed to initialize configuration"); 781 782 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 783 fatal("unknown user %s", VMD_USER); 784 785 /* First proc runs as root without pledge but in default chroot */ 786 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 787 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 788 789 /* Open /dev/vmm */ 790 if (env->vmd_noaction == 0) { 791 env->vmd_fd = open(VMM_NODE, O_RDWR); 792 if (env->vmd_fd == -1) 793 fatal("%s", VMM_NODE); 794 } 795 796 /* Configure the control socket */ 797 ps->ps_csock.cs_name = SOCKET_NAME; 798 TAILQ_INIT(&ps->ps_rcsocks); 799 800 /* Configuration will be parsed after forking the children */ 801 env->vmd_conffile = conffile; 802 803 log_init(env->vmd_debug, LOG_DAEMON); 804 log_setverbose(env->vmd_verbose); 805 806 if (env->vmd_noaction) 807 ps->ps_noaction = 1; 808 ps->ps_instance = proc_instance; 809 if (title != NULL) 810 ps->ps_title[proc_id] = title; 811 812 /* only the parent returns */ 813 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 814 proc_id); 815 816 log_procinit("parent"); 817 if (!env->vmd_debug && daemon(0, 0) == -1) 818 fatal("can't daemonize"); 819 820 if (ps->ps_noaction == 0) 821 log_info("startup"); 822 823 event_init(); 824 825 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 826 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 827 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 828 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 829 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 830 831 signal_add(&ps->ps_evsigint, NULL); 832 signal_add(&ps->ps_evsigterm, NULL); 833 signal_add(&ps->ps_evsighup, NULL); 834 signal_add(&ps->ps_evsigpipe, NULL); 835 signal_add(&ps->ps_evsigusr1, NULL); 836 837 if (!env->vmd_noaction) 838 proc_connect(ps); 839 840 if (vmd_configure() == -1) 841 fatalx("configuration failed"); 842 843 event_dispatch(); 844 845 log_debug("parent exiting"); 846 847 return (0); 848} 849 850int 851vmd_configure(void) 852{ 853 struct vmd_vm *vm; 854 struct vmd_switch *vsw; 855 856 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 857 fatal("open %s", PATH_PTMDEV); 858 859 /* 860 * pledge in the parent process: 861 * stdio - for malloc and basic I/O including events. 862 * rpath - for reload to open and read the configuration files. 863 * wpath - for opening disk images and tap devices. 864 * tty - for openpty and TIOCUCNTL. 865 * proc - run kill to terminate its children safely. 866 * sendfd - for disks, interfaces and other fds. 867 * recvfd - for send and receive. 868 * getpw - lookup user or group id by name. 869 * chown, fattr - change tty ownership 870 * flock - locking disk files 871 */ 872 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 873 " chown fattr flock", NULL) == -1) 874 fatal("pledge"); 875 876 if (parse_config(env->vmd_conffile) == -1) { 877 proc_kill(&env->vmd_ps); 878 exit(1); 879 } 880 881 if (env->vmd_noaction) { 882 fprintf(stderr, "configuration OK\n"); 883 proc_kill(&env->vmd_ps); 884 exit(0); 885 } 886 887 /* Send shared global configuration to all children */ 888 if (config_setconfig(env) == -1) 889 return (-1); 890 891 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 892 if (vsw->sw_running) 893 continue; 894 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 895 log_warn("%s: failed to create switch %s", 896 __func__, vsw->sw_name); 897 switch_remove(vsw); 898 return (-1); 899 } 900 } 901 902 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 903 if (vm->vm_disabled) { 904 log_debug("%s: not creating vm %s (disabled)", 905 __func__, 906 vm->vm_params.vmc_params.vcp_name); 907 continue; 908 } 909 if (config_setvm(&env->vmd_ps, vm, 910 -1, vm->vm_params.vmc_owner.uid) == -1) 911 return (-1); 912 } 913 914 return (0); 915} 916 917int 918vmd_reload(unsigned int reset, const char *filename) 919{ 920 struct vmd_vm *vm, *next_vm; 921 struct vmd_switch *vsw; 922 int reload = 0; 923 924 /* Switch back to the default config file */ 925 if (filename == NULL || *filename == '\0') { 926 filename = env->vmd_conffile; 927 reload = 1; 928 } 929 930 log_debug("%s: level %d config file %s", __func__, reset, filename); 931 932 if (reset) { 933 /* Purge the configuration */ 934 config_purge(env, reset); 935 config_setreset(env, reset); 936 } else { 937 /* 938 * Load or reload the configuration. 939 * 940 * Reloading removes all non-running VMs before processing the 941 * config file, whereas loading only adds to the existing list 942 * of VMs. 943 */ 944 945 if (reload) { 946 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 947 next_vm) { 948 if (vm->vm_running == 0) { 949 DPRINTF("%s: calling vm_remove", 950 __func__); 951 vm_remove(vm, __func__); 952 } 953 } 954 } 955 956 if (parse_config(filename) == -1) { 957 log_debug("%s: failed to load config file %s", 958 __func__, filename); 959 return (-1); 960 } 961 962 if (reload) { 963 /* Update shared global configuration in all children */ 964 if (config_setconfig(env) == -1) 965 return (-1); 966 } 967 968 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 969 if (vsw->sw_running) 970 continue; 971 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 972 log_warn("%s: failed to create switch %s", 973 __func__, vsw->sw_name); 974 switch_remove(vsw); 975 return (-1); 976 } 977 } 978 979 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 980 if (vm->vm_running == 0) { 981 if (vm->vm_disabled) { 982 log_debug("%s: not creating vm %s" 983 " (disabled)", __func__, 984 vm->vm_params.vmc_params.vcp_name); 985 continue; 986 } 987 if (config_setvm(&env->vmd_ps, vm, 988 -1, vm->vm_params.vmc_owner.uid) == -1) 989 return (-1); 990 } else { 991 log_debug("%s: not creating vm \"%s\": " 992 "(running)", __func__, 993 vm->vm_params.vmc_params.vcp_name); 994 } 995 } 996 } 997 998 return (0); 999} 1000 1001void 1002vmd_shutdown(void) 1003{ 1004 struct vmd_vm *vm, *vm_next; 1005 1006 log_debug("%s: performing shutdown", __func__); 1007 1008 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1009 vm_remove(vm, __func__); 1010 } 1011 1012 proc_kill(&env->vmd_ps); 1013 free(env); 1014 1015 log_warnx("parent terminating"); 1016 exit(0); 1017} 1018 1019struct vmd_vm * 1020vm_getbyvmid(uint32_t vmid) 1021{ 1022 struct vmd_vm *vm; 1023 1024 if (vmid == 0) 1025 return (NULL); 1026 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1027 if (vm->vm_vmid == vmid) 1028 return (vm); 1029 } 1030 1031 return (NULL); 1032} 1033 1034struct vmd_vm * 1035vm_getbyid(uint32_t id) 1036{ 1037 struct vmd_vm *vm; 1038 1039 if (id == 0) 1040 return (NULL); 1041 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1042 if (vm->vm_params.vmc_params.vcp_id == id) 1043 return (vm); 1044 } 1045 1046 return (NULL); 1047} 1048 1049uint32_t 1050vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1051{ 1052 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1053 return (0); 1054 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1055 id, vm->vm_vmid); 1056 return (vm->vm_vmid); 1057} 1058 1059uint32_t 1060vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1061{ 1062 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1063 return (0); 1064 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1065 vmid, vm->vm_params.vmc_params.vcp_id); 1066 return (vm->vm_params.vmc_params.vcp_id); 1067} 1068 1069struct vmd_vm * 1070vm_getbyname(const char *name) 1071{ 1072 struct vmd_vm *vm; 1073 1074 if (name == NULL) 1075 return (NULL); 1076 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1077 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1078 return (vm); 1079 } 1080 1081 return (NULL); 1082} 1083 1084struct vmd_vm * 1085vm_getbypid(pid_t pid) 1086{ 1087 struct vmd_vm *vm; 1088 1089 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1090 if (vm->vm_pid == pid) 1091 return (vm); 1092 } 1093 1094 return (NULL); 1095} 1096 1097void 1098vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1099{ 1100 struct privsep *ps = &env->vmd_ps; 1101 unsigned int i, j; 1102 1103 if (vm == NULL) 1104 return; 1105 1106 log_debug("%s: %s %s stopping vm %d%s", 1107 __func__, ps->ps_title[privsep_process], caller, 1108 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1109 1110 vm->vm_running = 0; 1111 vm->vm_shutdown = 0; 1112 1113 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1114 user_put(vm->vm_user); 1115 1116 if (vm->vm_iev.ibuf.fd != -1) { 1117 event_del(&vm->vm_iev.ev); 1118 close(vm->vm_iev.ibuf.fd); 1119 } 1120 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1121 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1122 if (vm->vm_disks[i][j] != -1) { 1123 close(vm->vm_disks[i][j]); 1124 vm->vm_disks[i][j] = -1; 1125 } 1126 } 1127 } 1128 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1129 if (vm->vm_ifs[i].vif_fd != -1) { 1130 close(vm->vm_ifs[i].vif_fd); 1131 vm->vm_ifs[i].vif_fd = -1; 1132 } 1133 free(vm->vm_ifs[i].vif_name); 1134 free(vm->vm_ifs[i].vif_switch); 1135 free(vm->vm_ifs[i].vif_group); 1136 vm->vm_ifs[i].vif_name = NULL; 1137 vm->vm_ifs[i].vif_switch = NULL; 1138 vm->vm_ifs[i].vif_group = NULL; 1139 } 1140 if (vm->vm_kernel != -1) { 1141 close(vm->vm_kernel); 1142 vm->vm_kernel = -1; 1143 } 1144 if (vm->vm_cdrom != -1) { 1145 close(vm->vm_cdrom); 1146 vm->vm_cdrom = -1; 1147 } 1148 if (!keeptty) { 1149 vm_closetty(vm); 1150 vm->vm_uid = 0; 1151 } 1152} 1153 1154void 1155vm_remove(struct vmd_vm *vm, const char *caller) 1156{ 1157 struct privsep *ps = &env->vmd_ps; 1158 1159 if (vm == NULL) 1160 return; 1161 1162 log_debug("%s: %s %s removing vm %d from running config", 1163 __func__, ps->ps_title[privsep_process], caller, 1164 vm->vm_vmid); 1165 1166 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1167 1168 user_put(vm->vm_user); 1169 vm_stop(vm, 0, caller); 1170 free(vm); 1171} 1172 1173uint32_t 1174vm_claimid(const char *name, int uid) 1175{ 1176 struct name2id *n2i = NULL; 1177 1178 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1179 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1180 return n2i->id; 1181 1182 if (++env->vmd_nvm == 0) 1183 fatalx("too many vms"); 1184 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) 1185 fatalx("could not alloc vm name"); 1186 n2i->id = env->vmd_nvm; 1187 n2i->uid = uid; 1188 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) 1189 fatalx("overlong vm name"); 1190 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1191 1192 return n2i->id; 1193} 1194 1195int 1196vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1197 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1198{ 1199 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1200 struct vm_create_params *vcp = &vmc->vmc_params; 1201 struct vmop_owner *vmo = NULL; 1202 struct vmd_user *usr = NULL; 1203 uint32_t rng; 1204 unsigned int i, j; 1205 struct vmd_switch *sw; 1206 char *s; 1207 1208 /* Check if this is an instance of another VM */ 1209 if (vm_instance(ps, &vm_parent, vmc, uid) == -1) 1210 return (-1); 1211 1212 errno = 0; 1213 *ret_vm = NULL; 1214 1215 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1216 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1217 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1218 uid) != 0) { 1219 errno = EPERM; 1220 goto fail; 1221 } 1222 *ret_vm = vm; 1223 errno = EALREADY; 1224 goto fail; 1225 } 1226 1227 if (vm_parent != NULL) 1228 vmo = &vm_parent->vm_params.vmc_insowner; 1229 1230 /* non-root users can only start existing VMs or instances */ 1231 if (vm_checkperm(NULL, vmo, uid) != 0) { 1232 log_warnx("permission denied"); 1233 errno = EPERM; 1234 goto fail; 1235 } 1236 if (vmc->vmc_flags == 0) { 1237 log_warnx("invalid configuration, no devices"); 1238 errno = VMD_DISK_MISSING; 1239 goto fail; 1240 } 1241 if (vcp->vcp_ncpus == 0) 1242 vcp->vcp_ncpus = 1; 1243 if (vcp->vcp_memranges[0].vmr_size == 0) 1244 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1245 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1246 log_warnx("invalid number of CPUs"); 1247 goto fail; 1248 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1249 log_warnx("invalid number of disks"); 1250 goto fail; 1251 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1252 log_warnx("invalid number of interfaces"); 1253 goto fail; 1254 } else if (strlen(vcp->vcp_kernel) == 0 && 1255 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1256 log_warnx("no kernel or disk/cdrom specified"); 1257 goto fail; 1258 } else if (strlen(vcp->vcp_name) == 0) { 1259 log_warnx("invalid VM name"); 1260 goto fail; 1261 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1262 *vcp->vcp_name == '_') { 1263 log_warnx("invalid VM name"); 1264 goto fail; 1265 } else { 1266 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1267 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1268 *s == '_')) { 1269 log_warnx("invalid VM name"); 1270 goto fail; 1271 } 1272 } 1273 } 1274 1275 /* track active users */ 1276 if (uid != 0 && env->vmd_users != NULL && 1277 (usr = user_get(uid)) == NULL) { 1278 log_warnx("could not add user"); 1279 goto fail; 1280 } 1281 1282 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1283 goto fail; 1284 1285 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1286 vmc = &vm->vm_params; 1287 vcp = &vmc->vmc_params; 1288 vm->vm_pid = -1; 1289 vm->vm_tty = -1; 1290 vm->vm_receive_fd = -1; 1291 vm->vm_paused = 0; 1292 vm->vm_user = usr; 1293 1294 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1295 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1296 vm->vm_disks[i][j] = -1; 1297 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1298 vm->vm_ifs[i].vif_fd = -1; 1299 for (i = 0; i < vcp->vcp_nnics; i++) { 1300 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1301 /* inherit per-interface flags from the switch */ 1302 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1303 } 1304 1305 /* 1306 * If the MAC address is zero, always randomize it in vmd(8) 1307 * because we cannot rely on the guest OS to do the right 1308 * thing like OpenBSD does. Based on ether_fakeaddr() 1309 * from the kernel, incremented by one to differentiate 1310 * the source. 1311 */ 1312 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1313 rng = arc4random(); 1314 vcp->vcp_macs[i][0] = 0xfe; 1315 vcp->vcp_macs[i][1] = 0xe1; 1316 vcp->vcp_macs[i][2] = 0xba + 1; 1317 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1318 vcp->vcp_macs[i][4] = rng; 1319 vcp->vcp_macs[i][5] = rng >> 8; 1320 } 1321 } 1322 vm->vm_kernel = -1; 1323 vm->vm_cdrom = -1; 1324 vm->vm_iev.ibuf.fd = -1; 1325 1326 /* Assign a new internal Id if not specified */ 1327 vm->vm_vmid = (id == 0) ? vm_claimid(vcp->vcp_name, uid) : id; 1328 1329 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1330 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1331 1332 *ret_vm = vm; 1333 return (0); 1334 fail: 1335 if (errno == 0) 1336 errno = EINVAL; 1337 return (-1); 1338} 1339 1340int 1341vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1342 struct vmop_create_params *vmc, uid_t uid) 1343{ 1344 char *name; 1345 struct vm_create_params *vcp = &vmc->vmc_params; 1346 struct vmop_create_params *vmcp; 1347 struct vm_create_params *vcpp; 1348 struct vmd_vm *vm = NULL; 1349 unsigned int i, j; 1350 uint32_t id; 1351 1352 /* return without error if the parent is NULL (nothing to inherit) */ 1353 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1354 (*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) 1355 return (0); 1356 1357 errno = 0; 1358 vmcp = &(*vm_parent)->vm_params; 1359 vcpp = &vmcp->vmc_params; 1360 1361 /* Are we allowed to create an instance from this VM? */ 1362 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1363 log_warnx("vm \"%s\" no permission to create vm instance", 1364 vcpp->vcp_name); 1365 errno = ENAMETOOLONG; 1366 return (-1); 1367 } 1368 1369 id = vcp->vcp_id; 1370 name = vcp->vcp_name; 1371 1372 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1373 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1374 errno = EPROCLIM; 1375 return (-1); 1376 } 1377 1378 /* CPU */ 1379 if (vcp->vcp_ncpus == 0) 1380 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1381 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1382 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1383 log_warnx("vm \"%s\" no permission to set cpus", name); 1384 errno = EPERM; 1385 return (-1); 1386 } 1387 1388 /* memory */ 1389 if (vcp->vcp_memranges[0].vmr_size == 0) 1390 vcp->vcp_memranges[0].vmr_size = 1391 vcpp->vcp_memranges[0].vmr_size; 1392 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1393 vcp->vcp_memranges[0].vmr_size != 1394 vcpp->vcp_memranges[0].vmr_size) { 1395 log_warnx("vm \"%s\" no permission to set memory", name); 1396 errno = EPERM; 1397 return (-1); 1398 } 1399 1400 /* disks cannot be inherited */ 1401 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1402 vcp->vcp_ndisks) { 1403 log_warnx("vm \"%s\" no permission to set disks", name); 1404 errno = EPERM; 1405 return (-1); 1406 } 1407 for (i = 0; i < vcp->vcp_ndisks; i++) { 1408 /* Check if this disk is already used in the parent */ 1409 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1410 if (strcmp(vcp->vcp_disks[i], 1411 vcpp->vcp_disks[j]) == 0) { 1412 log_warnx("vm \"%s\" disk %s cannot be reused", 1413 name, vcp->vcp_disks[i]); 1414 errno = EBUSY; 1415 return (-1); 1416 } 1417 } 1418 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1419 } 1420 1421 /* interfaces */ 1422 if (vcp->vcp_nnics > 0 && 1423 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1424 vcp->vcp_nnics != vcpp->vcp_nnics) { 1425 log_warnx("vm \"%s\" no permission to set interfaces", name); 1426 errno = EPERM; 1427 return (-1); 1428 } 1429 for (i = 0; i < vcpp->vcp_nnics; i++) { 1430 /* Interface got overwritten */ 1431 if (i < vcp->vcp_nnics) 1432 continue; 1433 1434 /* Copy interface from parent */ 1435 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1436 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1437 sizeof(vmc->vmc_ifnames[i])); 1438 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1439 sizeof(vmc->vmc_ifswitch[i])); 1440 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1441 sizeof(vmc->vmc_ifgroup[i])); 1442 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1443 sizeof(vcp->vcp_macs[i])); 1444 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1445 vcp->vcp_nnics++; 1446 } 1447 for (i = 0; i < vcp->vcp_nnics; i++) { 1448 for (j = 0; j < vcpp->vcp_nnics; j++) { 1449 if (memcmp(zero_mac, vcp->vcp_macs[i], 1450 sizeof(vcp->vcp_macs[i])) != 0 && 1451 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1452 sizeof(vcp->vcp_macs[i])) != 0) { 1453 log_warnx("vm \"%s\" lladdr cannot be reused", 1454 name); 1455 errno = EBUSY; 1456 return (-1); 1457 } 1458 if (strlen(vmc->vmc_ifnames[i]) && 1459 strcmp(vmc->vmc_ifnames[i], 1460 vmcp->vmc_ifnames[j]) == 0) { 1461 log_warnx("vm \"%s\" %s cannot be reused", 1462 vmc->vmc_ifnames[i], name); 1463 errno = EBUSY; 1464 return (-1); 1465 } 1466 } 1467 } 1468 1469 /* kernel */ 1470 if (strlen(vcp->vcp_kernel) > 0) { 1471 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1472 log_warnx("vm \"%s\" no permission to set boot image", 1473 name); 1474 errno = EPERM; 1475 return (-1); 1476 } 1477 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1478 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1479 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1480 log_warnx("vm \"%s\" kernel name too long", name); 1481 errno = EINVAL; 1482 return (-1); 1483 } 1484 1485 /* cdrom */ 1486 if (strlen(vcp->vcp_cdrom) > 0) { 1487 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1488 log_warnx("vm \"%s\" no permission to set cdrom", name); 1489 errno = EPERM; 1490 return (-1); 1491 } 1492 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1493 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1494 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1495 log_warnx("vm \"%s\" cdrom name too long", name); 1496 errno = EINVAL; 1497 return (-1); 1498 } 1499 1500 /* user */ 1501 if (vmc->vmc_owner.uid == 0) 1502 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1503 else if (vmc->vmc_owner.uid != uid && 1504 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1505 log_warnx("vm \"%s\" user mismatch", name); 1506 errno = EPERM; 1507 return (-1); 1508 } 1509 1510 /* group */ 1511 if (vmc->vmc_owner.gid == 0) 1512 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1513 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1514 log_warnx("vm \"%s\" group mismatch", name); 1515 errno = EPERM; 1516 return (-1); 1517 } 1518 1519 /* child instances */ 1520 if (vmc->vmc_insflags) { 1521 log_warnx("vm \"%s\" cannot change instance permissions", name); 1522 errno = EPERM; 1523 return (-1); 1524 } 1525 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1526 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1527 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1528 vmc->vmc_insflags = vmcp->vmc_insflags; 1529 } else { 1530 vmc->vmc_insowner.gid = 0; 1531 vmc->vmc_insowner.uid = 0; 1532 vmc->vmc_insflags = 0; 1533 } 1534 1535 /* finished, remove instance flags */ 1536 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1537 1538 return (0); 1539} 1540 1541/* 1542 * vm_checkperm 1543 * 1544 * Checks if the user represented by the 'uid' parameter is allowed to 1545 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1546 * console.) 1547 * 1548 * Parameters: 1549 * vm: the VM whose permission is to be checked 1550 * vmo: the required uid/gid to be checked 1551 * uid: the user ID of the user making the request 1552 * 1553 * Return values: 1554 * 0: the permission should be granted 1555 * -1: the permission check failed (also returned if vm == null) 1556 */ 1557int 1558vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1559{ 1560 struct group *gr; 1561 struct passwd *pw; 1562 char **grmem; 1563 1564 /* root has no restrictions */ 1565 if (uid == 0) 1566 return (0); 1567 1568 if (vmo == NULL) 1569 return (-1); 1570 1571 /* check user */ 1572 if (vm == NULL) { 1573 if (vmo->uid == uid) 1574 return (0); 1575 } else { 1576 /* 1577 * check user of running vm (the owner of a running vm can 1578 * be different to (or more specific than) the configured owner. 1579 */ 1580 if ((vm->vm_running && vm->vm_uid == uid) || 1581 (!vm->vm_running && vmo->uid == uid)) 1582 return (0); 1583 } 1584 1585 /* check groups */ 1586 if (vmo->gid != -1) { 1587 if ((pw = getpwuid(uid)) == NULL) 1588 return (-1); 1589 if (pw->pw_gid == vmo->gid) 1590 return (0); 1591 if ((gr = getgrgid(vmo->gid)) != NULL) { 1592 for (grmem = gr->gr_mem; *grmem; grmem++) 1593 if (strcmp(*grmem, pw->pw_name) == 0) 1594 return (0); 1595 } 1596 } 1597 1598 return (-1); 1599} 1600 1601/* 1602 * vm_checkinsflag 1603 * 1604 * Checks wheter the non-root user is allowed to set an instance option. 1605 * 1606 * Parameters: 1607 * vmc: the VM create parameters 1608 * flag: the flag to be checked 1609 * uid: the user ID of the user making the request 1610 * 1611 * Return values: 1612 * 0: the permission should be granted 1613 * -1: the permission check failed (also returned if vm == null) 1614 */ 1615int 1616vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1617{ 1618 /* root has no restrictions */ 1619 if (uid == 0) 1620 return (0); 1621 1622 if ((vmc->vmc_insflags & flag) == 0) 1623 return (-1); 1624 1625 return (0); 1626} 1627 1628/* 1629 * vm_checkaccess 1630 * 1631 * Checks if the user represented by the 'uid' parameter is allowed to 1632 * access the file described by the 'path' parameter. 1633 * 1634 * Parameters: 1635 * fd: the file descriptor of the opened file 1636 * uflag: check if the userid has access to the file 1637 * uid: the user ID of the user making the request 1638 * amode: the access flags of R_OK and W_OK 1639 * 1640 * Return values: 1641 * 0: the permission should be granted 1642 * -1: the permission check failed 1643 */ 1644int 1645vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1646{ 1647 struct group *gr; 1648 struct passwd *pw; 1649 char **grmem; 1650 struct stat st; 1651 mode_t mode; 1652 1653 if (fd == -1) 1654 return (-1); 1655 1656 /* 1657 * File has to be accessible and a regular file 1658 */ 1659 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1660 return (-1); 1661 1662 /* root has no restrictions */ 1663 if (uid == 0 || uflag == 0) 1664 return (0); 1665 1666 /* check other */ 1667 mode = amode & W_OK ? S_IWOTH : 0; 1668 mode |= amode & R_OK ? S_IROTH : 0; 1669 if ((st.st_mode & mode) == mode) 1670 return (0); 1671 1672 /* check user */ 1673 mode = amode & W_OK ? S_IWUSR : 0; 1674 mode |= amode & R_OK ? S_IRUSR : 0; 1675 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1676 return (0); 1677 1678 /* check groups */ 1679 mode = amode & W_OK ? S_IWGRP : 0; 1680 mode |= amode & R_OK ? S_IRGRP : 0; 1681 if ((st.st_mode & mode) != mode) 1682 return (-1); 1683 if ((pw = getpwuid(uid)) == NULL) 1684 return (-1); 1685 if (pw->pw_gid == st.st_gid) 1686 return (0); 1687 if ((gr = getgrgid(st.st_gid)) != NULL) { 1688 for (grmem = gr->gr_mem; *grmem; grmem++) 1689 if (strcmp(*grmem, pw->pw_name) == 0) 1690 return (0); 1691 } 1692 1693 return (-1); 1694} 1695 1696int 1697vm_opentty(struct vmd_vm *vm) 1698{ 1699 struct ptmget ptm; 1700 struct stat st; 1701 struct group *gr; 1702 uid_t uid; 1703 gid_t gid; 1704 mode_t mode; 1705 int on; 1706 1707 /* 1708 * Open tty with pre-opened PTM fd 1709 */ 1710 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1711 return (-1); 1712 1713 /* 1714 * We use user ioctl(2) mode to pass break commands. 1715 */ 1716 on = 1; 1717 if (ioctl(ptm.cfd, TIOCUCNTL, &on)) 1718 fatal("could not enable user ioctl mode"); 1719 1720 vm->vm_tty = ptm.cfd; 1721 close(ptm.sfd); 1722 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1723 goto fail; 1724 1725 uid = vm->vm_uid; 1726 gid = vm->vm_params.vmc_owner.gid; 1727 1728 if (vm->vm_params.vmc_owner.gid != -1) { 1729 mode = 0660; 1730 } else if ((gr = getgrnam("tty")) != NULL) { 1731 gid = gr->gr_gid; 1732 mode = 0620; 1733 } else { 1734 mode = 0600; 1735 gid = 0; 1736 } 1737 1738 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1739 __func__, vm->vm_params.vmc_params.vcp_name, 1740 vm->vm_ttyname, uid, gid, mode); 1741 1742 /* 1743 * Change ownership and mode of the tty as required. 1744 * Loosely based on the implementation of sshpty.c 1745 */ 1746 if (stat(vm->vm_ttyname, &st) == -1) 1747 goto fail; 1748 1749 if (st.st_uid != uid || st.st_gid != gid) { 1750 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1751 log_warn("chown %s %d %d failed, uid %d", 1752 vm->vm_ttyname, uid, gid, getuid()); 1753 1754 /* Ignore failure on read-only filesystems */ 1755 if (!((errno == EROFS) && 1756 (st.st_uid == uid || st.st_uid == 0))) 1757 goto fail; 1758 } 1759 } 1760 1761 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1762 if (chmod(vm->vm_ttyname, mode) == -1) { 1763 log_warn("chmod %s %o failed, uid %d", 1764 vm->vm_ttyname, mode, getuid()); 1765 1766 /* Ignore failure on read-only filesystems */ 1767 if (!((errno == EROFS) && 1768 (st.st_uid == uid || st.st_uid == 0))) 1769 goto fail; 1770 } 1771 } 1772 1773 return (0); 1774 fail: 1775 vm_closetty(vm); 1776 return (-1); 1777} 1778 1779void 1780vm_closetty(struct vmd_vm *vm) 1781{ 1782 if (vm->vm_tty != -1) { 1783 /* Release and close the tty */ 1784 if (fchown(vm->vm_tty, 0, 0) == -1) 1785 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1786 if (fchmod(vm->vm_tty, 0666) == -1) 1787 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1788 close(vm->vm_tty); 1789 vm->vm_tty = -1; 1790 } 1791 free(vm->vm_ttyname); 1792 vm->vm_ttyname = NULL; 1793} 1794 1795void 1796switch_remove(struct vmd_switch *vsw) 1797{ 1798 if (vsw == NULL) 1799 return; 1800 1801 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1802 1803 free(vsw->sw_group); 1804 free(vsw->sw_name); 1805 free(vsw); 1806} 1807 1808struct vmd_switch * 1809switch_getbyname(const char *name) 1810{ 1811 struct vmd_switch *vsw; 1812 1813 if (name == NULL) 1814 return (NULL); 1815 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1816 if (strcmp(vsw->sw_name, name) == 0) 1817 return (vsw); 1818 } 1819 1820 return (NULL); 1821} 1822 1823struct vmd_user * 1824user_get(uid_t uid) 1825{ 1826 struct vmd_user *usr; 1827 1828 if (uid == 0) 1829 return (NULL); 1830 1831 /* first try to find an existing user */ 1832 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1833 if (usr->usr_id.uid == uid) 1834 goto done; 1835 } 1836 1837 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1838 log_warn("could not allocate user"); 1839 return (NULL); 1840 } 1841 1842 usr->usr_id.uid = uid; 1843 usr->usr_id.gid = -1; 1844 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1845 1846 done: 1847 DPRINTF("%s: uid %d #%d +", 1848 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1849 usr->usr_refcnt++; 1850 1851 return (usr); 1852} 1853 1854void 1855user_put(struct vmd_user *usr) 1856{ 1857 if (usr == NULL) 1858 return; 1859 1860 DPRINTF("%s: uid %d #%d -", 1861 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1862 1863 if (--usr->usr_refcnt > 0) 1864 return; 1865 1866 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1867 free(usr); 1868} 1869 1870void 1871user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1872{ 1873 char mem[FMT_SCALED_STRSIZE]; 1874 1875 if (usr == NULL) 1876 return; 1877 1878 /* increment or decrement counters */ 1879 inc = inc ? 1 : -1; 1880 1881 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1882 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1883 usr->usr_maxifs += vcp->vcp_nnics * inc; 1884 1885 if (log_getverbose() > 1) { 1886 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1887 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1888 __func__, inc == 1 ? '+' : '-', 1889 usr->usr_id.uid, usr->usr_refcnt, 1890 usr->usr_maxcpu, mem, usr->usr_maxifs); 1891 } 1892} 1893 1894int 1895user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1896{ 1897 const char *limit = ""; 1898 1899 /* XXX make the limits configurable */ 1900 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1901 limit = "cpu "; 1902 goto fail; 1903 } 1904 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1905 limit = "memory "; 1906 goto fail; 1907 } 1908 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1909 limit = "interface "; 1910 goto fail; 1911 } 1912 1913 return (0); 1914 1915 fail: 1916 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1917 usr->usr_id.uid, limit); 1918 return (-1); 1919} 1920 1921char * 1922get_string(uint8_t *ptr, size_t len) 1923{ 1924 size_t i; 1925 1926 for (i = 0; i < len; i++) 1927 if (!isprint(ptr[i])) 1928 break; 1929 1930 return strndup(ptr, i); 1931} 1932 1933uint32_t 1934prefixlen2mask(uint8_t prefixlen) 1935{ 1936 if (prefixlen == 0) 1937 return (0); 1938 1939 if (prefixlen > 32) 1940 prefixlen = 32; 1941 1942 return (htonl(0xffffffff << (32 - prefixlen))); 1943} 1944 1945void 1946prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1947{ 1948 struct in6_addr s6; 1949 int i; 1950 1951 if (prefixlen > 128) 1952 prefixlen = 128; 1953 1954 memset(&s6, 0, sizeof(s6)); 1955 for (i = 0; i < prefixlen / 8; i++) 1956 s6.s6_addr[i] = 0xff; 1957 i = prefixlen % 8; 1958 if (i) 1959 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 1960 1961 memcpy(mask, &s6, sizeof(s6)); 1962} 1963 1964void 1965getmonotime(struct timeval *tv) 1966{ 1967 struct timespec ts; 1968 1969 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 1970 fatal("clock_gettime"); 1971 1972 TIMESPEC_TO_TIMEVAL(tv, &ts); 1973} 1974