vmd.c revision 1.120
1/* $OpenBSD: vmd.c,v 1.120 2021/01/27 07:21:54 deraadt Exp $ */ 2 3/* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/param.h> /* nitems */ 20#include <sys/queue.h> 21#include <sys/wait.h> 22#include <sys/cdefs.h> 23#include <sys/stat.h> 24#include <sys/sysctl.h> 25#include <sys/tty.h> 26#include <sys/ttycom.h> 27#include <sys/ioctl.h> 28 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <termios.h> 33#include <errno.h> 34#include <event.h> 35#include <fcntl.h> 36#include <pwd.h> 37#include <signal.h> 38#include <syslog.h> 39#include <unistd.h> 40#include <util.h> 41#include <ctype.h> 42#include <pwd.h> 43#include <grp.h> 44 45#include <machine/specialreg.h> 46#include <machine/vmmvar.h> 47 48#include "proc.h" 49#include "atomicio.h" 50#include "vmd.h" 51 52__dead void usage(void); 53 54int main(int, char **); 55int vmd_configure(void); 56void vmd_sighdlr(int sig, short event, void *arg); 57void vmd_shutdown(void); 58int vmd_control_run(void); 59int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 60int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 61int vmd_check_vmh(struct vm_dump_header *); 62 63int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66int vm_claimid(const char *, int, uint32_t *); 67void start_vm_batch(int, short, void*); 68 69struct vmd *env; 70 71static struct privsep_proc procs[] = { 72 /* Keep "priv" on top as procs[0] */ 73 { "priv", PROC_PRIV, NULL, priv }, 74 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 75 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 76}; 77 78enum privsep_procid privsep_process; 79 80struct event staggered_start_timer; 81 82/* For the privileged process */ 83static struct privsep_proc *proc_priv = &procs[0]; 84static struct passwd proc_privpw; 85static const uint8_t zero_mac[ETHER_ADDR_LEN]; 86 87int 88vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 89{ 90 struct privsep *ps = p->p_ps; 91 int res = 0, ret = 0, cmd = 0, verbose; 92 unsigned int v = 0, flags; 93 struct vmop_create_params vmc; 94 struct vmop_id vid; 95 struct vmop_result vmr; 96 struct vm_dump_header vmh; 97 struct vmd_vm *vm = NULL; 98 char *str = NULL; 99 uint32_t id = 0; 100 struct control_sock *rcs; 101 102 switch (imsg->hdr.type) { 103 case IMSG_VMDOP_START_VM_REQUEST: 104 IMSG_SIZE_CHECK(imsg, &vmc); 105 memcpy(&vmc, imsg->data, sizeof(vmc)); 106 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 107 if (vmc.vmc_flags == 0) { 108 /* start an existing VM with pre-configured options */ 109 if (!(ret == -1 && errno == EALREADY && 110 !(vm->vm_state & VM_STATE_RUNNING))) { 111 res = errno; 112 cmd = IMSG_VMDOP_START_VM_RESPONSE; 113 } 114 } else if (ret != 0) { 115 res = errno; 116 cmd = IMSG_VMDOP_START_VM_RESPONSE; 117 } 118 if (res == 0 && 119 config_setvm(ps, vm, 120 imsg->hdr.peerid, vm->vm_params.vmc_owner.uid) == -1) { 121 res = errno; 122 cmd = IMSG_VMDOP_START_VM_RESPONSE; 123 } 124 break; 125 case IMSG_VMDOP_WAIT_VM_REQUEST: 126 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 127 IMSG_SIZE_CHECK(imsg, &vid); 128 memcpy(&vid, imsg->data, sizeof(vid)); 129 flags = vid.vid_flags; 130 131 if ((id = vid.vid_id) == 0) { 132 /* Lookup vm (id) by name */ 133 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 134 res = ENOENT; 135 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 136 break; 137 } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 138 (flags & VMOP_FORCE) == 0) { 139 res = EALREADY; 140 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 141 break; 142 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 143 res = EINVAL; 144 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 145 break; 146 } 147 id = vm->vm_vmid; 148 } else if ((vm = vm_getbyvmid(id)) == NULL) { 149 res = ENOENT; 150 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 151 break; 152 } 153 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 154 vid.vid_uid) != 0) { 155 res = EPERM; 156 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 157 break; 158 } 159 160 memset(&vid, 0, sizeof(vid)); 161 vid.vid_id = id; 162 vid.vid_flags = flags; 163 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 164 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 165 return (-1); 166 break; 167 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 168 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 169 break; 170 case IMSG_VMDOP_LOAD: 171 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 172 str = get_string((uint8_t *)imsg->data, 173 IMSG_DATA_SIZE(imsg)); 174 case IMSG_VMDOP_RELOAD: 175 if (vmd_reload(0, str) == -1) 176 cmd = IMSG_CTL_FAIL; 177 else 178 cmd = IMSG_CTL_OK; 179 free(str); 180 break; 181 case IMSG_CTL_RESET: 182 IMSG_SIZE_CHECK(imsg, &v); 183 memcpy(&v, imsg->data, sizeof(v)); 184 if (vmd_reload(v, NULL) == -1) 185 cmd = IMSG_CTL_FAIL; 186 else 187 cmd = IMSG_CTL_OK; 188 break; 189 case IMSG_CTL_VERBOSE: 190 IMSG_SIZE_CHECK(imsg, &verbose); 191 memcpy(&verbose, imsg->data, sizeof(verbose)); 192 log_setverbose(verbose); 193 194 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 195 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 196 cmd = IMSG_CTL_OK; 197 break; 198 case IMSG_VMDOP_PAUSE_VM: 199 case IMSG_VMDOP_UNPAUSE_VM: 200 IMSG_SIZE_CHECK(imsg, &vid); 201 memcpy(&vid, imsg->data, sizeof(vid)); 202 if (vid.vid_id == 0) { 203 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 204 res = ENOENT; 205 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 206 break; 207 } else { 208 vid.vid_id = vm->vm_vmid; 209 } 210 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 211 res = ENOENT; 212 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 213 break; 214 } 215 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 216 vid.vid_uid) != 0) { 217 res = EPERM; 218 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 219 break; 220 } 221 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 222 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 223 break; 224 case IMSG_VMDOP_SEND_VM_REQUEST: 225 IMSG_SIZE_CHECK(imsg, &vid); 226 memcpy(&vid, imsg->data, sizeof(vid)); 227 id = vid.vid_id; 228 if (vid.vid_id == 0) { 229 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 230 res = ENOENT; 231 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 232 close(imsg->fd); 233 break; 234 } else { 235 vid.vid_id = vm->vm_vmid; 236 } 237 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 238 res = ENOENT; 239 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 240 close(imsg->fd); 241 break; 242 } 243 vmr.vmr_id = vid.vid_id; 244 log_debug("%s: sending fd to vmm", __func__); 245 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 246 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 247 break; 248 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 249 IMSG_SIZE_CHECK(imsg, &vid); 250 memcpy(&vid, imsg->data, sizeof(vid)); 251 if (imsg->fd == -1) { 252 log_warnx("%s: invalid fd", __func__); 253 return (-1); 254 } 255 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 256 sizeof(vmh)) { 257 log_warnx("%s: error reading vmh from received vm", 258 __func__); 259 res = EIO; 260 close(imsg->fd); 261 cmd = IMSG_VMDOP_START_VM_RESPONSE; 262 break; 263 } 264 265 if (vmd_check_vmh(&vmh)) { 266 res = ENOENT; 267 close(imsg->fd); 268 cmd = IMSG_VMDOP_START_VM_RESPONSE; 269 break; 270 } 271 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 272 sizeof(vmc)) { 273 log_warnx("%s: error reading vmc from received vm", 274 __func__); 275 res = EIO; 276 close(imsg->fd); 277 cmd = IMSG_VMDOP_START_VM_RESPONSE; 278 break; 279 } 280 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 281 sizeof(vmc.vmc_params.vcp_name)); 282 vmc.vmc_params.vcp_id = 0; 283 284 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 285 if (ret != 0) { 286 res = errno; 287 cmd = IMSG_VMDOP_START_VM_RESPONSE; 288 close(imsg->fd); 289 } else { 290 vm->vm_state |= VM_STATE_RECEIVED; 291 config_setvm(ps, vm, imsg->hdr.peerid, 292 vmc.vmc_owner.uid); 293 log_debug("%s: sending fd to vmm", __func__); 294 proc_compose_imsg(ps, PROC_VMM, -1, 295 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 296 NULL, 0); 297 } 298 break; 299 case IMSG_VMDOP_DONE: 300 control_reset(&ps->ps_csock); 301 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 302 control_reset(rcs); 303 cmd = 0; 304 break; 305 default: 306 return (-1); 307 } 308 309 switch (cmd) { 310 case 0: 311 break; 312 case IMSG_VMDOP_START_VM_RESPONSE: 313 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 314 memset(&vmr, 0, sizeof(vmr)); 315 vmr.vmr_result = res; 316 vmr.vmr_id = id; 317 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 318 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 319 return (-1); 320 break; 321 default: 322 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 323 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 324 return (-1); 325 break; 326 } 327 328 return (0); 329} 330 331int 332vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 333{ 334 struct vmop_result vmr; 335 struct privsep *ps = p->p_ps; 336 int res = 0; 337 struct vmd_vm *vm; 338 struct vm_create_params *vcp; 339 struct vmop_info_result vir; 340 341 switch (imsg->hdr.type) { 342 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 343 IMSG_SIZE_CHECK(imsg, &vmr); 344 memcpy(&vmr, imsg->data, sizeof(vmr)); 345 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 346 break; 347 proc_compose_imsg(ps, PROC_CONTROL, -1, 348 imsg->hdr.type, imsg->hdr.peerid, -1, 349 imsg->data, sizeof(imsg->data)); 350 log_info("%s: paused vm %d successfully", 351 vm->vm_params.vmc_params.vcp_name, 352 vm->vm_vmid); 353 vm->vm_state |= VM_STATE_PAUSED; 354 break; 355 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 356 IMSG_SIZE_CHECK(imsg, &vmr); 357 memcpy(&vmr, imsg->data, sizeof(vmr)); 358 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 359 break; 360 proc_compose_imsg(ps, PROC_CONTROL, -1, 361 imsg->hdr.type, imsg->hdr.peerid, -1, 362 imsg->data, sizeof(imsg->data)); 363 log_info("%s: unpaused vm %d successfully.", 364 vm->vm_params.vmc_params.vcp_name, 365 vm->vm_vmid); 366 vm->vm_state &= ~VM_STATE_PAUSED; 367 break; 368 case IMSG_VMDOP_START_VM_RESPONSE: 369 IMSG_SIZE_CHECK(imsg, &vmr); 370 memcpy(&vmr, imsg->data, sizeof(vmr)); 371 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 372 break; 373 vm->vm_pid = vmr.vmr_pid; 374 vcp = &vm->vm_params.vmc_params; 375 vcp->vcp_id = vmr.vmr_id; 376 377 /* 378 * If the peerid is not -1, forward the response back to the 379 * the control socket. If it is -1, the request originated 380 * from the parent, not the control socket. 381 */ 382 if (vm->vm_peerid != (uint32_t)-1) { 383 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 384 sizeof(vmr.vmr_ttyname)); 385 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 386 imsg->hdr.type, vm->vm_peerid, -1, 387 &vmr, sizeof(vmr)) == -1) { 388 errno = vmr.vmr_result; 389 log_warn("%s: failed to foward vm result", 390 vcp->vcp_name); 391 vm_remove(vm, __func__); 392 return (-1); 393 } 394 } 395 396 if (vmr.vmr_result) { 397 errno = vmr.vmr_result; 398 log_warn("%s: failed to start vm", vcp->vcp_name); 399 vm_remove(vm, __func__); 400 break; 401 } 402 403 /* Now configure all the interfaces */ 404 if (vm_priv_ifconfig(ps, vm) == -1) { 405 log_warn("%s: failed to configure vm", vcp->vcp_name); 406 vm_remove(vm, __func__); 407 break; 408 } 409 410 log_info("%s: started vm %d successfully, tty %s", 411 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 412 break; 413 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 414 IMSG_SIZE_CHECK(imsg, &vmr); 415 memcpy(&vmr, imsg->data, sizeof(vmr)); 416 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 417 __func__, vmr.vmr_id); 418 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 419 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 420 break; 421 if (vmr.vmr_result == 0) { 422 /* Mark VM as shutting down */ 423 vm->vm_state |= VM_STATE_SHUTDOWN; 424 } 425 break; 426 case IMSG_VMDOP_SEND_VM_RESPONSE: 427 IMSG_SIZE_CHECK(imsg, &vmr); 428 memcpy(&vmr, imsg->data, sizeof(vmr)); 429 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 430 break; 431 if (!vmr.vmr_result) { 432 log_info("%s: sent vm %d successfully.", 433 vm->vm_params.vmc_params.vcp_name, 434 vm->vm_vmid); 435 if (vm->vm_from_config) 436 vm_stop(vm, 0, __func__); 437 else 438 vm_remove(vm, __func__); 439 } 440 441 /* Send a response if a control client is waiting for it */ 442 if (imsg->hdr.peerid != (uint32_t)-1) { 443 /* the error is meaningless for deferred responses */ 444 vmr.vmr_result = 0; 445 446 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 447 IMSG_VMDOP_SEND_VM_RESPONSE, 448 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 449 return (-1); 450 } 451 break; 452 case IMSG_VMDOP_TERMINATE_VM_EVENT: 453 IMSG_SIZE_CHECK(imsg, &vmr); 454 memcpy(&vmr, imsg->data, sizeof(vmr)); 455 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 456 __func__, vmr.vmr_id, vmr.vmr_result); 457 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 458 log_debug("%s: vm %d is no longer available", 459 __func__, vmr.vmr_id); 460 break; 461 } 462 if (vmr.vmr_result != EAGAIN || 463 vm->vm_params.vmc_bootdevice) { 464 if (vm->vm_from_config) 465 vm_stop(vm, 0, __func__); 466 else 467 vm_remove(vm, __func__); 468 } else { 469 /* Stop VM instance but keep the tty open */ 470 vm_stop(vm, 1, __func__); 471 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 472 } 473 474 /* Send a response if a control client is waiting for it */ 475 if (imsg->hdr.peerid != (uint32_t)-1) { 476 /* the error is meaningless for deferred responses */ 477 vmr.vmr_result = 0; 478 479 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 480 IMSG_VMDOP_TERMINATE_VM_RESPONSE, 481 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 482 return (-1); 483 } 484 break; 485 case IMSG_VMDOP_GET_INFO_VM_DATA: 486 IMSG_SIZE_CHECK(imsg, &vir); 487 memcpy(&vir, imsg->data, sizeof(vir)); 488 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 489 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 490 if (vm->vm_ttyname != NULL) 491 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 492 sizeof(vir.vir_ttyname)); 493 log_debug("%s: running vm: %d, vm_state: 0x%x", 494 __func__, vm->vm_vmid, vm->vm_state); 495 vir.vir_state = vm->vm_state; 496 /* get the user id who started the vm */ 497 vir.vir_uid = vm->vm_uid; 498 vir.vir_gid = vm->vm_params.vmc_owner.gid; 499 } 500 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 501 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 502 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 503 __func__, vm->vm_vmid); 504 vm_remove(vm, __func__); 505 return (-1); 506 } 507 break; 508 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 509 /* 510 * PROC_VMM has responded with the *running* VMs, now we 511 * append the others. These use the special value 0 for their 512 * kernel id to indicate that they are not running. 513 */ 514 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 515 if (!(vm->vm_state & VM_STATE_RUNNING)) { 516 memset(&vir, 0, sizeof(vir)); 517 vir.vir_info.vir_id = vm->vm_vmid; 518 strlcpy(vir.vir_info.vir_name, 519 vm->vm_params.vmc_params.vcp_name, 520 VMM_MAX_NAME_LEN); 521 vir.vir_info.vir_memory_size = 522 vm->vm_params.vmc_params. 523 vcp_memranges[0].vmr_size; 524 vir.vir_info.vir_ncpus = 525 vm->vm_params.vmc_params.vcp_ncpus; 526 /* get the configured user id for this vm */ 527 vir.vir_uid = vm->vm_params.vmc_owner.uid; 528 vir.vir_gid = vm->vm_params.vmc_owner.gid; 529 log_debug("%s: vm: %d, vm_state: 0x%x", 530 __func__, vm->vm_vmid, vm->vm_state); 531 vir.vir_state = vm->vm_state; 532 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 533 IMSG_VMDOP_GET_INFO_VM_DATA, 534 imsg->hdr.peerid, -1, &vir, 535 sizeof(vir)) == -1) { 536 log_debug("%s: GET_INFO_VM_END failed", 537 __func__); 538 vm_remove(vm, __func__); 539 return (-1); 540 } 541 } 542 } 543 IMSG_SIZE_CHECK(imsg, &res); 544 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 545 break; 546 default: 547 return (-1); 548 } 549 550 return (0); 551} 552 553int 554vmd_check_vmh(struct vm_dump_header *vmh) 555{ 556 int i; 557 unsigned int code, leaf; 558 unsigned int a, b, c, d; 559 560 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 561 log_warnx("%s: incompatible dump signature", __func__); 562 return (-1); 563 } 564 565 if (vmh->vmh_version != VM_DUMP_VERSION) { 566 log_warnx("%s: incompatible dump version", __func__); 567 return (-1); 568 } 569 570 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 571 code = vmh->vmh_cpuids[i].code; 572 leaf = vmh->vmh_cpuids[i].leaf; 573 if (leaf != 0x00) { 574 log_debug("%s: invalid leaf 0x%x for code 0x%x", 575 __func__, leaf, code); 576 return (-1); 577 } 578 579 switch (code) { 580 case 0x00: 581 CPUID_LEAF(code, leaf, a, b, c, d); 582 if (vmh->vmh_cpuids[i].a > a) { 583 log_debug("%s: incompatible cpuid level", 584 __func__); 585 return (-1); 586 } 587 if (!(vmh->vmh_cpuids[i].b == b && 588 vmh->vmh_cpuids[i].c == c && 589 vmh->vmh_cpuids[i].d == d)) { 590 log_debug("%s: incompatible cpu brand", 591 __func__); 592 return (-1); 593 } 594 break; 595 596 case 0x01: 597 CPUID_LEAF(code, leaf, a, b, c, d); 598 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 599 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 600 log_debug("%s: incompatible cpu features " 601 "code: 0x%x leaf: 0x%x reg: c", __func__, 602 code, leaf); 603 return (-1); 604 } 605 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 606 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 607 log_debug("%s: incompatible cpu features " 608 "code: 0x%x leaf: 0x%x reg: d", __func__, 609 code, leaf); 610 return (-1); 611 } 612 break; 613 614 case 0x07: 615 CPUID_LEAF(code, leaf, a, b, c, d); 616 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 617 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 618 log_debug("%s: incompatible cpu features " 619 "code: 0x%x leaf: 0x%x reg: c", __func__, 620 code, leaf); 621 return (-1); 622 } 623 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 624 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 625 log_debug("%s: incompatible cpu features " 626 "code: 0x%x leaf: 0x%x reg: d", __func__, 627 code, leaf); 628 return (-1); 629 } 630 break; 631 632 case 0x0d: 633 CPUID_LEAF(code, leaf, a, b, c, d); 634 if (vmh->vmh_cpuids[i].b > b) { 635 log_debug("%s: incompatible cpu: insufficient " 636 "max save area for enabled XCR0 features", 637 __func__); 638 return (-1); 639 } 640 if (vmh->vmh_cpuids[i].c > c) { 641 log_debug("%s: incompatible cpu: insufficient " 642 "max save area for supported XCR0 features", 643 __func__); 644 return (-1); 645 } 646 break; 647 648 case 0x80000001: 649 CPUID_LEAF(code, leaf, a, b, c, d); 650 if ((vmh->vmh_cpuids[i].a & a) != 651 vmh->vmh_cpuids[i].a) { 652 log_debug("%s: incompatible cpu features " 653 "code: 0x%x leaf: 0x%x reg: a", __func__, 654 code, leaf); 655 return (-1); 656 } 657 if ((vmh->vmh_cpuids[i].c & c) != 658 vmh->vmh_cpuids[i].c) { 659 log_debug("%s: incompatible cpu features " 660 "code: 0x%x leaf: 0x%x reg: c", __func__, 661 code, leaf); 662 return (-1); 663 } 664 if ((vmh->vmh_cpuids[i].d & d) != 665 vmh->vmh_cpuids[i].d) { 666 log_debug("%s: incompatible cpu features " 667 "code: 0x%x leaf: 0x%x reg: d", __func__, 668 code, leaf); 669 return (-1); 670 } 671 break; 672 673 default: 674 log_debug("%s: unknown code 0x%x", __func__, code); 675 return (-1); 676 } 677 } 678 679 return (0); 680} 681 682void 683vmd_sighdlr(int sig, short event, void *arg) 684{ 685 if (privsep_process != PROC_PARENT) 686 return; 687 log_debug("%s: handling signal", __func__); 688 689 switch (sig) { 690 case SIGHUP: 691 log_info("%s: reload requested with SIGHUP", __func__); 692 693 /* 694 * This is safe because libevent uses async signal handlers 695 * that run in the event loop and not in signal context. 696 */ 697 (void)vmd_reload(0, NULL); 698 break; 699 case SIGPIPE: 700 log_info("%s: ignoring SIGPIPE", __func__); 701 break; 702 case SIGUSR1: 703 log_info("%s: ignoring SIGUSR1", __func__); 704 break; 705 case SIGTERM: 706 case SIGINT: 707 vmd_shutdown(); 708 break; 709 default: 710 fatalx("unexpected signal"); 711 } 712} 713 714__dead void 715usage(void) 716{ 717 extern char *__progname; 718 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 719 __progname); 720 exit(1); 721} 722 723int 724main(int argc, char **argv) 725{ 726 struct privsep *ps; 727 int ch; 728 const char *conffile = VMD_CONF; 729 enum privsep_procid proc_id = PROC_PARENT; 730 int proc_instance = 0; 731 const char *errp, *title = NULL; 732 int argc0 = argc; 733 734 log_init(0, LOG_DAEMON); 735 736 if ((env = calloc(1, sizeof(*env))) == NULL) 737 fatal("calloc: env"); 738 739 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 740 switch (ch) { 741 case 'D': 742 if (cmdline_symset(optarg) < 0) 743 log_warnx("could not parse macro definition %s", 744 optarg); 745 break; 746 case 'd': 747 env->vmd_debug = 2; 748 break; 749 case 'f': 750 conffile = optarg; 751 break; 752 case 'v': 753 env->vmd_verbose++; 754 break; 755 case 'n': 756 env->vmd_noaction = 1; 757 break; 758 case 'P': 759 title = optarg; 760 proc_id = proc_getid(procs, nitems(procs), title); 761 if (proc_id == PROC_MAX) 762 fatalx("invalid process name"); 763 break; 764 case 'I': 765 proc_instance = strtonum(optarg, 0, 766 PROC_MAX_INSTANCES, &errp); 767 if (errp) 768 fatalx("invalid process instance"); 769 break; 770 default: 771 usage(); 772 } 773 } 774 775 argc -= optind; 776 if (argc > 0) 777 usage(); 778 779 if (env->vmd_noaction && !env->vmd_debug) 780 env->vmd_debug = 1; 781 782 /* check for root privileges */ 783 if (env->vmd_noaction == 0) { 784 if (geteuid()) 785 fatalx("need root privileges"); 786 } 787 788 ps = &env->vmd_ps; 789 ps->ps_env = env; 790 env->vmd_fd = -1; 791 792 if (config_init(env) == -1) 793 fatal("failed to initialize configuration"); 794 795 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 796 fatal("unknown user %s", VMD_USER); 797 798 /* First proc runs as root without pledge but in default chroot */ 799 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 800 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 801 802 /* Open /dev/vmm */ 803 if (env->vmd_noaction == 0) { 804 env->vmd_fd = open(VMM_NODE, O_RDWR); 805 if (env->vmd_fd == -1) 806 fatal("%s", VMM_NODE); 807 } 808 809 /* Configure the control socket */ 810 ps->ps_csock.cs_name = SOCKET_NAME; 811 TAILQ_INIT(&ps->ps_rcsocks); 812 813 /* Configuration will be parsed after forking the children */ 814 env->vmd_conffile = conffile; 815 816 log_init(env->vmd_debug, LOG_DAEMON); 817 log_setverbose(env->vmd_verbose); 818 819 if (env->vmd_noaction) 820 ps->ps_noaction = 1; 821 ps->ps_instance = proc_instance; 822 if (title != NULL) 823 ps->ps_title[proc_id] = title; 824 825 /* only the parent returns */ 826 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 827 proc_id); 828 829 log_procinit("parent"); 830 if (!env->vmd_debug && daemon(0, 0) == -1) 831 fatal("can't daemonize"); 832 833 if (ps->ps_noaction == 0) 834 log_info("startup"); 835 836 event_init(); 837 838 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 839 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 840 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 841 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 842 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 843 844 signal_add(&ps->ps_evsigint, NULL); 845 signal_add(&ps->ps_evsigterm, NULL); 846 signal_add(&ps->ps_evsighup, NULL); 847 signal_add(&ps->ps_evsigpipe, NULL); 848 signal_add(&ps->ps_evsigusr1, NULL); 849 850 if (!env->vmd_noaction) 851 proc_connect(ps); 852 853 if (vmd_configure() == -1) 854 fatalx("configuration failed"); 855 856 event_dispatch(); 857 858 log_debug("parent exiting"); 859 860 return (0); 861} 862 863void 864start_vm_batch(int fd, short type, void *args) 865{ 866 int i = 0; 867 struct vmd_vm *vm; 868 869 log_debug("%s: starting batch of %d vms", __func__, 870 env->vmd_cfg.parallelism); 871 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 872 if (!(vm->vm_state & VM_STATE_WAITING)) { 873 log_debug("%s: not starting vm %s (disabled)", 874 __func__, 875 vm->vm_params.vmc_params.vcp_name); 876 continue; 877 } 878 i++; 879 if (i > env->vmd_cfg.parallelism) { 880 evtimer_add(&staggered_start_timer, 881 &env->vmd_cfg.delay); 882 break; 883 } 884 vm->vm_state &= ~VM_STATE_WAITING; 885 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 886 } 887 log_debug("%s: done starting vms", __func__); 888} 889 890int 891vmd_configure(void) 892{ 893 int ncpus; 894 struct vmd_switch *vsw; 895 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 896 size_t ncpus_sz = sizeof(ncpus); 897 898 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 899 fatal("open %s", PATH_PTMDEV); 900 901 /* 902 * pledge in the parent process: 903 * stdio - for malloc and basic I/O including events. 904 * rpath - for reload to open and read the configuration files. 905 * wpath - for opening disk images and tap devices. 906 * tty - for openpty and TIOCUCNTL. 907 * proc - run kill to terminate its children safely. 908 * sendfd - for disks, interfaces and other fds. 909 * recvfd - for send and receive. 910 * getpw - lookup user or group id by name. 911 * chown, fattr - change tty ownership 912 * flock - locking disk files 913 */ 914 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 915 " chown fattr flock", NULL) == -1) 916 fatal("pledge"); 917 918 if (parse_config(env->vmd_conffile) == -1) { 919 proc_kill(&env->vmd_ps); 920 exit(1); 921 } 922 923 if (env->vmd_noaction) { 924 fprintf(stderr, "configuration OK\n"); 925 proc_kill(&env->vmd_ps); 926 exit(0); 927 } 928 929 /* Send shared global configuration to all children */ 930 if (config_setconfig(env) == -1) 931 return (-1); 932 933 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 934 if (vsw->sw_running) 935 continue; 936 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 937 log_warn("%s: failed to create switch %s", 938 __func__, vsw->sw_name); 939 switch_remove(vsw); 940 return (-1); 941 } 942 } 943 944 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 945 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 946 if (sysctl(ncpu_mib, NELEM(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 947 ncpus = 1; 948 env->vmd_cfg.parallelism = ncpus; 949 log_debug("%s: setting staggered start configuration to " 950 "parallelism: %d and delay: %lld", 951 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 952 } 953 954 log_debug("%s: starting vms in staggered fashion", __func__); 955 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 956 /* start first batch */ 957 start_vm_batch(0, 0, NULL); 958 959 return (0); 960} 961 962int 963vmd_reload(unsigned int reset, const char *filename) 964{ 965 struct vmd_vm *vm, *next_vm; 966 struct vmd_switch *vsw; 967 int reload = 0; 968 969 /* Switch back to the default config file */ 970 if (filename == NULL || *filename == '\0') { 971 filename = env->vmd_conffile; 972 reload = 1; 973 } 974 975 log_debug("%s: level %d config file %s", __func__, reset, filename); 976 977 if (reset) { 978 /* Purge the configuration */ 979 config_purge(env, reset); 980 config_setreset(env, reset); 981 } else { 982 /* 983 * Load or reload the configuration. 984 * 985 * Reloading removes all non-running VMs before processing the 986 * config file, whereas loading only adds to the existing list 987 * of VMs. 988 */ 989 990 if (reload) { 991 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 992 next_vm) { 993 if (!(vm->vm_state & VM_STATE_RUNNING)) { 994 DPRINTF("%s: calling vm_remove", 995 __func__); 996 vm_remove(vm, __func__); 997 } 998 } 999 } 1000 1001 if (parse_config(filename) == -1) { 1002 log_debug("%s: failed to load config file %s", 1003 __func__, filename); 1004 return (-1); 1005 } 1006 1007 if (reload) { 1008 /* Update shared global configuration in all children */ 1009 if (config_setconfig(env) == -1) 1010 return (-1); 1011 } 1012 1013 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1014 if (vsw->sw_running) 1015 continue; 1016 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1017 log_warn("%s: failed to create switch %s", 1018 __func__, vsw->sw_name); 1019 switch_remove(vsw); 1020 return (-1); 1021 } 1022 } 1023 1024 log_debug("%s: starting vms in staggered fashion", __func__); 1025 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1026 /* start first batch */ 1027 start_vm_batch(0, 0, NULL); 1028 1029 } 1030 1031 return (0); 1032} 1033 1034void 1035vmd_shutdown(void) 1036{ 1037 struct vmd_vm *vm, *vm_next; 1038 1039 log_debug("%s: performing shutdown", __func__); 1040 1041 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1042 vm_remove(vm, __func__); 1043 } 1044 1045 proc_kill(&env->vmd_ps); 1046 free(env); 1047 1048 log_warnx("parent terminating"); 1049 exit(0); 1050} 1051 1052struct vmd_vm * 1053vm_getbyvmid(uint32_t vmid) 1054{ 1055 struct vmd_vm *vm; 1056 1057 if (vmid == 0) 1058 return (NULL); 1059 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1060 if (vm->vm_vmid == vmid) 1061 return (vm); 1062 } 1063 1064 return (NULL); 1065} 1066 1067struct vmd_vm * 1068vm_getbyid(uint32_t id) 1069{ 1070 struct vmd_vm *vm; 1071 1072 if (id == 0) 1073 return (NULL); 1074 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1075 if (vm->vm_params.vmc_params.vcp_id == id) 1076 return (vm); 1077 } 1078 1079 return (NULL); 1080} 1081 1082uint32_t 1083vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1084{ 1085 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1086 return (0); 1087 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1088 id, vm->vm_vmid); 1089 return (vm->vm_vmid); 1090} 1091 1092uint32_t 1093vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1094{ 1095 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1096 return (0); 1097 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1098 vmid, vm->vm_params.vmc_params.vcp_id); 1099 return (vm->vm_params.vmc_params.vcp_id); 1100} 1101 1102struct vmd_vm * 1103vm_getbyname(const char *name) 1104{ 1105 struct vmd_vm *vm; 1106 1107 if (name == NULL) 1108 return (NULL); 1109 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1110 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1111 return (vm); 1112 } 1113 1114 return (NULL); 1115} 1116 1117struct vmd_vm * 1118vm_getbypid(pid_t pid) 1119{ 1120 struct vmd_vm *vm; 1121 1122 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1123 if (vm->vm_pid == pid) 1124 return (vm); 1125 } 1126 1127 return (NULL); 1128} 1129 1130void 1131vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1132{ 1133 struct privsep *ps = &env->vmd_ps; 1134 unsigned int i, j; 1135 1136 if (vm == NULL) 1137 return; 1138 1139 log_debug("%s: %s %s stopping vm %d%s", 1140 __func__, ps->ps_title[privsep_process], caller, 1141 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1142 1143 vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN); 1144 1145 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1146 user_put(vm->vm_user); 1147 1148 if (vm->vm_iev.ibuf.fd != -1) { 1149 event_del(&vm->vm_iev.ev); 1150 close(vm->vm_iev.ibuf.fd); 1151 } 1152 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1153 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1154 if (vm->vm_disks[i][j] != -1) { 1155 close(vm->vm_disks[i][j]); 1156 vm->vm_disks[i][j] = -1; 1157 } 1158 } 1159 } 1160 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1161 if (vm->vm_ifs[i].vif_fd != -1) { 1162 close(vm->vm_ifs[i].vif_fd); 1163 vm->vm_ifs[i].vif_fd = -1; 1164 } 1165 free(vm->vm_ifs[i].vif_name); 1166 free(vm->vm_ifs[i].vif_switch); 1167 free(vm->vm_ifs[i].vif_group); 1168 vm->vm_ifs[i].vif_name = NULL; 1169 vm->vm_ifs[i].vif_switch = NULL; 1170 vm->vm_ifs[i].vif_group = NULL; 1171 } 1172 if (vm->vm_kernel != -1) { 1173 close(vm->vm_kernel); 1174 vm->vm_kernel = -1; 1175 } 1176 if (vm->vm_cdrom != -1) { 1177 close(vm->vm_cdrom); 1178 vm->vm_cdrom = -1; 1179 } 1180 if (!keeptty) { 1181 vm_closetty(vm); 1182 vm->vm_uid = 0; 1183 } 1184} 1185 1186void 1187vm_remove(struct vmd_vm *vm, const char *caller) 1188{ 1189 struct privsep *ps = &env->vmd_ps; 1190 1191 if (vm == NULL) 1192 return; 1193 1194 log_debug("%s: %s %s removing vm %d from running config", 1195 __func__, ps->ps_title[privsep_process], caller, 1196 vm->vm_vmid); 1197 1198 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1199 1200 user_put(vm->vm_user); 1201 vm_stop(vm, 0, caller); 1202 free(vm); 1203} 1204 1205int 1206vm_claimid(const char *name, int uid, uint32_t *id) 1207{ 1208 struct name2id *n2i = NULL; 1209 1210 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1211 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1212 goto out; 1213 1214 if (++env->vmd_nvm == 0) { 1215 log_warnx("too many vms"); 1216 return -1; 1217 } 1218 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1219 log_warnx("could not alloc vm name"); 1220 return -1; 1221 } 1222 n2i->id = env->vmd_nvm; 1223 n2i->uid = uid; 1224 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1225 log_warnx("vm name too long"); 1226 free(n2i); 1227 return -1; 1228 } 1229 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1230 1231out: 1232 *id = n2i->id; 1233 return 0; 1234} 1235 1236int 1237vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1238 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1239{ 1240 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1241 struct vm_create_params *vcp = &vmc->vmc_params; 1242 struct vmop_owner *vmo = NULL; 1243 struct vmd_user *usr = NULL; 1244 uint32_t nid, rng; 1245 unsigned int i, j; 1246 struct vmd_switch *sw; 1247 char *s; 1248 1249 /* Check if this is an instance of another VM */ 1250 if (vm_instance(ps, &vm_parent, vmc, uid) == -1) 1251 return (-1); 1252 1253 errno = 0; 1254 *ret_vm = NULL; 1255 1256 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1257 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1258 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1259 uid) != 0) { 1260 errno = EPERM; 1261 goto fail; 1262 } 1263 *ret_vm = vm; 1264 errno = EALREADY; 1265 goto fail; 1266 } 1267 1268 if (vm_parent != NULL) 1269 vmo = &vm_parent->vm_params.vmc_insowner; 1270 1271 /* non-root users can only start existing VMs or instances */ 1272 if (vm_checkperm(NULL, vmo, uid) != 0) { 1273 log_warnx("permission denied"); 1274 errno = EPERM; 1275 goto fail; 1276 } 1277 if (vmc->vmc_flags == 0) { 1278 log_warnx("invalid configuration, no devices"); 1279 errno = VMD_DISK_MISSING; 1280 goto fail; 1281 } 1282 if (vcp->vcp_ncpus == 0) 1283 vcp->vcp_ncpus = 1; 1284 if (vcp->vcp_memranges[0].vmr_size == 0) 1285 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1286 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1287 log_warnx("invalid number of CPUs"); 1288 goto fail; 1289 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1290 log_warnx("invalid number of disks"); 1291 goto fail; 1292 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1293 log_warnx("invalid number of interfaces"); 1294 goto fail; 1295 } else if (strlen(vcp->vcp_kernel) == 0 && 1296 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1297 log_warnx("no kernel or disk/cdrom specified"); 1298 goto fail; 1299 } else if (strlen(vcp->vcp_name) == 0) { 1300 log_warnx("invalid VM name"); 1301 goto fail; 1302 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1303 *vcp->vcp_name == '_') { 1304 log_warnx("invalid VM name"); 1305 goto fail; 1306 } else { 1307 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1308 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1309 *s == '_')) { 1310 log_warnx("invalid VM name"); 1311 goto fail; 1312 } 1313 } 1314 } 1315 1316 /* track active users */ 1317 if (uid != 0 && env->vmd_users != NULL && 1318 (usr = user_get(uid)) == NULL) { 1319 log_warnx("could not add user"); 1320 goto fail; 1321 } 1322 1323 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1324 goto fail; 1325 1326 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1327 vmc = &vm->vm_params; 1328 vcp = &vmc->vmc_params; 1329 vm->vm_pid = -1; 1330 vm->vm_tty = -1; 1331 vm->vm_receive_fd = -1; 1332 vm->vm_state &= ~VM_STATE_PAUSED; 1333 vm->vm_user = usr; 1334 1335 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1336 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1337 vm->vm_disks[i][j] = -1; 1338 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1339 vm->vm_ifs[i].vif_fd = -1; 1340 for (i = 0; i < vcp->vcp_nnics; i++) { 1341 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1342 /* inherit per-interface flags from the switch */ 1343 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1344 } 1345 1346 /* 1347 * If the MAC address is zero, always randomize it in vmd(8) 1348 * because we cannot rely on the guest OS to do the right 1349 * thing like OpenBSD does. Based on ether_fakeaddr() 1350 * from the kernel, incremented by one to differentiate 1351 * the source. 1352 */ 1353 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1354 rng = arc4random(); 1355 vcp->vcp_macs[i][0] = 0xfe; 1356 vcp->vcp_macs[i][1] = 0xe1; 1357 vcp->vcp_macs[i][2] = 0xba + 1; 1358 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1359 vcp->vcp_macs[i][4] = rng; 1360 vcp->vcp_macs[i][5] = rng >> 8; 1361 } 1362 } 1363 vm->vm_kernel = -1; 1364 vm->vm_cdrom = -1; 1365 vm->vm_iev.ibuf.fd = -1; 1366 1367 /* 1368 * Assign a new internal Id if not specified and we succeed in 1369 * claiming a new Id. 1370 */ 1371 if (id != 0) 1372 vm->vm_vmid = id; 1373 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1374 goto fail; 1375 else 1376 vm->vm_vmid = nid; 1377 1378 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1379 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1380 1381 *ret_vm = vm; 1382 return (0); 1383 fail: 1384 if (errno == 0) 1385 errno = EINVAL; 1386 return (-1); 1387} 1388 1389int 1390vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1391 struct vmop_create_params *vmc, uid_t uid) 1392{ 1393 char *name; 1394 struct vm_create_params *vcp = &vmc->vmc_params; 1395 struct vmop_create_params *vmcp; 1396 struct vm_create_params *vcpp; 1397 struct vmd_vm *vm = NULL; 1398 unsigned int i, j; 1399 uint32_t id; 1400 1401 /* return without error if the parent is NULL (nothing to inherit) */ 1402 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1403 vmc->vmc_instance[0] == '\0') 1404 return (0); 1405 1406 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1407 errno = VMD_PARENT_INVALID; 1408 return (-1); 1409 } 1410 1411 errno = 0; 1412 vmcp = &(*vm_parent)->vm_params; 1413 vcpp = &vmcp->vmc_params; 1414 1415 /* Are we allowed to create an instance from this VM? */ 1416 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1417 log_warnx("vm \"%s\" no permission to create vm instance", 1418 vcpp->vcp_name); 1419 errno = ENAMETOOLONG; 1420 return (-1); 1421 } 1422 1423 id = vcp->vcp_id; 1424 name = vcp->vcp_name; 1425 1426 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1427 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1428 errno = EPROCLIM; 1429 return (-1); 1430 } 1431 1432 /* CPU */ 1433 if (vcp->vcp_ncpus == 0) 1434 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1435 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1436 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1437 log_warnx("vm \"%s\" no permission to set cpus", name); 1438 errno = EPERM; 1439 return (-1); 1440 } 1441 1442 /* memory */ 1443 if (vcp->vcp_memranges[0].vmr_size == 0) 1444 vcp->vcp_memranges[0].vmr_size = 1445 vcpp->vcp_memranges[0].vmr_size; 1446 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1447 vcp->vcp_memranges[0].vmr_size != 1448 vcpp->vcp_memranges[0].vmr_size) { 1449 log_warnx("vm \"%s\" no permission to set memory", name); 1450 errno = EPERM; 1451 return (-1); 1452 } 1453 1454 /* disks cannot be inherited */ 1455 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1456 vcp->vcp_ndisks) { 1457 log_warnx("vm \"%s\" no permission to set disks", name); 1458 errno = EPERM; 1459 return (-1); 1460 } 1461 for (i = 0; i < vcp->vcp_ndisks; i++) { 1462 /* Check if this disk is already used in the parent */ 1463 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1464 if (strcmp(vcp->vcp_disks[i], 1465 vcpp->vcp_disks[j]) == 0) { 1466 log_warnx("vm \"%s\" disk %s cannot be reused", 1467 name, vcp->vcp_disks[i]); 1468 errno = EBUSY; 1469 return (-1); 1470 } 1471 } 1472 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1473 } 1474 1475 /* interfaces */ 1476 if (vcp->vcp_nnics > 0 && 1477 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1478 vcp->vcp_nnics != vcpp->vcp_nnics) { 1479 log_warnx("vm \"%s\" no permission to set interfaces", name); 1480 errno = EPERM; 1481 return (-1); 1482 } 1483 for (i = 0; i < vcpp->vcp_nnics; i++) { 1484 /* Interface got overwritten */ 1485 if (i < vcp->vcp_nnics) 1486 continue; 1487 1488 /* Copy interface from parent */ 1489 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1490 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1491 sizeof(vmc->vmc_ifnames[i])); 1492 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1493 sizeof(vmc->vmc_ifswitch[i])); 1494 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1495 sizeof(vmc->vmc_ifgroup[i])); 1496 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1497 sizeof(vcp->vcp_macs[i])); 1498 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1499 vcp->vcp_nnics++; 1500 } 1501 for (i = 0; i < vcp->vcp_nnics; i++) { 1502 for (j = 0; j < vcpp->vcp_nnics; j++) { 1503 if (memcmp(zero_mac, vcp->vcp_macs[i], 1504 sizeof(vcp->vcp_macs[i])) != 0 && 1505 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1506 sizeof(vcp->vcp_macs[i])) != 0) { 1507 log_warnx("vm \"%s\" lladdr cannot be reused", 1508 name); 1509 errno = EBUSY; 1510 return (-1); 1511 } 1512 if (strlen(vmc->vmc_ifnames[i]) && 1513 strcmp(vmc->vmc_ifnames[i], 1514 vmcp->vmc_ifnames[j]) == 0) { 1515 log_warnx("vm \"%s\" %s cannot be reused", 1516 vmc->vmc_ifnames[i], name); 1517 errno = EBUSY; 1518 return (-1); 1519 } 1520 } 1521 } 1522 1523 /* kernel */ 1524 if (strlen(vcp->vcp_kernel) > 0) { 1525 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1526 log_warnx("vm \"%s\" no permission to set boot image", 1527 name); 1528 errno = EPERM; 1529 return (-1); 1530 } 1531 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1532 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1533 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1534 log_warnx("vm \"%s\" kernel name too long", name); 1535 errno = EINVAL; 1536 return (-1); 1537 } 1538 1539 /* cdrom */ 1540 if (strlen(vcp->vcp_cdrom) > 0) { 1541 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1542 log_warnx("vm \"%s\" no permission to set cdrom", name); 1543 errno = EPERM; 1544 return (-1); 1545 } 1546 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1547 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1548 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1549 log_warnx("vm \"%s\" cdrom name too long", name); 1550 errno = EINVAL; 1551 return (-1); 1552 } 1553 1554 /* user */ 1555 if (vmc->vmc_owner.uid == 0) 1556 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1557 else if (vmc->vmc_owner.uid != uid && 1558 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1559 log_warnx("vm \"%s\" user mismatch", name); 1560 errno = EPERM; 1561 return (-1); 1562 } 1563 1564 /* group */ 1565 if (vmc->vmc_owner.gid == 0) 1566 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1567 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1568 log_warnx("vm \"%s\" group mismatch", name); 1569 errno = EPERM; 1570 return (-1); 1571 } 1572 1573 /* child instances */ 1574 if (vmc->vmc_insflags) { 1575 log_warnx("vm \"%s\" cannot change instance permissions", name); 1576 errno = EPERM; 1577 return (-1); 1578 } 1579 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1580 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1581 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1582 vmc->vmc_insflags = vmcp->vmc_insflags; 1583 } else { 1584 vmc->vmc_insowner.gid = 0; 1585 vmc->vmc_insowner.uid = 0; 1586 vmc->vmc_insflags = 0; 1587 } 1588 1589 /* finished, remove instance flags */ 1590 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1591 1592 return (0); 1593} 1594 1595/* 1596 * vm_checkperm 1597 * 1598 * Checks if the user represented by the 'uid' parameter is allowed to 1599 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1600 * console.) 1601 * 1602 * Parameters: 1603 * vm: the VM whose permission is to be checked 1604 * vmo: the required uid/gid to be checked 1605 * uid: the user ID of the user making the request 1606 * 1607 * Return values: 1608 * 0: the permission should be granted 1609 * -1: the permission check failed (also returned if vm == null) 1610 */ 1611int 1612vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1613{ 1614 struct group *gr; 1615 struct passwd *pw; 1616 char **grmem; 1617 1618 /* root has no restrictions */ 1619 if (uid == 0) 1620 return (0); 1621 1622 if (vmo == NULL) 1623 return (-1); 1624 1625 /* check user */ 1626 if (vm == NULL) { 1627 if (vmo->uid == uid) 1628 return (0); 1629 } else { 1630 /* 1631 * check user of running vm (the owner of a running vm can 1632 * be different to (or more specific than) the configured owner. 1633 */ 1634 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1635 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1636 return (0); 1637 } 1638 1639 /* check groups */ 1640 if (vmo->gid != -1) { 1641 if ((pw = getpwuid(uid)) == NULL) 1642 return (-1); 1643 if (pw->pw_gid == vmo->gid) 1644 return (0); 1645 if ((gr = getgrgid(vmo->gid)) != NULL) { 1646 for (grmem = gr->gr_mem; *grmem; grmem++) 1647 if (strcmp(*grmem, pw->pw_name) == 0) 1648 return (0); 1649 } 1650 } 1651 1652 return (-1); 1653} 1654 1655/* 1656 * vm_checkinsflag 1657 * 1658 * Checks wheter the non-root user is allowed to set an instance option. 1659 * 1660 * Parameters: 1661 * vmc: the VM create parameters 1662 * flag: the flag to be checked 1663 * uid: the user ID of the user making the request 1664 * 1665 * Return values: 1666 * 0: the permission should be granted 1667 * -1: the permission check failed (also returned if vm == null) 1668 */ 1669int 1670vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1671{ 1672 /* root has no restrictions */ 1673 if (uid == 0) 1674 return (0); 1675 1676 if ((vmc->vmc_insflags & flag) == 0) 1677 return (-1); 1678 1679 return (0); 1680} 1681 1682/* 1683 * vm_checkaccess 1684 * 1685 * Checks if the user represented by the 'uid' parameter is allowed to 1686 * access the file described by the 'path' parameter. 1687 * 1688 * Parameters: 1689 * fd: the file descriptor of the opened file 1690 * uflag: check if the userid has access to the file 1691 * uid: the user ID of the user making the request 1692 * amode: the access flags of R_OK and W_OK 1693 * 1694 * Return values: 1695 * 0: the permission should be granted 1696 * -1: the permission check failed 1697 */ 1698int 1699vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1700{ 1701 struct group *gr; 1702 struct passwd *pw; 1703 char **grmem; 1704 struct stat st; 1705 mode_t mode; 1706 1707 if (fd == -1) 1708 return (-1); 1709 1710 /* 1711 * File has to be accessible and a regular file 1712 */ 1713 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1714 return (-1); 1715 1716 /* root has no restrictions */ 1717 if (uid == 0 || uflag == 0) 1718 return (0); 1719 1720 /* check other */ 1721 mode = amode & W_OK ? S_IWOTH : 0; 1722 mode |= amode & R_OK ? S_IROTH : 0; 1723 if ((st.st_mode & mode) == mode) 1724 return (0); 1725 1726 /* check user */ 1727 mode = amode & W_OK ? S_IWUSR : 0; 1728 mode |= amode & R_OK ? S_IRUSR : 0; 1729 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1730 return (0); 1731 1732 /* check groups */ 1733 mode = amode & W_OK ? S_IWGRP : 0; 1734 mode |= amode & R_OK ? S_IRGRP : 0; 1735 if ((st.st_mode & mode) != mode) 1736 return (-1); 1737 if ((pw = getpwuid(uid)) == NULL) 1738 return (-1); 1739 if (pw->pw_gid == st.st_gid) 1740 return (0); 1741 if ((gr = getgrgid(st.st_gid)) != NULL) { 1742 for (grmem = gr->gr_mem; *grmem; grmem++) 1743 if (strcmp(*grmem, pw->pw_name) == 0) 1744 return (0); 1745 } 1746 1747 return (-1); 1748} 1749 1750int 1751vm_opentty(struct vmd_vm *vm) 1752{ 1753 struct ptmget ptm; 1754 struct stat st; 1755 struct group *gr; 1756 uid_t uid; 1757 gid_t gid; 1758 mode_t mode; 1759 int on; 1760 1761 /* 1762 * Open tty with pre-opened PTM fd 1763 */ 1764 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1765 return (-1); 1766 1767 /* 1768 * We use user ioctl(2) mode to pass break commands. 1769 */ 1770 on = 1; 1771 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1772 fatal("could not enable user ioctl mode"); 1773 1774 vm->vm_tty = ptm.cfd; 1775 close(ptm.sfd); 1776 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1777 goto fail; 1778 1779 uid = vm->vm_uid; 1780 gid = vm->vm_params.vmc_owner.gid; 1781 1782 if (vm->vm_params.vmc_owner.gid != -1) { 1783 mode = 0660; 1784 } else if ((gr = getgrnam("tty")) != NULL) { 1785 gid = gr->gr_gid; 1786 mode = 0620; 1787 } else { 1788 mode = 0600; 1789 gid = 0; 1790 } 1791 1792 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1793 __func__, vm->vm_params.vmc_params.vcp_name, 1794 vm->vm_ttyname, uid, gid, mode); 1795 1796 /* 1797 * Change ownership and mode of the tty as required. 1798 * Loosely based on the implementation of sshpty.c 1799 */ 1800 if (stat(vm->vm_ttyname, &st) == -1) 1801 goto fail; 1802 1803 if (st.st_uid != uid || st.st_gid != gid) { 1804 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1805 log_warn("chown %s %d %d failed, uid %d", 1806 vm->vm_ttyname, uid, gid, getuid()); 1807 1808 /* Ignore failure on read-only filesystems */ 1809 if (!((errno == EROFS) && 1810 (st.st_uid == uid || st.st_uid == 0))) 1811 goto fail; 1812 } 1813 } 1814 1815 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1816 if (chmod(vm->vm_ttyname, mode) == -1) { 1817 log_warn("chmod %s %o failed, uid %d", 1818 vm->vm_ttyname, mode, getuid()); 1819 1820 /* Ignore failure on read-only filesystems */ 1821 if (!((errno == EROFS) && 1822 (st.st_uid == uid || st.st_uid == 0))) 1823 goto fail; 1824 } 1825 } 1826 1827 return (0); 1828 fail: 1829 vm_closetty(vm); 1830 return (-1); 1831} 1832 1833void 1834vm_closetty(struct vmd_vm *vm) 1835{ 1836 if (vm->vm_tty != -1) { 1837 /* Release and close the tty */ 1838 if (fchown(vm->vm_tty, 0, 0) == -1) 1839 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1840 if (fchmod(vm->vm_tty, 0666) == -1) 1841 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1842 close(vm->vm_tty); 1843 vm->vm_tty = -1; 1844 } 1845 free(vm->vm_ttyname); 1846 vm->vm_ttyname = NULL; 1847} 1848 1849void 1850switch_remove(struct vmd_switch *vsw) 1851{ 1852 if (vsw == NULL) 1853 return; 1854 1855 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1856 1857 free(vsw->sw_group); 1858 free(vsw->sw_name); 1859 free(vsw); 1860} 1861 1862struct vmd_switch * 1863switch_getbyname(const char *name) 1864{ 1865 struct vmd_switch *vsw; 1866 1867 if (name == NULL) 1868 return (NULL); 1869 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1870 if (strcmp(vsw->sw_name, name) == 0) 1871 return (vsw); 1872 } 1873 1874 return (NULL); 1875} 1876 1877struct vmd_user * 1878user_get(uid_t uid) 1879{ 1880 struct vmd_user *usr; 1881 1882 if (uid == 0) 1883 return (NULL); 1884 1885 /* first try to find an existing user */ 1886 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1887 if (usr->usr_id.uid == uid) 1888 goto done; 1889 } 1890 1891 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1892 log_warn("could not allocate user"); 1893 return (NULL); 1894 } 1895 1896 usr->usr_id.uid = uid; 1897 usr->usr_id.gid = -1; 1898 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1899 1900 done: 1901 DPRINTF("%s: uid %d #%d +", 1902 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1903 usr->usr_refcnt++; 1904 1905 return (usr); 1906} 1907 1908void 1909user_put(struct vmd_user *usr) 1910{ 1911 if (usr == NULL) 1912 return; 1913 1914 DPRINTF("%s: uid %d #%d -", 1915 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1916 1917 if (--usr->usr_refcnt > 0) 1918 return; 1919 1920 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1921 free(usr); 1922} 1923 1924void 1925user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1926{ 1927 char mem[FMT_SCALED_STRSIZE]; 1928 1929 if (usr == NULL) 1930 return; 1931 1932 /* increment or decrement counters */ 1933 inc = inc ? 1 : -1; 1934 1935 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1936 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1937 usr->usr_maxifs += vcp->vcp_nnics * inc; 1938 1939 if (log_getverbose() > 1) { 1940 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1941 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1942 __func__, inc == 1 ? '+' : '-', 1943 usr->usr_id.uid, usr->usr_refcnt, 1944 usr->usr_maxcpu, mem, usr->usr_maxifs); 1945 } 1946} 1947 1948int 1949user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1950{ 1951 const char *limit = ""; 1952 1953 /* XXX make the limits configurable */ 1954 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1955 limit = "cpu "; 1956 goto fail; 1957 } 1958 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1959 limit = "memory "; 1960 goto fail; 1961 } 1962 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1963 limit = "interface "; 1964 goto fail; 1965 } 1966 1967 return (0); 1968 1969 fail: 1970 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1971 usr->usr_id.uid, limit); 1972 return (-1); 1973} 1974 1975char * 1976get_string(uint8_t *ptr, size_t len) 1977{ 1978 size_t i; 1979 1980 for (i = 0; i < len; i++) 1981 if (!isprint(ptr[i])) 1982 break; 1983 1984 return strndup(ptr, i); 1985} 1986 1987uint32_t 1988prefixlen2mask(uint8_t prefixlen) 1989{ 1990 if (prefixlen == 0) 1991 return (0); 1992 1993 if (prefixlen > 32) 1994 prefixlen = 32; 1995 1996 return (htonl(0xffffffff << (32 - prefixlen))); 1997} 1998 1999void 2000prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 2001{ 2002 struct in6_addr s6; 2003 int i; 2004 2005 if (prefixlen > 128) 2006 prefixlen = 128; 2007 2008 memset(&s6, 0, sizeof(s6)); 2009 for (i = 0; i < prefixlen / 8; i++) 2010 s6.s6_addr[i] = 0xff; 2011 i = prefixlen % 8; 2012 if (i) 2013 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2014 2015 memcpy(mask, &s6, sizeof(s6)); 2016} 2017 2018void 2019getmonotime(struct timeval *tv) 2020{ 2021 struct timespec ts; 2022 2023 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2024 fatal("clock_gettime"); 2025 2026 TIMESPEC_TO_TIMEVAL(tv, &ts); 2027} 2028