vmd.c revision 1.127
1112158Sdas/* $OpenBSD: vmd.c,v 1.127 2021/11/29 05:17:35 deraadt Exp $ */ 2112158Sdas 3112158Sdas/* 4112158Sdas * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5112158Sdas * 6112158Sdas * Permission to use, copy, modify, and distribute this software for any 7112158Sdas * purpose with or without fee is hereby granted, provided that the above 8112158Sdas * copyright notice and this permission notice appear in all copies. 9112158Sdas * 10112158Sdas * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11112158Sdas * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12112158Sdas * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13112158Sdas * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14112158Sdas * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15112158Sdas * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16112158Sdas * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17112158Sdas */ 18112158Sdas 19112158Sdas#include <sys/types.h> 20112158Sdas#include <sys/queue.h> 21112158Sdas#include <sys/wait.h> 22112158Sdas#include <sys/cdefs.h> 23112158Sdas#include <sys/stat.h> 24112158Sdas#include <sys/sysctl.h> 25112158Sdas#include <sys/tty.h> 26112158Sdas#include <sys/ttycom.h> 27112158Sdas#include <sys/ioctl.h> 28112158Sdas 29165743Sdas#include <stdio.h> 30165743Sdas#include <stdlib.h> 31112158Sdas#include <string.h> 32112158Sdas#include <termios.h> 33112158Sdas#include <errno.h> 34112158Sdas#include <event.h> 35112158Sdas#include <fcntl.h> 36112158Sdas#include <pwd.h> 37165743Sdas#include <signal.h> 38112158Sdas#include <syslog.h> 39112158Sdas#include <unistd.h> 40112158Sdas#include <util.h> 41112158Sdas#include <ctype.h> 42112158Sdas#include <pwd.h> 43112158Sdas#include <grp.h> 44112158Sdas 45112158Sdas#include <machine/specialreg.h> 46112158Sdas#include <machine/vmmvar.h> 47112158Sdas 48112158Sdas#include "proc.h" 49112158Sdas#include "atomicio.h" 50112158Sdas#include "vmd.h" 51112158Sdas 52112158Sdas__dead void usage(void); 53112158Sdas 54112158Sdasint main(int, char **); 55112158Sdasint vmd_configure(void); 56112158Sdasvoid vmd_sighdlr(int sig, short event, void *arg); 57112158Sdasvoid vmd_shutdown(void); 58112158Sdasint vmd_control_run(void); 59112158Sdasint vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 60112158Sdasint vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 61112158Sdasint vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 62112158Sdasint vmd_check_vmh(struct vm_dump_header *); 63112158Sdas 64112158Sdasint vm_instance(struct privsep *, struct vmd_vm **, 65112158Sdas struct vmop_create_params *, uid_t); 66112158Sdasint vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 67112158Sdasint vm_claimid(const char *, int, uint32_t *); 68112158Sdasvoid start_vm_batch(int, short, void*); 69112158Sdas 70112158Sdasstruct vmd *env; 71112158Sdas 72112158Sdasstatic struct privsep_proc procs[] = { 73112158Sdas /* Keep "priv" on top as procs[0] */ 74112158Sdas { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 75112158Sdas { "control", PROC_CONTROL, vmd_dispatch_control, control }, 76112158Sdas { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 77112158Sdas}; 78219557Sdas 79219557Sdasenum privsep_procid privsep_process; 80112158Sdas 81219557Sdasstruct event staggered_start_timer; 82112158Sdas 83112158Sdas/* For the privileged process */ 84112158Sdasstatic struct privsep_proc *proc_priv = &procs[0]; 85112158Sdasstatic struct passwd proc_privpw; 86112158Sdasstatic const uint8_t zero_mac[ETHER_ADDR_LEN]; 87112158Sdas 88112158Sdasint 89112158Sdasvmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 90112158Sdas{ 91112158Sdas struct privsep *ps = p->p_ps; 92112158Sdas int res = 0, ret = 0, cmd = 0, verbose; 93112158Sdas unsigned int v = 0, flags; 94112158Sdas struct vmop_create_params vmc; 95112158Sdas struct vmop_id vid; 96112158Sdas struct vmop_result vmr; 97112158Sdas struct vm_dump_header vmh; 98112158Sdas struct vmd_vm *vm = NULL; 99112158Sdas char *str = NULL; 100112158Sdas uint32_t id = 0; 101112158Sdas struct control_sock *rcs; 102112158Sdas 103112158Sdas switch (imsg->hdr.type) { 104112158Sdas case IMSG_VMDOP_START_VM_REQUEST: 105112158Sdas IMSG_SIZE_CHECK(imsg, &vmc); 106112158Sdas memcpy(&vmc, imsg->data, sizeof(vmc)); 107112158Sdas ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 108112158Sdas if (vmc.vmc_flags == 0) { 109112158Sdas /* start an existing VM with pre-configured options */ 110112158Sdas if (!(ret == -1 && errno == EALREADY && 111112158Sdas !(vm->vm_state & VM_STATE_RUNNING))) { 112112158Sdas res = errno; 113112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 114112158Sdas } 115112158Sdas } else if (ret != 0) { 116112158Sdas res = errno; 117112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 118112158Sdas } 119112158Sdas if (res == 0) { 120112158Sdas res = config_setvm(ps, vm, imsg->hdr.peerid, 121112158Sdas vm->vm_params.vmc_owner.uid); 122112158Sdas if (res) 123112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 124112158Sdas } 125112158Sdas break; 126112158Sdas case IMSG_VMDOP_WAIT_VM_REQUEST: 127219557Sdas case IMSG_VMDOP_TERMINATE_VM_REQUEST: 128219557Sdas IMSG_SIZE_CHECK(imsg, &vid); 129112158Sdas memcpy(&vid, imsg->data, sizeof(vid)); 130112158Sdas flags = vid.vid_flags; 131112158Sdas cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 132112158Sdas 133182709Sdas if ((id = vid.vid_id) == 0) { 134182709Sdas /* Lookup vm (id) by name */ 135182709Sdas if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 136182709Sdas res = ENOENT; 137182709Sdas break; 138182709Sdas } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 139182709Sdas (flags & VMOP_FORCE) == 0) { 140182709Sdas res = EALREADY; 141182709Sdas break; 142182709Sdas } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 143182709Sdas res = EINVAL; 144182709Sdas break; 145182709Sdas } 146112158Sdas id = vm->vm_vmid; 147112158Sdas } else if ((vm = vm_getbyvmid(id)) == NULL) { 148112158Sdas res = ENOENT; 149112158Sdas break; 150112158Sdas } 151112158Sdas if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 152112158Sdas res = EPERM; 153219557Sdas break; 154219557Sdas } 155112158Sdas 156112158Sdas /* Only relay TERMINATION requests, not WAIT requests */ 157219557Sdas if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 158112158Sdas memset(&vid, 0, sizeof(vid)); 159112158Sdas vid.vid_id = id; 160112158Sdas vid.vid_flags = flags; 161112158Sdas 162112158Sdas if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 163112158Sdas imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 164219557Sdas return (-1); 165112158Sdas } 166219557Sdas break; 167112158Sdas case IMSG_VMDOP_GET_INFO_VM_REQUEST: 168112158Sdas proc_forward_imsg(ps, imsg, PROC_VMM, -1); 169112158Sdas break; 170112158Sdas case IMSG_VMDOP_LOAD: 171112158Sdas IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 172219557Sdas str = get_string((uint8_t *)imsg->data, 173112158Sdas IMSG_DATA_SIZE(imsg)); 174112158Sdas case IMSG_VMDOP_RELOAD: 175112158Sdas if (vmd_reload(0, str) == -1) 176112158Sdas cmd = IMSG_CTL_FAIL; 177112158Sdas else 178112158Sdas cmd = IMSG_CTL_OK; 179219557Sdas free(str); 180112158Sdas break; 181219557Sdas case IMSG_CTL_RESET: 182112158Sdas IMSG_SIZE_CHECK(imsg, &v); 183112158Sdas memcpy(&v, imsg->data, sizeof(v)); 184112158Sdas if (vmd_reload(v, NULL) == -1) 185112158Sdas cmd = IMSG_CTL_FAIL; 186112158Sdas else 187112158Sdas cmd = IMSG_CTL_OK; 188112158Sdas break; 189112158Sdas case IMSG_CTL_VERBOSE: 190112158Sdas IMSG_SIZE_CHECK(imsg, &verbose); 191182709Sdas memcpy(&verbose, imsg->data, sizeof(verbose)); 192112158Sdas log_setverbose(verbose); 193182709Sdas 194112158Sdas proc_forward_imsg(ps, imsg, PROC_VMM, -1); 195182709Sdas proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 196182709Sdas cmd = IMSG_CTL_OK; 197112158Sdas break; 198112158Sdas case IMSG_VMDOP_PAUSE_VM: 199112158Sdas case IMSG_VMDOP_UNPAUSE_VM: 200219557Sdas IMSG_SIZE_CHECK(imsg, &vid); 201112158Sdas memcpy(&vid, imsg->data, sizeof(vid)); 202219557Sdas if (vid.vid_id == 0) { 203112158Sdas if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 204219557Sdas res = ENOENT; 205112158Sdas cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 206219557Sdas ? IMSG_VMDOP_PAUSE_VM_RESPONSE 207219557Sdas : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 208219557Sdas break; 209112158Sdas } else { 210219557Sdas vid.vid_id = vm->vm_vmid; 211219557Sdas } 212112158Sdas } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 213112158Sdas res = ENOENT; 214112158Sdas cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 215112158Sdas ? IMSG_VMDOP_PAUSE_VM_RESPONSE 216112158Sdas : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 217219557Sdas break; 218112158Sdas } 219219557Sdas if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 220112158Sdas vid.vid_uid) != 0) { 221112158Sdas res = EPERM; 222112158Sdas cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 223112158Sdas ? IMSG_VMDOP_PAUSE_VM_RESPONSE 224112158Sdas : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 225112158Sdas break; 226112158Sdas } 227112158Sdas proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 228112158Sdas imsg->hdr.peerid, -1, &vid, sizeof(vid)); 229112158Sdas break; 230112158Sdas case IMSG_VMDOP_SEND_VM_REQUEST: 231112158Sdas IMSG_SIZE_CHECK(imsg, &vid); 232112158Sdas memcpy(&vid, imsg->data, sizeof(vid)); 233112158Sdas id = vid.vid_id; 234112158Sdas if (vid.vid_id == 0) { 235112158Sdas if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 236112158Sdas res = ENOENT; 237112158Sdas cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 238112158Sdas close(imsg->fd); 239112158Sdas break; 240112158Sdas } else { 241112158Sdas vid.vid_id = vm->vm_vmid; 242112158Sdas } 243112158Sdas } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 244112158Sdas res = ENOENT; 245112158Sdas cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 246112158Sdas close(imsg->fd); 247112158Sdas break; 248219557Sdas } 249219557Sdas vmr.vmr_id = vid.vid_id; 250219557Sdas log_debug("%s: sending fd to vmm", __func__); 251219557Sdas proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 252112158Sdas imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 253112158Sdas break; 254112158Sdas case IMSG_VMDOP_RECEIVE_VM_REQUEST: 255112158Sdas IMSG_SIZE_CHECK(imsg, &vid); 256219557Sdas memcpy(&vid, imsg->data, sizeof(vid)); 257112158Sdas if (imsg->fd == -1) { 258112158Sdas log_warnx("%s: invalid fd", __func__); 259112158Sdas return (-1); 260112158Sdas } 261112158Sdas if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 262219557Sdas sizeof(vmh)) { 263112158Sdas log_warnx("%s: error reading vmh from received vm", 264112158Sdas __func__); 265112158Sdas res = EIO; 266112158Sdas close(imsg->fd); 267112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 268112158Sdas break; 269112158Sdas } 270112158Sdas 271112158Sdas if (vmd_check_vmh(&vmh)) { 272112158Sdas res = ENOENT; 273112158Sdas close(imsg->fd); 274112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 275112158Sdas break; 276112158Sdas } 277112158Sdas if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 278112158Sdas sizeof(vmc)) { 279112158Sdas log_warnx("%s: error reading vmc from received vm", 280112158Sdas __func__); 281112158Sdas res = EIO; 282112158Sdas close(imsg->fd); 283112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 284112158Sdas break; 285112158Sdas } 286112158Sdas strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 287112158Sdas sizeof(vmc.vmc_params.vcp_name)); 288112158Sdas vmc.vmc_params.vcp_id = 0; 289112158Sdas 290112158Sdas ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 291112158Sdas if (ret != 0) { 292112158Sdas res = errno; 293112158Sdas cmd = IMSG_VMDOP_START_VM_RESPONSE; 294112158Sdas close(imsg->fd); 295112158Sdas } else { 296112158Sdas vm->vm_state |= VM_STATE_RECEIVED; 297112158Sdas config_setvm(ps, vm, imsg->hdr.peerid, 298112158Sdas vmc.vmc_owner.uid); 299112158Sdas log_debug("%s: sending fd to vmm", __func__); 300112158Sdas proc_compose_imsg(ps, PROC_VMM, -1, 301219557Sdas IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 302219557Sdas NULL, 0); 303112158Sdas } 304112158Sdas break; 305112158Sdas case IMSG_VMDOP_DONE: 306112158Sdas control_reset(&ps->ps_csock); 307112158Sdas TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 308112158Sdas control_reset(rcs); 309112158Sdas cmd = 0; 310112158Sdas break; 311112158Sdas default: 312112158Sdas return (-1); 313112158Sdas } 314112158Sdas 315112158Sdas switch (cmd) { 316112158Sdas case 0: 317112158Sdas break; 318112158Sdas case IMSG_VMDOP_START_VM_RESPONSE: 319112158Sdas case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 320112158Sdas memset(&vmr, 0, sizeof(vmr)); 321112158Sdas vmr.vmr_result = res; 322112158Sdas vmr.vmr_id = id; 323112158Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 324112158Sdas imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 325112158Sdas return (-1); 326112158Sdas break; 327112158Sdas default: 328112158Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 329112158Sdas imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 330182709Sdas return (-1); 331112158Sdas break; 332112158Sdas } 333112158Sdas 334112158Sdas return (0); 335112158Sdas} 336112158Sdas 337112158Sdasint 338112158Sdasvmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 339219557Sdas{ 340112158Sdas struct vmop_result vmr; 341112158Sdas struct privsep *ps = p->p_ps; 342112158Sdas int res = 0; 343112158Sdas struct vmd_vm *vm; 344112158Sdas struct vm_create_params *vcp; 345112158Sdas struct vmop_info_result vir; 346112158Sdas 347112158Sdas switch (imsg->hdr.type) { 348112158Sdas case IMSG_VMDOP_PAUSE_VM_RESPONSE: 349219557Sdas IMSG_SIZE_CHECK(imsg, &vmr); 350112158Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 351112158Sdas if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 352112158Sdas break; 353112158Sdas proc_compose_imsg(ps, PROC_CONTROL, -1, 354112158Sdas imsg->hdr.type, imsg->hdr.peerid, -1, 355112158Sdas imsg->data, sizeof(imsg->data)); 356112158Sdas log_info("%s: paused vm %d successfully", 357219557Sdas vm->vm_params.vmc_params.vcp_name, 358112158Sdas vm->vm_vmid); 359112158Sdas vm->vm_state |= VM_STATE_PAUSED; 360219557Sdas break; 361112158Sdas case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 362112158Sdas IMSG_SIZE_CHECK(imsg, &vmr); 363112158Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 364219557Sdas if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 365112158Sdas break; 366112158Sdas proc_compose_imsg(ps, PROC_CONTROL, -1, 367219557Sdas imsg->hdr.type, imsg->hdr.peerid, -1, 368112158Sdas imsg->data, sizeof(imsg->data)); 369112158Sdas log_info("%s: unpaused vm %d successfully.", 370112158Sdas vm->vm_params.vmc_params.vcp_name, 371112158Sdas vm->vm_vmid); 372219557Sdas vm->vm_state &= ~VM_STATE_PAUSED; 373112158Sdas break; 374112158Sdas case IMSG_VMDOP_START_VM_RESPONSE: 375219557Sdas IMSG_SIZE_CHECK(imsg, &vmr); 376219557Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 377112158Sdas if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 378112158Sdas break; 379219557Sdas vm->vm_pid = vmr.vmr_pid; 380219557Sdas vcp = &vm->vm_params.vmc_params; 381112158Sdas vcp->vcp_id = vmr.vmr_id; 382219557Sdas 383112158Sdas /* 384112158Sdas * If the peerid is not -1, forward the response back to the 385112158Sdas * the control socket. If it is -1, the request originated 386112158Sdas * from the parent, not the control socket. 387112158Sdas */ 388112158Sdas if (vm->vm_peerid != (uint32_t)-1) { 389112158Sdas (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 390112158Sdas sizeof(vmr.vmr_ttyname)); 391219557Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, 392112158Sdas imsg->hdr.type, vm->vm_peerid, -1, 393219557Sdas &vmr, sizeof(vmr)) == -1) { 394219557Sdas errno = vmr.vmr_result; 395112158Sdas log_warn("%s: failed to foward vm result", 396219557Sdas vcp->vcp_name); 397112158Sdas vm_remove(vm, __func__); 398219557Sdas return (-1); 399112158Sdas } 400112158Sdas } 401112158Sdas 402219557Sdas if (vmr.vmr_result) { 403219557Sdas errno = vmr.vmr_result; 404112158Sdas log_warn("%s: failed to start vm", vcp->vcp_name); 405112158Sdas vm_remove(vm, __func__); 406112158Sdas break; 407112158Sdas } 408112158Sdas 409219557Sdas /* Now configure all the interfaces */ 410219557Sdas if (vm_priv_ifconfig(ps, vm) == -1) { 411219557Sdas log_warn("%s: failed to configure vm", vcp->vcp_name); 412219557Sdas vm_remove(vm, __func__); 413112158Sdas break; 414112158Sdas } 415112158Sdas 416219557Sdas log_info("%s: started vm %d successfully, tty %s", 417112158Sdas vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 418219557Sdas break; 419112158Sdas case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 420112158Sdas IMSG_SIZE_CHECK(imsg, &vmr); 421112158Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 422112158Sdas 423112158Sdas if (vmr.vmr_result) { 424112158Sdas DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 425112158Sdas __func__, vmr.vmr_id); 426112158Sdas proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 427112158Sdas } else { 428112158Sdas if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 429112158Sdas break; 430112158Sdas /* Mark VM as shutting down */ 431219557Sdas vm->vm_state |= VM_STATE_SHUTDOWN; 432112158Sdas } 433112158Sdas break; 434112158Sdas case IMSG_VMDOP_SEND_VM_RESPONSE: 435112158Sdas IMSG_SIZE_CHECK(imsg, &vmr); 436112158Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 437112158Sdas if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 438112158Sdas break; 439112158Sdas if (!vmr.vmr_result) { 440112158Sdas log_info("%s: sent vm %d successfully.", 441112158Sdas vm->vm_params.vmc_params.vcp_name, 442112158Sdas vm->vm_vmid); 443219557Sdas if (vm->vm_from_config) 444112158Sdas vm_stop(vm, 0, __func__); 445112158Sdas else 446112158Sdas vm_remove(vm, __func__); 447219557Sdas } 448219557Sdas 449219557Sdas /* Send a response if a control client is waiting for it */ 450112158Sdas if (imsg->hdr.peerid != (uint32_t)-1) { 451112158Sdas /* the error is meaningless for deferred responses */ 452219557Sdas vmr.vmr_result = 0; 453112158Sdas 454219557Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, 455112158Sdas IMSG_VMDOP_SEND_VM_RESPONSE, 456112158Sdas imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 457112158Sdas return (-1); 458219557Sdas } 459112158Sdas break; 460112158Sdas case IMSG_VMDOP_TERMINATE_VM_EVENT: 461112158Sdas IMSG_SIZE_CHECK(imsg, &vmr); 462112158Sdas memcpy(&vmr, imsg->data, sizeof(vmr)); 463112158Sdas DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 464112158Sdas __func__, vmr.vmr_id, vmr.vmr_result); 465112158Sdas if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 466112158Sdas log_debug("%s: vm %d is no longer available", 467182709Sdas __func__, vmr.vmr_id); 468112158Sdas break; 469112158Sdas } 470112158Sdas if (vmr.vmr_result != EAGAIN || 471112158Sdas vm->vm_params.vmc_bootdevice) { 472219557Sdas if (vm->vm_from_config) 473219557Sdas vm_stop(vm, 0, __func__); 474219557Sdas else 475219557Sdas vm_remove(vm, __func__); 476219557Sdas } else { 477219557Sdas /* Stop VM instance but keep the tty open */ 478219557Sdas vm_stop(vm, 1, __func__); 479112158Sdas config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 480112158Sdas } 481112158Sdas 482112158Sdas /* The error is meaningless for deferred responses */ 483112158Sdas vmr.vmr_result = 0; 484112158Sdas 485112158Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, 486112158Sdas IMSG_VMDOP_TERMINATE_VM_EVENT, 487112158Sdas imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 488112158Sdas return (-1); 489112158Sdas break; 490112158Sdas case IMSG_VMDOP_GET_INFO_VM_DATA: 491112158Sdas IMSG_SIZE_CHECK(imsg, &vir); 492112158Sdas memcpy(&vir, imsg->data, sizeof(vir)); 493112158Sdas if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 494112158Sdas memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 495112158Sdas if (vm->vm_ttyname != NULL) 496112158Sdas strlcpy(vir.vir_ttyname, vm->vm_ttyname, 497112158Sdas sizeof(vir.vir_ttyname)); 498112158Sdas log_debug("%s: running vm: %d, vm_state: 0x%x", 499112158Sdas __func__, vm->vm_vmid, vm->vm_state); 500112158Sdas vir.vir_state = vm->vm_state; 501112158Sdas /* get the user id who started the vm */ 502112158Sdas vir.vir_uid = vm->vm_uid; 503112158Sdas vir.vir_gid = vm->vm_params.vmc_owner.gid; 504112158Sdas } 505112158Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 506112158Sdas imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 507112158Sdas log_debug("%s: GET_INFO_VM failed for vm %d, removing", 508112158Sdas __func__, vm->vm_vmid); 509112158Sdas vm_remove(vm, __func__); 510112158Sdas return (-1); 511112158Sdas } 512112158Sdas break; 513112158Sdas case IMSG_VMDOP_GET_INFO_VM_END_DATA: 514112158Sdas /* 515112158Sdas * PROC_VMM has responded with the *running* VMs, now we 516112158Sdas * append the others. These use the special value 0 for their 517112158Sdas * kernel id to indicate that they are not running. 518112158Sdas */ 519112158Sdas TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 520112158Sdas if (!(vm->vm_state & VM_STATE_RUNNING)) { 521112158Sdas memset(&vir, 0, sizeof(vir)); 522112158Sdas vir.vir_info.vir_id = vm->vm_vmid; 523112158Sdas strlcpy(vir.vir_info.vir_name, 524112158Sdas vm->vm_params.vmc_params.vcp_name, 525112158Sdas VMM_MAX_NAME_LEN); 526112158Sdas vir.vir_info.vir_memory_size = 527112158Sdas vm->vm_params.vmc_params. 528112158Sdas vcp_memranges[0].vmr_size; 529112158Sdas vir.vir_info.vir_ncpus = 530112158Sdas vm->vm_params.vmc_params.vcp_ncpus; 531112158Sdas /* get the configured user id for this vm */ 532112158Sdas vir.vir_uid = vm->vm_params.vmc_owner.uid; 533112158Sdas vir.vir_gid = vm->vm_params.vmc_owner.gid; 534112158Sdas log_debug("%s: vm: %d, vm_state: 0x%x", 535112158Sdas __func__, vm->vm_vmid, vm->vm_state); 536112158Sdas vir.vir_state = vm->vm_state; 537112158Sdas if (proc_compose_imsg(ps, PROC_CONTROL, -1, 538112158Sdas IMSG_VMDOP_GET_INFO_VM_DATA, 539112158Sdas imsg->hdr.peerid, -1, &vir, 540182709Sdas sizeof(vir)) == -1) { 541112158Sdas log_debug("%s: GET_INFO_VM_END failed", 542112158Sdas __func__); 543219557Sdas vm_remove(vm, __func__); 544112158Sdas return (-1); 545219557Sdas } 546112158Sdas } 547112158Sdas } 548112158Sdas IMSG_SIZE_CHECK(imsg, &res); 549112158Sdas proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 550112158Sdas break; 551112158Sdas default: 552112158Sdas return (-1); 553112158Sdas } 554112158Sdas 555112158Sdas return (0); 556112158Sdas} 557112158Sdas 558112158Sdasint 559112158Sdasvmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 560112158Sdas{ 561112158Sdas struct vmop_addr_result var; 562112158Sdas 563112158Sdas switch (imsg->hdr.type) { 564112158Sdas case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 565112158Sdas IMSG_SIZE_CHECK(imsg, &var); 566112158Sdas memcpy(&var, imsg->data, sizeof(var)); 567112158Sdas proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 568112158Sdas break; 569112158Sdas default: 570112158Sdas return (-1); 571112158Sdas } 572112158Sdas 573112158Sdas return (0); 574112158Sdas} 575112158Sdas 576112158Sdasint 577112158Sdasvmd_check_vmh(struct vm_dump_header *vmh) 578112158Sdas{ 579112158Sdas int i; 580112158Sdas unsigned int code, leaf; 581112158Sdas unsigned int a, b, c, d; 582112158Sdas 583112158Sdas if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 584112158Sdas log_warnx("%s: incompatible dump signature", __func__); 585112158Sdas return (-1); 586112158Sdas } 587112158Sdas 588112158Sdas if (vmh->vmh_version != VM_DUMP_VERSION) { 589112158Sdas log_warnx("%s: incompatible dump version", __func__); 590112158Sdas return (-1); 591112158Sdas } 592112158Sdas 593112158Sdas for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 594112158Sdas code = vmh->vmh_cpuids[i].code; 595112158Sdas leaf = vmh->vmh_cpuids[i].leaf; 596112158Sdas if (leaf != 0x00) { 597112158Sdas log_debug("%s: invalid leaf 0x%x for code 0x%x", 598112158Sdas __func__, leaf, code); 599112158Sdas return (-1); 600112158Sdas } 601112158Sdas 602112158Sdas switch (code) { 603112158Sdas case 0x00: 604112158Sdas CPUID_LEAF(code, leaf, a, b, c, d); 605112158Sdas if (vmh->vmh_cpuids[i].a > a) { 606112158Sdas log_debug("%s: incompatible cpuid level", 607112158Sdas __func__); 608112158Sdas return (-1); 609112158Sdas } 610112158Sdas if (!(vmh->vmh_cpuids[i].b == b && 611112158Sdas vmh->vmh_cpuids[i].c == c && 612112158Sdas vmh->vmh_cpuids[i].d == d)) { 613112158Sdas log_debug("%s: incompatible cpu brand", 614112158Sdas __func__); 615112158Sdas return (-1); 616112158Sdas } 617112158Sdas break; 618112158Sdas 619112158Sdas case 0x01: 620112158Sdas CPUID_LEAF(code, leaf, a, b, c, d); 621112158Sdas if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 622112158Sdas (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 623112158Sdas log_debug("%s: incompatible cpu features " 624112158Sdas "code: 0x%x leaf: 0x%x reg: c", __func__, 625112158Sdas code, leaf); 626112158Sdas return (-1); 627112158Sdas } 628112158Sdas if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 629112158Sdas (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 630112158Sdas log_debug("%s: incompatible cpu features " 631219557Sdas "code: 0x%x leaf: 0x%x reg: d", __func__, 632112158Sdas code, leaf); 633182709Sdas return (-1); 634112158Sdas } 635112158Sdas break; 636112158Sdas 637112158Sdas case 0x07: 638112158Sdas CPUID_LEAF(code, leaf, a, b, c, d); 639112158Sdas if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 640112158Sdas (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 641112158Sdas log_debug("%s: incompatible cpu features " 642112158Sdas "code: 0x%x leaf: 0x%x reg: c", __func__, 643112158Sdas code, leaf); 644112158Sdas return (-1); 645112158Sdas } 646112158Sdas if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 647112158Sdas (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 648219557Sdas log_debug("%s: incompatible cpu features " 649112158Sdas "code: 0x%x leaf: 0x%x reg: d", __func__, 650219557Sdas code, leaf); 651112158Sdas return (-1); 652219557Sdas } 653112158Sdas break; 654112158Sdas 655112158Sdas case 0x0d: 656112158Sdas CPUID_LEAF(code, leaf, a, b, c, d); 657112158Sdas if (vmh->vmh_cpuids[i].b > b) { 658112158Sdas log_debug("%s: incompatible cpu: insufficient " 659112158Sdas "max save area for enabled XCR0 features", 660112158Sdas __func__); 661182709Sdas return (-1); 662112158Sdas } 663112158Sdas if (vmh->vmh_cpuids[i].c > c) { 664112158Sdas log_debug("%s: incompatible cpu: insufficient " 665112158Sdas "max save area for supported XCR0 features", 666112158Sdas __func__); 667112158Sdas return (-1); 668112158Sdas } 669219557Sdas break; 670219557Sdas 671219557Sdas case 0x80000001: 672219557Sdas CPUID_LEAF(code, leaf, a, b, c, d); 673219557Sdas if ((vmh->vmh_cpuids[i].a & a) != 674112158Sdas vmh->vmh_cpuids[i].a) { 675112158Sdas log_debug("%s: incompatible cpu features " 676112158Sdas "code: 0x%x leaf: 0x%x reg: a", __func__, 677112158Sdas code, leaf); 678112158Sdas return (-1); 679112158Sdas } 680112158Sdas if ((vmh->vmh_cpuids[i].c & c) != 681112158Sdas vmh->vmh_cpuids[i].c) { 682112158Sdas log_debug("%s: incompatible cpu features " 683182709Sdas "code: 0x%x leaf: 0x%x reg: c", __func__, 684112158Sdas code, leaf); 685112158Sdas return (-1); 686112158Sdas } 687112158Sdas if ((vmh->vmh_cpuids[i].d & d) != 688112158Sdas vmh->vmh_cpuids[i].d) { 689112158Sdas log_debug("%s: incompatible cpu features " 690112158Sdas "code: 0x%x leaf: 0x%x reg: d", __func__, 691112158Sdas code, leaf); 692112158Sdas return (-1); 693112158Sdas } 694112158Sdas break; 695112158Sdas 696112158Sdas default: 697112158Sdas log_debug("%s: unknown code 0x%x", __func__, code); 698112158Sdas return (-1); 699112158Sdas } 700112158Sdas } 701112158Sdas 702112158Sdas return (0); 703112158Sdas} 704112158Sdas 705112158Sdasvoid 706112158Sdasvmd_sighdlr(int sig, short event, void *arg) 707112158Sdas{ 708112158Sdas if (privsep_process != PROC_PARENT) 709112158Sdas return; 710112158Sdas log_debug("%s: handling signal", __func__); 711112158Sdas 712112158Sdas switch (sig) { 713112158Sdas case SIGHUP: 714112158Sdas log_info("%s: reload requested with SIGHUP", __func__); 715112158Sdas 716112158Sdas /* 717112158Sdas * This is safe because libevent uses async signal handlers 718112158Sdas * that run in the event loop and not in signal context. 719112158Sdas */ 720112158Sdas (void)vmd_reload(0, NULL); 721112158Sdas break; 722112158Sdas case SIGPIPE: 723112158Sdas log_info("%s: ignoring SIGPIPE", __func__); 724112158Sdas break; 725112158Sdas case SIGUSR1: 726182709Sdas log_info("%s: ignoring SIGUSR1", __func__); 727112158Sdas break; 728112158Sdas case SIGTERM: 729112158Sdas case SIGINT: 730112158Sdas vmd_shutdown(); 731112158Sdas break; 732112158Sdas default: 733219557Sdas fatalx("unexpected signal"); 734219557Sdas } 735219557Sdas} 736219557Sdas 737219557Sdas__dead void 738219557Sdasusage(void) 739112158Sdas{ 740112158Sdas extern char *__progname; 741112158Sdas fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 742112158Sdas __progname); 743112158Sdas exit(1); 744112158Sdas} 745112158Sdas 746112158Sdasint 747112158Sdasmain(int argc, char **argv) 748112158Sdas{ 749219557Sdas struct privsep *ps; 750112158Sdas int ch; 751219557Sdas const char *conffile = VMD_CONF; 752112158Sdas enum privsep_procid proc_id = PROC_PARENT; 753112158Sdas int proc_instance = 0; 754112158Sdas const char *errp, *title = NULL; 755112158Sdas int argc0 = argc; 756112158Sdas 757112158Sdas log_init(0, LOG_DAEMON); 758112158Sdas 759112158Sdas if ((env = calloc(1, sizeof(*env))) == NULL) 760112158Sdas fatal("calloc: env"); 761112158Sdas 762112158Sdas while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 763112158Sdas switch (ch) { 764112158Sdas case 'D': 765112158Sdas if (cmdline_symset(optarg) < 0) 766219557Sdas log_warnx("could not parse macro definition %s", 767219557Sdas optarg); 768219557Sdas break; 769112158Sdas case 'd': 770112158Sdas env->vmd_debug = 2; 771112158Sdas break; 772112158Sdas case 'f': 773112158Sdas conffile = optarg; 774112158Sdas break; 775112158Sdas case 'v': 776112158Sdas env->vmd_verbose++; 777112158Sdas break; 778112158Sdas case 'n': 779112158Sdas env->vmd_noaction = 1; 780112158Sdas break; 781 case 'P': 782 title = optarg; 783 proc_id = proc_getid(procs, nitems(procs), title); 784 if (proc_id == PROC_MAX) 785 fatalx("invalid process name"); 786 break; 787 case 'I': 788 proc_instance = strtonum(optarg, 0, 789 PROC_MAX_INSTANCES, &errp); 790 if (errp) 791 fatalx("invalid process instance"); 792 break; 793 default: 794 usage(); 795 } 796 } 797 798 argc -= optind; 799 if (argc > 0) 800 usage(); 801 802 if (env->vmd_noaction && !env->vmd_debug) 803 env->vmd_debug = 1; 804 805 log_init(env->vmd_debug, LOG_DAEMON); 806 log_setverbose(env->vmd_verbose); 807 808 /* check for root privileges */ 809 if (env->vmd_noaction == 0) { 810 if (geteuid()) 811 fatalx("need root privileges"); 812 } 813 814 ps = &env->vmd_ps; 815 ps->ps_env = env; 816 env->vmd_fd = -1; 817 818 if (config_init(env) == -1) 819 fatal("failed to initialize configuration"); 820 821 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 822 fatal("unknown user %s", VMD_USER); 823 824 /* First proc runs as root without pledge but in default chroot */ 825 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 826 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 827 828 /* Open /dev/vmm */ 829 if (env->vmd_noaction == 0) { 830 env->vmd_fd = open(VMM_NODE, O_RDWR); 831 if (env->vmd_fd == -1) 832 fatal("%s", VMM_NODE); 833 } 834 835 /* Configure the control socket */ 836 ps->ps_csock.cs_name = SOCKET_NAME; 837 TAILQ_INIT(&ps->ps_rcsocks); 838 839 /* Configuration will be parsed after forking the children */ 840 env->vmd_conffile = conffile; 841 842 if (env->vmd_noaction) 843 ps->ps_noaction = 1; 844 ps->ps_instance = proc_instance; 845 if (title != NULL) 846 ps->ps_title[proc_id] = title; 847 848 /* only the parent returns */ 849 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 850 proc_id); 851 852 log_procinit("parent"); 853 if (!env->vmd_debug && daemon(0, 0) == -1) 854 fatal("can't daemonize"); 855 856 if (ps->ps_noaction == 0) 857 log_info("startup"); 858 859 event_init(); 860 861 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 862 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 863 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 864 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 865 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 866 867 signal_add(&ps->ps_evsigint, NULL); 868 signal_add(&ps->ps_evsigterm, NULL); 869 signal_add(&ps->ps_evsighup, NULL); 870 signal_add(&ps->ps_evsigpipe, NULL); 871 signal_add(&ps->ps_evsigusr1, NULL); 872 873 if (!env->vmd_noaction) 874 proc_connect(ps); 875 876 if (vmd_configure() == -1) 877 fatalx("configuration failed"); 878 879 event_dispatch(); 880 881 log_debug("parent exiting"); 882 883 return (0); 884} 885 886void 887start_vm_batch(int fd, short type, void *args) 888{ 889 int i = 0; 890 struct vmd_vm *vm; 891 892 log_debug("%s: starting batch of %d vms", __func__, 893 env->vmd_cfg.parallelism); 894 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 895 if (!(vm->vm_state & VM_STATE_WAITING)) { 896 log_debug("%s: not starting vm %s (disabled)", 897 __func__, 898 vm->vm_params.vmc_params.vcp_name); 899 continue; 900 } 901 i++; 902 if (i > env->vmd_cfg.parallelism) { 903 evtimer_add(&staggered_start_timer, 904 &env->vmd_cfg.delay); 905 break; 906 } 907 vm->vm_state &= ~VM_STATE_WAITING; 908 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 909 } 910 log_debug("%s: done starting vms", __func__); 911} 912 913int 914vmd_configure(void) 915{ 916 int ncpus; 917 struct vmd_switch *vsw; 918 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 919 size_t ncpus_sz = sizeof(ncpus); 920 921 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 922 fatal("open %s", PATH_PTMDEV); 923 924 /* 925 * pledge in the parent process: 926 * stdio - for malloc and basic I/O including events. 927 * rpath - for reload to open and read the configuration files. 928 * wpath - for opening disk images and tap devices. 929 * tty - for openpty and TIOCUCNTL. 930 * proc - run kill to terminate its children safely. 931 * sendfd - for disks, interfaces and other fds. 932 * recvfd - for send and receive. 933 * getpw - lookup user or group id by name. 934 * chown, fattr - change tty ownership 935 * flock - locking disk files 936 */ 937 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 938 " chown fattr flock", NULL) == -1) 939 fatal("pledge"); 940 941 if (parse_config(env->vmd_conffile) == -1) { 942 proc_kill(&env->vmd_ps); 943 exit(1); 944 } 945 946 if (env->vmd_noaction) { 947 fprintf(stderr, "configuration OK\n"); 948 proc_kill(&env->vmd_ps); 949 exit(0); 950 } 951 952 /* Send shared global configuration to all children */ 953 if (config_setconfig(env) == -1) 954 return (-1); 955 956 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 957 if (vsw->sw_running) 958 continue; 959 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 960 log_warn("%s: failed to create switch %s", 961 __func__, vsw->sw_name); 962 switch_remove(vsw); 963 return (-1); 964 } 965 } 966 967 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 968 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 969 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 970 ncpus = 1; 971 env->vmd_cfg.parallelism = ncpus; 972 log_debug("%s: setting staggered start configuration to " 973 "parallelism: %d and delay: %lld", 974 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 975 } 976 977 log_debug("%s: starting vms in staggered fashion", __func__); 978 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 979 /* start first batch */ 980 start_vm_batch(0, 0, NULL); 981 982 return (0); 983} 984 985int 986vmd_reload(unsigned int reset, const char *filename) 987{ 988 struct vmd_vm *vm, *next_vm; 989 struct vmd_switch *vsw; 990 int reload = 0; 991 992 /* Switch back to the default config file */ 993 if (filename == NULL || *filename == '\0') { 994 filename = env->vmd_conffile; 995 reload = 1; 996 } 997 998 log_debug("%s: level %d config file %s", __func__, reset, filename); 999 1000 if (reset) { 1001 /* Purge the configuration */ 1002 config_purge(env, reset); 1003 config_setreset(env, reset); 1004 } else { 1005 /* 1006 * Load or reload the configuration. 1007 * 1008 * Reloading removes all non-running VMs before processing the 1009 * config file, whereas loading only adds to the existing list 1010 * of VMs. 1011 */ 1012 1013 if (reload) { 1014 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1015 next_vm) { 1016 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1017 DPRINTF("%s: calling vm_remove", 1018 __func__); 1019 vm_remove(vm, __func__); 1020 } 1021 } 1022 } 1023 1024 if (parse_config(filename) == -1) { 1025 log_debug("%s: failed to load config file %s", 1026 __func__, filename); 1027 return (-1); 1028 } 1029 1030 if (reload) { 1031 /* Update shared global configuration in all children */ 1032 if (config_setconfig(env) == -1) 1033 return (-1); 1034 } 1035 1036 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1037 if (vsw->sw_running) 1038 continue; 1039 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1040 log_warn("%s: failed to create switch %s", 1041 __func__, vsw->sw_name); 1042 switch_remove(vsw); 1043 return (-1); 1044 } 1045 } 1046 1047 log_debug("%s: starting vms in staggered fashion", __func__); 1048 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1049 /* start first batch */ 1050 start_vm_batch(0, 0, NULL); 1051 1052 } 1053 1054 return (0); 1055} 1056 1057void 1058vmd_shutdown(void) 1059{ 1060 struct vmd_vm *vm, *vm_next; 1061 1062 log_debug("%s: performing shutdown", __func__); 1063 1064 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1065 vm_remove(vm, __func__); 1066 } 1067 1068 proc_kill(&env->vmd_ps); 1069 free(env); 1070 1071 log_warnx("parent terminating"); 1072 exit(0); 1073} 1074 1075struct vmd_vm * 1076vm_getbyvmid(uint32_t vmid) 1077{ 1078 struct vmd_vm *vm; 1079 1080 if (vmid == 0) 1081 return (NULL); 1082 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1083 if (vm->vm_vmid == vmid) 1084 return (vm); 1085 } 1086 1087 return (NULL); 1088} 1089 1090struct vmd_vm * 1091vm_getbyid(uint32_t id) 1092{ 1093 struct vmd_vm *vm; 1094 1095 if (id == 0) 1096 return (NULL); 1097 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1098 if (vm->vm_params.vmc_params.vcp_id == id) 1099 return (vm); 1100 } 1101 1102 return (NULL); 1103} 1104 1105uint32_t 1106vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1107{ 1108 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1109 return (0); 1110 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1111 id, vm->vm_vmid); 1112 return (vm->vm_vmid); 1113} 1114 1115uint32_t 1116vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1117{ 1118 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1119 return (0); 1120 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1121 vmid, vm->vm_params.vmc_params.vcp_id); 1122 return (vm->vm_params.vmc_params.vcp_id); 1123} 1124 1125struct vmd_vm * 1126vm_getbyname(const char *name) 1127{ 1128 struct vmd_vm *vm; 1129 1130 if (name == NULL) 1131 return (NULL); 1132 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1133 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1134 return (vm); 1135 } 1136 1137 return (NULL); 1138} 1139 1140struct vmd_vm * 1141vm_getbypid(pid_t pid) 1142{ 1143 struct vmd_vm *vm; 1144 1145 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1146 if (vm->vm_pid == pid) 1147 return (vm); 1148 } 1149 1150 return (NULL); 1151} 1152 1153void 1154vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1155{ 1156 struct privsep *ps = &env->vmd_ps; 1157 unsigned int i, j; 1158 1159 if (vm == NULL) 1160 return; 1161 1162 log_debug("%s: %s %s stopping vm %d%s", 1163 __func__, ps->ps_title[privsep_process], caller, 1164 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1165 1166 vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN); 1167 1168 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1169 user_put(vm->vm_user); 1170 1171 if (vm->vm_iev.ibuf.fd != -1) { 1172 event_del(&vm->vm_iev.ev); 1173 close(vm->vm_iev.ibuf.fd); 1174 } 1175 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1176 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1177 if (vm->vm_disks[i][j] != -1) { 1178 close(vm->vm_disks[i][j]); 1179 vm->vm_disks[i][j] = -1; 1180 } 1181 } 1182 } 1183 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1184 if (vm->vm_ifs[i].vif_fd != -1) { 1185 close(vm->vm_ifs[i].vif_fd); 1186 vm->vm_ifs[i].vif_fd = -1; 1187 } 1188 free(vm->vm_ifs[i].vif_name); 1189 free(vm->vm_ifs[i].vif_switch); 1190 free(vm->vm_ifs[i].vif_group); 1191 vm->vm_ifs[i].vif_name = NULL; 1192 vm->vm_ifs[i].vif_switch = NULL; 1193 vm->vm_ifs[i].vif_group = NULL; 1194 } 1195 if (vm->vm_kernel != -1) { 1196 close(vm->vm_kernel); 1197 vm->vm_kernel = -1; 1198 } 1199 if (vm->vm_cdrom != -1) { 1200 close(vm->vm_cdrom); 1201 vm->vm_cdrom = -1; 1202 } 1203 if (!keeptty) { 1204 vm_closetty(vm); 1205 vm->vm_uid = 0; 1206 } 1207} 1208 1209void 1210vm_remove(struct vmd_vm *vm, const char *caller) 1211{ 1212 struct privsep *ps = &env->vmd_ps; 1213 1214 if (vm == NULL) 1215 return; 1216 1217 log_debug("%s: %s %s removing vm %d from running config", 1218 __func__, ps->ps_title[privsep_process], caller, 1219 vm->vm_vmid); 1220 1221 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1222 1223 user_put(vm->vm_user); 1224 vm_stop(vm, 0, caller); 1225 free(vm); 1226} 1227 1228int 1229vm_claimid(const char *name, int uid, uint32_t *id) 1230{ 1231 struct name2id *n2i = NULL; 1232 1233 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1234 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1235 goto out; 1236 1237 if (++env->vmd_nvm == 0) { 1238 log_warnx("too many vms"); 1239 return -1; 1240 } 1241 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1242 log_warnx("could not alloc vm name"); 1243 return -1; 1244 } 1245 n2i->id = env->vmd_nvm; 1246 n2i->uid = uid; 1247 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1248 log_warnx("vm name too long"); 1249 free(n2i); 1250 return -1; 1251 } 1252 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1253 1254out: 1255 *id = n2i->id; 1256 return 0; 1257} 1258 1259int 1260vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1261 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1262{ 1263 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1264 struct vm_create_params *vcp = &vmc->vmc_params; 1265 struct vmop_owner *vmo = NULL; 1266 struct vmd_user *usr = NULL; 1267 uint32_t nid, rng; 1268 unsigned int i, j; 1269 struct vmd_switch *sw; 1270 char *s; 1271 int ret = 0; 1272 1273 /* Check if this is an instance of another VM */ 1274 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1275 errno = ret; /* XXX might set invalid errno */ 1276 return (-1); 1277 } 1278 1279 errno = 0; 1280 *ret_vm = NULL; 1281 1282 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1283 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1284 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1285 uid) != 0) { 1286 errno = EPERM; 1287 goto fail; 1288 } 1289 *ret_vm = vm; 1290 errno = EALREADY; 1291 goto fail; 1292 } 1293 1294 if (vm_parent != NULL) 1295 vmo = &vm_parent->vm_params.vmc_insowner; 1296 1297 /* non-root users can only start existing VMs or instances */ 1298 if (vm_checkperm(NULL, vmo, uid) != 0) { 1299 log_warnx("permission denied"); 1300 errno = EPERM; 1301 goto fail; 1302 } 1303 if (vmc->vmc_flags == 0) { 1304 log_warnx("invalid configuration, no devices"); 1305 errno = VMD_DISK_MISSING; 1306 goto fail; 1307 } 1308 if (vcp->vcp_ncpus == 0) 1309 vcp->vcp_ncpus = 1; 1310 if (vcp->vcp_memranges[0].vmr_size == 0) 1311 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1312 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1313 log_warnx("invalid number of CPUs"); 1314 goto fail; 1315 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1316 log_warnx("invalid number of disks"); 1317 goto fail; 1318 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1319 log_warnx("invalid number of interfaces"); 1320 goto fail; 1321 } else if (strlen(vcp->vcp_kernel) == 0 && 1322 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1323 log_warnx("no kernel or disk/cdrom specified"); 1324 goto fail; 1325 } else if (strlen(vcp->vcp_name) == 0) { 1326 log_warnx("invalid VM name"); 1327 goto fail; 1328 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1329 *vcp->vcp_name == '_') { 1330 log_warnx("invalid VM name"); 1331 goto fail; 1332 } else { 1333 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1334 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1335 *s == '_')) { 1336 log_warnx("invalid VM name"); 1337 goto fail; 1338 } 1339 } 1340 } 1341 1342 /* track active users */ 1343 if (uid != 0 && env->vmd_users != NULL && 1344 (usr = user_get(uid)) == NULL) { 1345 log_warnx("could not add user"); 1346 goto fail; 1347 } 1348 1349 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1350 goto fail; 1351 1352 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1353 vmc = &vm->vm_params; 1354 vcp = &vmc->vmc_params; 1355 vm->vm_pid = -1; 1356 vm->vm_tty = -1; 1357 vm->vm_receive_fd = -1; 1358 vm->vm_state &= ~VM_STATE_PAUSED; 1359 vm->vm_user = usr; 1360 1361 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1362 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1363 vm->vm_disks[i][j] = -1; 1364 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1365 vm->vm_ifs[i].vif_fd = -1; 1366 for (i = 0; i < vcp->vcp_nnics; i++) { 1367 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1368 /* inherit per-interface flags from the switch */ 1369 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1370 } 1371 1372 /* 1373 * If the MAC address is zero, always randomize it in vmd(8) 1374 * because we cannot rely on the guest OS to do the right 1375 * thing like OpenBSD does. Based on ether_fakeaddr() 1376 * from the kernel, incremented by one to differentiate 1377 * the source. 1378 */ 1379 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1380 rng = arc4random(); 1381 vcp->vcp_macs[i][0] = 0xfe; 1382 vcp->vcp_macs[i][1] = 0xe1; 1383 vcp->vcp_macs[i][2] = 0xba + 1; 1384 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1385 vcp->vcp_macs[i][4] = rng; 1386 vcp->vcp_macs[i][5] = rng >> 8; 1387 } 1388 } 1389 vm->vm_kernel = -1; 1390 vm->vm_cdrom = -1; 1391 vm->vm_iev.ibuf.fd = -1; 1392 1393 /* 1394 * Assign a new internal Id if not specified and we succeed in 1395 * claiming a new Id. 1396 */ 1397 if (id != 0) 1398 vm->vm_vmid = id; 1399 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1400 goto fail; 1401 else 1402 vm->vm_vmid = nid; 1403 1404 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1405 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1406 1407 *ret_vm = vm; 1408 return (0); 1409 fail: 1410 if (errno == 0) 1411 errno = EINVAL; 1412 return (-1); 1413} 1414 1415int 1416vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1417 struct vmop_create_params *vmc, uid_t uid) 1418{ 1419 char *name; 1420 struct vm_create_params *vcp = &vmc->vmc_params; 1421 struct vmop_create_params *vmcp; 1422 struct vm_create_params *vcpp; 1423 struct vmd_vm *vm = NULL; 1424 unsigned int i, j; 1425 uint32_t id; 1426 1427 /* return without error if the parent is NULL (nothing to inherit) */ 1428 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1429 vmc->vmc_instance[0] == '\0') 1430 return (0); 1431 1432 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1433 return (VMD_PARENT_INVALID); 1434 } 1435 1436 vmcp = &(*vm_parent)->vm_params; 1437 vcpp = &vmcp->vmc_params; 1438 1439 /* Are we allowed to create an instance from this VM? */ 1440 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1441 log_warnx("vm \"%s\" no permission to create vm instance", 1442 vcpp->vcp_name); 1443 return (ENAMETOOLONG); 1444 } 1445 1446 id = vcp->vcp_id; 1447 name = vcp->vcp_name; 1448 1449 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1450 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1451 return (EPROCLIM); 1452 } 1453 1454 /* CPU */ 1455 if (vcp->vcp_ncpus == 0) 1456 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1457 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1458 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1459 log_warnx("vm \"%s\" no permission to set cpus", name); 1460 return (EPERM); 1461 } 1462 1463 /* memory */ 1464 if (vcp->vcp_memranges[0].vmr_size == 0) 1465 vcp->vcp_memranges[0].vmr_size = 1466 vcpp->vcp_memranges[0].vmr_size; 1467 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1468 vcp->vcp_memranges[0].vmr_size != 1469 vcpp->vcp_memranges[0].vmr_size) { 1470 log_warnx("vm \"%s\" no permission to set memory", name); 1471 return (EPERM); 1472 } 1473 1474 /* disks cannot be inherited */ 1475 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1476 vcp->vcp_ndisks) { 1477 log_warnx("vm \"%s\" no permission to set disks", name); 1478 return (EPERM); 1479 } 1480 for (i = 0; i < vcp->vcp_ndisks; i++) { 1481 /* Check if this disk is already used in the parent */ 1482 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1483 if (strcmp(vcp->vcp_disks[i], 1484 vcpp->vcp_disks[j]) == 0) { 1485 log_warnx("vm \"%s\" disk %s cannot be reused", 1486 name, vcp->vcp_disks[i]); 1487 return (EBUSY); 1488 } 1489 } 1490 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1491 } 1492 1493 /* interfaces */ 1494 if (vcp->vcp_nnics > 0 && 1495 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1496 vcp->vcp_nnics != vcpp->vcp_nnics) { 1497 log_warnx("vm \"%s\" no permission to set interfaces", name); 1498 return (EPERM); 1499 } 1500 for (i = 0; i < vcpp->vcp_nnics; i++) { 1501 /* Interface got overwritten */ 1502 if (i < vcp->vcp_nnics) 1503 continue; 1504 1505 /* Copy interface from parent */ 1506 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1507 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1508 sizeof(vmc->vmc_ifnames[i])); 1509 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1510 sizeof(vmc->vmc_ifswitch[i])); 1511 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1512 sizeof(vmc->vmc_ifgroup[i])); 1513 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1514 sizeof(vcp->vcp_macs[i])); 1515 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1516 vcp->vcp_nnics++; 1517 } 1518 for (i = 0; i < vcp->vcp_nnics; i++) { 1519 for (j = 0; j < vcpp->vcp_nnics; j++) { 1520 if (memcmp(zero_mac, vcp->vcp_macs[i], 1521 sizeof(vcp->vcp_macs[i])) != 0 && 1522 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1523 sizeof(vcp->vcp_macs[i])) != 0) { 1524 log_warnx("vm \"%s\" lladdr cannot be reused", 1525 name); 1526 return (EBUSY); 1527 } 1528 if (strlen(vmc->vmc_ifnames[i]) && 1529 strcmp(vmc->vmc_ifnames[i], 1530 vmcp->vmc_ifnames[j]) == 0) { 1531 log_warnx("vm \"%s\" %s cannot be reused", 1532 vmc->vmc_ifnames[i], name); 1533 return (EBUSY); 1534 } 1535 } 1536 } 1537 1538 /* kernel */ 1539 if (strlen(vcp->vcp_kernel) > 0) { 1540 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1541 log_warnx("vm \"%s\" no permission to set boot image", 1542 name); 1543 return (EPERM); 1544 } 1545 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1546 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1547 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1548 log_warnx("vm \"%s\" kernel name too long", name); 1549 return (EINVAL); 1550 } 1551 1552 /* cdrom */ 1553 if (strlen(vcp->vcp_cdrom) > 0) { 1554 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1555 log_warnx("vm \"%s\" no permission to set cdrom", name); 1556 return (EPERM); 1557 } 1558 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1559 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1560 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1561 log_warnx("vm \"%s\" cdrom name too long", name); 1562 return (EINVAL); 1563 } 1564 1565 /* user */ 1566 if (vmc->vmc_owner.uid == 0) 1567 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1568 else if (vmc->vmc_owner.uid != uid && 1569 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1570 log_warnx("vm \"%s\" user mismatch", name); 1571 return (EPERM); 1572 } 1573 1574 /* group */ 1575 if (vmc->vmc_owner.gid == 0) 1576 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1577 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1578 log_warnx("vm \"%s\" group mismatch", name); 1579 return (EPERM); 1580 } 1581 1582 /* child instances */ 1583 if (vmc->vmc_insflags) { 1584 log_warnx("vm \"%s\" cannot change instance permissions", name); 1585 return (EPERM); 1586 } 1587 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1588 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1589 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1590 vmc->vmc_insflags = vmcp->vmc_insflags; 1591 } else { 1592 vmc->vmc_insowner.gid = 0; 1593 vmc->vmc_insowner.uid = 0; 1594 vmc->vmc_insflags = 0; 1595 } 1596 1597 /* finished, remove instance flags */ 1598 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1599 1600 return (0); 1601} 1602 1603/* 1604 * vm_checkperm 1605 * 1606 * Checks if the user represented by the 'uid' parameter is allowed to 1607 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1608 * console.) 1609 * 1610 * Parameters: 1611 * vm: the VM whose permission is to be checked 1612 * vmo: the required uid/gid to be checked 1613 * uid: the user ID of the user making the request 1614 * 1615 * Return values: 1616 * 0: the permission should be granted 1617 * -1: the permission check failed (also returned if vm == null) 1618 */ 1619int 1620vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1621{ 1622 struct group *gr; 1623 struct passwd *pw; 1624 char **grmem; 1625 1626 /* root has no restrictions */ 1627 if (uid == 0) 1628 return (0); 1629 1630 if (vmo == NULL) 1631 return (-1); 1632 1633 /* check user */ 1634 if (vm == NULL) { 1635 if (vmo->uid == uid) 1636 return (0); 1637 } else { 1638 /* 1639 * check user of running vm (the owner of a running vm can 1640 * be different to (or more specific than) the configured owner. 1641 */ 1642 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1643 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1644 return (0); 1645 } 1646 1647 /* check groups */ 1648 if (vmo->gid != -1) { 1649 if ((pw = getpwuid(uid)) == NULL) 1650 return (-1); 1651 if (pw->pw_gid == vmo->gid) 1652 return (0); 1653 if ((gr = getgrgid(vmo->gid)) != NULL) { 1654 for (grmem = gr->gr_mem; *grmem; grmem++) 1655 if (strcmp(*grmem, pw->pw_name) == 0) 1656 return (0); 1657 } 1658 } 1659 1660 return (-1); 1661} 1662 1663/* 1664 * vm_checkinsflag 1665 * 1666 * Checks wheter the non-root user is allowed to set an instance option. 1667 * 1668 * Parameters: 1669 * vmc: the VM create parameters 1670 * flag: the flag to be checked 1671 * uid: the user ID of the user making the request 1672 * 1673 * Return values: 1674 * 0: the permission should be granted 1675 * -1: the permission check failed (also returned if vm == null) 1676 */ 1677int 1678vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1679{ 1680 /* root has no restrictions */ 1681 if (uid == 0) 1682 return (0); 1683 1684 if ((vmc->vmc_insflags & flag) == 0) 1685 return (-1); 1686 1687 return (0); 1688} 1689 1690/* 1691 * vm_checkaccess 1692 * 1693 * Checks if the user represented by the 'uid' parameter is allowed to 1694 * access the file described by the 'path' parameter. 1695 * 1696 * Parameters: 1697 * fd: the file descriptor of the opened file 1698 * uflag: check if the userid has access to the file 1699 * uid: the user ID of the user making the request 1700 * amode: the access flags of R_OK and W_OK 1701 * 1702 * Return values: 1703 * 0: the permission should be granted 1704 * -1: the permission check failed 1705 */ 1706int 1707vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1708{ 1709 struct group *gr; 1710 struct passwd *pw; 1711 char **grmem; 1712 struct stat st; 1713 mode_t mode; 1714 1715 if (fd == -1) 1716 return (-1); 1717 1718 /* 1719 * File has to be accessible and a regular file 1720 */ 1721 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1722 return (-1); 1723 1724 /* root has no restrictions */ 1725 if (uid == 0 || uflag == 0) 1726 return (0); 1727 1728 /* check other */ 1729 mode = amode & W_OK ? S_IWOTH : 0; 1730 mode |= amode & R_OK ? S_IROTH : 0; 1731 if ((st.st_mode & mode) == mode) 1732 return (0); 1733 1734 /* check user */ 1735 mode = amode & W_OK ? S_IWUSR : 0; 1736 mode |= amode & R_OK ? S_IRUSR : 0; 1737 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1738 return (0); 1739 1740 /* check groups */ 1741 mode = amode & W_OK ? S_IWGRP : 0; 1742 mode |= amode & R_OK ? S_IRGRP : 0; 1743 if ((st.st_mode & mode) != mode) 1744 return (-1); 1745 if ((pw = getpwuid(uid)) == NULL) 1746 return (-1); 1747 if (pw->pw_gid == st.st_gid) 1748 return (0); 1749 if ((gr = getgrgid(st.st_gid)) != NULL) { 1750 for (grmem = gr->gr_mem; *grmem; grmem++) 1751 if (strcmp(*grmem, pw->pw_name) == 0) 1752 return (0); 1753 } 1754 1755 return (-1); 1756} 1757 1758int 1759vm_opentty(struct vmd_vm *vm) 1760{ 1761 struct ptmget ptm; 1762 struct stat st; 1763 struct group *gr; 1764 uid_t uid; 1765 gid_t gid; 1766 mode_t mode; 1767 int on; 1768 1769 /* 1770 * Open tty with pre-opened PTM fd 1771 */ 1772 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1773 return (-1); 1774 1775 /* 1776 * We use user ioctl(2) mode to pass break commands. 1777 */ 1778 on = 1; 1779 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1780 fatal("could not enable user ioctl mode"); 1781 1782 vm->vm_tty = ptm.cfd; 1783 close(ptm.sfd); 1784 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1785 goto fail; 1786 1787 uid = vm->vm_uid; 1788 gid = vm->vm_params.vmc_owner.gid; 1789 1790 if (vm->vm_params.vmc_owner.gid != -1) { 1791 mode = 0660; 1792 } else if ((gr = getgrnam("tty")) != NULL) { 1793 gid = gr->gr_gid; 1794 mode = 0620; 1795 } else { 1796 mode = 0600; 1797 gid = 0; 1798 } 1799 1800 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1801 __func__, vm->vm_params.vmc_params.vcp_name, 1802 vm->vm_ttyname, uid, gid, mode); 1803 1804 /* 1805 * Change ownership and mode of the tty as required. 1806 * Loosely based on the implementation of sshpty.c 1807 */ 1808 if (stat(vm->vm_ttyname, &st) == -1) 1809 goto fail; 1810 1811 if (st.st_uid != uid || st.st_gid != gid) { 1812 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1813 log_warn("chown %s %d %d failed, uid %d", 1814 vm->vm_ttyname, uid, gid, getuid()); 1815 1816 /* Ignore failure on read-only filesystems */ 1817 if (!((errno == EROFS) && 1818 (st.st_uid == uid || st.st_uid == 0))) 1819 goto fail; 1820 } 1821 } 1822 1823 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1824 if (chmod(vm->vm_ttyname, mode) == -1) { 1825 log_warn("chmod %s %o failed, uid %d", 1826 vm->vm_ttyname, mode, getuid()); 1827 1828 /* Ignore failure on read-only filesystems */ 1829 if (!((errno == EROFS) && 1830 (st.st_uid == uid || st.st_uid == 0))) 1831 goto fail; 1832 } 1833 } 1834 1835 return (0); 1836 fail: 1837 vm_closetty(vm); 1838 return (-1); 1839} 1840 1841void 1842vm_closetty(struct vmd_vm *vm) 1843{ 1844 if (vm->vm_tty != -1) { 1845 /* Release and close the tty */ 1846 if (fchown(vm->vm_tty, 0, 0) == -1) 1847 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1848 if (fchmod(vm->vm_tty, 0666) == -1) 1849 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1850 close(vm->vm_tty); 1851 vm->vm_tty = -1; 1852 } 1853 free(vm->vm_ttyname); 1854 vm->vm_ttyname = NULL; 1855} 1856 1857void 1858switch_remove(struct vmd_switch *vsw) 1859{ 1860 if (vsw == NULL) 1861 return; 1862 1863 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1864 1865 free(vsw->sw_group); 1866 free(vsw->sw_name); 1867 free(vsw); 1868} 1869 1870struct vmd_switch * 1871switch_getbyname(const char *name) 1872{ 1873 struct vmd_switch *vsw; 1874 1875 if (name == NULL) 1876 return (NULL); 1877 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1878 if (strcmp(vsw->sw_name, name) == 0) 1879 return (vsw); 1880 } 1881 1882 return (NULL); 1883} 1884 1885struct vmd_user * 1886user_get(uid_t uid) 1887{ 1888 struct vmd_user *usr; 1889 1890 if (uid == 0) 1891 return (NULL); 1892 1893 /* first try to find an existing user */ 1894 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1895 if (usr->usr_id.uid == uid) 1896 goto done; 1897 } 1898 1899 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1900 log_warn("could not allocate user"); 1901 return (NULL); 1902 } 1903 1904 usr->usr_id.uid = uid; 1905 usr->usr_id.gid = -1; 1906 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1907 1908 done: 1909 DPRINTF("%s: uid %d #%d +", 1910 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1911 usr->usr_refcnt++; 1912 1913 return (usr); 1914} 1915 1916void 1917user_put(struct vmd_user *usr) 1918{ 1919 if (usr == NULL) 1920 return; 1921 1922 DPRINTF("%s: uid %d #%d -", 1923 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1924 1925 if (--usr->usr_refcnt > 0) 1926 return; 1927 1928 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1929 free(usr); 1930} 1931 1932void 1933user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1934{ 1935 char mem[FMT_SCALED_STRSIZE]; 1936 1937 if (usr == NULL) 1938 return; 1939 1940 /* increment or decrement counters */ 1941 inc = inc ? 1 : -1; 1942 1943 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1944 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1945 usr->usr_maxifs += vcp->vcp_nnics * inc; 1946 1947 if (log_getverbose() > 1) { 1948 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1949 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1950 __func__, inc == 1 ? '+' : '-', 1951 usr->usr_id.uid, usr->usr_refcnt, 1952 usr->usr_maxcpu, mem, usr->usr_maxifs); 1953 } 1954} 1955 1956int 1957user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1958{ 1959 const char *limit = ""; 1960 1961 /* XXX make the limits configurable */ 1962 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1963 limit = "cpu "; 1964 goto fail; 1965 } 1966 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1967 limit = "memory "; 1968 goto fail; 1969 } 1970 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1971 limit = "interface "; 1972 goto fail; 1973 } 1974 1975 return (0); 1976 1977 fail: 1978 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1979 usr->usr_id.uid, limit); 1980 return (-1); 1981} 1982 1983char * 1984get_string(uint8_t *ptr, size_t len) 1985{ 1986 size_t i; 1987 1988 for (i = 0; i < len; i++) 1989 if (!isprint(ptr[i])) 1990 break; 1991 1992 return strndup(ptr, i); 1993} 1994 1995uint32_t 1996prefixlen2mask(uint8_t prefixlen) 1997{ 1998 if (prefixlen == 0) 1999 return (0); 2000 2001 if (prefixlen > 32) 2002 prefixlen = 32; 2003 2004 return (htonl(0xffffffff << (32 - prefixlen))); 2005} 2006 2007void 2008prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 2009{ 2010 struct in6_addr s6; 2011 int i; 2012 2013 if (prefixlen > 128) 2014 prefixlen = 128; 2015 2016 memset(&s6, 0, sizeof(s6)); 2017 for (i = 0; i < prefixlen / 8; i++) 2018 s6.s6_addr[i] = 0xff; 2019 i = prefixlen % 8; 2020 if (i) 2021 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2022 2023 memcpy(mask, &s6, sizeof(s6)); 2024} 2025 2026void 2027getmonotime(struct timeval *tv) 2028{ 2029 struct timespec ts; 2030 2031 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2032 fatal("clock_gettime"); 2033 2034 TIMESPEC_TO_TIMEVAL(tv, &ts); 2035} 2036