vmd.c revision 1.139
1/*	$OpenBSD: vmd.c,v 1.139 2023/04/02 02:04:10 dv Exp $	*/
2
3/*
4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/queue.h>
21#include <sys/wait.h>
22#include <sys/stat.h>
23#include <sys/sysctl.h>
24#include <sys/tty.h>
25#include <sys/ttycom.h>
26#include <sys/ioctl.h>
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <termios.h>
32#include <errno.h>
33#include <event.h>
34#include <fcntl.h>
35#include <pwd.h>
36#include <signal.h>
37#include <syslog.h>
38#include <unistd.h>
39#include <util.h>
40#include <ctype.h>
41#include <pwd.h>
42#include <grp.h>
43
44#include <machine/specialreg.h>
45#include <machine/vmmvar.h>
46
47#include "proc.h"
48#include "atomicio.h"
49#include "vmd.h"
50
51__dead void usage(void);
52
53int	 main(int, char **);
54int	 vmd_configure(void);
55void	 vmd_sighdlr(int sig, short event, void *arg);
56void	 vmd_shutdown(void);
57int	 vmd_control_run(void);
58int	 vmd_dispatch_control(int, struct privsep_proc *, struct imsg *);
59int	 vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *);
60int	 vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *);
61int	 vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *);
62int	 vmd_check_vmh(struct vm_dump_header *);
63
64int	 vm_instance(struct privsep *, struct vmd_vm **,
65	    struct vmop_create_params *, uid_t);
66int	 vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
67int	 vm_claimid(const char *, int, uint32_t *);
68void	 start_vm_batch(int, short, void*);
69
70static inline void vm_terminate(struct vmd_vm *, const char *);
71
72struct vmd	*env;
73
74static struct privsep_proc procs[] = {
75	/* Keep "priv" on top as procs[0] */
76	{ "priv",	PROC_PRIV,	vmd_dispatch_priv, priv },
77	{ "control",	PROC_CONTROL,	vmd_dispatch_control, control },
78	{ "vmm",	PROC_VMM,	vmd_dispatch_vmm, vmm, vmm_shutdown },
79	{ "agentx", 	PROC_AGENTX,	vmd_dispatch_agentx, vm_agentx, vm_agentx_shutdown, "/" }
80};
81
82enum privsep_procid privsep_process;
83
84struct event staggered_start_timer;
85
86/* For the privileged process */
87static struct privsep_proc *proc_priv = &procs[0];
88static struct passwd proc_privpw;
89static const uint8_t zero_mac[ETHER_ADDR_LEN];
90
91int
92vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
93{
94	struct privsep			*ps = p->p_ps;
95	int				 res = 0, ret = 0, cmd = 0, verbose;
96	unsigned int			 v = 0, flags;
97	struct vmop_create_params	 vmc;
98	struct vmop_id			 vid;
99	struct vmop_result		 vmr;
100	struct vm_dump_header		 vmh;
101	struct vmd_vm			*vm = NULL;
102	char				*str = NULL;
103	uint32_t			 id = 0;
104	struct control_sock		*rcs;
105
106	switch (imsg->hdr.type) {
107	case IMSG_VMDOP_START_VM_REQUEST:
108		IMSG_SIZE_CHECK(imsg, &vmc);
109		memcpy(&vmc, imsg->data, sizeof(vmc));
110		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
111		if (vmc.vmc_flags == 0) {
112			/* start an existing VM with pre-configured options */
113			if (!(ret == -1 && errno == EALREADY &&
114			    !(vm->vm_state & VM_STATE_RUNNING))) {
115				res = errno;
116				cmd = IMSG_VMDOP_START_VM_RESPONSE;
117			}
118		} else if (ret != 0) {
119			res = errno;
120			cmd = IMSG_VMDOP_START_VM_RESPONSE;
121		}
122		if (res == 0) {
123			res = config_setvm(ps, vm, imsg->hdr.peerid,
124			    vm->vm_params.vmc_owner.uid);
125			if (res)
126				cmd = IMSG_VMDOP_START_VM_RESPONSE;
127		}
128		break;
129	case IMSG_VMDOP_WAIT_VM_REQUEST:
130	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
131		IMSG_SIZE_CHECK(imsg, &vid);
132		memcpy(&vid, imsg->data, sizeof(vid));
133		flags = vid.vid_flags;
134		cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
135
136		if ((id = vid.vid_id) == 0) {
137			/* Lookup vm (id) by name */
138			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
139				res = ENOENT;
140				break;
141			} else if ((vm->vm_state & VM_STATE_SHUTDOWN) &&
142			    (flags & VMOP_FORCE) == 0) {
143				res = EALREADY;
144				break;
145			} else if (!(vm->vm_state & VM_STATE_RUNNING)) {
146				res = EINVAL;
147				break;
148			}
149			id = vm->vm_vmid;
150		} else if ((vm = vm_getbyvmid(id)) == NULL) {
151			res = ENOENT;
152			break;
153		}
154		if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) {
155			res = EPERM;
156			break;
157		}
158
159		/* Only relay TERMINATION requests, not WAIT requests */
160		if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) {
161			memset(&vid, 0, sizeof(vid));
162			vid.vid_id = id;
163			vid.vid_flags = flags;
164
165			if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
166				imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
167				return (-1);
168		}
169		break;
170	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
171		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
172		break;
173	case IMSG_VMDOP_LOAD:
174		IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */
175		str = get_string((uint8_t *)imsg->data,
176		    IMSG_DATA_SIZE(imsg));
177	case IMSG_VMDOP_RELOAD:
178		if (vmd_reload(0, str) == -1)
179			cmd = IMSG_CTL_FAIL;
180		else
181			cmd = IMSG_CTL_OK;
182		free(str);
183		break;
184	case IMSG_CTL_RESET:
185		IMSG_SIZE_CHECK(imsg, &v);
186		memcpy(&v, imsg->data, sizeof(v));
187		if (vmd_reload(v, NULL) == -1)
188			cmd = IMSG_CTL_FAIL;
189		else
190			cmd = IMSG_CTL_OK;
191		break;
192	case IMSG_CTL_VERBOSE:
193		IMSG_SIZE_CHECK(imsg, &verbose);
194		memcpy(&verbose, imsg->data, sizeof(verbose));
195		log_setverbose(verbose);
196
197		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
198		proc_forward_imsg(ps, imsg, PROC_PRIV, -1);
199		cmd = IMSG_CTL_OK;
200		break;
201	case IMSG_VMDOP_PAUSE_VM:
202	case IMSG_VMDOP_UNPAUSE_VM:
203		IMSG_SIZE_CHECK(imsg, &vid);
204		memcpy(&vid, imsg->data, sizeof(vid));
205		if (vid.vid_id == 0) {
206			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
207				res = ENOENT;
208				cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
209				    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
210				    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
211				break;
212			} else {
213				vid.vid_id = vm->vm_vmid;
214			}
215		} else if ((vm = vm_getbyid(vid.vid_id)) == NULL) {
216			res = ENOENT;
217			cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
218			    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
219			    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
220			break;
221		}
222		if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
223		    vid.vid_uid) != 0) {
224			res = EPERM;
225			cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
226			    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
227			    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
228			break;
229		}
230		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
231		    imsg->hdr.peerid, -1, &vid, sizeof(vid));
232		break;
233	case IMSG_VMDOP_SEND_VM_REQUEST:
234		IMSG_SIZE_CHECK(imsg, &vid);
235		memcpy(&vid, imsg->data, sizeof(vid));
236		id = vid.vid_id;
237		if (vid.vid_id == 0) {
238			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
239				res = ENOENT;
240				cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
241				close(imsg->fd);
242				break;
243			} else {
244				vid.vid_id = vm->vm_vmid;
245			}
246		} else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) {
247			res = ENOENT;
248			cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
249			close(imsg->fd);
250			break;
251		}
252		vmr.vmr_id = vid.vid_id;
253		log_debug("%s: sending fd to vmm", __func__);
254		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
255		    imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid));
256		break;
257	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
258		IMSG_SIZE_CHECK(imsg, &vid);
259		memcpy(&vid, imsg->data, sizeof(vid));
260		if (imsg->fd == -1) {
261			log_warnx("%s: invalid fd", __func__);
262			return (-1);
263		}
264		if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) !=
265		    sizeof(vmh)) {
266			log_warnx("%s: error reading vmh from received vm",
267			    __func__);
268			res = EIO;
269			close(imsg->fd);
270			cmd = IMSG_VMDOP_START_VM_RESPONSE;
271			break;
272		}
273
274		if (vmd_check_vmh(&vmh)) {
275			res = ENOENT;
276			close(imsg->fd);
277			cmd = IMSG_VMDOP_START_VM_RESPONSE;
278			break;
279		}
280		if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) !=
281		    sizeof(vmc)) {
282			log_warnx("%s: error reading vmc from received vm",
283			    __func__);
284			res = EIO;
285			close(imsg->fd);
286			cmd = IMSG_VMDOP_START_VM_RESPONSE;
287			break;
288		}
289		strlcpy(vmc.vmc_params.vcp_name, vid.vid_name,
290		    sizeof(vmc.vmc_params.vcp_name));
291		vmc.vmc_params.vcp_id = 0;
292
293		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
294		if (ret != 0) {
295			res = errno;
296			cmd = IMSG_VMDOP_START_VM_RESPONSE;
297			close(imsg->fd);
298		} else {
299			vm->vm_state |= VM_STATE_RECEIVED;
300			config_setvm(ps, vm, imsg->hdr.peerid,
301			    vmc.vmc_owner.uid);
302			log_debug("%s: sending fd to vmm", __func__);
303			proc_compose_imsg(ps, PROC_VMM, -1,
304			    IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd,
305			    NULL, 0);
306		}
307		break;
308	case IMSG_VMDOP_DONE:
309		control_reset(&ps->ps_csock);
310		TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)
311			control_reset(rcs);
312		cmd = 0;
313		break;
314	default:
315		return (-1);
316	}
317
318	switch (cmd) {
319	case 0:
320		break;
321	case IMSG_VMDOP_START_VM_RESPONSE:
322	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
323		memset(&vmr, 0, sizeof(vmr));
324		vmr.vmr_result = res;
325		vmr.vmr_id = id;
326		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
327		    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
328			return (-1);
329		break;
330	default:
331		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
332		    imsg->hdr.peerid, -1, &res, sizeof(res)) == -1)
333			return (-1);
334		break;
335	}
336
337	return (0);
338}
339
340int
341vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
342{
343	struct vmop_result	 vmr;
344	struct privsep		*ps = p->p_ps;
345	int			 res = 0;
346	struct vmd_vm		*vm;
347	struct vm_create_params	*vcp;
348	struct vmop_info_result	 vir;
349
350	switch (imsg->hdr.type) {
351	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
352		IMSG_SIZE_CHECK(imsg, &vmr);
353		memcpy(&vmr, imsg->data, sizeof(vmr));
354		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
355			break;
356		proc_compose_imsg(ps, PROC_CONTROL, -1,
357		    imsg->hdr.type, imsg->hdr.peerid, -1,
358		    imsg->data, sizeof(imsg->data));
359		log_info("%s: paused vm %d successfully",
360		    vm->vm_params.vmc_params.vcp_name,
361		    vm->vm_vmid);
362		vm->vm_state |= VM_STATE_PAUSED;
363		break;
364	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
365		IMSG_SIZE_CHECK(imsg, &vmr);
366		memcpy(&vmr, imsg->data, sizeof(vmr));
367		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
368			break;
369		proc_compose_imsg(ps, PROC_CONTROL, -1,
370		    imsg->hdr.type, imsg->hdr.peerid, -1,
371		    imsg->data, sizeof(imsg->data));
372		log_info("%s: unpaused vm %d successfully.",
373		    vm->vm_params.vmc_params.vcp_name,
374		    vm->vm_vmid);
375		vm->vm_state &= ~VM_STATE_PAUSED;
376		break;
377	case IMSG_VMDOP_START_VM_RESPONSE:
378		IMSG_SIZE_CHECK(imsg, &vmr);
379		memcpy(&vmr, imsg->data, sizeof(vmr));
380		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL)
381			break;
382		vm->vm_pid = vmr.vmr_pid;
383		vcp = &vm->vm_params.vmc_params;
384		vcp->vcp_id = vmr.vmr_id;
385
386		/*
387		 * If the peerid is not -1, forward the response back to the
388		 * the control socket.  If it is -1, the request originated
389		 * from the parent, not the control socket.
390		 */
391		if (vm->vm_peerid != (uint32_t)-1) {
392			(void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname,
393			    sizeof(vmr.vmr_ttyname));
394			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
395			    imsg->hdr.type, vm->vm_peerid, -1,
396			    &vmr, sizeof(vmr)) == -1) {
397				errno = vmr.vmr_result;
398				log_warn("%s: failed to forward vm result",
399				    vcp->vcp_name);
400				vm_terminate(vm, __func__);
401				return (-1);
402			}
403		}
404
405		if (vmr.vmr_result) {
406			log_warnx("%s: failed to start vm", vcp->vcp_name);
407			vm_terminate(vm, __func__);
408			errno = vmr.vmr_result;
409			break;
410		}
411
412		/* Now configure all the interfaces */
413		if (vm_priv_ifconfig(ps, vm) == -1) {
414			log_warn("%s: failed to configure vm", vcp->vcp_name);
415			vm_terminate(vm, __func__);
416			break;
417		}
418
419		log_info("%s: started vm %d successfully, tty %s",
420		    vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname);
421		break;
422	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
423		IMSG_SIZE_CHECK(imsg, &vmr);
424		memcpy(&vmr, imsg->data, sizeof(vmr));
425
426		if (vmr.vmr_result) {
427			DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
428			    __func__, vmr.vmr_id);
429			proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
430		} else {
431			if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
432				break;
433			/* Mark VM as shutting down */
434			vm->vm_state |= VM_STATE_SHUTDOWN;
435		}
436		break;
437	case IMSG_VMDOP_SEND_VM_RESPONSE:
438		IMSG_SIZE_CHECK(imsg, &vmr);
439		memcpy(&vmr, imsg->data, sizeof(vmr));
440		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
441			break;
442		if (!vmr.vmr_result) {
443			log_info("%s: sent vm %d successfully.",
444			    vm->vm_params.vmc_params.vcp_name,
445			    vm->vm_vmid);
446			vm_terminate(vm, __func__);
447		}
448
449		/* Send a response if a control client is waiting for it */
450		if (imsg->hdr.peerid != (uint32_t)-1) {
451			/* the error is meaningless for deferred responses */
452			vmr.vmr_result = 0;
453
454			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
455			    IMSG_VMDOP_SEND_VM_RESPONSE,
456			    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
457				return (-1);
458		}
459		break;
460	case IMSG_VMDOP_TERMINATE_VM_EVENT:
461		IMSG_SIZE_CHECK(imsg, &vmr);
462		memcpy(&vmr, imsg->data, sizeof(vmr));
463		DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",
464		    __func__, vmr.vmr_id, vmr.vmr_result);
465		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) {
466			log_debug("%s: vm %d is no longer available",
467			    __func__, vmr.vmr_id);
468			break;
469		}
470		if (vmr.vmr_result != EAGAIN ||
471		    vm->vm_params.vmc_bootdevice) {
472			vm_terminate(vm, __func__);
473		} else {
474			/* Stop VM instance but keep the tty open */
475			vm_stop(vm, 1, __func__);
476			config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
477		}
478
479		/* The error is meaningless for deferred responses */
480		vmr.vmr_result = 0;
481
482		if (proc_compose_imsg(ps, PROC_CONTROL, -1,
483			IMSG_VMDOP_TERMINATE_VM_EVENT,
484			imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
485			return (-1);
486		break;
487	case IMSG_VMDOP_GET_INFO_VM_DATA:
488		IMSG_SIZE_CHECK(imsg, &vir);
489		memcpy(&vir, imsg->data, sizeof(vir));
490		if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) {
491			memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname));
492			if (vm->vm_ttyname[0] != '\0')
493				strlcpy(vir.vir_ttyname, vm->vm_ttyname,
494				    sizeof(vir.vir_ttyname));
495			log_debug("%s: running vm: %d, vm_state: 0x%x",
496			    __func__, vm->vm_vmid, vm->vm_state);
497			vir.vir_state = vm->vm_state;
498			/* get the user id who started the vm */
499			vir.vir_uid = vm->vm_uid;
500			vir.vir_gid = vm->vm_params.vmc_owner.gid;
501		}
502		if (proc_compose_imsg(ps,
503		    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
504		    PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type,
505		    imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
506			log_debug("%s: GET_INFO_VM failed for vm %d, removing",
507			    __func__, vm->vm_vmid);
508			vm_terminate(vm, __func__);
509			return (-1);
510		}
511		break;
512	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
513		/*
514		 * PROC_VMM has responded with the *running* VMs, now we
515		 * append the others. These use the special value 0 for their
516		 * kernel id to indicate that they are not running.
517		 */
518		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
519			if (!(vm->vm_state & VM_STATE_RUNNING)) {
520				memset(&vir, 0, sizeof(vir));
521				vir.vir_info.vir_id = vm->vm_vmid;
522				strlcpy(vir.vir_info.vir_name,
523				    vm->vm_params.vmc_params.vcp_name,
524				    VMM_MAX_NAME_LEN);
525				vir.vir_info.vir_memory_size =
526				    vm->vm_params.vmc_params.
527				    vcp_memranges[0].vmr_size;
528				vir.vir_info.vir_ncpus =
529				    vm->vm_params.vmc_params.vcp_ncpus;
530				/* get the configured user id for this vm */
531				vir.vir_uid = vm->vm_params.vmc_owner.uid;
532				vir.vir_gid = vm->vm_params.vmc_owner.gid;
533				log_debug("%s: vm: %d, vm_state: 0x%x",
534				    __func__, vm->vm_vmid, vm->vm_state);
535				vir.vir_state = vm->vm_state;
536				if (proc_compose_imsg(ps,
537				    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
538				    PROC_AGENTX : PROC_CONTROL, -1,
539				    IMSG_VMDOP_GET_INFO_VM_DATA,
540				    imsg->hdr.peerid, -1, &vir,
541				    sizeof(vir)) == -1) {
542					log_debug("%s: GET_INFO_VM_END failed",
543					    __func__);
544					vm_terminate(vm, __func__);
545					return (-1);
546				}
547			}
548		}
549		IMSG_SIZE_CHECK(imsg, &res);
550		proc_forward_imsg(ps, imsg,
551		    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
552		    PROC_AGENTX : PROC_CONTROL, -1);
553		break;
554	default:
555		return (-1);
556	}
557
558	return (0);
559}
560
561int
562vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg)
563{
564	struct privsep			*ps = p->p_ps;
565
566	switch (imsg->hdr.type) {
567	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
568		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
569		return (0);
570	default:
571		break;
572	}
573	return (-1);
574}
575
576int
577vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg)
578{
579	struct vmop_addr_result	 var;
580
581	switch (imsg->hdr.type) {
582	case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
583		IMSG_SIZE_CHECK(imsg, &var);
584		memcpy(&var, imsg->data, sizeof(var));
585		proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1);
586		break;
587	default:
588		return (-1);
589	}
590
591	return (0);
592}
593
594int
595vmd_check_vmh(struct vm_dump_header *vmh)
596{
597	int i;
598	unsigned int code, leaf;
599	unsigned int a, b, c, d;
600
601	if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) {
602		log_warnx("%s: incompatible dump signature", __func__);
603		return (-1);
604	}
605
606	if (vmh->vmh_version != VM_DUMP_VERSION) {
607		log_warnx("%s: incompatible dump version", __func__);
608		return (-1);
609	}
610
611	for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
612		code = vmh->vmh_cpuids[i].code;
613		leaf = vmh->vmh_cpuids[i].leaf;
614		if (leaf != 0x00) {
615			log_debug("%s: invalid leaf 0x%x for code 0x%x",
616			    __func__, leaf, code);
617			return (-1);
618		}
619
620		switch (code) {
621		case 0x00:
622			CPUID_LEAF(code, leaf, a, b, c, d);
623			if (vmh->vmh_cpuids[i].a > a) {
624				log_debug("%s: incompatible cpuid level",
625				    __func__);
626				return (-1);
627			}
628			if (!(vmh->vmh_cpuids[i].b == b &&
629			    vmh->vmh_cpuids[i].c == c &&
630			    vmh->vmh_cpuids[i].d == d)) {
631				log_debug("%s: incompatible cpu brand",
632				    __func__);
633				return (-1);
634			}
635			break;
636
637		case 0x01:
638			CPUID_LEAF(code, leaf, a, b, c, d);
639			if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) !=
640			    (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) {
641				log_debug("%s: incompatible cpu features "
642				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
643				    code, leaf);
644				return (-1);
645			}
646			if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) !=
647			    (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) {
648				log_debug("%s: incompatible cpu features "
649				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
650				    code, leaf);
651				return (-1);
652			}
653			break;
654
655		case 0x07:
656			CPUID_LEAF(code, leaf, a, b, c, d);
657			if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) !=
658			    (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) {
659				log_debug("%s: incompatible cpu features "
660				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
661				    code, leaf);
662				return (-1);
663			}
664			if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) !=
665			    (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) {
666				log_debug("%s: incompatible cpu features "
667				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
668				    code, leaf);
669				return (-1);
670			}
671			break;
672
673		case 0x0d:
674			CPUID_LEAF(code, leaf, a, b, c, d);
675			if (vmh->vmh_cpuids[i].b > b) {
676				log_debug("%s: incompatible cpu: insufficient "
677				    "max save area for enabled XCR0 features",
678				    __func__);
679				return (-1);
680			}
681			if (vmh->vmh_cpuids[i].c > c) {
682				log_debug("%s: incompatible cpu: insufficient "
683				    "max save area for supported XCR0 features",
684				    __func__);
685				return (-1);
686			}
687			break;
688
689		case 0x80000001:
690			CPUID_LEAF(code, leaf, a, b, c, d);
691			if ((vmh->vmh_cpuids[i].a & a) !=
692			    vmh->vmh_cpuids[i].a) {
693				log_debug("%s: incompatible cpu features "
694				    "code: 0x%x leaf: 0x%x  reg: a", __func__,
695				    code, leaf);
696				return (-1);
697			}
698			if ((vmh->vmh_cpuids[i].c & c) !=
699			    vmh->vmh_cpuids[i].c) {
700				log_debug("%s: incompatible cpu features "
701				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
702				    code, leaf);
703				return (-1);
704			}
705			if ((vmh->vmh_cpuids[i].d & d) !=
706			    vmh->vmh_cpuids[i].d) {
707				log_debug("%s: incompatible cpu features "
708				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
709				    code, leaf);
710				return (-1);
711			}
712			break;
713
714		default:
715			log_debug("%s: unknown code 0x%x", __func__, code);
716			return (-1);
717		}
718	}
719
720	return (0);
721}
722
723void
724vmd_sighdlr(int sig, short event, void *arg)
725{
726	if (privsep_process != PROC_PARENT)
727		return;
728	log_debug("%s: handling signal", __func__);
729
730	switch (sig) {
731	case SIGHUP:
732		log_info("%s: reload requested with SIGHUP", __func__);
733
734		/*
735		 * This is safe because libevent uses async signal handlers
736		 * that run in the event loop and not in signal context.
737		 */
738		(void)vmd_reload(0, NULL);
739		break;
740	case SIGPIPE:
741		log_info("%s: ignoring SIGPIPE", __func__);
742		break;
743	case SIGUSR1:
744		log_info("%s: ignoring SIGUSR1", __func__);
745		break;
746	case SIGTERM:
747	case SIGINT:
748		vmd_shutdown();
749		break;
750	default:
751		fatalx("unexpected signal");
752	}
753}
754
755__dead void
756usage(void)
757{
758	extern char *__progname;
759	fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n",
760	    __progname);
761	exit(1);
762}
763
764int
765main(int argc, char **argv)
766{
767	struct privsep		*ps;
768	int			 ch;
769	const char		*conffile = VMD_CONF;
770	enum privsep_procid	 proc_id = PROC_PARENT;
771	int			 proc_instance = 0;
772	const char		*errp, *title = NULL;
773	int			 argc0 = argc;
774
775	log_init(0, LOG_DAEMON);
776
777	if ((env = calloc(1, sizeof(*env))) == NULL)
778		fatal("calloc: env");
779
780	while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) {
781		switch (ch) {
782		case 'D':
783			if (cmdline_symset(optarg) < 0)
784				log_warnx("could not parse macro definition %s",
785				    optarg);
786			break;
787		case 'd':
788			env->vmd_debug = 2;
789			break;
790		case 'f':
791			conffile = optarg;
792			break;
793		case 'v':
794			env->vmd_verbose++;
795			break;
796		case 'n':
797			env->vmd_noaction = 1;
798			break;
799		case 'P':
800			title = optarg;
801			proc_id = proc_getid(procs, nitems(procs), title);
802			if (proc_id == PROC_MAX)
803				fatalx("invalid process name");
804			break;
805		case 'I':
806			proc_instance = strtonum(optarg, 0,
807			    PROC_MAX_INSTANCES, &errp);
808			if (errp)
809				fatalx("invalid process instance");
810			break;
811		default:
812			usage();
813		}
814	}
815
816	argc -= optind;
817	if (argc > 0)
818		usage();
819
820	if (env->vmd_noaction && !env->vmd_debug)
821		env->vmd_debug = 1;
822
823	log_init(env->vmd_debug, LOG_DAEMON);
824	log_setverbose(env->vmd_verbose);
825
826	/* check for root privileges */
827	if (env->vmd_noaction == 0) {
828		if (geteuid())
829			fatalx("need root privileges");
830	}
831
832	ps = &env->vmd_ps;
833	ps->ps_env = env;
834	env->vmd_fd = -1;
835
836	if (config_init(env) == -1)
837		fatal("failed to initialize configuration");
838
839	if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL)
840		fatal("unknown user %s", VMD_USER);
841
842	/* First proc runs as root without pledge but in default chroot */
843	proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
844	proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */
845
846	/* Open /dev/vmm early. */
847	if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) {
848		env->vmd_fd = open(VMM_NODE, O_RDWR);
849		if (env->vmd_fd == -1)
850			fatal("%s", VMM_NODE);
851	}
852
853	/* Configure the control socket */
854	ps->ps_csock.cs_name = SOCKET_NAME;
855	TAILQ_INIT(&ps->ps_rcsocks);
856
857	/* Configuration will be parsed after forking the children */
858	env->vmd_conffile = conffile;
859
860	if (env->vmd_noaction)
861		ps->ps_noaction = 1;
862	ps->ps_instance = proc_instance;
863	if (title != NULL)
864		ps->ps_title[proc_id] = title;
865
866	/* only the parent returns */
867	proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv,
868	    proc_id);
869
870	log_procinit("parent");
871	if (!env->vmd_debug && daemon(0, 0) == -1)
872		fatal("can't daemonize");
873
874	if (ps->ps_noaction == 0)
875		log_info("startup");
876
877	event_init();
878
879	signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps);
880	signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps);
881	signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps);
882	signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps);
883	signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps);
884
885	signal_add(&ps->ps_evsigint, NULL);
886	signal_add(&ps->ps_evsigterm, NULL);
887	signal_add(&ps->ps_evsighup, NULL);
888	signal_add(&ps->ps_evsigpipe, NULL);
889	signal_add(&ps->ps_evsigusr1, NULL);
890
891	if (!env->vmd_noaction)
892		proc_connect(ps);
893
894	if (vmd_configure() == -1)
895		fatalx("configuration failed");
896
897	event_dispatch();
898
899	log_debug("parent exiting");
900
901	return (0);
902}
903
904void
905start_vm_batch(int fd, short type, void *args)
906{
907	int		i = 0;
908	struct vmd_vm	*vm;
909
910	log_debug("%s: starting batch of %d vms", __func__,
911	    env->vmd_cfg.parallelism);
912	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
913		if (!(vm->vm_state & VM_STATE_WAITING)) {
914			log_debug("%s: not starting vm %s (disabled)",
915			    __func__,
916			    vm->vm_params.vmc_params.vcp_name);
917			continue;
918		}
919		i++;
920		if (i > env->vmd_cfg.parallelism) {
921			evtimer_add(&staggered_start_timer,
922			    &env->vmd_cfg.delay);
923			break;
924		}
925		vm->vm_state &= ~VM_STATE_WAITING;
926		config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid);
927	}
928	log_debug("%s: done starting vms", __func__);
929}
930
931int
932vmd_configure(void)
933{
934	int			ncpus;
935	struct vmd_switch	*vsw;
936	int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE};
937	size_t ncpus_sz = sizeof(ncpus);
938
939	if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
940		fatal("open %s", PATH_PTMDEV);
941
942	/*
943	 * pledge in the parent process:
944	 * stdio - for malloc and basic I/O including events.
945	 * rpath - for reload to open and read the configuration files.
946	 * wpath - for opening disk images and tap devices.
947	 * tty - for openpty and TIOCUCNTL.
948	 * proc - run kill to terminate its children safely.
949	 * sendfd - for disks, interfaces and other fds.
950	 * recvfd - for send and receive.
951	 * getpw - lookup user or group id by name.
952	 * chown, fattr - change tty ownership
953	 * flock - locking disk files
954	 */
955	if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw"
956	    " chown fattr flock", NULL) == -1)
957		fatal("pledge");
958
959	if (parse_config(env->vmd_conffile) == -1) {
960		proc_kill(&env->vmd_ps);
961		exit(1);
962	}
963
964	if (env->vmd_noaction) {
965		fprintf(stderr, "configuration OK\n");
966		proc_kill(&env->vmd_ps);
967		exit(0);
968	}
969
970	/* Send VMM device fd to vmm proc. */
971	proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1,
972	    IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0);
973
974	/* Send shared global configuration to all children */
975	if (config_setconfig(env) == -1)
976		return (-1);
977
978	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
979		if (vsw->sw_running)
980			continue;
981		if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
982			log_warn("%s: failed to create switch %s",
983			    __func__, vsw->sw_name);
984			switch_remove(vsw);
985			return (-1);
986		}
987	}
988
989	if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
990		env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
991		if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1)
992			ncpus = 1;
993		env->vmd_cfg.parallelism = ncpus;
994		log_debug("%s: setting staggered start configuration to "
995		    "parallelism: %d and delay: %lld",
996		    __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec);
997	}
998
999	log_debug("%s: starting vms in staggered fashion", __func__);
1000	evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
1001	/* start first batch */
1002	start_vm_batch(0, 0, NULL);
1003
1004	return (0);
1005}
1006
1007int
1008vmd_reload(unsigned int reset, const char *filename)
1009{
1010	struct vmd_vm		*vm, *next_vm;
1011	struct vmd_switch	*vsw;
1012	int			 reload = 0;
1013
1014	/* Switch back to the default config file */
1015	if (filename == NULL || *filename == '\0') {
1016		filename = env->vmd_conffile;
1017		reload = 1;
1018	}
1019
1020	log_debug("%s: level %d config file %s", __func__, reset, filename);
1021
1022	if (reset) {
1023		/* Purge the configuration */
1024		config_purge(env, reset);
1025		config_setreset(env, reset);
1026	} else {
1027		/*
1028		 * Load or reload the configuration.
1029		 *
1030		 * Reloading removes all non-running VMs before processing the
1031		 * config file, whereas loading only adds to the existing list
1032		 * of VMs.
1033		 */
1034
1035		if (reload) {
1036			TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,
1037			    next_vm) {
1038				if (!(vm->vm_state & VM_STATE_RUNNING)) {
1039					DPRINTF("%s: calling vm_remove",
1040					    __func__);
1041					vm_remove(vm, __func__);
1042				}
1043			}
1044		}
1045
1046		if (parse_config(filename) == -1) {
1047			log_debug("%s: failed to load config file %s",
1048			    __func__, filename);
1049			return (-1);
1050		}
1051
1052		if (reload) {
1053			/* Update shared global configuration in all children */
1054			if (config_setconfig(env) == -1)
1055				return (-1);
1056		}
1057
1058		TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1059			if (vsw->sw_running)
1060				continue;
1061			if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
1062				log_warn("%s: failed to create switch %s",
1063				    __func__, vsw->sw_name);
1064				switch_remove(vsw);
1065				return (-1);
1066			}
1067		}
1068
1069		log_debug("%s: starting vms in staggered fashion", __func__);
1070		evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
1071		/* start first batch */
1072		start_vm_batch(0, 0, NULL);
1073
1074		}
1075
1076	return (0);
1077}
1078
1079void
1080vmd_shutdown(void)
1081{
1082	struct vmd_vm *vm, *vm_next;
1083
1084	log_debug("%s: performing shutdown", __func__);
1085
1086	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
1087		vm_remove(vm, __func__);
1088	}
1089
1090	proc_kill(&env->vmd_ps);
1091	free(env);
1092
1093	log_warnx("parent terminating");
1094	exit(0);
1095}
1096
1097struct vmd_vm *
1098vm_getbyvmid(uint32_t vmid)
1099{
1100	struct vmd_vm	*vm;
1101
1102	if (vmid == 0)
1103		return (NULL);
1104	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1105		if (vm->vm_vmid == vmid)
1106			return (vm);
1107	}
1108
1109	return (NULL);
1110}
1111
1112struct vmd_vm *
1113vm_getbyid(uint32_t id)
1114{
1115	struct vmd_vm	*vm;
1116
1117	if (id == 0)
1118		return (NULL);
1119	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1120		if (vm->vm_params.vmc_params.vcp_id == id)
1121			return (vm);
1122	}
1123
1124	return (NULL);
1125}
1126
1127uint32_t
1128vm_id2vmid(uint32_t id, struct vmd_vm *vm)
1129{
1130	if (vm == NULL && (vm = vm_getbyid(id)) == NULL)
1131		return (0);
1132	DPRINTF("%s: vmm id %u is vmid %u", __func__,
1133	    id, vm->vm_vmid);
1134	return (vm->vm_vmid);
1135}
1136
1137uint32_t
1138vm_vmid2id(uint32_t vmid, struct vmd_vm *vm)
1139{
1140	if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL)
1141		return (0);
1142	DPRINTF("%s: vmid %u is vmm id %u", __func__,
1143	    vmid, vm->vm_params.vmc_params.vcp_id);
1144	return (vm->vm_params.vmc_params.vcp_id);
1145}
1146
1147struct vmd_vm *
1148vm_getbyname(const char *name)
1149{
1150	struct vmd_vm	*vm;
1151
1152	if (name == NULL)
1153		return (NULL);
1154	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1155		if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0)
1156			return (vm);
1157	}
1158
1159	return (NULL);
1160}
1161
1162struct vmd_vm *
1163vm_getbypid(pid_t pid)
1164{
1165	struct vmd_vm	*vm;
1166
1167	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1168		if (vm->vm_pid == pid)
1169			return (vm);
1170	}
1171
1172	return (NULL);
1173}
1174
1175void
1176vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
1177{
1178	struct privsep	*ps = &env->vmd_ps;
1179	unsigned int	 i, j;
1180
1181	if (vm == NULL)
1182		return;
1183
1184	log_debug("%s: %s %s stopping vm %d%s",
1185	    __func__, ps->ps_title[privsep_process], caller,
1186	    vm->vm_vmid, keeptty ? ", keeping tty open" : "");
1187
1188	vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING
1189	    | VM_STATE_SHUTDOWN);
1190
1191	if (vm->vm_iev.ibuf.fd != -1) {
1192		event_del(&vm->vm_iev.ev);
1193		close(vm->vm_iev.ibuf.fd);
1194	}
1195	for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) {
1196		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
1197			if (vm->vm_disks[i][j] != -1) {
1198				close(vm->vm_disks[i][j]);
1199				vm->vm_disks[i][j] = -1;
1200			}
1201		}
1202	}
1203	for (i = 0; i < VM_MAX_NICS_PER_VM; i++) {
1204		if (vm->vm_ifs[i].vif_fd != -1) {
1205			close(vm->vm_ifs[i].vif_fd);
1206			vm->vm_ifs[i].vif_fd = -1;
1207		}
1208		free(vm->vm_ifs[i].vif_name);
1209		free(vm->vm_ifs[i].vif_switch);
1210		free(vm->vm_ifs[i].vif_group);
1211		vm->vm_ifs[i].vif_name = NULL;
1212		vm->vm_ifs[i].vif_switch = NULL;
1213		vm->vm_ifs[i].vif_group = NULL;
1214	}
1215	if (vm->vm_kernel != -1) {
1216		close(vm->vm_kernel);
1217		vm->vm_kernel = -1;
1218	}
1219	if (vm->vm_cdrom != -1) {
1220		close(vm->vm_cdrom);
1221		vm->vm_cdrom = -1;
1222	}
1223	if (!keeptty) {
1224		vm_closetty(vm);
1225		vm->vm_uid = 0;
1226	}
1227}
1228
1229void
1230vm_remove(struct vmd_vm *vm, const char *caller)
1231{
1232	struct privsep	*ps = &env->vmd_ps;
1233
1234	if (vm == NULL)
1235		return;
1236
1237	log_debug("%s: %s %s removing vm %d from running config",
1238	    __func__, ps->ps_title[privsep_process], caller,
1239	    vm->vm_vmid);
1240
1241	TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
1242
1243	vm_stop(vm, 0, caller);
1244	free(vm);
1245}
1246
1247int
1248vm_claimid(const char *name, int uid, uint32_t *id)
1249{
1250	struct name2id *n2i = NULL;
1251
1252	TAILQ_FOREACH(n2i, env->vmd_known, entry)
1253		if (strcmp(n2i->name, name) == 0 && n2i->uid == uid)
1254			goto out;
1255
1256	if (++env->vmd_nvm == 0) {
1257		log_warnx("too many vms");
1258		return (-1);
1259	}
1260	if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) {
1261		log_warnx("could not alloc vm name");
1262		return (-1);
1263	}
1264	n2i->id = env->vmd_nvm;
1265	n2i->uid = uid;
1266	if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) {
1267		log_warnx("vm name too long");
1268		free(n2i);
1269		return (-1);
1270	}
1271	TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry);
1272
1273out:
1274	*id = n2i->id;
1275	return (0);
1276}
1277
1278int
1279vm_register(struct privsep *ps, struct vmop_create_params *vmc,
1280    struct vmd_vm **ret_vm, uint32_t id, uid_t uid)
1281{
1282	struct vmd_vm		*vm = NULL, *vm_parent = NULL;
1283	struct vm_create_params	*vcp = &vmc->vmc_params;
1284	struct vmop_owner	*vmo = NULL;
1285	uint32_t		 nid, rng;
1286	unsigned int		 i, j;
1287	struct vmd_switch	*sw;
1288	char			*s;
1289	int			 ret = 0;
1290
1291	/* Check if this is an instance of another VM */
1292	if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) {
1293		errno = ret; /* XXX might set invalid errno */
1294		return (-1);
1295	}
1296
1297	errno = 0;
1298	*ret_vm = NULL;
1299
1300	if ((vm = vm_getbyname(vcp->vcp_name)) != NULL ||
1301	    (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) {
1302		if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
1303		    uid) != 0) {
1304			errno = EPERM;
1305			goto fail;
1306		}
1307		*ret_vm = vm;
1308		errno = EALREADY;
1309		goto fail;
1310	}
1311
1312	if (vm_parent != NULL)
1313		vmo = &vm_parent->vm_params.vmc_insowner;
1314
1315	/* non-root users can only start existing VMs or instances */
1316	if (vm_checkperm(NULL, vmo, uid) != 0) {
1317		log_warnx("permission denied");
1318		errno = EPERM;
1319		goto fail;
1320	}
1321	if (vmc->vmc_flags == 0) {
1322		log_warnx("invalid configuration, no devices");
1323		errno = VMD_DISK_MISSING;
1324		goto fail;
1325	}
1326	if (vcp->vcp_ncpus == 0)
1327		vcp->vcp_ncpus = 1;
1328	if (vcp->vcp_memranges[0].vmr_size == 0)
1329		vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
1330	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
1331		log_warnx("invalid number of CPUs");
1332		goto fail;
1333	} else if (vcp->vcp_ndisks > VM_MAX_DISKS_PER_VM) {
1334		log_warnx("invalid number of disks");
1335		goto fail;
1336	} else if (vcp->vcp_nnics > VM_MAX_NICS_PER_VM) {
1337		log_warnx("invalid number of interfaces");
1338		goto fail;
1339	} else if (strlen(vcp->vcp_kernel) == 0 &&
1340	    vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) {
1341		log_warnx("no kernel or disk/cdrom specified");
1342		goto fail;
1343	} else if (strlen(vcp->vcp_name) == 0) {
1344		log_warnx("invalid VM name");
1345		goto fail;
1346	} else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' ||
1347	    *vcp->vcp_name == '_') {
1348		log_warnx("invalid VM name");
1349		goto fail;
1350	} else {
1351		for (s = vcp->vcp_name; *s != '\0'; ++s) {
1352			if (!(isalnum((unsigned char)*s) || *s == '.' || \
1353			    *s == '-' || *s == '_')) {
1354				log_warnx("invalid VM name");
1355				goto fail;
1356			}
1357		}
1358	}
1359
1360	if ((vm = calloc(1, sizeof(*vm))) == NULL)
1361		goto fail;
1362
1363	memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params));
1364	vmc = &vm->vm_params;
1365	vcp = &vmc->vmc_params;
1366	vm->vm_pid = -1;
1367	vm->vm_tty = -1;
1368	vm->vm_receive_fd = -1;
1369	vm->vm_state &= ~VM_STATE_PAUSED;
1370
1371	for (i = 0; i < VM_MAX_DISKS_PER_VM; i++)
1372		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
1373			vm->vm_disks[i][j] = -1;
1374	for (i = 0; i < VM_MAX_NICS_PER_VM; i++)
1375		vm->vm_ifs[i].vif_fd = -1;
1376	for (i = 0; i < vcp->vcp_nnics; i++) {
1377		if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) {
1378			/* inherit per-interface flags from the switch */
1379			vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK);
1380		}
1381
1382		/*
1383		 * If the MAC address is zero, always randomize it in vmd(8)
1384		 * because we cannot rely on the guest OS to do the right
1385		 * thing like OpenBSD does.  Based on ether_fakeaddr()
1386		 * from the kernel, incremented by one to differentiate
1387		 * the source.
1388		 */
1389		if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) {
1390			rng = arc4random();
1391			vcp->vcp_macs[i][0] = 0xfe;
1392			vcp->vcp_macs[i][1] = 0xe1;
1393			vcp->vcp_macs[i][2] = 0xba + 1;
1394			vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf);
1395			vcp->vcp_macs[i][4] = rng;
1396			vcp->vcp_macs[i][5] = rng >> 8;
1397		}
1398	}
1399	vm->vm_kernel = -1;
1400	vm->vm_cdrom = -1;
1401	vm->vm_iev.ibuf.fd = -1;
1402
1403	/*
1404	 * Assign a new internal Id if not specified and we succeed in
1405	 * claiming a new Id.
1406	 */
1407	if (id != 0)
1408		vm->vm_vmid = id;
1409	else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1)
1410		goto fail;
1411	else
1412		vm->vm_vmid = nid;
1413
1414	log_debug("%s: registering vm %d", __func__, vm->vm_vmid);
1415	TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry);
1416
1417	*ret_vm = vm;
1418	return (0);
1419 fail:
1420	if (errno == 0)
1421		errno = EINVAL;
1422	return (-1);
1423}
1424
1425int
1426vm_instance(struct privsep *ps, struct vmd_vm **vm_parent,
1427    struct vmop_create_params *vmc, uid_t uid)
1428{
1429	char			*name;
1430	struct vm_create_params	*vcp = &vmc->vmc_params;
1431	struct vmop_create_params *vmcp;
1432	struct vm_create_params	*vcpp;
1433	struct vmd_vm		*vm = NULL;
1434	unsigned int		 i, j;
1435
1436	/* return without error if the parent is NULL (nothing to inherit) */
1437	if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 ||
1438	    vmc->vmc_instance[0] == '\0')
1439		return (0);
1440
1441	if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) {
1442		return (VMD_PARENT_INVALID);
1443	}
1444
1445	vmcp = &(*vm_parent)->vm_params;
1446	vcpp = &vmcp->vmc_params;
1447
1448	/* Are we allowed to create an instance from this VM? */
1449	if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) {
1450		log_warnx("vm \"%s\" no permission to create vm instance",
1451		    vcpp->vcp_name);
1452		return (ENAMETOOLONG);
1453	}
1454
1455	name = vcp->vcp_name;
1456
1457	if ((vm = vm_getbyname(vcp->vcp_name)) != NULL ||
1458	    (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) {
1459		return (EPROCLIM);
1460	}
1461
1462	/* CPU */
1463	if (vcp->vcp_ncpus == 0)
1464		vcp->vcp_ncpus = vcpp->vcp_ncpus;
1465	if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 &&
1466	    vcp->vcp_ncpus != vcpp->vcp_ncpus) {
1467		log_warnx("vm \"%s\" no permission to set cpus", name);
1468		return (EPERM);
1469	}
1470
1471	/* memory */
1472	if (vcp->vcp_memranges[0].vmr_size == 0)
1473		vcp->vcp_memranges[0].vmr_size =
1474		    vcpp->vcp_memranges[0].vmr_size;
1475	if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 &&
1476	    vcp->vcp_memranges[0].vmr_size !=
1477	    vcpp->vcp_memranges[0].vmr_size) {
1478		log_warnx("vm \"%s\" no permission to set memory", name);
1479		return (EPERM);
1480	}
1481
1482	/* disks cannot be inherited */
1483	if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 &&
1484	    vcp->vcp_ndisks) {
1485		log_warnx("vm \"%s\" no permission to set disks", name);
1486		return (EPERM);
1487	}
1488	for (i = 0; i < vcp->vcp_ndisks; i++) {
1489		/* Check if this disk is already used in the parent */
1490		for (j = 0; j < vcpp->vcp_ndisks; j++) {
1491			if (strcmp(vcp->vcp_disks[i],
1492			    vcpp->vcp_disks[j]) == 0) {
1493				log_warnx("vm \"%s\" disk %s cannot be reused",
1494				    name, vcp->vcp_disks[i]);
1495				return (EBUSY);
1496			}
1497		}
1498		vmc->vmc_checkaccess |= VMOP_CREATE_DISK;
1499	}
1500
1501	/* interfaces */
1502	if (vcp->vcp_nnics > 0 &&
1503	    vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 &&
1504	    vcp->vcp_nnics != vcpp->vcp_nnics) {
1505		log_warnx("vm \"%s\" no permission to set interfaces", name);
1506		return (EPERM);
1507	}
1508	for (i = 0; i < vcpp->vcp_nnics; i++) {
1509		/* Interface got overwritten */
1510		if (i < vcp->vcp_nnics)
1511			continue;
1512
1513		/* Copy interface from parent */
1514		vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i];
1515		(void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i],
1516		    sizeof(vmc->vmc_ifnames[i]));
1517		(void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i],
1518		    sizeof(vmc->vmc_ifswitch[i]));
1519		(void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i],
1520		    sizeof(vmc->vmc_ifgroup[i]));
1521		memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i],
1522		    sizeof(vcp->vcp_macs[i]));
1523		vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i];
1524		vcp->vcp_nnics++;
1525	}
1526	for (i = 0; i < vcp->vcp_nnics; i++) {
1527		for (j = 0; j < vcpp->vcp_nnics; j++) {
1528			if (memcmp(zero_mac, vcp->vcp_macs[i],
1529			    sizeof(vcp->vcp_macs[i])) != 0 &&
1530			    memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i],
1531			    sizeof(vcp->vcp_macs[i])) != 0) {
1532				log_warnx("vm \"%s\" lladdr cannot be reused",
1533				    name);
1534				return (EBUSY);
1535			}
1536			if (strlen(vmc->vmc_ifnames[i]) &&
1537			    strcmp(vmc->vmc_ifnames[i],
1538			    vmcp->vmc_ifnames[j]) == 0) {
1539				log_warnx("vm \"%s\" %s cannot be reused",
1540				    vmc->vmc_ifnames[i], name);
1541				return (EBUSY);
1542			}
1543		}
1544	}
1545
1546	/* kernel */
1547	if (strlen(vcp->vcp_kernel) > 0) {
1548		if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) {
1549			log_warnx("vm \"%s\" no permission to set boot image",
1550			    name);
1551			return (EPERM);
1552		}
1553		vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL;
1554	} else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel,
1555	    sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) {
1556		log_warnx("vm \"%s\" kernel name too long", name);
1557		return (EINVAL);
1558	}
1559
1560	/* cdrom */
1561	if (strlen(vcp->vcp_cdrom) > 0) {
1562		if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) {
1563			log_warnx("vm \"%s\" no permission to set cdrom", name);
1564			return (EPERM);
1565		}
1566		vmc->vmc_checkaccess |= VMOP_CREATE_CDROM;
1567	} else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom,
1568	    sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) {
1569		log_warnx("vm \"%s\" cdrom name too long", name);
1570		return (EINVAL);
1571	}
1572
1573	/* user */
1574	if (vmc->vmc_owner.uid == 0)
1575		vmc->vmc_owner.uid = vmcp->vmc_owner.uid;
1576	else if (vmc->vmc_owner.uid != uid &&
1577	    vmc->vmc_owner.uid != vmcp->vmc_owner.uid) {
1578		log_warnx("vm \"%s\" user mismatch", name);
1579		return (EPERM);
1580	}
1581
1582	/* group */
1583	if (vmc->vmc_owner.gid == 0)
1584		vmc->vmc_owner.gid = vmcp->vmc_owner.gid;
1585	else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) {
1586		log_warnx("vm \"%s\" group mismatch", name);
1587		return (EPERM);
1588	}
1589
1590	/* child instances */
1591	if (vmc->vmc_insflags) {
1592		log_warnx("vm \"%s\" cannot change instance permissions", name);
1593		return (EPERM);
1594	}
1595	if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) {
1596		vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid;
1597		vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid;
1598		vmc->vmc_insflags = vmcp->vmc_insflags;
1599	} else {
1600		vmc->vmc_insowner.gid = 0;
1601		vmc->vmc_insowner.uid = 0;
1602		vmc->vmc_insflags = 0;
1603	}
1604
1605	/* finished, remove instance flags */
1606	vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE;
1607
1608	return (0);
1609}
1610
1611/*
1612 * vm_checkperm
1613 *
1614 * Checks if the user represented by the 'uid' parameter is allowed to
1615 * manipulate the VM described by the 'vm' parameter (or connect to said VM's
1616 * console.)
1617 *
1618 * Parameters:
1619 *  vm: the VM whose permission is to be checked
1620 *  vmo: the required uid/gid to be checked
1621 *  uid: the user ID of the user making the request
1622 *
1623 * Return values:
1624 *   0: the permission should be granted
1625 *  -1: the permission check failed (also returned if vm == null)
1626 */
1627int
1628vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid)
1629{
1630	struct group	*gr;
1631	struct passwd	*pw;
1632	char		**grmem;
1633
1634	/* root has no restrictions */
1635	if (uid == 0)
1636		return (0);
1637
1638	if (vmo == NULL)
1639		return (-1);
1640
1641	/* check user */
1642	if (vm == NULL) {
1643		if  (vmo->uid == uid)
1644			return (0);
1645	} else {
1646		/*
1647		 * check user of running vm (the owner of a running vm can
1648		 * be different to (or more specific than) the configured owner.
1649		 */
1650		if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) ||
1651		    (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid))
1652			return (0);
1653	}
1654
1655	/* check groups */
1656	if (vmo->gid != -1) {
1657		if ((pw = getpwuid(uid)) == NULL)
1658			return (-1);
1659		if (pw->pw_gid == vmo->gid)
1660			return (0);
1661		if ((gr = getgrgid(vmo->gid)) != NULL) {
1662			for (grmem = gr->gr_mem; *grmem; grmem++)
1663				if (strcmp(*grmem, pw->pw_name) == 0)
1664					return (0);
1665		}
1666	}
1667
1668	return (-1);
1669}
1670
1671/*
1672 * vm_checkinsflag
1673 *
1674 * Checks whether the non-root user is allowed to set an instance option.
1675 *
1676 * Parameters:
1677 *  vmc: the VM create parameters
1678 *  flag: the flag to be checked
1679 *  uid: the user ID of the user making the request
1680 *
1681 * Return values:
1682 *   0: the permission should be granted
1683 *  -1: the permission check failed (also returned if vm == null)
1684 */
1685int
1686vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid)
1687{
1688	/* root has no restrictions */
1689	if (uid == 0)
1690		return (0);
1691
1692	if ((vmc->vmc_insflags & flag) == 0)
1693		return (-1);
1694
1695	return (0);
1696}
1697
1698/*
1699 * vm_checkaccess
1700 *
1701 * Checks if the user represented by the 'uid' parameter is allowed to
1702 * access the file described by the 'path' parameter.
1703 *
1704 * Parameters:
1705 *  fd: the file descriptor of the opened file
1706 *  uflag: check if the userid has access to the file
1707 *  uid: the user ID of the user making the request
1708 *  amode: the access flags of R_OK and W_OK
1709 *
1710 * Return values:
1711 *   0: the permission should be granted
1712 *  -1: the permission check failed
1713 */
1714int
1715vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode)
1716{
1717	struct group	*gr;
1718	struct passwd	*pw;
1719	char		**grmem;
1720	struct stat	 st;
1721	mode_t		 mode;
1722
1723	if (fd == -1)
1724		return (-1);
1725
1726	/*
1727	 * File has to be accessible and a regular file
1728	 */
1729	if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode))
1730		return (-1);
1731
1732	/* root has no restrictions */
1733	if (uid == 0 || uflag == 0)
1734		return (0);
1735
1736	/* check other */
1737	mode = amode & W_OK ? S_IWOTH : 0;
1738	mode |= amode & R_OK ? S_IROTH : 0;
1739	if ((st.st_mode & mode) == mode)
1740		return (0);
1741
1742	/* check user */
1743	mode = amode & W_OK ? S_IWUSR : 0;
1744	mode |= amode & R_OK ? S_IRUSR : 0;
1745	if (uid == st.st_uid && (st.st_mode & mode) == mode)
1746		return (0);
1747
1748	/* check groups */
1749	mode = amode & W_OK ? S_IWGRP : 0;
1750	mode |= amode & R_OK ? S_IRGRP : 0;
1751	if ((st.st_mode & mode) != mode)
1752		return (-1);
1753	if ((pw = getpwuid(uid)) == NULL)
1754		return (-1);
1755	if (pw->pw_gid == st.st_gid)
1756		return (0);
1757	if ((gr = getgrgid(st.st_gid)) != NULL) {
1758		for (grmem = gr->gr_mem; *grmem; grmem++)
1759			if (strcmp(*grmem, pw->pw_name) == 0)
1760				return (0);
1761	}
1762
1763	return (-1);
1764}
1765
1766int
1767vm_opentty(struct vmd_vm *vm)
1768{
1769	struct ptmget		 ptm;
1770	struct stat		 st;
1771	struct group		*gr;
1772	uid_t			 uid;
1773	gid_t			 gid;
1774	mode_t			 mode;
1775	int			 on;
1776
1777	/*
1778	 * Open tty with pre-opened PTM fd
1779	 */
1780	if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1))
1781		return (-1);
1782
1783	/*
1784	 * We use user ioctl(2) mode to pass break commands.
1785	 */
1786	on = 1;
1787	if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1)
1788		fatal("could not enable user ioctl mode");
1789
1790	vm->vm_tty = ptm.cfd;
1791	close(ptm.sfd);
1792	if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname))
1793	    >= sizeof(vm->vm_ttyname)) {
1794		log_warnx("%s: truncated ttyname", __func__);
1795		goto fail;
1796	}
1797
1798	uid = vm->vm_uid;
1799	gid = vm->vm_params.vmc_owner.gid;
1800
1801	if (vm->vm_params.vmc_owner.gid != -1) {
1802		mode = 0660;
1803	} else if ((gr = getgrnam("tty")) != NULL) {
1804		gid = gr->gr_gid;
1805		mode = 0620;
1806	} else {
1807		mode = 0600;
1808		gid = 0;
1809	}
1810
1811	log_debug("%s: vm %s tty %s uid %d gid %d mode %o",
1812	    __func__, vm->vm_params.vmc_params.vcp_name,
1813	    vm->vm_ttyname, uid, gid, mode);
1814
1815	/*
1816	 * Change ownership and mode of the tty as required.
1817	 * Loosely based on the implementation of sshpty.c
1818	 */
1819	if (stat(vm->vm_ttyname, &st) == -1)
1820		goto fail;
1821
1822	if (st.st_uid != uid || st.st_gid != gid) {
1823		if (chown(vm->vm_ttyname, uid, gid) == -1) {
1824			log_warn("chown %s %d %d failed, uid %d",
1825			    vm->vm_ttyname, uid, gid, getuid());
1826
1827			/* Ignore failure on read-only filesystems */
1828			if (!((errno == EROFS) &&
1829			    (st.st_uid == uid || st.st_uid == 0)))
1830				goto fail;
1831		}
1832	}
1833
1834	if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) {
1835		if (chmod(vm->vm_ttyname, mode) == -1) {
1836			log_warn("chmod %s %o failed, uid %d",
1837			    vm->vm_ttyname, mode, getuid());
1838
1839			/* Ignore failure on read-only filesystems */
1840			if (!((errno == EROFS) &&
1841			    (st.st_uid == uid || st.st_uid == 0)))
1842				goto fail;
1843		}
1844	}
1845
1846	return (0);
1847 fail:
1848	vm_closetty(vm);
1849	return (-1);
1850}
1851
1852void
1853vm_closetty(struct vmd_vm *vm)
1854{
1855	if (vm->vm_tty != -1) {
1856		/* Release and close the tty */
1857		if (fchown(vm->vm_tty, 0, 0) == -1)
1858			log_warn("chown %s 0 0 failed", vm->vm_ttyname);
1859		if (fchmod(vm->vm_tty, 0666) == -1)
1860			log_warn("chmod %s 0666 failed", vm->vm_ttyname);
1861		close(vm->vm_tty);
1862		vm->vm_tty = -1;
1863	}
1864	memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname));
1865}
1866
1867void
1868switch_remove(struct vmd_switch *vsw)
1869{
1870	if (vsw == NULL)
1871		return;
1872
1873	TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry);
1874
1875	free(vsw->sw_group);
1876	free(vsw->sw_name);
1877	free(vsw);
1878}
1879
1880struct vmd_switch *
1881switch_getbyname(const char *name)
1882{
1883	struct vmd_switch	*vsw;
1884
1885	if (name == NULL)
1886		return (NULL);
1887	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1888		if (strcmp(vsw->sw_name, name) == 0)
1889			return (vsw);
1890	}
1891
1892	return (NULL);
1893}
1894
1895char *
1896get_string(uint8_t *ptr, size_t len)
1897{
1898	size_t	 i;
1899
1900	for (i = 0; i < len; i++)
1901		if (!isprint((unsigned char)ptr[i]))
1902			break;
1903
1904	return strndup(ptr, i);
1905}
1906
1907uint32_t
1908prefixlen2mask(uint8_t prefixlen)
1909{
1910	if (prefixlen == 0)
1911		return (0);
1912
1913	if (prefixlen > 32)
1914		prefixlen = 32;
1915
1916	return (htonl(0xffffffff << (32 - prefixlen)));
1917}
1918
1919void
1920prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask)
1921{
1922	struct in6_addr	 s6;
1923	int		 i;
1924
1925	if (prefixlen > 128)
1926		prefixlen = 128;
1927
1928	memset(&s6, 0, sizeof(s6));
1929	for (i = 0; i < prefixlen / 8; i++)
1930		s6.s6_addr[i] = 0xff;
1931	i = prefixlen % 8;
1932	if (i)
1933		s6.s6_addr[prefixlen / 8] = 0xff00 >> i;
1934
1935	memcpy(mask, &s6, sizeof(s6));
1936}
1937
1938void
1939getmonotime(struct timeval *tv)
1940{
1941	struct timespec	 ts;
1942
1943	if (clock_gettime(CLOCK_MONOTONIC, &ts))
1944		fatal("clock_gettime");
1945
1946	TIMESPEC_TO_TIMEVAL(tv, &ts);
1947}
1948
1949static inline void
1950vm_terminate(struct vmd_vm *vm, const char *caller)
1951{
1952	if (vm->vm_from_config)
1953		vm_stop(vm, 0, caller);
1954	else {
1955		/* vm_remove calls vm_stop */
1956		vm_remove(vm, caller);
1957	}
1958}
1959