vmd.c revision 1.88
1/*	$OpenBSD: vmd.c,v 1.88 2018/07/10 16:15:51 reyk Exp $	*/
2
3/*
4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>	/* nitems */
20#include <sys/queue.h>
21#include <sys/wait.h>
22#include <sys/cdefs.h>
23#include <sys/stat.h>
24#include <sys/tty.h>
25#include <sys/ttycom.h>
26#include <sys/ioctl.h>
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <termios.h>
32#include <errno.h>
33#include <event.h>
34#include <fcntl.h>
35#include <pwd.h>
36#include <signal.h>
37#include <syslog.h>
38#include <unistd.h>
39#include <ctype.h>
40#include <pwd.h>
41#include <grp.h>
42
43#include <machine/specialreg.h>
44#include <machine/vmmvar.h>
45
46#include "proc.h"
47#include "atomicio.h"
48#include "vmd.h"
49
50__dead void usage(void);
51
52int	 main(int, char **);
53int	 vmd_configure(void);
54void	 vmd_sighdlr(int sig, short event, void *arg);
55void	 vmd_shutdown(void);
56int	 vmd_control_run(void);
57int	 vmd_dispatch_control(int, struct privsep_proc *, struct imsg *);
58int	 vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *);
59int	 check_vmh(struct vm_dump_header *);
60
61struct vmd	*env;
62
63static struct privsep_proc procs[] = {
64	/* Keep "priv" on top as procs[0] */
65	{ "priv",	PROC_PRIV,	NULL, priv },
66	{ "control",	PROC_CONTROL,	vmd_dispatch_control, control },
67	{ "vmm",	PROC_VMM,	vmd_dispatch_vmm, vmm, vmm_shutdown },
68};
69
70/* For the privileged process */
71static struct privsep_proc *proc_priv = &procs[0];
72static struct passwd proc_privpw;
73
74int
75vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
76{
77	struct privsep			*ps = p->p_ps;
78	int				 res = 0, ret = 0, cmd = 0, verbose;
79	unsigned int			 v = 0;
80	struct vmop_create_params	 vmc;
81	struct vmop_id			 vid;
82	struct vm_terminate_params	 vtp;
83	struct vmop_result		 vmr;
84	struct vm_dump_header		 vmh;
85	struct vmd_vm			*vm = NULL;
86	char				*str = NULL;
87	uint32_t			 id = 0;
88	struct control_sock		*rcs;
89
90	switch (imsg->hdr.type) {
91	case IMSG_VMDOP_START_VM_REQUEST:
92		IMSG_SIZE_CHECK(imsg, &vmc);
93		memcpy(&vmc, imsg->data, sizeof(vmc));
94		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_uid);
95		if (vmc.vmc_flags == 0) {
96			/* start an existing VM with pre-configured options */
97			if (!(ret == -1 && errno == EALREADY &&
98			    vm->vm_running == 0)) {
99				res = errno;
100				cmd = IMSG_VMDOP_START_VM_RESPONSE;
101			}
102		} else if (ret != 0) {
103			res = errno;
104			cmd = IMSG_VMDOP_START_VM_RESPONSE;
105		}
106		if (res == 0 &&
107		    config_setvm(ps, vm,
108		    imsg->hdr.peerid, vm->vm_params.vmc_uid) == -1) {
109			res = errno;
110			cmd = IMSG_VMDOP_START_VM_RESPONSE;
111		}
112		break;
113	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
114		IMSG_SIZE_CHECK(imsg, &vid);
115		memcpy(&vid, imsg->data, sizeof(vid));
116		if ((id = vid.vid_id) == 0) {
117			/* Lookup vm (id) by name */
118			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
119				res = ENOENT;
120				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
121				break;
122			} else if (vm->vm_shutdown) {
123				res = EALREADY;
124				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
125				break;
126			} else if (vm->vm_running == 0) {
127				res = EINVAL;
128				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
129				break;
130			}
131			id = vm->vm_vmid;
132		} else if ((vm = vm_getbyvmid(id)) == NULL) {
133			res = ENOENT;
134			cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
135			break;
136		}
137		if (vm_checkperm(vm, vid.vid_uid) != 0) {
138			res = EPERM;
139			cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
140			break;
141		}
142		memset(&vtp, 0, sizeof(vtp));
143		vtp.vtp_vm_id = id;
144		if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
145		    imsg->hdr.peerid, -1, &vtp, sizeof(vtp)) == -1)
146			return (-1);
147		break;
148	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
149		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
150		break;
151	case IMSG_VMDOP_LOAD:
152		IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */
153		str = get_string((uint8_t *)imsg->data,
154		    IMSG_DATA_SIZE(imsg));
155	case IMSG_VMDOP_RELOAD:
156		if (vmd_reload(0, str) == -1)
157			cmd = IMSG_CTL_FAIL;
158		else
159			cmd = IMSG_CTL_OK;
160		free(str);
161		break;
162	case IMSG_CTL_RESET:
163		IMSG_SIZE_CHECK(imsg, &v);
164		memcpy(&v, imsg->data, sizeof(v));
165		if (vmd_reload(v, NULL) == -1)
166			cmd = IMSG_CTL_FAIL;
167		else
168			cmd = IMSG_CTL_OK;
169		break;
170	case IMSG_CTL_VERBOSE:
171		IMSG_SIZE_CHECK(imsg, &verbose);
172		memcpy(&verbose, imsg->data, sizeof(verbose));
173		log_setverbose(verbose);
174
175		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
176		proc_forward_imsg(ps, imsg, PROC_PRIV, -1);
177		cmd = IMSG_CTL_OK;
178		break;
179	case IMSG_VMDOP_PAUSE_VM:
180	case IMSG_VMDOP_UNPAUSE_VM:
181		IMSG_SIZE_CHECK(imsg, &vid);
182		memcpy(&vid, imsg->data, sizeof(vid));
183		if (vid.vid_id == 0) {
184			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
185				res = ENOENT;
186				cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
187				break;
188			} else {
189				vid.vid_id = vm->vm_vmid;
190			}
191		} else if ((vm = vm_getbyid(vid.vid_id)) == NULL) {
192			res = ENOENT;
193			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
194			break;
195		}
196		if (vm_checkperm(vm, vid.vid_uid) != 0) {
197			res = EPERM;
198			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
199			break;
200		}
201		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
202		    imsg->hdr.peerid, -1, &vid, sizeof(vid));
203		break;
204	case IMSG_VMDOP_SEND_VM_REQUEST:
205		IMSG_SIZE_CHECK(imsg, &vid);
206		memcpy(&vid, imsg->data, sizeof(vid));
207		id = vid.vid_id;
208		if (vid.vid_id == 0) {
209			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
210				res = ENOENT;
211				cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
212				close(imsg->fd);
213				break;
214			} else {
215				vid.vid_id = vm->vm_vmid;
216			}
217		} else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) {
218			res = ENOENT;
219			cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
220			close(imsg->fd);
221			break;
222		} else {
223		}
224		vmr.vmr_id = vid.vid_id;
225		log_debug("%s: sending fd to vmm", __func__);
226		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
227		    imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid));
228		break;
229	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
230		IMSG_SIZE_CHECK(imsg, &vid);
231		memcpy(&vid, imsg->data, sizeof(vid));
232		if (imsg->fd == -1) {
233			log_warnx("%s: invalid fd", __func__);
234			return (-1);
235		}
236		if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) !=
237		    sizeof(vmh)) {
238			log_warnx("%s: error reading vmh from received vm",
239			    __func__);
240			res = EIO;
241			close(imsg->fd);
242			cmd = IMSG_VMDOP_START_VM_RESPONSE;
243			break;
244		}
245
246		if (check_vmh(&vmh)) {
247			res = ENOENT;
248			close(imsg->fd);
249			cmd = IMSG_VMDOP_START_VM_RESPONSE;
250			break;
251		}
252		if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) !=
253		    sizeof(vmc)) {
254			log_warnx("%s: error reading vmc from received vm",
255			    __func__);
256			res = EIO;
257			close(imsg->fd);
258			cmd = IMSG_VMDOP_START_VM_RESPONSE;
259			break;
260		}
261		strlcpy(vmc.vmc_params.vcp_name, vid.vid_name,
262		    sizeof(vmc.vmc_params.vcp_name));
263		vmc.vmc_params.vcp_id = 0;
264
265		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_uid);
266		if (ret != 0) {
267			res = errno;
268			cmd = IMSG_VMDOP_START_VM_RESPONSE;
269			close(imsg->fd);
270		} else {
271			vm->vm_received = 1;
272			config_setvm(ps, vm, imsg->hdr.peerid, vmc.vmc_uid);
273			log_debug("%s: sending fd to vmm", __func__);
274			proc_compose_imsg(ps, PROC_VMM, -1,
275			    IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd,
276			    NULL, 0);
277		}
278		break;
279	case IMSG_VMDOP_DONE:
280		control_reset(&ps->ps_csock);
281		TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)
282			control_reset(rcs);
283		cmd = 0;
284		break;
285	default:
286		return (-1);
287	}
288
289	switch (cmd) {
290	case 0:
291		break;
292	case IMSG_VMDOP_START_VM_RESPONSE:
293	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
294		memset(&vmr, 0, sizeof(vmr));
295		vmr.vmr_result = res;
296		vmr.vmr_id = id;
297		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
298		    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
299			return (-1);
300		break;
301	default:
302		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
303		    imsg->hdr.peerid, -1, &res, sizeof(res)) == -1)
304			return (-1);
305		break;
306	}
307
308	return (0);
309}
310
311int
312vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
313{
314	struct vmop_result	 vmr;
315	struct privsep		*ps = p->p_ps;
316	int			 res = 0;
317	struct vmd_vm		*vm;
318	struct vm_create_params	*vcp;
319	struct vmop_info_result	 vir;
320
321	switch (imsg->hdr.type) {
322	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
323		IMSG_SIZE_CHECK(imsg, &vmr);
324		memcpy(&vmr, imsg->data, sizeof(vmr));
325		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
326			break;
327		proc_compose_imsg(ps, PROC_CONTROL, -1,
328		    imsg->hdr.type, imsg->hdr.peerid, -1,
329		    imsg->data, sizeof(imsg->data));
330		log_info("%s: paused vm %d successfully",
331		    vm->vm_params.vmc_params.vcp_name,
332		    vm->vm_vmid);
333		break;
334	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
335		IMSG_SIZE_CHECK(imsg, &vmr);
336		memcpy(&vmr, imsg->data, sizeof(vmr));
337		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
338			break;
339		proc_compose_imsg(ps, PROC_CONTROL, -1,
340		    imsg->hdr.type, imsg->hdr.peerid, -1,
341		    imsg->data, sizeof(imsg->data));
342		log_info("%s: unpaused vm %d successfully.",
343		    vm->vm_params.vmc_params.vcp_name,
344		    vm->vm_vmid);
345		break;
346	case IMSG_VMDOP_START_VM_RESPONSE:
347		IMSG_SIZE_CHECK(imsg, &vmr);
348		memcpy(&vmr, imsg->data, sizeof(vmr));
349		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL)
350			break;
351		vm->vm_pid = vmr.vmr_pid;
352		vcp = &vm->vm_params.vmc_params;
353		vcp->vcp_id = vmr.vmr_id;
354
355		/*
356		 * If the peerid is not -1, forward the response back to the
357		 * the control socket.  If it is -1, the request originated
358		 * from the parent, not the control socket.
359		 */
360		if (vm->vm_peerid != (uint32_t)-1) {
361			(void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname,
362			    sizeof(vmr.vmr_ttyname));
363			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
364			    imsg->hdr.type, vm->vm_peerid, -1,
365			    &vmr, sizeof(vmr)) == -1) {
366				errno = vmr.vmr_result;
367				log_warn("%s: failed to foward vm result",
368				    vcp->vcp_name);
369				vm_remove(vm, __func__);
370				return (-1);
371			}
372		}
373
374		if (vmr.vmr_result) {
375			errno = vmr.vmr_result;
376			log_warn("%s: failed to start vm", vcp->vcp_name);
377			vm_remove(vm, __func__);
378			break;
379		}
380
381		/* Now configure all the interfaces */
382		if (vm_priv_ifconfig(ps, vm) == -1) {
383			log_warn("%s: failed to configure vm", vcp->vcp_name);
384			vm_remove(vm, __func__);
385			break;
386		}
387
388		log_info("%s: started vm %d successfully, tty %s",
389		    vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname);
390		break;
391	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
392		IMSG_SIZE_CHECK(imsg, &vmr);
393		memcpy(&vmr, imsg->data, sizeof(vmr));
394		DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
395		    __func__, vmr.vmr_id);
396		proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
397		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
398			break;
399		if (vmr.vmr_result == 0) {
400			/* Mark VM as shutting down */
401			vm->vm_shutdown = 1;
402		}
403		break;
404	case IMSG_VMDOP_SEND_VM_RESPONSE:
405		IMSG_SIZE_CHECK(imsg, &vmr);
406		memcpy(&vmr, imsg->data, sizeof(vmr));
407		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
408			break;
409		if (!vmr.vmr_result)
410			log_info("%s: sent vm %d successfully.",
411			    vm->vm_params.vmc_params.vcp_name,
412			    vm->vm_vmid);
413	case IMSG_VMDOP_TERMINATE_VM_EVENT:
414		IMSG_SIZE_CHECK(imsg, &vmr);
415		memcpy(&vmr, imsg->data, sizeof(vmr));
416		DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",
417		    __func__, vmr.vmr_id, vmr.vmr_result);
418		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) {
419			log_debug("%s: vm %d is no longer available",
420			    __func__, vmr.vmr_id);
421			break;
422		}
423		if (vmr.vmr_result != EAGAIN) {
424			if (vm->vm_from_config) {
425				vm_stop(vm, 0, __func__);
426			} else {
427				vm_remove(vm, __func__);
428			}
429		} else {
430			/* Stop VM instance but keep the tty open */
431			vm_stop(vm, 1, __func__);
432			config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
433		}
434		break;
435	case IMSG_VMDOP_GET_INFO_VM_DATA:
436		IMSG_SIZE_CHECK(imsg, &vir);
437		memcpy(&vir, imsg->data, sizeof(vir));
438		if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) {
439			memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname));
440			if (vm->vm_ttyname != NULL)
441				strlcpy(vir.vir_ttyname, vm->vm_ttyname,
442				    sizeof(vir.vir_ttyname));
443			if (vm->vm_shutdown) {
444				/* XXX there might be a nicer way */
445				(void)strlcat(vir.vir_info.vir_name,
446				    " - stopping",
447				    sizeof(vir.vir_info.vir_name));
448			}
449			/* get the user id who started the vm */
450			vir.vir_uid = vm->vm_uid;
451			vir.vir_gid = vm->vm_params.vmc_gid;
452		}
453		if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type,
454		    imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
455			log_debug("%s: GET_INFO_VM failed for vm %d, removing",
456			    __func__, vm->vm_vmid);
457			vm_remove(vm, __func__);
458			return (-1);
459		}
460		break;
461	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
462		/*
463		 * PROC_VMM has responded with the *running* VMs, now we
464		 * append the others. These use the special value 0 for their
465		 * kernel id to indicate that they are not running.
466		 */
467		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
468			if (!vm->vm_running) {
469				memset(&vir, 0, sizeof(vir));
470				vir.vir_info.vir_id = vm->vm_vmid;
471				strlcpy(vir.vir_info.vir_name,
472				    vm->vm_params.vmc_params.vcp_name,
473				    VMM_MAX_NAME_LEN);
474				vir.vir_info.vir_memory_size =
475				    vm->vm_params.vmc_params.
476				    vcp_memranges[0].vmr_size;
477				vir.vir_info.vir_ncpus =
478				    vm->vm_params.vmc_params.vcp_ncpus;
479				/* get the configured user id for this vm */
480				vir.vir_uid = vm->vm_params.vmc_uid;
481				vir.vir_gid = vm->vm_params.vmc_gid;
482				if (proc_compose_imsg(ps, PROC_CONTROL, -1,
483				    IMSG_VMDOP_GET_INFO_VM_DATA,
484				    imsg->hdr.peerid, -1, &vir,
485				    sizeof(vir)) == -1) {
486					log_debug("%s: GET_INFO_VM_END failed",
487					    __func__);
488					vm_remove(vm, __func__);
489					return (-1);
490				}
491			}
492		}
493		IMSG_SIZE_CHECK(imsg, &res);
494		proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
495		break;
496	default:
497		return (-1);
498	}
499
500	return (0);
501}
502
503int
504check_vmh(struct vm_dump_header *vmh)
505{
506	int i;
507	unsigned int code, leaf;
508	unsigned int a, b, c, d;
509
510
511	if (vmh->vmh_version != VM_DUMP_VERSION) {
512		log_warnx("%s: incompatible dump version", __func__);
513		return (-1);
514	}
515
516	for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
517		code = vmh->vmh_cpuids[i].code;
518		leaf = vmh->vmh_cpuids[i].leaf;
519		if (leaf != 0x00) {
520			log_debug("%s: invalid leaf 0x%x for code 0x%x",
521			    __func__, leaf, code);
522			return (-1);
523		}
524
525		switch (code) {
526		case 0x00:
527		CPUID_LEAF(code, leaf, a, b, c, d);
528		if (vmh->vmh_cpuids[i].a > a) {
529			log_debug("%s: incompatible cpuid level", __func__);
530			return (-1);
531		}
532		if (!(vmh->vmh_cpuids[i].b == b &&
533		    vmh->vmh_cpuids[i].c == c &&
534		    vmh->vmh_cpuids[i].d == d)) {
535			log_debug("%s: incompatible cpu brand", __func__);
536			return (-1);
537		}
538		break;
539
540		case 0x01:
541		CPUID_LEAF(code, leaf, a, b, c, d);
542		if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) !=
543		    (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) {
544			log_debug("%s: incompatible cpu features "
545			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
546			    code, leaf);
547			return (-1);
548		}
549		if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) !=
550		    (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) {
551			log_debug("%s: incompatible cpu features "
552			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
553			    code, leaf);
554			return (-1);
555		}
556		break;
557
558		case 0x07:
559		CPUID_LEAF(code, leaf, a, b, c, d);
560		if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) !=
561		    (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) {
562			log_debug("%s: incompatible cpu features "
563			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
564			    code, leaf);
565			return (-1);
566		}
567		if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) !=
568		    (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) {
569			log_debug("%s: incompatible cpu features "
570			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
571			    code, leaf);
572			return (-1);
573		}
574		break;
575
576		case 0x0d:
577		CPUID_LEAF(code, leaf, a, b, c, d);
578		if (vmh->vmh_cpuids[i].b > b) {
579			log_debug("%s: incompatible cpu: insufficient "
580			    "max save area for enabled XCR0 features",
581			    __func__);
582			return (-1);
583		}
584		if (vmh->vmh_cpuids[i].c > c) {
585			log_debug("%s: incompatible cpu: insufficient "
586			    "max save area for supported XCR0 features",
587			    __func__);
588			return (-1);
589		}
590		break;
591
592		case 0x80000001:
593		CPUID_LEAF(code, leaf, a, b, c, d);
594		if ((vmh->vmh_cpuids[i].a & a) != vmh->vmh_cpuids[i].a) {
595			log_debug("%s: incompatible cpu features "
596			    "code: 0x%x leaf: 0x%x  reg: a", __func__,
597			    code, leaf);
598			return (-1);
599		}
600		if ((vmh->vmh_cpuids[i].c & c) != vmh->vmh_cpuids[i].c) {
601			log_debug("%s: incompatible cpu features "
602			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
603			    code, leaf);
604			return (-1);
605		}
606		if ((vmh->vmh_cpuids[i].d & d) != vmh->vmh_cpuids[i].d) {
607			log_debug("%s: incompatible cpu features "
608			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
609			    code, leaf);
610			return (-1);
611		}
612		break;
613
614		default:
615		log_debug("%s: unknown code 0x%x", __func__, code);
616		return (-1);
617		}
618	}
619
620	return (0);
621}
622
623void
624vmd_sighdlr(int sig, short event, void *arg)
625{
626	if (privsep_process != PROC_PARENT)
627		return;
628	log_debug("%s: handling signal", __func__);
629
630	switch (sig) {
631	case SIGHUP:
632		log_info("%s: reload requested with SIGHUP", __func__);
633
634		/*
635		 * This is safe because libevent uses async signal handlers
636		 * that run in the event loop and not in signal context.
637		 */
638		(void)vmd_reload(0, NULL);
639		break;
640	case SIGPIPE:
641		log_info("%s: ignoring SIGPIPE", __func__);
642		break;
643	case SIGUSR1:
644		log_info("%s: ignoring SIGUSR1", __func__);
645		break;
646	case SIGTERM:
647	case SIGINT:
648		vmd_shutdown();
649		break;
650	default:
651		fatalx("unexpected signal");
652	}
653}
654
655__dead void
656usage(void)
657{
658	extern char *__progname;
659	fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n",
660	    __progname);
661	exit(1);
662}
663
664int
665main(int argc, char **argv)
666{
667	struct privsep		*ps;
668	int			 ch;
669	const char		*conffile = VMD_CONF;
670	enum privsep_procid	 proc_id = PROC_PARENT;
671	int			 proc_instance = 0;
672	const char		*errp, *title = NULL;
673	int			 argc0 = argc;
674
675	log_init(0, LOG_DAEMON);
676
677	if ((env = calloc(1, sizeof(*env))) == NULL)
678		fatal("calloc: env");
679
680	while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) {
681		switch (ch) {
682		case 'D':
683			if (cmdline_symset(optarg) < 0)
684				log_warnx("could not parse macro definition %s",
685				    optarg);
686			break;
687		case 'd':
688			env->vmd_debug = 2;
689			break;
690		case 'f':
691			conffile = optarg;
692			break;
693		case 'v':
694			env->vmd_verbose++;
695			break;
696		case 'n':
697			env->vmd_noaction = 1;
698			break;
699		case 'P':
700			title = optarg;
701			proc_id = proc_getid(procs, nitems(procs), title);
702			if (proc_id == PROC_MAX)
703				fatalx("invalid process name");
704			break;
705		case 'I':
706			proc_instance = strtonum(optarg, 0,
707			    PROC_MAX_INSTANCES, &errp);
708			if (errp)
709				fatalx("invalid process instance");
710			break;
711		default:
712			usage();
713		}
714	}
715
716	argc -= optind;
717	if (argc > 0)
718		usage();
719
720	if (env->vmd_noaction && !env->vmd_debug)
721		env->vmd_debug = 1;
722
723	/* check for root privileges */
724	if (env->vmd_noaction == 0) {
725		if (geteuid())
726			fatalx("need root privileges");
727	}
728
729	ps = &env->vmd_ps;
730	ps->ps_env = env;
731	env->vmd_fd = -1;
732
733	if (config_init(env) == -1)
734		fatal("failed to initialize configuration");
735
736	if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL)
737		fatal("unknown user %s", VMD_USER);
738
739	/* First proc runs as root without pledge but in default chroot */
740	proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
741	proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */
742
743	/* Open /dev/vmm */
744	if (env->vmd_noaction == 0) {
745		env->vmd_fd = open(VMM_NODE, O_RDWR);
746		if (env->vmd_fd == -1)
747			fatal("%s", VMM_NODE);
748	}
749
750	/* Configure the control socket */
751	ps->ps_csock.cs_name = SOCKET_NAME;
752	TAILQ_INIT(&ps->ps_rcsocks);
753
754	/* Configuration will be parsed after forking the children */
755	env->vmd_conffile = conffile;
756
757	log_init(env->vmd_debug, LOG_DAEMON);
758	log_setverbose(env->vmd_verbose);
759
760	if (env->vmd_noaction)
761		ps->ps_noaction = 1;
762	ps->ps_instance = proc_instance;
763	if (title != NULL)
764		ps->ps_title[proc_id] = title;
765
766	/* only the parent returns */
767	proc_init(ps, procs, nitems(procs), argc0, argv, proc_id);
768
769	log_procinit("parent");
770	if (!env->vmd_debug && daemon(0, 0) == -1)
771		fatal("can't daemonize");
772
773	if (ps->ps_noaction == 0)
774		log_info("startup");
775
776	event_init();
777
778	signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps);
779	signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps);
780	signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps);
781	signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps);
782	signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps);
783
784	signal_add(&ps->ps_evsigint, NULL);
785	signal_add(&ps->ps_evsigterm, NULL);
786	signal_add(&ps->ps_evsighup, NULL);
787	signal_add(&ps->ps_evsigpipe, NULL);
788	signal_add(&ps->ps_evsigusr1, NULL);
789
790	if (!env->vmd_noaction)
791		proc_connect(ps);
792
793	if (vmd_configure() == -1)
794		fatalx("configuration failed");
795
796	event_dispatch();
797
798	log_debug("parent exiting");
799
800	return (0);
801}
802
803int
804vmd_configure(void)
805{
806	struct vmd_vm		*vm;
807	struct vmd_switch	*vsw;
808
809	if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
810		fatal("open %s", PATH_PTMDEV);
811
812	/*
813	 * pledge in the parent process:
814	 * stdio - for malloc and basic I/O including events.
815	 * rpath - for reload to open and read the configuration files.
816	 * wpath - for opening disk images and tap devices.
817	 * tty - for openpty and TIOCUCNTL.
818	 * proc - run kill to terminate its children safely.
819	 * sendfd - for disks, interfaces and other fds.
820	 * recvfd - for send and receive.
821	 * getpw - lookup user or group id by name.
822	 * chown, fattr - change tty ownership
823	 * flock - locking disk files
824	 */
825	if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw"
826	    " chown fattr flock", NULL) == -1)
827		fatal("pledge");
828
829	if (parse_config(env->vmd_conffile) == -1) {
830		proc_kill(&env->vmd_ps);
831		exit(1);
832	}
833
834	if (env->vmd_noaction) {
835		fprintf(stderr, "configuration OK\n");
836		proc_kill(&env->vmd_ps);
837		exit(0);
838	}
839
840	/* Send shared global configuration to all children */
841	if (config_setconfig(env) == -1)
842		return (-1);
843
844	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
845		if (vsw->sw_running)
846			continue;
847		if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
848			log_warn("%s: failed to create switch %s",
849			    __func__, vsw->sw_name);
850			switch_remove(vsw);
851			return (-1);
852		}
853	}
854
855	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
856		if (vm->vm_disabled) {
857			log_debug("%s: not creating vm %s (disabled)",
858			    __func__,
859			    vm->vm_params.vmc_params.vcp_name);
860			continue;
861		}
862		if (config_setvm(&env->vmd_ps, vm,
863		    -1, vm->vm_params.vmc_uid) == -1)
864			return (-1);
865	}
866
867	return (0);
868}
869
870int
871vmd_reload(unsigned int reset, const char *filename)
872{
873	struct vmd_vm		*vm, *next_vm;
874	struct vmd_switch	*vsw;
875	int			 reload = 0;
876
877	/* Switch back to the default config file */
878	if (filename == NULL || *filename == '\0') {
879		filename = env->vmd_conffile;
880		reload = 1;
881	}
882
883	log_debug("%s: level %d config file %s", __func__, reset, filename);
884
885	if (reset) {
886		/* Purge the configuration */
887		config_purge(env, reset);
888		config_setreset(env, reset);
889	} else {
890		/*
891		 * Load or reload the configuration.
892		 *
893		 * Reloading removes all non-running VMs before processing the
894		 * config file, whereas loading only adds to the existing list
895		 * of VMs.
896		 */
897
898		if (reload) {
899			TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,
900			    next_vm) {
901				if (vm->vm_running == 0) {
902					DPRINTF("%s: calling vm_remove",
903					    __func__);
904					vm_remove(vm, __func__);
905				}
906			}
907		}
908
909		if (parse_config(filename) == -1) {
910			log_debug("%s: failed to load config file %s",
911			    __func__, filename);
912			return (-1);
913		}
914
915		if (reload) {
916			/* Update shared global configuration in all children */
917			if (config_setconfig(env) == -1)
918				return (-1);
919		}
920
921		TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
922			if (vsw->sw_running)
923				continue;
924			if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
925				log_warn("%s: failed to create switch %s",
926				    __func__, vsw->sw_name);
927				switch_remove(vsw);
928				return (-1);
929			}
930		}
931
932		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
933			if (vm->vm_running == 0) {
934				if (vm->vm_disabled) {
935					log_debug("%s: not creating vm %s"
936					    " (disabled)", __func__,
937					    vm->vm_params.vmc_params.vcp_name);
938					continue;
939				}
940				if (config_setvm(&env->vmd_ps, vm,
941				    -1, vm->vm_params.vmc_uid) == -1)
942					return (-1);
943			} else {
944				log_debug("%s: not creating vm \"%s\": "
945				    "(running)", __func__,
946				    vm->vm_params.vmc_params.vcp_name);
947			}
948		}
949	}
950
951	return (0);
952}
953
954void
955vmd_shutdown(void)
956{
957	struct vmd_vm *vm, *vm_next;
958
959	log_debug("%s: performing shutdown", __func__);
960
961	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
962		vm_remove(vm, __func__);
963	}
964
965	proc_kill(&env->vmd_ps);
966	free(env);
967
968	log_warnx("parent terminating");
969	exit(0);
970}
971
972struct vmd_vm *
973vm_getbyvmid(uint32_t vmid)
974{
975	struct vmd_vm	*vm;
976
977	if (vmid == 0)
978		return (NULL);
979	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
980		if (vm->vm_vmid == vmid)
981			return (vm);
982	}
983
984	return (NULL);
985}
986
987struct vmd_vm *
988vm_getbyid(uint32_t id)
989{
990	struct vmd_vm	*vm;
991
992	if (id == 0)
993		return (NULL);
994	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
995		if (vm->vm_params.vmc_params.vcp_id == id)
996			return (vm);
997	}
998
999	return (NULL);
1000}
1001
1002uint32_t
1003vm_id2vmid(uint32_t id, struct vmd_vm *vm)
1004{
1005	if (vm == NULL && (vm = vm_getbyid(id)) == NULL)
1006		return (0);
1007	dprintf("%s: vmm id %u is vmid %u", __func__,
1008	    id, vm->vm_vmid);
1009	return (vm->vm_vmid);
1010}
1011
1012uint32_t
1013vm_vmid2id(uint32_t vmid, struct vmd_vm *vm)
1014{
1015	if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL)
1016		return (0);
1017	dprintf("%s: vmid %u is vmm id %u", __func__,
1018	    vmid, vm->vm_params.vmc_params.vcp_id);
1019	return (vm->vm_params.vmc_params.vcp_id);
1020}
1021
1022struct vmd_vm *
1023vm_getbyname(const char *name)
1024{
1025	struct vmd_vm	*vm;
1026
1027	if (name == NULL)
1028		return (NULL);
1029	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1030		if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0)
1031			return (vm);
1032	}
1033
1034	return (NULL);
1035}
1036
1037struct vmd_vm *
1038vm_getbypid(pid_t pid)
1039{
1040	struct vmd_vm	*vm;
1041
1042	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1043		if (vm->vm_pid == pid)
1044			return (vm);
1045	}
1046
1047	return (NULL);
1048}
1049
1050void
1051vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
1052{
1053	struct privsep	*ps = &env->vmd_ps;
1054	unsigned int	 i;
1055
1056	if (vm == NULL)
1057		return;
1058
1059	log_debug("%s: %s %s stopping vm %d%s",
1060	    __func__, ps->ps_title[privsep_process], caller,
1061	    vm->vm_vmid, keeptty ? ", keeping tty open" : "");
1062
1063	vm->vm_running = 0;
1064	vm->vm_shutdown = 0;
1065
1066	if (vm->vm_iev.ibuf.fd != -1) {
1067		event_del(&vm->vm_iev.ev);
1068		close(vm->vm_iev.ibuf.fd);
1069	}
1070	for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) {
1071		if (vm->vm_disks[i] != -1) {
1072			close(vm->vm_disks[i]);
1073			vm->vm_disks[i] = -1;
1074		}
1075	}
1076	for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
1077		if (vm->vm_ifs[i].vif_fd != -1) {
1078			close(vm->vm_ifs[i].vif_fd);
1079			vm->vm_ifs[i].vif_fd = -1;
1080		}
1081		free(vm->vm_ifs[i].vif_name);
1082		free(vm->vm_ifs[i].vif_switch);
1083		free(vm->vm_ifs[i].vif_group);
1084		vm->vm_ifs[i].vif_name = NULL;
1085		vm->vm_ifs[i].vif_switch = NULL;
1086		vm->vm_ifs[i].vif_group = NULL;
1087	}
1088	if (vm->vm_kernel != -1) {
1089		close(vm->vm_kernel);
1090		vm->vm_kernel = -1;
1091	}
1092	if (vm->vm_cdrom != -1) {
1093		close(vm->vm_cdrom);
1094		vm->vm_cdrom = -1;
1095	}
1096	if (!keeptty) {
1097		vm_closetty(vm);
1098		vm->vm_uid = 0;
1099	}
1100}
1101
1102void
1103vm_remove(struct vmd_vm *vm, const char *caller)
1104{
1105	struct privsep	*ps = &env->vmd_ps;
1106
1107	if (vm == NULL)
1108		return;
1109
1110	log_debug("%s: %s %s removing vm %d from running config",
1111	    __func__, ps->ps_title[privsep_process], caller,
1112	    vm->vm_vmid);
1113
1114	TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
1115
1116	vm_stop(vm, 0, caller);
1117	free(vm);
1118}
1119
1120int
1121vm_register(struct privsep *ps, struct vmop_create_params *vmc,
1122    struct vmd_vm **ret_vm, uint32_t id, uid_t uid)
1123{
1124	struct vmd_vm		*vm = NULL;
1125	struct vm_create_params	*vcp = &vmc->vmc_params;
1126	static const uint8_t	 zero_mac[ETHER_ADDR_LEN];
1127	uint32_t		 rng;
1128	unsigned int		 i;
1129	struct vmd_switch	*sw;
1130	char			*s;
1131
1132	errno = 0;
1133	*ret_vm = NULL;
1134
1135	if ((vm = vm_getbyname(vcp->vcp_name)) != NULL ||
1136	    (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) {
1137		if (vm_checkperm(vm, uid) != 0) {
1138			errno = EPERM;
1139			goto fail;
1140		}
1141		*ret_vm = vm;
1142		errno = EALREADY;
1143		goto fail;
1144	}
1145
1146	/*
1147	 * non-root users can only start existing VMs
1148	 * XXX there could be a mechanism to allow overriding some options
1149	 */
1150	if (vm_checkperm(NULL, uid) != 0) {
1151		errno = EPERM;
1152		goto fail;
1153	}
1154	if (vmc->vmc_flags == 0) {
1155		errno = ENOENT;
1156		goto fail;
1157	}
1158	if (vcp->vcp_ncpus == 0)
1159		vcp->vcp_ncpus = 1;
1160	if (vcp->vcp_memranges[0].vmr_size == 0)
1161		vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
1162	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
1163		log_warnx("invalid number of CPUs");
1164		goto fail;
1165	} else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) {
1166		log_warnx("invalid number of disks");
1167		goto fail;
1168	} else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) {
1169		log_warnx("invalid number of interfaces");
1170		goto fail;
1171	} else if (strlen(vcp->vcp_kernel) == 0 &&
1172	    vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) {
1173		log_warnx("no kernel or disk/cdrom specified");
1174		goto fail;
1175	} else if (strlen(vcp->vcp_name) == 0) {
1176		log_warnx("invalid VM name");
1177		goto fail;
1178	} else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' ||
1179	    *vcp->vcp_name == '_') {
1180		log_warnx("invalid VM name");
1181		goto fail;
1182	} else {
1183		for (s = vcp->vcp_name; *s != '\0'; ++s) {
1184			if (!(isalnum(*s) || *s == '.' || *s == '-' ||
1185			    *s == '_')) {
1186				log_warnx("invalid VM name");
1187				goto fail;
1188			}
1189		}
1190	}
1191
1192	if ((vm = calloc(1, sizeof(*vm))) == NULL)
1193		goto fail;
1194
1195	memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params));
1196	vmc = &vm->vm_params;
1197	vcp = &vmc->vmc_params;
1198	vm->vm_pid = -1;
1199	vm->vm_tty = -1;
1200	vm->vm_receive_fd = -1;
1201	vm->vm_paused = 0;
1202
1203	for (i = 0; i < vcp->vcp_ndisks; i++)
1204		vm->vm_disks[i] = -1;
1205	for (i = 0; i < vcp->vcp_nnics; i++) {
1206		vm->vm_ifs[i].vif_fd = -1;
1207
1208		if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) {
1209			/* inherit per-interface flags from the switch */
1210			vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK);
1211		}
1212
1213		/*
1214		 * If the MAC address is zero, always randomize it in vmd(8)
1215		 * because we cannot rely on the guest OS to do the right
1216		 * thing like OpenBSD does.  Based on ether_fakeaddr()
1217		 * from the kernel, incremented by one to differentiate
1218		 * the source.
1219		 */
1220		if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) {
1221			rng = arc4random();
1222			vcp->vcp_macs[i][0] = 0xfe;
1223			vcp->vcp_macs[i][1] = 0xe1;
1224			vcp->vcp_macs[i][2] = 0xba + 1;
1225			vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf);
1226			vcp->vcp_macs[i][4] = rng;
1227			vcp->vcp_macs[i][5] = rng >> 8;
1228		}
1229	}
1230	vm->vm_kernel = -1;
1231	vm->vm_cdrom = -1;
1232	vm->vm_iev.ibuf.fd = -1;
1233
1234	if (++env->vmd_nvm == 0)
1235		fatalx("too many vms");
1236
1237	/* Assign a new internal Id if not specified */
1238	vm->vm_vmid = id == 0 ? env->vmd_nvm : id;
1239
1240	log_debug("%s: registering vm %d", __func__, vm->vm_vmid);
1241	TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry);
1242
1243	*ret_vm = vm;
1244	return (0);
1245 fail:
1246	if (errno == 0)
1247		errno = EINVAL;
1248	return (-1);
1249}
1250
1251/*
1252 * vm_checkperm
1253 *
1254 * Checks if the user represented by the 'uid' parameter is allowed to
1255 * manipulate the VM described by the 'vm' parameter (or connect to said VM's
1256 * console.)
1257 *
1258 * Parameters:
1259 *  vm: the VM whose permission is to be checked
1260 *  uid: the user ID of the user making the request
1261 *
1262 * Return values:
1263 *   0: the permission should be granted
1264 *  -1: the permission check failed (also returned if vm == null)
1265 */
1266int
1267vm_checkperm(struct vmd_vm *vm, uid_t uid)
1268{
1269	struct group	*gr;
1270	struct passwd	*pw;
1271	char		**grmem;
1272
1273	/* root has no restrictions */
1274	if (uid == 0)
1275		return (0);
1276
1277	if (vm == NULL)
1278		return (-1);
1279
1280	/* check supplementary groups */
1281	if (vm->vm_params.vmc_gid != -1 &&
1282	    (pw = getpwuid(uid)) != NULL &&
1283	    (gr = getgrgid(vm->vm_params.vmc_gid)) != NULL) {
1284		for (grmem = gr->gr_mem; *grmem; grmem++)
1285			if (strcmp(*grmem, pw->pw_name) == 0)
1286				return (0);
1287	}
1288
1289	/* check user */
1290	if ((vm->vm_running && vm->vm_uid == uid) ||
1291	    (!vm->vm_running && vm->vm_params.vmc_uid == uid))
1292		return (0);
1293
1294	return (-1);
1295}
1296
1297int
1298vm_opentty(struct vmd_vm *vm)
1299{
1300	struct ptmget		 ptm;
1301	struct stat		 st;
1302	struct group		*gr;
1303	uid_t			 uid;
1304	gid_t			 gid;
1305	mode_t			 mode;
1306	int			 on;
1307
1308	/*
1309	 * Open tty with pre-opened PTM fd
1310	 */
1311	if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1))
1312		return (-1);
1313
1314	/*
1315	 * We use user ioctl(2) mode to pass break commands.
1316	 */
1317	on = 1;
1318	if (ioctl(ptm.cfd, TIOCUCNTL, &on))
1319		fatal("could not enable user ioctl mode");
1320
1321	vm->vm_tty = ptm.cfd;
1322	close(ptm.sfd);
1323	if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL)
1324		goto fail;
1325
1326	uid = vm->vm_uid;
1327	gid = vm->vm_params.vmc_gid;
1328
1329	if (vm->vm_params.vmc_gid != -1) {
1330		mode = 0660;
1331	} else if ((gr = getgrnam("tty")) != NULL) {
1332		gid = gr->gr_gid;
1333		mode = 0620;
1334	} else {
1335		mode = 0600;
1336		gid = 0;
1337	}
1338
1339	log_debug("%s: vm %s tty %s uid %d gid %d mode %o",
1340	    __func__, vm->vm_params.vmc_params.vcp_name,
1341	    vm->vm_ttyname, uid, gid, mode);
1342
1343	/*
1344	 * Change ownership and mode of the tty as required.
1345	 * Loosely based on the implementation of sshpty.c
1346	 */
1347	if (stat(vm->vm_ttyname, &st) == -1)
1348		goto fail;
1349
1350	if (st.st_uid != uid || st.st_gid != gid) {
1351		if (chown(vm->vm_ttyname, uid, gid) == -1) {
1352			log_warn("chown %s %d %d failed, uid %d",
1353			    vm->vm_ttyname, uid, gid, getuid());
1354
1355			/* Ignore failure on read-only filesystems */
1356			if (!((errno == EROFS) &&
1357			    (st.st_uid == uid || st.st_uid == 0)))
1358				goto fail;
1359		}
1360	}
1361
1362	if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) {
1363		if (chmod(vm->vm_ttyname, mode) == -1) {
1364			log_warn("chmod %s %o failed, uid %d",
1365			    vm->vm_ttyname, mode, getuid());
1366
1367			/* Ignore failure on read-only filesystems */
1368			if (!((errno == EROFS) &&
1369			    (st.st_uid == uid || st.st_uid == 0)))
1370				goto fail;
1371		}
1372	}
1373
1374	return (0);
1375 fail:
1376	vm_closetty(vm);
1377	return (-1);
1378}
1379
1380void
1381vm_closetty(struct vmd_vm *vm)
1382{
1383	if (vm->vm_tty != -1) {
1384		/* Release and close the tty */
1385		if (fchown(vm->vm_tty, 0, 0) == -1)
1386			log_warn("chown %s 0 0 failed", vm->vm_ttyname);
1387		if (fchmod(vm->vm_tty, 0666) == -1)
1388			log_warn("chmod %s 0666 failed", vm->vm_ttyname);
1389		close(vm->vm_tty);
1390		vm->vm_tty = -1;
1391	}
1392	free(vm->vm_ttyname);
1393	vm->vm_ttyname = NULL;
1394}
1395
1396void
1397switch_remove(struct vmd_switch *vsw)
1398{
1399	if (vsw == NULL)
1400		return;
1401
1402	TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry);
1403
1404	free(vsw->sw_group);
1405	free(vsw->sw_name);
1406	free(vsw);
1407}
1408
1409struct vmd_switch *
1410switch_getbyname(const char *name)
1411{
1412	struct vmd_switch	*vsw;
1413
1414	if (name == NULL)
1415		return (NULL);
1416	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1417		if (strcmp(vsw->sw_name, name) == 0)
1418			return (vsw);
1419	}
1420
1421	return (NULL);
1422}
1423
1424char *
1425get_string(uint8_t *ptr, size_t len)
1426{
1427	size_t	 i;
1428
1429	for (i = 0; i < len; i++)
1430		if (!isprint(ptr[i]))
1431			break;
1432
1433	return strndup(ptr, i);
1434}
1435
1436uint32_t
1437prefixlen2mask(uint8_t prefixlen)
1438{
1439	if (prefixlen == 0)
1440		return (0);
1441
1442	if (prefixlen > 32)
1443		prefixlen = 32;
1444
1445	return (htonl(0xffffffff << (32 - prefixlen)));
1446}
1447