vmd.c revision 1.90
1/*	$OpenBSD: vmd.c,v 1.90 2018/07/10 21:12:20 reyk Exp $	*/
2
3/*
4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>	/* nitems */
20#include <sys/queue.h>
21#include <sys/wait.h>
22#include <sys/cdefs.h>
23#include <sys/stat.h>
24#include <sys/tty.h>
25#include <sys/ttycom.h>
26#include <sys/ioctl.h>
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <termios.h>
32#include <errno.h>
33#include <event.h>
34#include <fcntl.h>
35#include <pwd.h>
36#include <signal.h>
37#include <syslog.h>
38#include <unistd.h>
39#include <ctype.h>
40#include <pwd.h>
41#include <grp.h>
42
43#include <machine/specialreg.h>
44#include <machine/vmmvar.h>
45
46#include "proc.h"
47#include "atomicio.h"
48#include "vmd.h"
49
50__dead void usage(void);
51
52int	 main(int, char **);
53int	 vmd_configure(void);
54void	 vmd_sighdlr(int sig, short event, void *arg);
55void	 vmd_shutdown(void);
56int	 vmd_control_run(void);
57int	 vmd_dispatch_control(int, struct privsep_proc *, struct imsg *);
58int	 vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *);
59int	 check_vmh(struct vm_dump_header *);
60
61struct vmd	*env;
62
63static struct privsep_proc procs[] = {
64	/* Keep "priv" on top as procs[0] */
65	{ "priv",	PROC_PRIV,	NULL, priv },
66	{ "control",	PROC_CONTROL,	vmd_dispatch_control, control },
67	{ "vmm",	PROC_VMM,	vmd_dispatch_vmm, vmm, vmm_shutdown },
68};
69
70/* For the privileged process */
71static struct privsep_proc *proc_priv = &procs[0];
72static struct passwd proc_privpw;
73
74int
75vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
76{
77	struct privsep			*ps = p->p_ps;
78	int				 res = 0, ret = 0, cmd = 0, verbose;
79	unsigned int			 v = 0;
80	struct vmop_create_params	 vmc;
81	struct vmop_id			 vid;
82	struct vm_terminate_params	 vtp;
83	struct vmop_result		 vmr;
84	struct vm_dump_header		 vmh;
85	struct vmd_vm			*vm = NULL;
86	char				*str = NULL;
87	uint32_t			 id = 0;
88	struct control_sock		*rcs;
89
90	switch (imsg->hdr.type) {
91	case IMSG_VMDOP_START_VM_REQUEST:
92		IMSG_SIZE_CHECK(imsg, &vmc);
93		memcpy(&vmc, imsg->data, sizeof(vmc));
94		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_uid);
95		if (vmc.vmc_flags == 0) {
96			/* start an existing VM with pre-configured options */
97			if (!(ret == -1 && errno == EALREADY &&
98			    vm->vm_running == 0)) {
99				res = errno;
100				cmd = IMSG_VMDOP_START_VM_RESPONSE;
101			}
102		} else if (ret != 0) {
103			res = errno;
104			cmd = IMSG_VMDOP_START_VM_RESPONSE;
105		}
106		if (res == 0 &&
107		    config_setvm(ps, vm,
108		    imsg->hdr.peerid, vm->vm_params.vmc_uid) == -1) {
109			res = errno;
110			cmd = IMSG_VMDOP_START_VM_RESPONSE;
111		}
112		break;
113	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
114		IMSG_SIZE_CHECK(imsg, &vid);
115		memcpy(&vid, imsg->data, sizeof(vid));
116		if ((id = vid.vid_id) == 0) {
117			/* Lookup vm (id) by name */
118			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
119				res = ENOENT;
120				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
121				break;
122			} else if (vm->vm_shutdown) {
123				res = EALREADY;
124				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
125				break;
126			} else if (vm->vm_running == 0) {
127				res = EINVAL;
128				cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
129				break;
130			}
131			id = vm->vm_vmid;
132		} else if ((vm = vm_getbyvmid(id)) == NULL) {
133			res = ENOENT;
134			cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
135			break;
136		}
137		if (vm_checkperm(vm, vid.vid_uid) != 0) {
138			res = EPERM;
139			cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
140			break;
141		}
142		memset(&vtp, 0, sizeof(vtp));
143		vtp.vtp_vm_id = id;
144		if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
145		    imsg->hdr.peerid, -1, &vtp, sizeof(vtp)) == -1)
146			return (-1);
147		break;
148	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
149		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
150		break;
151	case IMSG_VMDOP_LOAD:
152		IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */
153		str = get_string((uint8_t *)imsg->data,
154		    IMSG_DATA_SIZE(imsg));
155	case IMSG_VMDOP_RELOAD:
156		if (vmd_reload(0, str) == -1)
157			cmd = IMSG_CTL_FAIL;
158		else
159			cmd = IMSG_CTL_OK;
160		free(str);
161		break;
162	case IMSG_CTL_RESET:
163		IMSG_SIZE_CHECK(imsg, &v);
164		memcpy(&v, imsg->data, sizeof(v));
165		if (vmd_reload(v, NULL) == -1)
166			cmd = IMSG_CTL_FAIL;
167		else
168			cmd = IMSG_CTL_OK;
169		break;
170	case IMSG_CTL_VERBOSE:
171		IMSG_SIZE_CHECK(imsg, &verbose);
172		memcpy(&verbose, imsg->data, sizeof(verbose));
173		log_setverbose(verbose);
174
175		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
176		proc_forward_imsg(ps, imsg, PROC_PRIV, -1);
177		cmd = IMSG_CTL_OK;
178		break;
179	case IMSG_VMDOP_PAUSE_VM:
180	case IMSG_VMDOP_UNPAUSE_VM:
181		IMSG_SIZE_CHECK(imsg, &vid);
182		memcpy(&vid, imsg->data, sizeof(vid));
183		if (vid.vid_id == 0) {
184			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
185				res = ENOENT;
186				cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
187				break;
188			} else {
189				vid.vid_id = vm->vm_vmid;
190			}
191		} else if ((vm = vm_getbyid(vid.vid_id)) == NULL) {
192			res = ENOENT;
193			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
194			break;
195		}
196		if (vm_checkperm(vm, vid.vid_uid) != 0) {
197			res = EPERM;
198			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
199			break;
200		}
201		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
202		    imsg->hdr.peerid, -1, &vid, sizeof(vid));
203		break;
204	case IMSG_VMDOP_SEND_VM_REQUEST:
205		IMSG_SIZE_CHECK(imsg, &vid);
206		memcpy(&vid, imsg->data, sizeof(vid));
207		id = vid.vid_id;
208		if (vid.vid_id == 0) {
209			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
210				res = ENOENT;
211				cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
212				close(imsg->fd);
213				break;
214			} else {
215				vid.vid_id = vm->vm_vmid;
216			}
217		} else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) {
218			res = ENOENT;
219			cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
220			close(imsg->fd);
221			break;
222		} else {
223		}
224		vmr.vmr_id = vid.vid_id;
225		log_debug("%s: sending fd to vmm", __func__);
226		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
227		    imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid));
228		break;
229	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
230		IMSG_SIZE_CHECK(imsg, &vid);
231		memcpy(&vid, imsg->data, sizeof(vid));
232		if (imsg->fd == -1) {
233			log_warnx("%s: invalid fd", __func__);
234			return (-1);
235		}
236		if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) !=
237		    sizeof(vmh)) {
238			log_warnx("%s: error reading vmh from received vm",
239			    __func__);
240			res = EIO;
241			close(imsg->fd);
242			cmd = IMSG_VMDOP_START_VM_RESPONSE;
243			break;
244		}
245
246		if (check_vmh(&vmh)) {
247			res = ENOENT;
248			close(imsg->fd);
249			cmd = IMSG_VMDOP_START_VM_RESPONSE;
250			break;
251		}
252		if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) !=
253		    sizeof(vmc)) {
254			log_warnx("%s: error reading vmc from received vm",
255			    __func__);
256			res = EIO;
257			close(imsg->fd);
258			cmd = IMSG_VMDOP_START_VM_RESPONSE;
259			break;
260		}
261		strlcpy(vmc.vmc_params.vcp_name, vid.vid_name,
262		    sizeof(vmc.vmc_params.vcp_name));
263		vmc.vmc_params.vcp_id = 0;
264
265		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_uid);
266		if (ret != 0) {
267			res = errno;
268			cmd = IMSG_VMDOP_START_VM_RESPONSE;
269			close(imsg->fd);
270		} else {
271			vm->vm_received = 1;
272			config_setvm(ps, vm, imsg->hdr.peerid, vmc.vmc_uid);
273			log_debug("%s: sending fd to vmm", __func__);
274			proc_compose_imsg(ps, PROC_VMM, -1,
275			    IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd,
276			    NULL, 0);
277		}
278		break;
279	case IMSG_VMDOP_DONE:
280		control_reset(&ps->ps_csock);
281		TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)
282			control_reset(rcs);
283		cmd = 0;
284		break;
285	default:
286		return (-1);
287	}
288
289	switch (cmd) {
290	case 0:
291		break;
292	case IMSG_VMDOP_START_VM_RESPONSE:
293	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
294		memset(&vmr, 0, sizeof(vmr));
295		vmr.vmr_result = res;
296		vmr.vmr_id = id;
297		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
298		    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
299			return (-1);
300		break;
301	default:
302		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
303		    imsg->hdr.peerid, -1, &res, sizeof(res)) == -1)
304			return (-1);
305		break;
306	}
307
308	return (0);
309}
310
311int
312vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
313{
314	struct vmop_result	 vmr;
315	struct privsep		*ps = p->p_ps;
316	int			 res = 0;
317	struct vmd_vm		*vm;
318	struct vm_create_params	*vcp;
319	struct vmop_info_result	 vir;
320
321	switch (imsg->hdr.type) {
322	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
323		IMSG_SIZE_CHECK(imsg, &vmr);
324		memcpy(&vmr, imsg->data, sizeof(vmr));
325		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
326			break;
327		proc_compose_imsg(ps, PROC_CONTROL, -1,
328		    imsg->hdr.type, imsg->hdr.peerid, -1,
329		    imsg->data, sizeof(imsg->data));
330		log_info("%s: paused vm %d successfully",
331		    vm->vm_params.vmc_params.vcp_name,
332		    vm->vm_vmid);
333		break;
334	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
335		IMSG_SIZE_CHECK(imsg, &vmr);
336		memcpy(&vmr, imsg->data, sizeof(vmr));
337		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
338			break;
339		proc_compose_imsg(ps, PROC_CONTROL, -1,
340		    imsg->hdr.type, imsg->hdr.peerid, -1,
341		    imsg->data, sizeof(imsg->data));
342		log_info("%s: unpaused vm %d successfully.",
343		    vm->vm_params.vmc_params.vcp_name,
344		    vm->vm_vmid);
345		break;
346	case IMSG_VMDOP_START_VM_RESPONSE:
347		IMSG_SIZE_CHECK(imsg, &vmr);
348		memcpy(&vmr, imsg->data, sizeof(vmr));
349		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL)
350			break;
351		vm->vm_pid = vmr.vmr_pid;
352		vcp = &vm->vm_params.vmc_params;
353		vcp->vcp_id = vmr.vmr_id;
354
355		/*
356		 * If the peerid is not -1, forward the response back to the
357		 * the control socket.  If it is -1, the request originated
358		 * from the parent, not the control socket.
359		 */
360		if (vm->vm_peerid != (uint32_t)-1) {
361			(void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname,
362			    sizeof(vmr.vmr_ttyname));
363			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
364			    imsg->hdr.type, vm->vm_peerid, -1,
365			    &vmr, sizeof(vmr)) == -1) {
366				errno = vmr.vmr_result;
367				log_warn("%s: failed to foward vm result",
368				    vcp->vcp_name);
369				vm_remove(vm, __func__);
370				return (-1);
371			}
372		}
373
374		if (vmr.vmr_result) {
375			errno = vmr.vmr_result;
376			log_warn("%s: failed to start vm", vcp->vcp_name);
377			vm_remove(vm, __func__);
378			break;
379		}
380
381		/* Now configure all the interfaces */
382		if (vm_priv_ifconfig(ps, vm) == -1) {
383			log_warn("%s: failed to configure vm", vcp->vcp_name);
384			vm_remove(vm, __func__);
385			break;
386		}
387
388		log_info("%s: started vm %d successfully, tty %s",
389		    vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname);
390		break;
391	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
392		IMSG_SIZE_CHECK(imsg, &vmr);
393		memcpy(&vmr, imsg->data, sizeof(vmr));
394		DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
395		    __func__, vmr.vmr_id);
396		proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
397		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
398			break;
399		if (vmr.vmr_result == 0) {
400			/* Mark VM as shutting down */
401			vm->vm_shutdown = 1;
402		}
403		break;
404	case IMSG_VMDOP_SEND_VM_RESPONSE:
405		IMSG_SIZE_CHECK(imsg, &vmr);
406		memcpy(&vmr, imsg->data, sizeof(vmr));
407		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
408			break;
409		if (!vmr.vmr_result)
410			log_info("%s: sent vm %d successfully.",
411			    vm->vm_params.vmc_params.vcp_name,
412			    vm->vm_vmid);
413	case IMSG_VMDOP_TERMINATE_VM_EVENT:
414		IMSG_SIZE_CHECK(imsg, &vmr);
415		memcpy(&vmr, imsg->data, sizeof(vmr));
416		DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",
417		    __func__, vmr.vmr_id, vmr.vmr_result);
418		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) {
419			log_debug("%s: vm %d is no longer available",
420			    __func__, vmr.vmr_id);
421			break;
422		}
423		if (vmr.vmr_result != EAGAIN) {
424			if (vm->vm_from_config)
425				vm_stop(vm, 0, __func__);
426			else
427				vm_remove(vm, __func__);
428		} else {
429			/* Stop VM instance but keep the tty open */
430			vm_stop(vm, 1, __func__);
431			config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
432		}
433		break;
434	case IMSG_VMDOP_GET_INFO_VM_DATA:
435		IMSG_SIZE_CHECK(imsg, &vir);
436		memcpy(&vir, imsg->data, sizeof(vir));
437		if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) {
438			memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname));
439			if (vm->vm_ttyname != NULL)
440				strlcpy(vir.vir_ttyname, vm->vm_ttyname,
441				    sizeof(vir.vir_ttyname));
442			if (vm->vm_shutdown) {
443				/* XXX there might be a nicer way */
444				(void)strlcat(vir.vir_info.vir_name,
445				    " - stopping",
446				    sizeof(vir.vir_info.vir_name));
447			}
448			/* get the user id who started the vm */
449			vir.vir_uid = vm->vm_uid;
450			vir.vir_gid = vm->vm_params.vmc_gid;
451		}
452		if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type,
453		    imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
454			log_debug("%s: GET_INFO_VM failed for vm %d, removing",
455			    __func__, vm->vm_vmid);
456			vm_remove(vm, __func__);
457			return (-1);
458		}
459		break;
460	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
461		/*
462		 * PROC_VMM has responded with the *running* VMs, now we
463		 * append the others. These use the special value 0 for their
464		 * kernel id to indicate that they are not running.
465		 */
466		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
467			if (!vm->vm_running) {
468				memset(&vir, 0, sizeof(vir));
469				vir.vir_info.vir_id = vm->vm_vmid;
470				strlcpy(vir.vir_info.vir_name,
471				    vm->vm_params.vmc_params.vcp_name,
472				    VMM_MAX_NAME_LEN);
473				vir.vir_info.vir_memory_size =
474				    vm->vm_params.vmc_params.
475				    vcp_memranges[0].vmr_size;
476				vir.vir_info.vir_ncpus =
477				    vm->vm_params.vmc_params.vcp_ncpus;
478				/* get the configured user id for this vm */
479				vir.vir_uid = vm->vm_params.vmc_uid;
480				vir.vir_gid = vm->vm_params.vmc_gid;
481				if (proc_compose_imsg(ps, PROC_CONTROL, -1,
482				    IMSG_VMDOP_GET_INFO_VM_DATA,
483				    imsg->hdr.peerid, -1, &vir,
484				    sizeof(vir)) == -1) {
485					log_debug("%s: GET_INFO_VM_END failed",
486					    __func__);
487					vm_remove(vm, __func__);
488					return (-1);
489				}
490			}
491		}
492		IMSG_SIZE_CHECK(imsg, &res);
493		proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
494		break;
495	default:
496		return (-1);
497	}
498
499	return (0);
500}
501
502int
503check_vmh(struct vm_dump_header *vmh)
504{
505	int i;
506	unsigned int code, leaf;
507	unsigned int a, b, c, d;
508
509
510	if (vmh->vmh_version != VM_DUMP_VERSION) {
511		log_warnx("%s: incompatible dump version", __func__);
512		return (-1);
513	}
514
515	for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
516		code = vmh->vmh_cpuids[i].code;
517		leaf = vmh->vmh_cpuids[i].leaf;
518		if (leaf != 0x00) {
519			log_debug("%s: invalid leaf 0x%x for code 0x%x",
520			    __func__, leaf, code);
521			return (-1);
522		}
523
524		switch (code) {
525		case 0x00:
526		CPUID_LEAF(code, leaf, a, b, c, d);
527		if (vmh->vmh_cpuids[i].a > a) {
528			log_debug("%s: incompatible cpuid level", __func__);
529			return (-1);
530		}
531		if (!(vmh->vmh_cpuids[i].b == b &&
532		    vmh->vmh_cpuids[i].c == c &&
533		    vmh->vmh_cpuids[i].d == d)) {
534			log_debug("%s: incompatible cpu brand", __func__);
535			return (-1);
536		}
537		break;
538
539		case 0x01:
540		CPUID_LEAF(code, leaf, a, b, c, d);
541		if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) !=
542		    (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) {
543			log_debug("%s: incompatible cpu features "
544			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
545			    code, leaf);
546			return (-1);
547		}
548		if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) !=
549		    (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) {
550			log_debug("%s: incompatible cpu features "
551			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
552			    code, leaf);
553			return (-1);
554		}
555		break;
556
557		case 0x07:
558		CPUID_LEAF(code, leaf, a, b, c, d);
559		if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) !=
560		    (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) {
561			log_debug("%s: incompatible cpu features "
562			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
563			    code, leaf);
564			return (-1);
565		}
566		if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) !=
567		    (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) {
568			log_debug("%s: incompatible cpu features "
569			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
570			    code, leaf);
571			return (-1);
572		}
573		break;
574
575		case 0x0d:
576		CPUID_LEAF(code, leaf, a, b, c, d);
577		if (vmh->vmh_cpuids[i].b > b) {
578			log_debug("%s: incompatible cpu: insufficient "
579			    "max save area for enabled XCR0 features",
580			    __func__);
581			return (-1);
582		}
583		if (vmh->vmh_cpuids[i].c > c) {
584			log_debug("%s: incompatible cpu: insufficient "
585			    "max save area for supported XCR0 features",
586			    __func__);
587			return (-1);
588		}
589		break;
590
591		case 0x80000001:
592		CPUID_LEAF(code, leaf, a, b, c, d);
593		if ((vmh->vmh_cpuids[i].a & a) != vmh->vmh_cpuids[i].a) {
594			log_debug("%s: incompatible cpu features "
595			    "code: 0x%x leaf: 0x%x  reg: a", __func__,
596			    code, leaf);
597			return (-1);
598		}
599		if ((vmh->vmh_cpuids[i].c & c) != vmh->vmh_cpuids[i].c) {
600			log_debug("%s: incompatible cpu features "
601			    "code: 0x%x leaf: 0x%x  reg: c", __func__,
602			    code, leaf);
603			return (-1);
604		}
605		if ((vmh->vmh_cpuids[i].d & d) != vmh->vmh_cpuids[i].d) {
606			log_debug("%s: incompatible cpu features "
607			    "code: 0x%x leaf: 0x%x  reg: d", __func__,
608			    code, leaf);
609			return (-1);
610		}
611		break;
612
613		default:
614		log_debug("%s: unknown code 0x%x", __func__, code);
615		return (-1);
616		}
617	}
618
619	return (0);
620}
621
622void
623vmd_sighdlr(int sig, short event, void *arg)
624{
625	if (privsep_process != PROC_PARENT)
626		return;
627	log_debug("%s: handling signal", __func__);
628
629	switch (sig) {
630	case SIGHUP:
631		log_info("%s: reload requested with SIGHUP", __func__);
632
633		/*
634		 * This is safe because libevent uses async signal handlers
635		 * that run in the event loop and not in signal context.
636		 */
637		(void)vmd_reload(0, NULL);
638		break;
639	case SIGPIPE:
640		log_info("%s: ignoring SIGPIPE", __func__);
641		break;
642	case SIGUSR1:
643		log_info("%s: ignoring SIGUSR1", __func__);
644		break;
645	case SIGTERM:
646	case SIGINT:
647		vmd_shutdown();
648		break;
649	default:
650		fatalx("unexpected signal");
651	}
652}
653
654__dead void
655usage(void)
656{
657	extern char *__progname;
658	fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n",
659	    __progname);
660	exit(1);
661}
662
663int
664main(int argc, char **argv)
665{
666	struct privsep		*ps;
667	int			 ch;
668	const char		*conffile = VMD_CONF;
669	enum privsep_procid	 proc_id = PROC_PARENT;
670	int			 proc_instance = 0;
671	const char		*errp, *title = NULL;
672	int			 argc0 = argc;
673
674	log_init(0, LOG_DAEMON);
675
676	if ((env = calloc(1, sizeof(*env))) == NULL)
677		fatal("calloc: env");
678
679	while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) {
680		switch (ch) {
681		case 'D':
682			if (cmdline_symset(optarg) < 0)
683				log_warnx("could not parse macro definition %s",
684				    optarg);
685			break;
686		case 'd':
687			env->vmd_debug = 2;
688			break;
689		case 'f':
690			conffile = optarg;
691			break;
692		case 'v':
693			env->vmd_verbose++;
694			break;
695		case 'n':
696			env->vmd_noaction = 1;
697			break;
698		case 'P':
699			title = optarg;
700			proc_id = proc_getid(procs, nitems(procs), title);
701			if (proc_id == PROC_MAX)
702				fatalx("invalid process name");
703			break;
704		case 'I':
705			proc_instance = strtonum(optarg, 0,
706			    PROC_MAX_INSTANCES, &errp);
707			if (errp)
708				fatalx("invalid process instance");
709			break;
710		default:
711			usage();
712		}
713	}
714
715	argc -= optind;
716	if (argc > 0)
717		usage();
718
719	if (env->vmd_noaction && !env->vmd_debug)
720		env->vmd_debug = 1;
721
722	/* check for root privileges */
723	if (env->vmd_noaction == 0) {
724		if (geteuid())
725			fatalx("need root privileges");
726	}
727
728	ps = &env->vmd_ps;
729	ps->ps_env = env;
730	env->vmd_fd = -1;
731
732	if (config_init(env) == -1)
733		fatal("failed to initialize configuration");
734
735	if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL)
736		fatal("unknown user %s", VMD_USER);
737
738	/* First proc runs as root without pledge but in default chroot */
739	proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
740	proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */
741
742	/* Open /dev/vmm */
743	if (env->vmd_noaction == 0) {
744		env->vmd_fd = open(VMM_NODE, O_RDWR);
745		if (env->vmd_fd == -1)
746			fatal("%s", VMM_NODE);
747	}
748
749	/* Configure the control socket */
750	ps->ps_csock.cs_name = SOCKET_NAME;
751	TAILQ_INIT(&ps->ps_rcsocks);
752
753	/* Configuration will be parsed after forking the children */
754	env->vmd_conffile = conffile;
755
756	log_init(env->vmd_debug, LOG_DAEMON);
757	log_setverbose(env->vmd_verbose);
758
759	if (env->vmd_noaction)
760		ps->ps_noaction = 1;
761	ps->ps_instance = proc_instance;
762	if (title != NULL)
763		ps->ps_title[proc_id] = title;
764
765	/* only the parent returns */
766	proc_init(ps, procs, nitems(procs), argc0, argv, proc_id);
767
768	log_procinit("parent");
769	if (!env->vmd_debug && daemon(0, 0) == -1)
770		fatal("can't daemonize");
771
772	if (ps->ps_noaction == 0)
773		log_info("startup");
774
775	event_init();
776
777	signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps);
778	signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps);
779	signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps);
780	signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps);
781	signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps);
782
783	signal_add(&ps->ps_evsigint, NULL);
784	signal_add(&ps->ps_evsigterm, NULL);
785	signal_add(&ps->ps_evsighup, NULL);
786	signal_add(&ps->ps_evsigpipe, NULL);
787	signal_add(&ps->ps_evsigusr1, NULL);
788
789	if (!env->vmd_noaction)
790		proc_connect(ps);
791
792	if (vmd_configure() == -1)
793		fatalx("configuration failed");
794
795	event_dispatch();
796
797	log_debug("parent exiting");
798
799	return (0);
800}
801
802int
803vmd_configure(void)
804{
805	struct vmd_vm		*vm;
806	struct vmd_switch	*vsw;
807
808	if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
809		fatal("open %s", PATH_PTMDEV);
810
811	/*
812	 * pledge in the parent process:
813	 * stdio - for malloc and basic I/O including events.
814	 * rpath - for reload to open and read the configuration files.
815	 * wpath - for opening disk images and tap devices.
816	 * tty - for openpty and TIOCUCNTL.
817	 * proc - run kill to terminate its children safely.
818	 * sendfd - for disks, interfaces and other fds.
819	 * recvfd - for send and receive.
820	 * getpw - lookup user or group id by name.
821	 * chown, fattr - change tty ownership
822	 * flock - locking disk files
823	 */
824	if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw"
825	    " chown fattr flock", NULL) == -1)
826		fatal("pledge");
827
828	if (parse_config(env->vmd_conffile) == -1) {
829		proc_kill(&env->vmd_ps);
830		exit(1);
831	}
832
833	if (env->vmd_noaction) {
834		fprintf(stderr, "configuration OK\n");
835		proc_kill(&env->vmd_ps);
836		exit(0);
837	}
838
839	/* Send shared global configuration to all children */
840	if (config_setconfig(env) == -1)
841		return (-1);
842
843	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
844		if (vsw->sw_running)
845			continue;
846		if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
847			log_warn("%s: failed to create switch %s",
848			    __func__, vsw->sw_name);
849			switch_remove(vsw);
850			return (-1);
851		}
852	}
853
854	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
855		if (vm->vm_disabled) {
856			log_debug("%s: not creating vm %s (disabled)",
857			    __func__,
858			    vm->vm_params.vmc_params.vcp_name);
859			continue;
860		}
861		if (config_setvm(&env->vmd_ps, vm,
862		    -1, vm->vm_params.vmc_uid) == -1)
863			return (-1);
864	}
865
866	return (0);
867}
868
869int
870vmd_reload(unsigned int reset, const char *filename)
871{
872	struct vmd_vm		*vm, *next_vm;
873	struct vmd_switch	*vsw;
874	int			 reload = 0;
875
876	/* Switch back to the default config file */
877	if (filename == NULL || *filename == '\0') {
878		filename = env->vmd_conffile;
879		reload = 1;
880	}
881
882	log_debug("%s: level %d config file %s", __func__, reset, filename);
883
884	if (reset) {
885		/* Purge the configuration */
886		config_purge(env, reset);
887		config_setreset(env, reset);
888	} else {
889		/*
890		 * Load or reload the configuration.
891		 *
892		 * Reloading removes all non-running VMs before processing the
893		 * config file, whereas loading only adds to the existing list
894		 * of VMs.
895		 */
896
897		if (reload) {
898			TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,
899			    next_vm) {
900				if (vm->vm_running == 0) {
901					DPRINTF("%s: calling vm_remove",
902					    __func__);
903					vm_remove(vm, __func__);
904				}
905			}
906		}
907
908		if (parse_config(filename) == -1) {
909			log_debug("%s: failed to load config file %s",
910			    __func__, filename);
911			return (-1);
912		}
913
914		if (reload) {
915			/* Update shared global configuration in all children */
916			if (config_setconfig(env) == -1)
917				return (-1);
918		}
919
920		TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
921			if (vsw->sw_running)
922				continue;
923			if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
924				log_warn("%s: failed to create switch %s",
925				    __func__, vsw->sw_name);
926				switch_remove(vsw);
927				return (-1);
928			}
929		}
930
931		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
932			if (vm->vm_running == 0) {
933				if (vm->vm_disabled) {
934					log_debug("%s: not creating vm %s"
935					    " (disabled)", __func__,
936					    vm->vm_params.vmc_params.vcp_name);
937					continue;
938				}
939				if (config_setvm(&env->vmd_ps, vm,
940				    -1, vm->vm_params.vmc_uid) == -1)
941					return (-1);
942			} else {
943				log_debug("%s: not creating vm \"%s\": "
944				    "(running)", __func__,
945				    vm->vm_params.vmc_params.vcp_name);
946			}
947		}
948	}
949
950	return (0);
951}
952
953void
954vmd_shutdown(void)
955{
956	struct vmd_vm *vm, *vm_next;
957
958	log_debug("%s: performing shutdown", __func__);
959
960	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
961		vm_remove(vm, __func__);
962	}
963
964	proc_kill(&env->vmd_ps);
965	free(env);
966
967	log_warnx("parent terminating");
968	exit(0);
969}
970
971struct vmd_vm *
972vm_getbyvmid(uint32_t vmid)
973{
974	struct vmd_vm	*vm;
975
976	if (vmid == 0)
977		return (NULL);
978	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
979		if (vm->vm_vmid == vmid)
980			return (vm);
981	}
982
983	return (NULL);
984}
985
986struct vmd_vm *
987vm_getbyid(uint32_t id)
988{
989	struct vmd_vm	*vm;
990
991	if (id == 0)
992		return (NULL);
993	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
994		if (vm->vm_params.vmc_params.vcp_id == id)
995			return (vm);
996	}
997
998	return (NULL);
999}
1000
1001uint32_t
1002vm_id2vmid(uint32_t id, struct vmd_vm *vm)
1003{
1004	if (vm == NULL && (vm = vm_getbyid(id)) == NULL)
1005		return (0);
1006	DPRINTF("%s: vmm id %u is vmid %u", __func__,
1007	    id, vm->vm_vmid);
1008	return (vm->vm_vmid);
1009}
1010
1011uint32_t
1012vm_vmid2id(uint32_t vmid, struct vmd_vm *vm)
1013{
1014	if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL)
1015		return (0);
1016	DPRINTF("%s: vmid %u is vmm id %u", __func__,
1017	    vmid, vm->vm_params.vmc_params.vcp_id);
1018	return (vm->vm_params.vmc_params.vcp_id);
1019}
1020
1021struct vmd_vm *
1022vm_getbyname(const char *name)
1023{
1024	struct vmd_vm	*vm;
1025
1026	if (name == NULL)
1027		return (NULL);
1028	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1029		if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0)
1030			return (vm);
1031	}
1032
1033	return (NULL);
1034}
1035
1036struct vmd_vm *
1037vm_getbypid(pid_t pid)
1038{
1039	struct vmd_vm	*vm;
1040
1041	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1042		if (vm->vm_pid == pid)
1043			return (vm);
1044	}
1045
1046	return (NULL);
1047}
1048
1049void
1050vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
1051{
1052	struct privsep	*ps = &env->vmd_ps;
1053	unsigned int	 i;
1054
1055	if (vm == NULL)
1056		return;
1057
1058	log_debug("%s: %s %s stopping vm %d%s",
1059	    __func__, ps->ps_title[privsep_process], caller,
1060	    vm->vm_vmid, keeptty ? ", keeping tty open" : "");
1061
1062	vm->vm_running = 0;
1063	vm->vm_shutdown = 0;
1064
1065	if (vm->vm_iev.ibuf.fd != -1) {
1066		event_del(&vm->vm_iev.ev);
1067		close(vm->vm_iev.ibuf.fd);
1068	}
1069	for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) {
1070		if (vm->vm_disks[i] != -1) {
1071			close(vm->vm_disks[i]);
1072			vm->vm_disks[i] = -1;
1073		}
1074	}
1075	for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
1076		if (vm->vm_ifs[i].vif_fd != -1) {
1077			close(vm->vm_ifs[i].vif_fd);
1078			vm->vm_ifs[i].vif_fd = -1;
1079		}
1080		free(vm->vm_ifs[i].vif_name);
1081		free(vm->vm_ifs[i].vif_switch);
1082		free(vm->vm_ifs[i].vif_group);
1083		vm->vm_ifs[i].vif_name = NULL;
1084		vm->vm_ifs[i].vif_switch = NULL;
1085		vm->vm_ifs[i].vif_group = NULL;
1086	}
1087	if (vm->vm_kernel != -1) {
1088		close(vm->vm_kernel);
1089		vm->vm_kernel = -1;
1090	}
1091	if (vm->vm_cdrom != -1) {
1092		close(vm->vm_cdrom);
1093		vm->vm_cdrom = -1;
1094	}
1095	if (!keeptty) {
1096		vm_closetty(vm);
1097		vm->vm_uid = 0;
1098	}
1099}
1100
1101void
1102vm_remove(struct vmd_vm *vm, const char *caller)
1103{
1104	struct privsep	*ps = &env->vmd_ps;
1105
1106	if (vm == NULL)
1107		return;
1108
1109	log_debug("%s: %s %s removing vm %d from running config",
1110	    __func__, ps->ps_title[privsep_process], caller,
1111	    vm->vm_vmid);
1112
1113	TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
1114
1115	vm_stop(vm, 0, caller);
1116	free(vm);
1117}
1118
1119int
1120vm_register(struct privsep *ps, struct vmop_create_params *vmc,
1121    struct vmd_vm **ret_vm, uint32_t id, uid_t uid)
1122{
1123	struct vmd_vm		*vm = NULL;
1124	struct vm_create_params	*vcp = &vmc->vmc_params;
1125	static const uint8_t	 zero_mac[ETHER_ADDR_LEN];
1126	uint32_t		 rng;
1127	unsigned int		 i;
1128	struct vmd_switch	*sw;
1129	char			*s;
1130
1131	errno = 0;
1132	*ret_vm = NULL;
1133
1134	if ((vm = vm_getbyname(vcp->vcp_name)) != NULL ||
1135	    (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) {
1136		if (vm_checkperm(vm, uid) != 0) {
1137			errno = EPERM;
1138			goto fail;
1139		}
1140		*ret_vm = vm;
1141		errno = EALREADY;
1142		goto fail;
1143	}
1144
1145	/*
1146	 * non-root users can only start existing VMs
1147	 * XXX there could be a mechanism to allow overriding some options
1148	 */
1149	if (vm_checkperm(NULL, uid) != 0) {
1150		errno = EPERM;
1151		goto fail;
1152	}
1153	if (vmc->vmc_flags == 0) {
1154		errno = ENOENT;
1155		goto fail;
1156	}
1157	if (vcp->vcp_ncpus == 0)
1158		vcp->vcp_ncpus = 1;
1159	if (vcp->vcp_memranges[0].vmr_size == 0)
1160		vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
1161	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
1162		log_warnx("invalid number of CPUs");
1163		goto fail;
1164	} else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) {
1165		log_warnx("invalid number of disks");
1166		goto fail;
1167	} else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) {
1168		log_warnx("invalid number of interfaces");
1169		goto fail;
1170	} else if (strlen(vcp->vcp_kernel) == 0 &&
1171	    vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) {
1172		log_warnx("no kernel or disk/cdrom specified");
1173		goto fail;
1174	} else if (strlen(vcp->vcp_name) == 0) {
1175		log_warnx("invalid VM name");
1176		goto fail;
1177	} else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' ||
1178	    *vcp->vcp_name == '_') {
1179		log_warnx("invalid VM name");
1180		goto fail;
1181	} else {
1182		for (s = vcp->vcp_name; *s != '\0'; ++s) {
1183			if (!(isalnum(*s) || *s == '.' || *s == '-' ||
1184			    *s == '_')) {
1185				log_warnx("invalid VM name");
1186				goto fail;
1187			}
1188		}
1189	}
1190
1191	if ((vm = calloc(1, sizeof(*vm))) == NULL)
1192		goto fail;
1193
1194	memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params));
1195	vmc = &vm->vm_params;
1196	vcp = &vmc->vmc_params;
1197	vm->vm_pid = -1;
1198	vm->vm_tty = -1;
1199	vm->vm_receive_fd = -1;
1200	vm->vm_paused = 0;
1201
1202	for (i = 0; i < vcp->vcp_ndisks; i++)
1203		vm->vm_disks[i] = -1;
1204	for (i = 0; i < vcp->vcp_nnics; i++) {
1205		vm->vm_ifs[i].vif_fd = -1;
1206
1207		if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) {
1208			/* inherit per-interface flags from the switch */
1209			vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK);
1210		}
1211
1212		/*
1213		 * If the MAC address is zero, always randomize it in vmd(8)
1214		 * because we cannot rely on the guest OS to do the right
1215		 * thing like OpenBSD does.  Based on ether_fakeaddr()
1216		 * from the kernel, incremented by one to differentiate
1217		 * the source.
1218		 */
1219		if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) {
1220			rng = arc4random();
1221			vcp->vcp_macs[i][0] = 0xfe;
1222			vcp->vcp_macs[i][1] = 0xe1;
1223			vcp->vcp_macs[i][2] = 0xba + 1;
1224			vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf);
1225			vcp->vcp_macs[i][4] = rng;
1226			vcp->vcp_macs[i][5] = rng >> 8;
1227		}
1228	}
1229	vm->vm_kernel = -1;
1230	vm->vm_cdrom = -1;
1231	vm->vm_iev.ibuf.fd = -1;
1232
1233	if (++env->vmd_nvm == 0)
1234		fatalx("too many vms");
1235
1236	/* Assign a new internal Id if not specified */
1237	vm->vm_vmid = id == 0 ? env->vmd_nvm : id;
1238
1239	log_debug("%s: registering vm %d", __func__, vm->vm_vmid);
1240	TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry);
1241
1242	*ret_vm = vm;
1243	return (0);
1244 fail:
1245	if (errno == 0)
1246		errno = EINVAL;
1247	return (-1);
1248}
1249
1250/*
1251 * vm_checkperm
1252 *
1253 * Checks if the user represented by the 'uid' parameter is allowed to
1254 * manipulate the VM described by the 'vm' parameter (or connect to said VM's
1255 * console.)
1256 *
1257 * Parameters:
1258 *  vm: the VM whose permission is to be checked
1259 *  uid: the user ID of the user making the request
1260 *
1261 * Return values:
1262 *   0: the permission should be granted
1263 *  -1: the permission check failed (also returned if vm == null)
1264 */
1265int
1266vm_checkperm(struct vmd_vm *vm, uid_t uid)
1267{
1268	struct group	*gr;
1269	struct passwd	*pw;
1270	char		**grmem;
1271
1272	/* root has no restrictions */
1273	if (uid == 0)
1274		return (0);
1275
1276	if (vm == NULL)
1277		return (-1);
1278
1279	/* check supplementary groups */
1280	if (vm->vm_params.vmc_gid != -1 &&
1281	    (pw = getpwuid(uid)) != NULL &&
1282	    (gr = getgrgid(vm->vm_params.vmc_gid)) != NULL) {
1283		for (grmem = gr->gr_mem; *grmem; grmem++)
1284			if (strcmp(*grmem, pw->pw_name) == 0)
1285				return (0);
1286	}
1287
1288	/* check user */
1289	if ((vm->vm_running && vm->vm_uid == uid) ||
1290	    (!vm->vm_running && vm->vm_params.vmc_uid == uid))
1291		return (0);
1292
1293	return (-1);
1294}
1295
1296int
1297vm_opentty(struct vmd_vm *vm)
1298{
1299	struct ptmget		 ptm;
1300	struct stat		 st;
1301	struct group		*gr;
1302	uid_t			 uid;
1303	gid_t			 gid;
1304	mode_t			 mode;
1305	int			 on;
1306
1307	/*
1308	 * Open tty with pre-opened PTM fd
1309	 */
1310	if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1))
1311		return (-1);
1312
1313	/*
1314	 * We use user ioctl(2) mode to pass break commands.
1315	 */
1316	on = 1;
1317	if (ioctl(ptm.cfd, TIOCUCNTL, &on))
1318		fatal("could not enable user ioctl mode");
1319
1320	vm->vm_tty = ptm.cfd;
1321	close(ptm.sfd);
1322	if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL)
1323		goto fail;
1324
1325	uid = vm->vm_uid;
1326	gid = vm->vm_params.vmc_gid;
1327
1328	if (vm->vm_params.vmc_gid != -1) {
1329		mode = 0660;
1330	} else if ((gr = getgrnam("tty")) != NULL) {
1331		gid = gr->gr_gid;
1332		mode = 0620;
1333	} else {
1334		mode = 0600;
1335		gid = 0;
1336	}
1337
1338	log_debug("%s: vm %s tty %s uid %d gid %d mode %o",
1339	    __func__, vm->vm_params.vmc_params.vcp_name,
1340	    vm->vm_ttyname, uid, gid, mode);
1341
1342	/*
1343	 * Change ownership and mode of the tty as required.
1344	 * Loosely based on the implementation of sshpty.c
1345	 */
1346	if (stat(vm->vm_ttyname, &st) == -1)
1347		goto fail;
1348
1349	if (st.st_uid != uid || st.st_gid != gid) {
1350		if (chown(vm->vm_ttyname, uid, gid) == -1) {
1351			log_warn("chown %s %d %d failed, uid %d",
1352			    vm->vm_ttyname, uid, gid, getuid());
1353
1354			/* Ignore failure on read-only filesystems */
1355			if (!((errno == EROFS) &&
1356			    (st.st_uid == uid || st.st_uid == 0)))
1357				goto fail;
1358		}
1359	}
1360
1361	if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) {
1362		if (chmod(vm->vm_ttyname, mode) == -1) {
1363			log_warn("chmod %s %o failed, uid %d",
1364			    vm->vm_ttyname, mode, getuid());
1365
1366			/* Ignore failure on read-only filesystems */
1367			if (!((errno == EROFS) &&
1368			    (st.st_uid == uid || st.st_uid == 0)))
1369				goto fail;
1370		}
1371	}
1372
1373	return (0);
1374 fail:
1375	vm_closetty(vm);
1376	return (-1);
1377}
1378
1379void
1380vm_closetty(struct vmd_vm *vm)
1381{
1382	if (vm->vm_tty != -1) {
1383		/* Release and close the tty */
1384		if (fchown(vm->vm_tty, 0, 0) == -1)
1385			log_warn("chown %s 0 0 failed", vm->vm_ttyname);
1386		if (fchmod(vm->vm_tty, 0666) == -1)
1387			log_warn("chmod %s 0666 failed", vm->vm_ttyname);
1388		close(vm->vm_tty);
1389		vm->vm_tty = -1;
1390	}
1391	free(vm->vm_ttyname);
1392	vm->vm_ttyname = NULL;
1393}
1394
1395void
1396switch_remove(struct vmd_switch *vsw)
1397{
1398	if (vsw == NULL)
1399		return;
1400
1401	TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry);
1402
1403	free(vsw->sw_group);
1404	free(vsw->sw_name);
1405	free(vsw);
1406}
1407
1408struct vmd_switch *
1409switch_getbyname(const char *name)
1410{
1411	struct vmd_switch	*vsw;
1412
1413	if (name == NULL)
1414		return (NULL);
1415	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1416		if (strcmp(vsw->sw_name, name) == 0)
1417			return (vsw);
1418	}
1419
1420	return (NULL);
1421}
1422
1423char *
1424get_string(uint8_t *ptr, size_t len)
1425{
1426	size_t	 i;
1427
1428	for (i = 0; i < len; i++)
1429		if (!isprint(ptr[i]))
1430			break;
1431
1432	return strndup(ptr, i);
1433}
1434
1435uint32_t
1436prefixlen2mask(uint8_t prefixlen)
1437{
1438	if (prefixlen == 0)
1439		return (0);
1440
1441	if (prefixlen > 32)
1442		prefixlen = 32;
1443
1444	return (htonl(0xffffffff << (32 - prefixlen)));
1445}
1446