priv.c revision 1.11
1/*	$OpenBSD: priv.c,v 1.11 2017/08/31 09:00:46 mlarkin Exp $	*/
2
3/*
4 * Copyright (c) 2016 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>	/* nitems */
20#include <sys/queue.h>
21#include <sys/stat.h>
22#include <sys/socket.h>
23#include <sys/un.h>
24#include <sys/ioctl.h>
25#include <sys/tree.h>
26
27#include <net/if.h>
28#include <netinet/in.h>
29#include <netinet/if_ether.h>
30#include <net/if_bridge.h>
31
32#include <arpa/inet.h>
33
34#include <errno.h>
35#include <event.h>
36#include <fcntl.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <string.h>
40#include <unistd.h>
41#include <signal.h>
42#include <ctype.h>
43
44#include "proc.h"
45#include "vmd.h"
46
47int	 priv_dispatch_parent(int, struct privsep_proc *, struct imsg *);
48void	 priv_run(struct privsep *, struct privsep_proc *, void *);
49
50static struct privsep_proc procs[] = {
51	{ "parent",	PROC_PARENT,	priv_dispatch_parent }
52};
53
54void
55priv(struct privsep *ps, struct privsep_proc *p)
56{
57	proc_run(ps, p, procs, nitems(procs), priv_run, NULL);
58}
59
60void
61priv_run(struct privsep *ps, struct privsep_proc *p, void *arg)
62{
63	struct vmd		*env = ps->ps_env;
64
65	/*
66	 * no pledge(2) in the "priv" process:
67	 * write ioctls are not permitted by pledge.
68	 */
69
70	/* Open our own socket for generic interface ioctls */
71	if ((env->vmd_fd = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
72		fatal("socket");
73}
74
75int
76priv_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
77{
78	const char		*desct[] = { "tap", "switch", "bridge", NULL };
79	struct privsep		*ps = p->p_ps;
80	struct vmop_ifreq	 vfr;
81	struct vmd		*env = ps->ps_env;
82	struct ifreq		 ifr;
83	struct ifbreq		 ifbr;
84	struct ifgroupreq	 ifgr;
85	struct ifaliasreq	 ifra;
86	char			 type[IF_NAMESIZE];
87
88	switch (imsg->hdr.type) {
89	case IMSG_VMDOP_PRIV_IFDESCR:
90	case IMSG_VMDOP_PRIV_IFCREATE:
91	case IMSG_VMDOP_PRIV_IFRDOMAIN:
92	case IMSG_VMDOP_PRIV_IFADD:
93	case IMSG_VMDOP_PRIV_IFUP:
94	case IMSG_VMDOP_PRIV_IFDOWN:
95	case IMSG_VMDOP_PRIV_IFGROUP:
96	case IMSG_VMDOP_PRIV_IFADDR:
97		IMSG_SIZE_CHECK(imsg, &vfr);
98		memcpy(&vfr, imsg->data, sizeof(vfr));
99
100		/* We should not get malicious requests from the parent */
101		if (priv_getiftype(vfr.vfr_name, type, NULL) == -1 ||
102		    priv_findname(type, desct) == -1)
103			fatalx("%s: rejected priv operation on interface: %s",
104			    __func__, vfr.vfr_name);
105		break;
106	case IMSG_VMDOP_CONFIG:
107	case IMSG_CTL_RESET:
108		break;
109	default:
110		return (-1);
111	}
112
113	switch (imsg->hdr.type) {
114	case IMSG_VMDOP_PRIV_IFDESCR:
115		/* Set the interface description */
116		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
117		ifr.ifr_data = (caddr_t)vfr.vfr_value;
118		if (ioctl(env->vmd_fd, SIOCSIFDESCR, &ifr) < 0)
119			log_warn("SIOCSIFDESCR");
120		break;
121	case IMSG_VMDOP_PRIV_IFCREATE:
122		/* Create the bridge if it doesn't exist */
123		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
124		if (ioctl(env->vmd_fd, SIOCIFCREATE, &ifr) < 0 &&
125		    errno != EEXIST)
126			log_warn("SIOCIFCREATE");
127		break;
128	case IMSG_VMDOP_PRIV_IFRDOMAIN:
129		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
130		ifr.ifr_rdomainid = vfr.vfr_id;
131		if (ioctl(env->vmd_fd, SIOCSIFRDOMAIN, &ifr) < 0)
132			log_warn("SIOCSIFRDOMAIN");
133		break;
134	case IMSG_VMDOP_PRIV_IFADD:
135		if (priv_getiftype(vfr.vfr_value, type, NULL) == -1)
136			fatalx("%s: rejected to add interface: %s",
137			    __func__, vfr.vfr_value);
138
139		/* Attach the device to the bridge */
140		strlcpy(ifbr.ifbr_name, vfr.vfr_name,
141		    sizeof(ifbr.ifbr_name));
142		strlcpy(ifbr.ifbr_ifsname, vfr.vfr_value,
143		    sizeof(ifbr.ifbr_ifsname));
144		if (ioctl(env->vmd_fd, SIOCBRDGADD, &ifbr) < 0 &&
145		    errno != EEXIST)
146			log_warn("SIOCBRDGADD");
147		break;
148	case IMSG_VMDOP_PRIV_IFUP:
149	case IMSG_VMDOP_PRIV_IFDOWN:
150		/* Set the interface status */
151		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
152		if (ioctl(env->vmd_fd, SIOCGIFFLAGS, &ifr) < 0) {
153			log_warn("SIOCGIFFLAGS");
154			break;
155		}
156		if (imsg->hdr.type == IMSG_VMDOP_PRIV_IFUP)
157			ifr.ifr_flags |= IFF_UP;
158		else
159			ifr.ifr_flags &= ~IFF_UP;
160		if (ioctl(env->vmd_fd, SIOCSIFFLAGS, &ifr) < 0)
161			log_warn("SIOCSIFFLAGS");
162		break;
163	case IMSG_VMDOP_PRIV_IFGROUP:
164		if (priv_validgroup(vfr.vfr_value) == -1)
165			fatalx("%s: invalid group name", __func__);
166
167		if (strlcpy(ifgr.ifgr_name, vfr.vfr_name,
168		    sizeof(ifgr.ifgr_name)) >= sizeof(ifgr.ifgr_name) ||
169		    strlcpy(ifgr.ifgr_group, vfr.vfr_value,
170		    sizeof(ifgr.ifgr_group)) >= sizeof(ifgr.ifgr_group))
171			fatalx("%s: group name too long", __func__);
172
173		if (ioctl(env->vmd_fd, SIOCAIFGROUP, &ifgr) < 0 &&
174		    errno != EEXIST)
175			log_warn("SIOCAIFGROUP");
176		break;
177	case IMSG_VMDOP_PRIV_IFADDR:
178		memset(&ifra, 0, sizeof(ifra));
179
180		/* Set the interface address */
181		strlcpy(ifra.ifra_name, vfr.vfr_name, sizeof(ifra.ifra_name));
182
183		memcpy(&ifra.ifra_addr, &vfr.vfr_ifra.ifra_addr,
184		    sizeof(ifra.ifra_addr));
185		ifra.ifra_addr.sa_family = AF_INET;
186		ifra.ifra_addr.sa_len = sizeof(struct sockaddr_in);
187
188		memcpy(&ifra.ifra_mask, &vfr.vfr_ifra.ifra_mask,
189		    sizeof(ifra.ifra_mask));
190		ifra.ifra_mask.sa_family = AF_INET;
191		ifra.ifra_mask.sa_len = sizeof(struct sockaddr_in);
192
193		if (ioctl(env->vmd_fd, SIOCAIFADDR, &ifra) < 0)
194			log_warn("SIOCAIFADDR");
195		break;
196	case IMSG_VMDOP_CONFIG:
197		config_getconfig(env, imsg);
198		break;
199	case IMSG_CTL_RESET:
200		config_getreset(env, imsg);
201		break;
202	default:
203		return (-1);
204	}
205
206	return (0);
207}
208
209int
210priv_getiftype(char *ifname, char *type, unsigned int *unitptr)
211{
212	const char	*errstr;
213	size_t		 span;
214	unsigned int	 unit;
215
216	/* Extract the name part */
217	span = strcspn(ifname, "0123456789");
218	if (span == 0 || span >= strlen(ifname) || span >= (IF_NAMESIZE - 1))
219		return (-1);
220	memcpy(type, ifname, span);
221	type[span] = 0;
222
223	/* Now parse the unit (we don't strictly validate the format here) */
224	unit = strtonum(ifname + span, 0, UINT_MAX, &errstr);
225	if (errstr != NULL)
226		return (-1);
227	if (unitptr != NULL)
228		*unitptr = unit;
229
230	return (0);
231}
232
233int
234priv_findname(const char *name, const char **names)
235{
236	unsigned int	 i;
237
238	for (i = 0; names[i] != NULL; i++) {
239		if (strcmp(name, names[i]) == 0)
240			return (0);
241	}
242
243	return (-1);
244}
245
246int
247priv_validgroup(const char *name)
248{
249	if (strlen(name) >= IF_NAMESIZE)
250		return (-1);
251	/* Group can not end with a digit */
252	if (name[0] && isdigit(name[strlen(name) - 1]))
253		return (-1);
254	return (0);
255}
256
257/*
258 * Called from the process peer
259 */
260
261int
262vm_priv_ifconfig(struct privsep *ps, struct vmd_vm *vm)
263{
264	struct vmd		*env = ps->ps_env;
265	struct vm_create_params	*vcp = &vm->vm_params.vmc_params;
266	struct vmd_if		*vif;
267	struct vmd_switch	*vsw;
268	unsigned int		 i;
269	struct vmop_ifreq	 vfr, vfbr;
270	struct sockaddr_in	*sin4;
271
272	for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
273		vif = &vm->vm_ifs[i];
274
275		if (vif->vif_name == NULL)
276			break;
277
278		if (strlcpy(vfr.vfr_name, vif->vif_name,
279		    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
280			return (-1);
281
282		/* Use the configured rdomain or get it from the process */
283		if (vif->vif_flags & VMIFF_RDOMAIN)
284			vfr.vfr_id = vif->vif_rdomain;
285		else
286			vfr.vfr_id = getrtable();
287		if (vfr.vfr_id != 0)
288			log_debug("%s: interface %s rdomain %u", __func__,
289			    vfr.vfr_name, vfr.vfr_id);
290
291		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
292		    &vfr, sizeof(vfr));
293
294		/* Description can be truncated */
295		(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
296		    "vm%u-if%u-%s", vm->vm_vmid, i, vcp->vcp_name);
297
298		log_debug("%s: interface %s description %s", __func__,
299		    vfr.vfr_name, vfr.vfr_value);
300
301		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
302		    &vfr, sizeof(vfr));
303
304		/* Add interface to bridge/switch */
305		if ((vsw = switch_getbyname(vif->vif_switch)) != NULL) {
306			memset(&vfbr, 0, sizeof(vfbr));
307
308			if (strlcpy(vfbr.vfr_name, vsw->sw_ifname,
309			    sizeof(vfbr.vfr_name)) >= sizeof(vfbr.vfr_name))
310				return (-1);
311			if (strlcpy(vfbr.vfr_value, vif->vif_name,
312			    sizeof(vfbr.vfr_value)) >= sizeof(vfbr.vfr_value))
313				return (-1);
314			if (vsw->sw_flags & VMIFF_RDOMAIN)
315				vfbr.vfr_id = vsw->sw_rdomain;
316			else
317				vfbr.vfr_id = getrtable();
318
319			log_debug("%s: interface %s add %s", __func__,
320			    vfbr.vfr_name, vfbr.vfr_value);
321
322			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE,
323			    &vfbr, sizeof(vfbr));
324			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
325			    &vfbr, sizeof(vfbr));
326			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD,
327			    &vfbr, sizeof(vfbr));
328		} else if (vif->vif_switch != NULL)
329			log_warnx("switch %s not found", vif->vif_switch);
330
331		/* First group is defined per-interface */
332		if (vif->vif_group) {
333			if (strlcpy(vfr.vfr_value, vif->vif_group,
334			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
335				return (-1);
336
337			log_debug("%s: interface %s group %s", __func__,
338			    vfr.vfr_name, vfr.vfr_value);
339
340			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
341			    &vfr, sizeof(vfr));
342		}
343
344		/* The second group is defined per-switch */
345		if (vsw != NULL && vsw->sw_group != NULL) {
346			if (strlcpy(vfr.vfr_value, vsw->sw_group,
347			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
348				return (-1);
349
350			log_debug("%s: interface %s group %s switch %s",
351			    __func__, vfr.vfr_name, vfr.vfr_value,
352			    vsw->sw_name);
353
354			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
355			    &vfr, sizeof(vfr));
356		}
357
358		/* Set the new interface status to up or down */
359		proc_compose(ps, PROC_PRIV, (vif->vif_flags & VMIFF_UP) ?
360		    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
361		    &vfr, sizeof(vfr));
362
363		if (vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) {
364			sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_mask;
365			sin4->sin_family = AF_INET;
366			sin4->sin_len = sizeof(*sin4);
367			sin4->sin_addr.s_addr = htonl(0xfffffffe);
368
369			sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_addr;
370			sin4->sin_family = AF_INET;
371			sin4->sin_len = sizeof(*sin4);
372			if ((sin4->sin_addr.s_addr =
373			    vm_priv_addr(&env->vmd_cfg.cfg_localprefix,
374			    vm->vm_vmid, i, 0)) == 0)
375				return (-1);
376
377			log_debug("%s: interface %s address %s/31",
378			    __func__, vfr.vfr_name,
379			    inet_ntoa(sin4->sin_addr));
380
381			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR,
382			    &vfr, sizeof(vfr));
383		}
384	}
385
386	return (0);
387}
388
389int
390vm_priv_brconfig(struct privsep *ps, struct vmd_switch *vsw)
391{
392	struct vmd_if		*vif;
393	struct vmop_ifreq	 vfr;
394
395	memset(&vfr, 0, sizeof(vfr));
396
397	if (strlcpy(vfr.vfr_name, vsw->sw_ifname,
398	    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
399		return (-1);
400
401	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE,
402	    &vfr, sizeof(vfr));
403
404	/* Use the configured rdomain or get it from the process */
405	if (vsw->sw_flags & VMIFF_RDOMAIN)
406		vfr.vfr_id = vsw->sw_rdomain;
407	else
408		vfr.vfr_id = getrtable();
409	if (vfr.vfr_id != 0)
410		log_debug("%s: interface %s rdomain %u", __func__,
411		    vfr.vfr_name, vfr.vfr_id);
412
413	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
414	    &vfr, sizeof(vfr));
415
416	/* Description can be truncated */
417	(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
418	    "switch%u-%s", vsw->sw_id, vsw->sw_name);
419
420	log_debug("%s: interface %s description %s", __func__,
421	    vfr.vfr_name, vfr.vfr_value);
422
423	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
424	    &vfr, sizeof(vfr));
425
426	TAILQ_FOREACH(vif, &vsw->sw_ifs, vif_entry) {
427		if (strlcpy(vfr.vfr_value, vif->vif_name,
428		    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
429			return (-1);
430
431		log_debug("%s: interface %s add %s", __func__,
432		    vfr.vfr_name, vfr.vfr_value);
433
434		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD,
435		    &vfr, sizeof(vfr));
436	}
437
438	/* Set the new interface status to up or down */
439	proc_compose(ps, PROC_PRIV, (vsw->sw_flags & VMIFF_UP) ?
440	    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
441	    &vfr, sizeof(vfr));
442
443	vsw->sw_running = 1;
444	return (0);
445}
446
447uint32_t
448vm_priv_addr(struct address *h, uint32_t vmid, int idx, int isvm)
449{
450	in_addr_t		prefix, mask, addr;
451
452	/*
453	 * 1. Set the address prefix and mask, 100.64.0.0/10 by default.
454	 */
455	if (h->ss.ss_family != AF_INET ||
456	    h->prefixlen < 0 || h->prefixlen > 32)
457		fatal("local prefix");
458	prefix = ss2sin(&h->ss)->sin_addr.s_addr;
459	mask = prefixlen2mask(h->prefixlen);
460
461	/* 2. Encode the VM ID as a per-VM subnet range N, 100.64.N.0/24. */
462	addr = vmid << 8;
463
464	/*
465	 * 3. Assign a /31 subnet M per VM interface, 100.64.N.M/31.
466	 * Each subnet contains exactly two IP addresses; skip the
467	 * first subnet to avoid a gateway address ending with .0.
468	 */
469	addr |= (idx + 1) * 2;
470
471	/* 4. Use the first address for the gateway, the second for the VM. */
472	if (isvm)
473		addr++;
474
475	/* 5. Convert to network byte order and add the prefix. */
476	addr = htonl(addr) | prefix;
477
478	/*
479	 * Validate the results:
480	 * - the address should not exceed the prefix (eg. VM ID to high).
481	 * - up to 126 interfaces can be encoded per VM.
482	 */
483	if (prefix != (addr & mask) || idx >= 0x7f) {
484		log_warnx("%s: dhcp address range exceeded,"
485		    " vm id %u interface %d", __func__, vmid, idx);
486		return (0);
487	}
488
489	return (addr);
490}
491