priv.c revision 1.22
1/*	$OpenBSD: priv.c,v 1.22 2023/01/28 14:40:53 dv Exp $	*/
2
3/*
4 * Copyright (c) 2016 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/queue.h>
21#include <sys/stat.h>
22#include <sys/socket.h>
23#include <sys/un.h>
24#include <sys/ioctl.h>
25#include <sys/tree.h>
26
27#include <net/if.h>
28#include <netinet/in.h>
29#include <netinet/if_ether.h>
30#include <netinet6/in6_var.h>
31#include <netinet6/nd6.h>
32#include <net/if_bridge.h>
33
34#include <arpa/inet.h>
35
36#include <errno.h>
37#include <event.h>
38#include <fcntl.h>
39#include <stdlib.h>
40#include <stdio.h>
41#include <string.h>
42#include <unistd.h>
43#include <signal.h>
44#include <ctype.h>
45
46#include "proc.h"
47#include "vmd.h"
48
49int	 priv_dispatch_parent(int, struct privsep_proc *, struct imsg *);
50void	 priv_run(struct privsep *, struct privsep_proc *, void *);
51
52static struct privsep_proc procs[] = {
53	{ "parent",	PROC_PARENT,	priv_dispatch_parent }
54};
55
56void
57priv(struct privsep *ps, struct privsep_proc *p)
58{
59	proc_run(ps, p, procs, nitems(procs), priv_run, NULL);
60}
61
62void
63priv_run(struct privsep *ps, struct privsep_proc *p, void *arg)
64{
65	struct vmd		*env = ps->ps_env;
66
67	/*
68	 * no pledge(2) in the "priv" process:
69	 * write ioctls are not permitted by pledge.
70	 */
71
72	/* Open our own socket for generic interface ioctls */
73	if ((env->vmd_fd = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
74		fatal("socket");
75
76	/* But we need a different fd for IPv6 */
77	if ((env->vmd_fd6 = socket(AF_INET6, SOCK_DGRAM, 0)) == -1)
78		fatal("socket6");
79}
80
81int
82priv_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
83{
84	const char		*desct[] = { "tap", "bridge", "veb", NULL };
85	struct privsep		*ps = p->p_ps;
86	struct vmop_ifreq	 vfr;
87	struct vmd		*env = ps->ps_env;
88	struct ifreq		 ifr;
89	struct ifbreq		 ifbr;
90	struct ifgroupreq	 ifgr;
91	struct ifaliasreq	 ifra;
92	struct in6_aliasreq	 in6_ifra;
93	struct if_afreq		 ifar;
94	struct vmop_addr_req	 vareq;
95	struct vmop_addr_result	 varesult;
96	char			 type[IF_NAMESIZE];
97
98	switch (imsg->hdr.type) {
99	case IMSG_VMDOP_PRIV_IFDESCR:
100	case IMSG_VMDOP_PRIV_IFRDOMAIN:
101	case IMSG_VMDOP_PRIV_IFEXISTS:
102	case IMSG_VMDOP_PRIV_IFADD:
103	case IMSG_VMDOP_PRIV_IFUP:
104	case IMSG_VMDOP_PRIV_IFDOWN:
105	case IMSG_VMDOP_PRIV_IFGROUP:
106	case IMSG_VMDOP_PRIV_IFADDR:
107	case IMSG_VMDOP_PRIV_IFADDR6:
108		IMSG_SIZE_CHECK(imsg, &vfr);
109		memcpy(&vfr, imsg->data, sizeof(vfr));
110
111		/* We should not get malicious requests from the parent */
112		if (priv_getiftype(vfr.vfr_name, type, NULL) == -1 ||
113		    priv_findname(type, desct) == -1)
114			fatalx("%s: rejected priv operation on interface: %s",
115			    __func__, vfr.vfr_name);
116		break;
117	case IMSG_VMDOP_CONFIG:
118	case IMSG_CTL_RESET:
119	case IMSG_VMDOP_PRIV_GET_ADDR:
120		break;
121	default:
122		return (-1);
123	}
124
125	switch (imsg->hdr.type) {
126	case IMSG_VMDOP_PRIV_IFDESCR:
127		/* Set the interface description */
128		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
129		ifr.ifr_data = (caddr_t)vfr.vfr_value;
130		if (ioctl(env->vmd_fd, SIOCSIFDESCR, &ifr) == -1)
131			log_warn("SIOCSIFDESCR");
132		break;
133	case IMSG_VMDOP_PRIV_IFRDOMAIN:
134		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
135		ifr.ifr_rdomainid = vfr.vfr_id;
136		if (ioctl(env->vmd_fd, SIOCSIFRDOMAIN, &ifr) == -1)
137			log_warn("SIOCSIFRDOMAIN");
138		break;
139	case IMSG_VMDOP_PRIV_IFADD:
140		if (priv_getiftype(vfr.vfr_value, type, NULL) == -1)
141			fatalx("%s: rejected to add interface: %s",
142			    __func__, vfr.vfr_value);
143
144		/* Attach the device to the bridge */
145		strlcpy(ifbr.ifbr_name, vfr.vfr_name,
146		    sizeof(ifbr.ifbr_name));
147		strlcpy(ifbr.ifbr_ifsname, vfr.vfr_value,
148		    sizeof(ifbr.ifbr_ifsname));
149		if (ioctl(env->vmd_fd, SIOCBRDGADD, &ifbr) == -1 &&
150		    errno != EEXIST)
151			log_warn("SIOCBRDGADD");
152		break;
153	case IMSG_VMDOP_PRIV_IFEXISTS:
154		/* Determine if bridge exists */
155		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
156		if (ioctl(env->vmd_fd, SIOCGIFFLAGS, &ifr) == -1)
157			fatalx("%s: bridge \"%s\" does not exist",
158			    __func__, vfr.vfr_name);
159		break;
160	case IMSG_VMDOP_PRIV_IFUP:
161	case IMSG_VMDOP_PRIV_IFDOWN:
162		/* Set the interface status */
163		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
164		if (ioctl(env->vmd_fd, SIOCGIFFLAGS, &ifr) == -1) {
165			log_warn("SIOCGIFFLAGS");
166			break;
167		}
168		if (imsg->hdr.type == IMSG_VMDOP_PRIV_IFUP)
169			ifr.ifr_flags |= IFF_UP;
170		else
171			ifr.ifr_flags &= ~IFF_UP;
172		if (ioctl(env->vmd_fd, SIOCSIFFLAGS, &ifr) == -1)
173			log_warn("SIOCSIFFLAGS");
174		break;
175	case IMSG_VMDOP_PRIV_IFGROUP:
176		if (priv_validgroup(vfr.vfr_value) == -1)
177			fatalx("%s: invalid group name", __func__);
178
179		if (strlcpy(ifgr.ifgr_name, vfr.vfr_name,
180		    sizeof(ifgr.ifgr_name)) >= sizeof(ifgr.ifgr_name) ||
181		    strlcpy(ifgr.ifgr_group, vfr.vfr_value,
182		    sizeof(ifgr.ifgr_group)) >= sizeof(ifgr.ifgr_group))
183			fatalx("%s: group name too long", __func__);
184
185		if (ioctl(env->vmd_fd, SIOCAIFGROUP, &ifgr) == -1 &&
186		    errno != EEXIST)
187			log_warn("SIOCAIFGROUP");
188		break;
189	case IMSG_VMDOP_PRIV_IFADDR:
190		memset(&ifra, 0, sizeof(ifra));
191
192		if (vfr.vfr_addr.ss_family != AF_INET ||
193		    vfr.vfr_addr.ss_family != vfr.vfr_mask.ss_family)
194			fatalx("%s: invalid address family", __func__);
195
196		/* Set the interface address */
197		strlcpy(ifra.ifra_name, vfr.vfr_name, sizeof(ifra.ifra_name));
198
199		ifra.ifra_addr.sa_len =
200		    ifra.ifra_mask.sa_len =
201		    sizeof(struct sockaddr_in);
202
203		memcpy(&ifra.ifra_addr, &vfr.vfr_addr,
204		    ifra.ifra_addr.sa_len);
205		memcpy(&ifra.ifra_mask, &vfr.vfr_mask,
206		    ifra.ifra_mask.sa_len);
207
208		if (ioctl(env->vmd_fd, SIOCAIFADDR, &ifra) == -1)
209			log_warn("SIOCAIFADDR");
210		break;
211	case IMSG_VMDOP_PRIV_IFADDR6:
212		memset(&ifar, 0, sizeof(ifar));
213		memset(&in6_ifra, 0, sizeof(in6_ifra));
214
215		if (vfr.vfr_addr.ss_family != AF_INET6 ||
216		    vfr.vfr_addr.ss_family != vfr.vfr_mask.ss_family)
217			fatalx("%s: invalid address family", __func__);
218
219		/* First enable IPv6 on this interface */
220		strlcpy(ifar.ifar_name, vfr.vfr_name,
221		    sizeof(ifar.ifar_name));
222		ifar.ifar_af = AF_INET6;
223		if (ioctl(env->vmd_fd, SIOCIFAFATTACH, (caddr_t)&ifar) == -1)
224			log_warn("SIOCIFAFATTACH");
225
226		/* Set the interface address */
227		strlcpy(in6_ifra.ifra_name, vfr.vfr_name,
228		    sizeof(in6_ifra.ifra_name));
229
230		in6_ifra.ifra_addr.sin6_len =
231		    in6_ifra.ifra_prefixmask.sin6_len =
232		    sizeof(struct sockaddr_in6);
233
234		memcpy(&in6_ifra.ifra_addr, &vfr.vfr_addr,
235		    in6_ifra.ifra_addr.sin6_len);
236		memcpy(&in6_ifra.ifra_prefixmask, &vfr.vfr_mask,
237		    in6_ifra.ifra_prefixmask.sin6_len);
238		in6_ifra.ifra_prefixmask.sin6_scope_id = 0;
239
240		in6_ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
241		in6_ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
242
243		if (ioctl(env->vmd_fd6, SIOCDIFADDR_IN6, &in6_ifra) == -1 &&
244		    errno != EADDRNOTAVAIL)
245			log_warn("SIOCDIFADDR_IN6");
246
247		if (ioctl(env->vmd_fd6, SIOCAIFADDR_IN6, &in6_ifra) == -1)
248			log_warn("SIOCAIFADDR_IN6");
249		break;
250	case IMSG_VMDOP_PRIV_GET_ADDR:
251		IMSG_SIZE_CHECK(imsg, &vareq);
252		memcpy(&vareq, imsg->data, sizeof(vareq));
253
254		varesult.var_vmid = vareq.var_vmid;
255		varesult.var_nic_idx = vareq.var_nic_idx;
256
257		/* resolve lladdr for the tap(4) and send back to parent */
258		if (ioctl(imsg->fd, SIOCGIFADDR, &varesult.var_addr) != 0)
259			log_warn("SIOCGIFADDR");
260		else
261			proc_compose_imsg(ps, PROC_PARENT, -1,
262			    IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE, imsg->hdr.peerid,
263			    -1, &varesult, sizeof(varesult));
264		close(imsg->fd);
265		break;
266	case IMSG_VMDOP_CONFIG:
267		config_getconfig(env, imsg);
268		break;
269	case IMSG_CTL_RESET:
270		config_getreset(env, imsg);
271		break;
272	default:
273		return (-1);
274	}
275
276	return (0);
277}
278
279int
280priv_getiftype(char *ifname, char *type, unsigned int *unitptr)
281{
282	const char	*errstr;
283	size_t		 span;
284	unsigned int	 unit;
285
286	/* Extract the name part */
287	span = strcspn(ifname, "0123456789");
288	if (span == 0 || span >= strlen(ifname) || span >= (IF_NAMESIZE - 1))
289		return (-1);
290	memcpy(type, ifname, span);
291	type[span] = 0;
292
293	/* Now parse the unit (we don't strictly validate the format here) */
294	unit = strtonum(ifname + span, 0, UINT_MAX, &errstr);
295	if (errstr != NULL)
296		return (-1);
297	if (unitptr != NULL)
298		*unitptr = unit;
299
300	return (0);
301}
302
303int
304priv_findname(const char *name, const char **names)
305{
306	unsigned int	 i;
307
308	for (i = 0; names[i] != NULL; i++) {
309		if (strcmp(name, names[i]) == 0)
310			return (0);
311	}
312
313	return (-1);
314}
315
316int
317priv_validgroup(const char *name)
318{
319	const size_t len = strnlen(name, IF_NAMESIZE);
320
321	if (len == IF_NAMESIZE)
322		return (-1);
323	/* Group can not end with a digit */
324	if (len > 0 && isdigit((unsigned char)name[len - 1]))
325		return (-1);
326	return (0);
327}
328
329/*
330 * Called from the Parent process to setup vm interface(s)
331 * - ensure the interface has the description set (tracking purposes)
332 * - if interface is to be attached to a switch, attach it
333 * - check if rdomain is set on interface and switch
334 *   - if interface only or both, use interface rdomain
335 *   - if switch only, use switch rdomain
336 * - check if group is set on interface and switch
337 *   - if interface, add it
338 *   - if switch, add it
339 * - ensure the interface is up/down
340 * - if local interface, set address
341 */
342int
343vm_priv_ifconfig(struct privsep *ps, struct vmd_vm *vm)
344{
345	char			 name[64];
346	struct vmd		*env = ps->ps_env;
347	struct vm_create_params	*vcp = &vm->vm_params.vmc_params;
348	struct vmd_if		*vif;
349	struct vmd_switch	*vsw;
350	unsigned int		 i;
351	struct vmop_ifreq	 vfr, vfbr;
352	struct sockaddr_in	*sin4;
353	struct sockaddr_in6	*sin6;
354
355	for (i = 0; i < VM_MAX_NICS_PER_VM; i++) {
356		vif = &vm->vm_ifs[i];
357
358		if (vif->vif_name == NULL)
359			break;
360
361		memset(&vfr, 0, sizeof(vfr));
362		if (strlcpy(vfr.vfr_name, vif->vif_name,
363		    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
364			return (-1);
365
366		/* Description can be truncated */
367		(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
368		    "vm%u-if%u-%s", vm->vm_vmid, i, vcp->vcp_name);
369
370		log_debug("%s: interface %s description %s", __func__,
371		    vfr.vfr_name, vfr.vfr_value);
372
373		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
374		    &vfr, sizeof(vfr));
375
376		/* set default rdomain */
377		vfr.vfr_id = getrtable();
378
379		vsw = switch_getbyname(vif->vif_switch);
380
381		/* Check if switch should exist */
382		if (vsw == NULL && vif->vif_switch != NULL)
383			log_warnx("switch \"%s\" not found", vif->vif_switch);
384
385		/* Add interface to switch and set proper rdomain */
386		if (vsw != NULL) {
387			memset(&vfbr, 0, sizeof(vfbr));
388
389			if (strlcpy(vfbr.vfr_name, vsw->sw_ifname,
390			    sizeof(vfbr.vfr_name)) >= sizeof(vfbr.vfr_name))
391				return (-1);
392			if (strlcpy(vfbr.vfr_value, vif->vif_name,
393			    sizeof(vfbr.vfr_value)) >= sizeof(vfbr.vfr_value))
394				return (-1);
395
396			log_debug("%s: switch \"%s\" interface %s add %s",
397			    __func__, vsw->sw_name, vfbr.vfr_name,
398			    vfbr.vfr_value);
399
400			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD,
401			    &vfbr, sizeof(vfbr));
402
403			/* Check rdomain properties */
404			if (vif->vif_flags & VMIFF_RDOMAIN)
405				vfr.vfr_id = vif->vif_rdomain;
406			else if (vsw->sw_flags & VMIFF_RDOMAIN)
407				vfr.vfr_id = vsw->sw_rdomain;
408		} else {
409			/* No switch to attach case */
410			if (vif->vif_flags & VMIFF_RDOMAIN)
411				vfr.vfr_id = vif->vif_rdomain;
412		}
413
414		/* Set rdomain on interface */
415		if (vfr.vfr_id != 0)
416			log_debug("%s: interface %s rdomain %u", __func__,
417			    vfr.vfr_name, vfr.vfr_id);
418
419		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
420		    &vfr, sizeof(vfr));
421
422		/* First group is defined per-interface */
423		if (vif->vif_group) {
424			if (strlcpy(vfr.vfr_value, vif->vif_group,
425			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
426				return (-1);
427
428			log_debug("%s: interface %s group %s", __func__,
429			    vfr.vfr_name, vfr.vfr_value);
430
431			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
432			    &vfr, sizeof(vfr));
433		}
434
435		/* The second group is defined per-switch */
436		if (vsw != NULL && vsw->sw_group != NULL) {
437			if (strlcpy(vfr.vfr_value, vsw->sw_group,
438			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
439				return (-1);
440
441			log_debug("%s: interface %s group %s switch \"%s\"",
442			    __func__, vfr.vfr_name, vfr.vfr_value,
443			    vsw->sw_name);
444
445			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
446			    &vfr, sizeof(vfr));
447		}
448
449		/* Set the new interface status to up or down */
450		proc_compose(ps, PROC_PRIV, (vif->vif_flags & VMIFF_UP) ?
451		    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
452		    &vfr, sizeof(vfr));
453
454		/* Set interface address if it is a local interface */
455		if (vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) {
456			memset(&vfr.vfr_mask, 0, sizeof(vfr.vfr_mask));
457			memset(&vfr.vfr_addr, 0, sizeof(vfr.vfr_addr));
458
459			/* local IPv4 address with a /31 mask */
460			sin4 = (struct sockaddr_in *)&vfr.vfr_mask;
461			sin4->sin_family = AF_INET;
462			sin4->sin_len = sizeof(*sin4);
463			sin4->sin_addr.s_addr = htonl(0xfffffffe);
464
465			sin4 = (struct sockaddr_in *)&vfr.vfr_addr;
466			sin4->sin_family = AF_INET;
467			sin4->sin_len = sizeof(*sin4);
468			if ((sin4->sin_addr.s_addr =
469			    vm_priv_addr(&env->vmd_cfg,
470			    vm->vm_vmid, i, 0)) == 0)
471				return (-1);
472
473			inet_ntop(AF_INET, &sin4->sin_addr,
474			    name, sizeof(name));
475			log_debug("%s: interface %s address %s/31",
476			    __func__, vfr.vfr_name, name);
477
478			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR,
479			    &vfr, sizeof(vfr));
480		}
481		if ((vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) &&
482		    (env->vmd_cfg.cfg_flags & VMD_CFG_INET6)) {
483			memset(&vfr.vfr_mask, 0, sizeof(vfr.vfr_mask));
484			memset(&vfr.vfr_addr, 0, sizeof(vfr.vfr_addr));
485
486			/* local IPv6 address with a /96 mask */
487			sin6 = ss2sin6(&vfr.vfr_mask);
488			sin6->sin6_family = AF_INET6;
489			sin6->sin6_len = sizeof(*sin6);
490			memset(&sin6->sin6_addr.s6_addr[0], 0xff, 12);
491			memset(&sin6->sin6_addr.s6_addr[12], 0, 4);
492
493			sin6 = ss2sin6(&vfr.vfr_addr);
494			sin6->sin6_family = AF_INET6;
495			sin6->sin6_len = sizeof(*sin6);
496			if (vm_priv_addr6(&env->vmd_cfg,
497			    vm->vm_vmid, i, 0, &sin6->sin6_addr) == -1)
498				return (-1);
499
500			inet_ntop(AF_INET6, &sin6->sin6_addr,
501			    name, sizeof(name));
502			log_debug("%s: interface %s address %s/96",
503			    __func__, vfr.vfr_name, name);
504
505			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR6,
506			    &vfr, sizeof(vfr));
507		}
508	}
509
510	return (0);
511}
512
513/*
514 * Called from the Parent process to setup underlying switch interface
515 * - ensure the interface exists
516 * - ensure the interface has the correct rdomain set
517 * - ensure the interface has the description set (tracking purposes)
518 * - ensure the interface is up/down
519 */
520int
521vm_priv_brconfig(struct privsep *ps, struct vmd_switch *vsw)
522{
523	struct vmop_ifreq	 vfr;
524
525	memset(&vfr, 0, sizeof(vfr));
526
527	if (strlcpy(vfr.vfr_name, vsw->sw_ifname,
528	    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
529		return (-1);
530
531	/* ensure bridge exists */
532	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFEXISTS,
533	    &vfr, sizeof(vfr));
534
535	/* Use the configured rdomain or get it from the process */
536	if (vsw->sw_flags & VMIFF_RDOMAIN)
537		vfr.vfr_id = vsw->sw_rdomain;
538	else
539		vfr.vfr_id = getrtable();
540	if (vfr.vfr_id != 0)
541		log_debug("%s: interface %s rdomain %u", __func__,
542		    vfr.vfr_name, vfr.vfr_id);
543
544	/* ensure bridge has the correct rdomain */
545	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
546	    &vfr, sizeof(vfr));
547
548	/* Description can be truncated */
549	(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
550	    "switch%u-%s", vsw->sw_id, vsw->sw_name);
551
552	log_debug("%s: interface %s description %s", __func__,
553	    vfr.vfr_name, vfr.vfr_value);
554
555	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
556	    &vfr, sizeof(vfr));
557
558	/* Set the new interface status to up or down */
559	proc_compose(ps, PROC_PRIV, (vsw->sw_flags & VMIFF_UP) ?
560	    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
561	    &vfr, sizeof(vfr));
562
563	vsw->sw_running = 1;
564	return (0);
565}
566
567uint32_t
568vm_priv_addr(struct vmd_config *cfg, uint32_t vmid, int idx, int isvm)
569{
570	struct address		*h = &cfg->cfg_localprefix;
571	in_addr_t		 prefix, mask, addr;
572
573	/*
574	 * 1. Set the address prefix and mask, 100.64.0.0/10 by default.
575	 */
576	if (h->ss.ss_family != AF_INET ||
577	    h->prefixlen < 0 || h->prefixlen > 32)
578		fatal("local prefix");
579	prefix = ss2sin(&h->ss)->sin_addr.s_addr;
580	mask = prefixlen2mask(h->prefixlen);
581
582	/* 2. Encode the VM ID as a per-VM subnet range N, 100.64.N.0/24. */
583	addr = vmid << 8;
584
585	/*
586	 * 3. Assign a /31 subnet M per VM interface, 100.64.N.M/31.
587	 * Each subnet contains exactly two IP addresses; skip the
588	 * first subnet to avoid a gateway address ending with .0.
589	 */
590	addr |= (idx + 1) * 2;
591
592	/* 4. Use the first address for the gateway, the second for the VM. */
593	if (isvm)
594		addr++;
595
596	/* 5. Convert to network byte order and add the prefix. */
597	addr = htonl(addr) | prefix;
598
599	/*
600	 * Validate the results:
601	 * - the address should not exceed the prefix (eg. VM ID to high).
602	 * - up to 126 interfaces can be encoded per VM.
603	 */
604	if (prefix != (addr & mask) || idx >= 0x7f) {
605		log_warnx("%s: dhcp address range exceeded,"
606		    " vm id %u interface %d", __func__, vmid, idx);
607		return (0);
608	}
609
610	return (addr);
611}
612
613int
614vm_priv_addr6(struct vmd_config *cfg, uint32_t vmid,
615    int idx, int isvm, struct in6_addr *in6_addr)
616{
617	struct address		*h = &cfg->cfg_localprefix6;
618	struct in6_addr		 addr, mask;
619	uint32_t		 addr4;
620
621	/* 1. Set the address prefix and mask, fd00::/8 by default. */
622	if (h->ss.ss_family != AF_INET6 ||
623	    h->prefixlen < 0 || h->prefixlen > 128)
624		fatal("local prefix6");
625	addr = ss2sin6(&h->ss)->sin6_addr;
626	prefixlen2mask6(h->prefixlen, &mask);
627
628	/* 2. Encode the VM IPv4 address as subnet, fd00::NN:NN:0:0/96. */
629	if ((addr4 = vm_priv_addr(cfg, vmid, idx, 1)) == 0)
630		return (0);
631	memcpy(&addr.s6_addr[8], &addr4, sizeof(addr4));
632
633	/*
634	 * 3. Set the last octet to 1 (host) or 2 (VM).
635	 * The latter is currently not used inside vmd as we don't
636	 * answer rtsol requests ourselves.
637	 */
638	if (!isvm)
639		addr.s6_addr[15] = 1;
640	else
641		addr.s6_addr[15] = 2;
642
643	memcpy(in6_addr, &addr, sizeof(*in6_addr));
644
645	return (0);
646}
647