priv.c revision 1.11
1/* $OpenBSD: priv.c,v 1.11 2017/08/31 09:00:46 mlarkin Exp $ */ 2 3/* 4 * Copyright (c) 2016 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/param.h> /* nitems */ 20#include <sys/queue.h> 21#include <sys/stat.h> 22#include <sys/socket.h> 23#include <sys/un.h> 24#include <sys/ioctl.h> 25#include <sys/tree.h> 26 27#include <net/if.h> 28#include <netinet/in.h> 29#include <netinet/if_ether.h> 30#include <net/if_bridge.h> 31 32#include <arpa/inet.h> 33 34#include <errno.h> 35#include <event.h> 36#include <fcntl.h> 37#include <stdlib.h> 38#include <stdio.h> 39#include <string.h> 40#include <unistd.h> 41#include <signal.h> 42#include <ctype.h> 43 44#include "proc.h" 45#include "vmd.h" 46 47int priv_dispatch_parent(int, struct privsep_proc *, struct imsg *); 48void priv_run(struct privsep *, struct privsep_proc *, void *); 49 50static struct privsep_proc procs[] = { 51 { "parent", PROC_PARENT, priv_dispatch_parent } 52}; 53 54void 55priv(struct privsep *ps, struct privsep_proc *p) 56{ 57 proc_run(ps, p, procs, nitems(procs), priv_run, NULL); 58} 59 60void 61priv_run(struct privsep *ps, struct privsep_proc *p, void *arg) 62{ 63 struct vmd *env = ps->ps_env; 64 65 /* 66 * no pledge(2) in the "priv" process: 67 * write ioctls are not permitted by pledge. 68 */ 69 70 /* Open our own socket for generic interface ioctls */ 71 if ((env->vmd_fd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) 72 fatal("socket"); 73} 74 75int 76priv_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 77{ 78 const char *desct[] = { "tap", "switch", "bridge", NULL }; 79 struct privsep *ps = p->p_ps; 80 struct vmop_ifreq vfr; 81 struct vmd *env = ps->ps_env; 82 struct ifreq ifr; 83 struct ifbreq ifbr; 84 struct ifgroupreq ifgr; 85 struct ifaliasreq ifra; 86 char type[IF_NAMESIZE]; 87 88 switch (imsg->hdr.type) { 89 case IMSG_VMDOP_PRIV_IFDESCR: 90 case IMSG_VMDOP_PRIV_IFCREATE: 91 case IMSG_VMDOP_PRIV_IFRDOMAIN: 92 case IMSG_VMDOP_PRIV_IFADD: 93 case IMSG_VMDOP_PRIV_IFUP: 94 case IMSG_VMDOP_PRIV_IFDOWN: 95 case IMSG_VMDOP_PRIV_IFGROUP: 96 case IMSG_VMDOP_PRIV_IFADDR: 97 IMSG_SIZE_CHECK(imsg, &vfr); 98 memcpy(&vfr, imsg->data, sizeof(vfr)); 99 100 /* We should not get malicious requests from the parent */ 101 if (priv_getiftype(vfr.vfr_name, type, NULL) == -1 || 102 priv_findname(type, desct) == -1) 103 fatalx("%s: rejected priv operation on interface: %s", 104 __func__, vfr.vfr_name); 105 break; 106 case IMSG_VMDOP_CONFIG: 107 case IMSG_CTL_RESET: 108 break; 109 default: 110 return (-1); 111 } 112 113 switch (imsg->hdr.type) { 114 case IMSG_VMDOP_PRIV_IFDESCR: 115 /* Set the interface description */ 116 strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name)); 117 ifr.ifr_data = (caddr_t)vfr.vfr_value; 118 if (ioctl(env->vmd_fd, SIOCSIFDESCR, &ifr) < 0) 119 log_warn("SIOCSIFDESCR"); 120 break; 121 case IMSG_VMDOP_PRIV_IFCREATE: 122 /* Create the bridge if it doesn't exist */ 123 strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name)); 124 if (ioctl(env->vmd_fd, SIOCIFCREATE, &ifr) < 0 && 125 errno != EEXIST) 126 log_warn("SIOCIFCREATE"); 127 break; 128 case IMSG_VMDOP_PRIV_IFRDOMAIN: 129 strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name)); 130 ifr.ifr_rdomainid = vfr.vfr_id; 131 if (ioctl(env->vmd_fd, SIOCSIFRDOMAIN, &ifr) < 0) 132 log_warn("SIOCSIFRDOMAIN"); 133 break; 134 case IMSG_VMDOP_PRIV_IFADD: 135 if (priv_getiftype(vfr.vfr_value, type, NULL) == -1) 136 fatalx("%s: rejected to add interface: %s", 137 __func__, vfr.vfr_value); 138 139 /* Attach the device to the bridge */ 140 strlcpy(ifbr.ifbr_name, vfr.vfr_name, 141 sizeof(ifbr.ifbr_name)); 142 strlcpy(ifbr.ifbr_ifsname, vfr.vfr_value, 143 sizeof(ifbr.ifbr_ifsname)); 144 if (ioctl(env->vmd_fd, SIOCBRDGADD, &ifbr) < 0 && 145 errno != EEXIST) 146 log_warn("SIOCBRDGADD"); 147 break; 148 case IMSG_VMDOP_PRIV_IFUP: 149 case IMSG_VMDOP_PRIV_IFDOWN: 150 /* Set the interface status */ 151 strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name)); 152 if (ioctl(env->vmd_fd, SIOCGIFFLAGS, &ifr) < 0) { 153 log_warn("SIOCGIFFLAGS"); 154 break; 155 } 156 if (imsg->hdr.type == IMSG_VMDOP_PRIV_IFUP) 157 ifr.ifr_flags |= IFF_UP; 158 else 159 ifr.ifr_flags &= ~IFF_UP; 160 if (ioctl(env->vmd_fd, SIOCSIFFLAGS, &ifr) < 0) 161 log_warn("SIOCSIFFLAGS"); 162 break; 163 case IMSG_VMDOP_PRIV_IFGROUP: 164 if (priv_validgroup(vfr.vfr_value) == -1) 165 fatalx("%s: invalid group name", __func__); 166 167 if (strlcpy(ifgr.ifgr_name, vfr.vfr_name, 168 sizeof(ifgr.ifgr_name)) >= sizeof(ifgr.ifgr_name) || 169 strlcpy(ifgr.ifgr_group, vfr.vfr_value, 170 sizeof(ifgr.ifgr_group)) >= sizeof(ifgr.ifgr_group)) 171 fatalx("%s: group name too long", __func__); 172 173 if (ioctl(env->vmd_fd, SIOCAIFGROUP, &ifgr) < 0 && 174 errno != EEXIST) 175 log_warn("SIOCAIFGROUP"); 176 break; 177 case IMSG_VMDOP_PRIV_IFADDR: 178 memset(&ifra, 0, sizeof(ifra)); 179 180 /* Set the interface address */ 181 strlcpy(ifra.ifra_name, vfr.vfr_name, sizeof(ifra.ifra_name)); 182 183 memcpy(&ifra.ifra_addr, &vfr.vfr_ifra.ifra_addr, 184 sizeof(ifra.ifra_addr)); 185 ifra.ifra_addr.sa_family = AF_INET; 186 ifra.ifra_addr.sa_len = sizeof(struct sockaddr_in); 187 188 memcpy(&ifra.ifra_mask, &vfr.vfr_ifra.ifra_mask, 189 sizeof(ifra.ifra_mask)); 190 ifra.ifra_mask.sa_family = AF_INET; 191 ifra.ifra_mask.sa_len = sizeof(struct sockaddr_in); 192 193 if (ioctl(env->vmd_fd, SIOCAIFADDR, &ifra) < 0) 194 log_warn("SIOCAIFADDR"); 195 break; 196 case IMSG_VMDOP_CONFIG: 197 config_getconfig(env, imsg); 198 break; 199 case IMSG_CTL_RESET: 200 config_getreset(env, imsg); 201 break; 202 default: 203 return (-1); 204 } 205 206 return (0); 207} 208 209int 210priv_getiftype(char *ifname, char *type, unsigned int *unitptr) 211{ 212 const char *errstr; 213 size_t span; 214 unsigned int unit; 215 216 /* Extract the name part */ 217 span = strcspn(ifname, "0123456789"); 218 if (span == 0 || span >= strlen(ifname) || span >= (IF_NAMESIZE - 1)) 219 return (-1); 220 memcpy(type, ifname, span); 221 type[span] = 0; 222 223 /* Now parse the unit (we don't strictly validate the format here) */ 224 unit = strtonum(ifname + span, 0, UINT_MAX, &errstr); 225 if (errstr != NULL) 226 return (-1); 227 if (unitptr != NULL) 228 *unitptr = unit; 229 230 return (0); 231} 232 233int 234priv_findname(const char *name, const char **names) 235{ 236 unsigned int i; 237 238 for (i = 0; names[i] != NULL; i++) { 239 if (strcmp(name, names[i]) == 0) 240 return (0); 241 } 242 243 return (-1); 244} 245 246int 247priv_validgroup(const char *name) 248{ 249 if (strlen(name) >= IF_NAMESIZE) 250 return (-1); 251 /* Group can not end with a digit */ 252 if (name[0] && isdigit(name[strlen(name) - 1])) 253 return (-1); 254 return (0); 255} 256 257/* 258 * Called from the process peer 259 */ 260 261int 262vm_priv_ifconfig(struct privsep *ps, struct vmd_vm *vm) 263{ 264 struct vmd *env = ps->ps_env; 265 struct vm_create_params *vcp = &vm->vm_params.vmc_params; 266 struct vmd_if *vif; 267 struct vmd_switch *vsw; 268 unsigned int i; 269 struct vmop_ifreq vfr, vfbr; 270 struct sockaddr_in *sin4; 271 272 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 273 vif = &vm->vm_ifs[i]; 274 275 if (vif->vif_name == NULL) 276 break; 277 278 if (strlcpy(vfr.vfr_name, vif->vif_name, 279 sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name)) 280 return (-1); 281 282 /* Use the configured rdomain or get it from the process */ 283 if (vif->vif_flags & VMIFF_RDOMAIN) 284 vfr.vfr_id = vif->vif_rdomain; 285 else 286 vfr.vfr_id = getrtable(); 287 if (vfr.vfr_id != 0) 288 log_debug("%s: interface %s rdomain %u", __func__, 289 vfr.vfr_name, vfr.vfr_id); 290 291 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN, 292 &vfr, sizeof(vfr)); 293 294 /* Description can be truncated */ 295 (void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value), 296 "vm%u-if%u-%s", vm->vm_vmid, i, vcp->vcp_name); 297 298 log_debug("%s: interface %s description %s", __func__, 299 vfr.vfr_name, vfr.vfr_value); 300 301 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR, 302 &vfr, sizeof(vfr)); 303 304 /* Add interface to bridge/switch */ 305 if ((vsw = switch_getbyname(vif->vif_switch)) != NULL) { 306 memset(&vfbr, 0, sizeof(vfbr)); 307 308 if (strlcpy(vfbr.vfr_name, vsw->sw_ifname, 309 sizeof(vfbr.vfr_name)) >= sizeof(vfbr.vfr_name)) 310 return (-1); 311 if (strlcpy(vfbr.vfr_value, vif->vif_name, 312 sizeof(vfbr.vfr_value)) >= sizeof(vfbr.vfr_value)) 313 return (-1); 314 if (vsw->sw_flags & VMIFF_RDOMAIN) 315 vfbr.vfr_id = vsw->sw_rdomain; 316 else 317 vfbr.vfr_id = getrtable(); 318 319 log_debug("%s: interface %s add %s", __func__, 320 vfbr.vfr_name, vfbr.vfr_value); 321 322 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE, 323 &vfbr, sizeof(vfbr)); 324 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN, 325 &vfbr, sizeof(vfbr)); 326 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD, 327 &vfbr, sizeof(vfbr)); 328 } else if (vif->vif_switch != NULL) 329 log_warnx("switch %s not found", vif->vif_switch); 330 331 /* First group is defined per-interface */ 332 if (vif->vif_group) { 333 if (strlcpy(vfr.vfr_value, vif->vif_group, 334 sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value)) 335 return (-1); 336 337 log_debug("%s: interface %s group %s", __func__, 338 vfr.vfr_name, vfr.vfr_value); 339 340 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP, 341 &vfr, sizeof(vfr)); 342 } 343 344 /* The second group is defined per-switch */ 345 if (vsw != NULL && vsw->sw_group != NULL) { 346 if (strlcpy(vfr.vfr_value, vsw->sw_group, 347 sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value)) 348 return (-1); 349 350 log_debug("%s: interface %s group %s switch %s", 351 __func__, vfr.vfr_name, vfr.vfr_value, 352 vsw->sw_name); 353 354 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP, 355 &vfr, sizeof(vfr)); 356 } 357 358 /* Set the new interface status to up or down */ 359 proc_compose(ps, PROC_PRIV, (vif->vif_flags & VMIFF_UP) ? 360 IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN, 361 &vfr, sizeof(vfr)); 362 363 if (vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) { 364 sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_mask; 365 sin4->sin_family = AF_INET; 366 sin4->sin_len = sizeof(*sin4); 367 sin4->sin_addr.s_addr = htonl(0xfffffffe); 368 369 sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_addr; 370 sin4->sin_family = AF_INET; 371 sin4->sin_len = sizeof(*sin4); 372 if ((sin4->sin_addr.s_addr = 373 vm_priv_addr(&env->vmd_cfg.cfg_localprefix, 374 vm->vm_vmid, i, 0)) == 0) 375 return (-1); 376 377 log_debug("%s: interface %s address %s/31", 378 __func__, vfr.vfr_name, 379 inet_ntoa(sin4->sin_addr)); 380 381 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR, 382 &vfr, sizeof(vfr)); 383 } 384 } 385 386 return (0); 387} 388 389int 390vm_priv_brconfig(struct privsep *ps, struct vmd_switch *vsw) 391{ 392 struct vmd_if *vif; 393 struct vmop_ifreq vfr; 394 395 memset(&vfr, 0, sizeof(vfr)); 396 397 if (strlcpy(vfr.vfr_name, vsw->sw_ifname, 398 sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name)) 399 return (-1); 400 401 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE, 402 &vfr, sizeof(vfr)); 403 404 /* Use the configured rdomain or get it from the process */ 405 if (vsw->sw_flags & VMIFF_RDOMAIN) 406 vfr.vfr_id = vsw->sw_rdomain; 407 else 408 vfr.vfr_id = getrtable(); 409 if (vfr.vfr_id != 0) 410 log_debug("%s: interface %s rdomain %u", __func__, 411 vfr.vfr_name, vfr.vfr_id); 412 413 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN, 414 &vfr, sizeof(vfr)); 415 416 /* Description can be truncated */ 417 (void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value), 418 "switch%u-%s", vsw->sw_id, vsw->sw_name); 419 420 log_debug("%s: interface %s description %s", __func__, 421 vfr.vfr_name, vfr.vfr_value); 422 423 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR, 424 &vfr, sizeof(vfr)); 425 426 TAILQ_FOREACH(vif, &vsw->sw_ifs, vif_entry) { 427 if (strlcpy(vfr.vfr_value, vif->vif_name, 428 sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value)) 429 return (-1); 430 431 log_debug("%s: interface %s add %s", __func__, 432 vfr.vfr_name, vfr.vfr_value); 433 434 proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD, 435 &vfr, sizeof(vfr)); 436 } 437 438 /* Set the new interface status to up or down */ 439 proc_compose(ps, PROC_PRIV, (vsw->sw_flags & VMIFF_UP) ? 440 IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN, 441 &vfr, sizeof(vfr)); 442 443 vsw->sw_running = 1; 444 return (0); 445} 446 447uint32_t 448vm_priv_addr(struct address *h, uint32_t vmid, int idx, int isvm) 449{ 450 in_addr_t prefix, mask, addr; 451 452 /* 453 * 1. Set the address prefix and mask, 100.64.0.0/10 by default. 454 */ 455 if (h->ss.ss_family != AF_INET || 456 h->prefixlen < 0 || h->prefixlen > 32) 457 fatal("local prefix"); 458 prefix = ss2sin(&h->ss)->sin_addr.s_addr; 459 mask = prefixlen2mask(h->prefixlen); 460 461 /* 2. Encode the VM ID as a per-VM subnet range N, 100.64.N.0/24. */ 462 addr = vmid << 8; 463 464 /* 465 * 3. Assign a /31 subnet M per VM interface, 100.64.N.M/31. 466 * Each subnet contains exactly two IP addresses; skip the 467 * first subnet to avoid a gateway address ending with .0. 468 */ 469 addr |= (idx + 1) * 2; 470 471 /* 4. Use the first address for the gateway, the second for the VM. */ 472 if (isvm) 473 addr++; 474 475 /* 5. Convert to network byte order and add the prefix. */ 476 addr = htonl(addr) | prefix; 477 478 /* 479 * Validate the results: 480 * - the address should not exceed the prefix (eg. VM ID to high). 481 * - up to 126 interfaces can be encoded per VM. 482 */ 483 if (prefix != (addr & mask) || idx >= 0x7f) { 484 log_warnx("%s: dhcp address range exceeded," 485 " vm id %u interface %d", __func__, vmid, idx); 486 return (0); 487 } 488 489 return (addr); 490} 491