1/* $OpenBSD: kroute.c,v 1.71 2023/03/08 04:43:13 guenther Exp $ */ 2 3/* 4 * Copyright (c) 2015, 2016 Renato Westphal <renato@openbsd.org> 5 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org> 6 * Copyright (c) 2004 Esben Norby <norby@openbsd.org> 7 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 8 * 9 * Permission to use, copy, modify, and distribute this software for any 10 * purpose with or without fee is hereby granted, provided that the above 11 * copyright notice and this permission notice appear in all copies. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 20 */ 21 22#include <sys/types.h> 23#include <sys/socket.h> 24#include <sys/ioctl.h> 25#include <sys/sysctl.h> 26#include <arpa/inet.h> 27#include <net/if_dl.h> 28#include <net/if_types.h> 29#include <net/route.h> 30#include <netmpls/mpls.h> 31#include <errno.h> 32#include <stdlib.h> 33#include <string.h> 34#include <unistd.h> 35#include <limits.h> 36 37#include "ldpd.h" 38#include "log.h" 39 40struct { 41 uint32_t rtseq; 42 pid_t pid; 43 int fib_sync; 44 int fd; 45 int ioctl_fd; 46 struct event ev; 47 unsigned int rdomain; 48} kr_state; 49 50struct kroute_node { 51 TAILQ_ENTRY(kroute_node) entry; 52 struct kroute_priority *kprio; /* back pointer */ 53 struct kroute r; 54}; 55 56struct kroute_priority { 57 TAILQ_ENTRY(kroute_priority) entry; 58 struct kroute_prefix *kp; /* back pointer */ 59 uint8_t priority; 60 TAILQ_HEAD(, kroute_node) nexthops; 61}; 62 63struct kroute_prefix { 64 RB_ENTRY(kroute_prefix) entry; 65 int af; 66 union ldpd_addr prefix; 67 uint8_t prefixlen; 68 TAILQ_HEAD(plist, kroute_priority) priorities; 69}; 70RB_HEAD(kroute_tree, kroute_prefix); 71RB_PROTOTYPE(kroute_tree, kroute_prefix, entry, kroute_compare) 72 73struct kif_addr { 74 TAILQ_ENTRY(kif_addr) entry; 75 struct kaddr a; 76}; 77 78struct kif_node { 79 RB_ENTRY(kif_node) entry; 80 TAILQ_HEAD(, kif_addr) addrs; 81 struct kif k; 82 struct kpw *kpw; 83}; 84RB_HEAD(kif_tree, kif_node); 85RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare) 86 87static void kr_dispatch_msg(int, short, void *); 88static void kr_redist_remove(struct kroute *); 89static int kr_redist_eval(struct kroute *); 90static void kr_redistribute(struct kroute_prefix *); 91static __inline int kroute_compare(struct kroute_prefix *, 92 struct kroute_prefix *); 93static struct kroute_prefix *kroute_find_prefix(int, union ldpd_addr *, 94 uint8_t); 95static struct kroute_priority *kroute_find_prio(struct kroute_prefix *, 96 uint8_t); 97static struct kroute_node *kroute_find_gw(struct kroute_priority *, 98 union ldpd_addr *); 99static int kroute_insert(struct kroute *); 100static int kroute_uninstall(struct kroute_node *); 101static int kroute_remove(struct kroute *); 102static void kroute_clear(void); 103static __inline int kif_compare(struct kif_node *, struct kif_node *); 104static struct kif_node *kif_find(unsigned short); 105static struct kif_node *kif_insert(unsigned short); 106static int kif_remove(struct kif_node *); 107static struct kif_node *kif_update(unsigned short, int, struct if_data *, 108 struct sockaddr_dl *, int *); 109static struct kroute_priority *kroute_match(int, union ldpd_addr *); 110static uint8_t prefixlen_classful(in_addr_t); 111static void get_rtaddrs(int, struct sockaddr *, 112 struct sockaddr **); 113static void if_change(unsigned short, int, struct if_data *, 114 struct sockaddr_dl *); 115static void if_newaddr(unsigned short, struct sockaddr *, 116 struct sockaddr *, struct sockaddr *); 117static void if_deladdr(unsigned short, struct sockaddr *, 118 struct sockaddr *, struct sockaddr *); 119static void if_announce(void *); 120static int send_rtmsg(int, int, struct kroute *, int); 121static int send_rtmsg_v4(int fd, int, struct kroute *, int); 122static int send_rtmsg_v6(int fd, int, struct kroute *, int); 123static int fetchtable(void); 124static int fetchifs(void); 125static int dispatch_rtmsg(void); 126static int rtmsg_process(char *, size_t); 127static int rtmsg_process_route(struct rt_msghdr *, 128 struct sockaddr *[RTAX_MAX]); 129static int kmpw_install(const char *, struct kpw *); 130static int kmpw_uninstall(const char *); 131 132RB_GENERATE(kroute_tree, kroute_prefix, entry, kroute_compare) 133RB_GENERATE(kif_tree, kif_node, entry, kif_compare) 134 135static struct kroute_tree krt = RB_INITIALIZER(&krt); 136static struct kif_tree kit = RB_INITIALIZER(&kit); 137 138int 139kif_init(void) 140{ 141 if (fetchifs() == -1) 142 return (-1); 143 144 if ((kr_state.ioctl_fd = socket(AF_INET, 145 SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { 146 log_warn("%s: ioctl socket", __func__); 147 return (-1); 148 } 149 150 return (0); 151} 152 153int 154kr_init(int fs, unsigned int rdomain) 155{ 156 int opt = 0, rcvbuf, default_rcvbuf; 157 socklen_t optlen; 158 unsigned int rtfilter; 159 160 kr_state.fib_sync = fs; 161 kr_state.rdomain = rdomain; 162 163 if ((kr_state.fd = socket(AF_ROUTE, 164 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { 165 log_warn("%s: socket", __func__); 166 return (-1); 167 } 168 169 /* not interested in my own messages */ 170 if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK, 171 &opt, sizeof(opt)) == -1) 172 log_warn("%s: setsockopt(SO_USELOOPBACK)", __func__); 173 174 /* filter out unwanted messages */ 175 rtfilter = ROUTE_FILTER(RTM_ADD) | ROUTE_FILTER(RTM_GET) | 176 ROUTE_FILTER(RTM_CHANGE) | ROUTE_FILTER(RTM_DELETE) | 177 ROUTE_FILTER(RTM_IFINFO) | ROUTE_FILTER(RTM_NEWADDR) | 178 ROUTE_FILTER(RTM_DELADDR) | ROUTE_FILTER(RTM_IFANNOUNCE); 179 180 if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_MSGFILTER, 181 &rtfilter, sizeof(rtfilter)) == -1) 182 log_warn("%s: setsockopt(ROUTE_MSGFILTER)", __func__); 183 184 /* grow receive buffer, don't wanna miss messages */ 185 optlen = sizeof(default_rcvbuf); 186 if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, 187 &default_rcvbuf, &optlen) == -1) 188 log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__); 189 else 190 for (rcvbuf = MAX_RTSOCK_BUF; 191 rcvbuf > default_rcvbuf && 192 setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, 193 &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS; 194 rcvbuf /= 2) 195 ; /* nothing */ 196 197 kr_state.pid = getpid(); 198 kr_state.rtseq = 1; 199 200 if (fetchtable() == -1) 201 return (-1); 202 203 event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST, 204 kr_dispatch_msg, NULL); 205 event_add(&kr_state.ev, NULL); 206 207 return (0); 208} 209 210void 211kif_redistribute(const char *ifname) 212{ 213 struct kif_node *kif; 214 struct kif_addr *ka; 215 216 RB_FOREACH(kif, kif_tree, &kit) { 217 if (kif->k.rdomain != kr_state.rdomain) 218 continue; 219 220 if (ifname && strcmp(kif->k.ifname, ifname) != 0) 221 continue; 222 223 TAILQ_FOREACH(ka, &kif->addrs, entry) 224 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, 225 sizeof(ka->a)); 226 } 227} 228 229int 230kr_change(struct kroute *kr) 231{ 232 struct kroute_prefix *kp; 233 struct kroute_priority *kprio; 234 struct kroute_node *kn; 235 int action = RTM_ADD; 236 237 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 238 if (kp == NULL) 239 goto miss; 240 241 kprio = kroute_find_prio(kp, kr->priority); 242 if (kprio == NULL) 243 goto miss; 244 245 kn = kroute_find_gw(kprio, &kr->nexthop); 246 if (kn == NULL) 247 goto miss; 248 249 if (kn->r.flags & F_LDPD_INSERTED) 250 action = RTM_CHANGE; 251 252 kn->r.local_label = kr->local_label; 253 kn->r.remote_label = kr->remote_label; 254 kn->r.flags = kn->r.flags | F_LDPD_INSERTED; 255 256 /* send update */ 257 if (send_rtmsg(kr_state.fd, action, &kn->r, AF_MPLS) == -1) 258 return (-1); 259 260 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 261 kn->r.remote_label != NO_LABEL) { 262 if (send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, kn->r.af) == -1) 263 return (-1); 264 } 265 266 return (0); 267 268 miss: 269 log_warnx("%s: lost FEC %s/%d nexthop %s", __func__, 270 log_addr(kr->af, &kr->prefix), kr->prefixlen, 271 log_addr(kr->af, &kr->nexthop)); 272 return (-1); 273} 274 275int 276kr_delete(struct kroute *kr) 277{ 278 struct kroute_prefix *kp; 279 struct kroute_priority *kprio; 280 struct kroute_node *kn; 281 int update = 0; 282 283 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 284 if (kp == NULL) 285 return (0); 286 kprio = kroute_find_prio(kp, kr->priority); 287 if (kprio == NULL) 288 return (0); 289 kn = kroute_find_gw(kprio, &kr->nexthop); 290 if (kn == NULL) 291 return (0); 292 293 if (!(kn->r.flags & F_LDPD_INSERTED)) 294 return (0); 295 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 296 kn->r.remote_label != NO_LABEL) 297 update = 1; 298 299 /* kill MPLS LSP */ 300 if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) 301 return (-1); 302 303 kn->r.flags &= ~F_LDPD_INSERTED; 304 kn->r.local_label = NO_LABEL; 305 kn->r.remote_label = NO_LABEL; 306 307 if (update && 308 send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, kn->r.af) == -1) 309 return (-1); 310 311 return (0); 312} 313 314void 315kr_shutdown(void) 316{ 317 kr_fib_decouple(); 318 kroute_clear(); 319 kif_clear(); 320} 321 322void 323kr_fib_couple(void) 324{ 325 struct kroute_prefix *kp; 326 struct kroute_priority *kprio; 327 struct kroute_node *kn; 328 struct kif_node *kif; 329 330 if (kr_state.fib_sync == 1) /* already coupled */ 331 return; 332 333 kr_state.fib_sync = 1; 334 335 RB_FOREACH(kp, kroute_tree, &krt) { 336 kprio = TAILQ_FIRST(&kp->priorities); 337 if (kprio == NULL) 338 continue; 339 340 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 341 if (!(kn->r.flags & F_LDPD_INSERTED)) 342 continue; 343 344 send_rtmsg(kr_state.fd, RTM_ADD, &kn->r, AF_MPLS); 345 346 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 347 kn->r.remote_label != NO_LABEL) { 348 send_rtmsg(kr_state.fd, RTM_CHANGE, 349 &kn->r, kn->r.af); 350 } 351 } 352 } 353 354 RB_FOREACH(kif, kif_tree, &kit) 355 if (kif->kpw) 356 kmpw_install(kif->k.ifname, kif->kpw); 357 358 log_info("kernel routing table coupled"); 359} 360 361void 362kr_fib_decouple(void) 363{ 364 struct kroute_prefix *kp; 365 struct kroute_priority *kprio; 366 struct kroute_node *kn; 367 uint32_t rl; 368 struct kif_node *kif; 369 370 if (kr_state.fib_sync == 0) /* already decoupled */ 371 return; 372 373 RB_FOREACH(kp, kroute_tree, &krt) { 374 kprio = TAILQ_FIRST(&kp->priorities); 375 if (kprio == NULL) 376 continue; 377 378 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 379 if (!(kn->r.flags & F_LDPD_INSERTED)) 380 continue; 381 382 send_rtmsg(kr_state.fd, RTM_DELETE, 383 &kn->r, AF_MPLS); 384 385 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 386 kn->r.remote_label != NO_LABEL) { 387 rl = kn->r.remote_label; 388 kn->r.remote_label = NO_LABEL; 389 send_rtmsg(kr_state.fd, RTM_CHANGE, 390 &kn->r, kn->r.af); 391 kn->r.remote_label = rl; 392 } 393 } 394 } 395 396 RB_FOREACH(kif, kif_tree, &kit) 397 if (kif->kpw) 398 kmpw_uninstall(kif->k.ifname); 399 400 kr_state.fib_sync = 0; 401 log_info("kernel routing table decoupled"); 402} 403 404void 405kr_change_egress_label(int af, int was_implicit) 406{ 407 struct kroute_prefix *kp; 408 struct kroute_priority *kprio; 409 struct kroute_node *kn; 410 411 RB_FOREACH(kp, kroute_tree, &krt) { 412 if (kp->af != af) 413 continue; 414 415 TAILQ_FOREACH(kprio, &kp->priorities, entry) { 416 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 417 if (kn->r.local_label > MPLS_LABEL_RESERVED_MAX) 418 continue; 419 420 if (!was_implicit) { 421 kn->r.local_label = MPLS_LABEL_IMPLNULL; 422 continue; 423 } 424 425 switch (kn->r.af) { 426 case AF_INET: 427 kn->r.local_label = MPLS_LABEL_IPV4NULL; 428 break; 429 case AF_INET6: 430 kn->r.local_label = MPLS_LABEL_IPV6NULL; 431 break; 432 default: 433 break; 434 } 435 } 436 } 437 } 438} 439 440static void 441kr_dispatch_msg(int fd, short event, void *bula) 442{ 443 if (dispatch_rtmsg() == -1) 444 event_loopexit(NULL); 445} 446 447void 448kr_show_route(struct imsg *imsg) 449{ 450 struct kroute_prefix *kp; 451 struct kroute_priority *kprio; 452 struct kroute_node *kn; 453 int flags; 454 struct kroute kr; 455 456 switch (imsg->hdr.type) { 457 case IMSG_CTL_KROUTE: 458 if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) { 459 log_warnx("%s: wrong imsg len", __func__); 460 return; 461 } 462 memcpy(&flags, imsg->data, sizeof(flags)); 463 464 RB_FOREACH(kp, kroute_tree, &krt) 465 TAILQ_FOREACH(kprio, &kp->priorities, entry) 466 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 467 if (flags && !(kn->r.flags & flags)) 468 continue; 469 470 main_imsg_compose_ldpe(IMSG_CTL_KROUTE, 471 imsg->hdr.pid, &kn->r, 472 sizeof(kn->r)); 473 } 474 break; 475 case IMSG_CTL_KROUTE_ADDR: 476 if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(kr)) { 477 log_warnx("%s: wrong imsg len", __func__); 478 return; 479 } 480 memcpy(&kr, imsg->data, sizeof(kr)); 481 482 kprio = kroute_match(kr.af, &kr.prefix); 483 if (kprio == NULL) 484 break; 485 486 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 487 main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid, 488 &kn->r, sizeof(kn->r)); 489 break; 490 default: 491 log_debug("%s: error handling imsg", __func__); 492 break; 493 } 494 main_imsg_compose_ldpe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0); 495} 496 497void 498kr_ifinfo(char *ifname, pid_t pid) 499{ 500 struct kif_node *kif; 501 502 RB_FOREACH(kif, kif_tree, &kit) 503 if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) { 504 main_imsg_compose_ldpe(IMSG_CTL_IFINFO, 505 pid, &kif->k, sizeof(kif->k)); 506 } 507 508 main_imsg_compose_ldpe(IMSG_CTL_END, pid, NULL, 0); 509} 510 511static void 512kr_redist_remove(struct kroute *kr) 513{ 514 /* was the route redistributed? */ 515 if ((kr->flags & F_REDISTRIBUTED) == 0) 516 return; 517 518 /* remove redistributed flag */ 519 kr->flags &= ~F_REDISTRIBUTED; 520 main_imsg_compose_lde(IMSG_NETWORK_DEL, 0, kr, sizeof(*kr)); 521} 522 523static int 524kr_redist_eval(struct kroute *kr) 525{ 526 /* was the route redistributed? */ 527 if (kr->flags & F_REDISTRIBUTED) 528 goto dont_redistribute; 529 530 /* Dynamic routes are not redistributable. */ 531 if (kr->flags & F_DYNAMIC) 532 goto dont_redistribute; 533 534 /* filter-out non-redistributable addresses */ 535 if (bad_addr(kr->af, &kr->prefix) || 536 (kr->af == AF_INET6 && IN6_IS_SCOPE_EMBED(&kr->prefix.v6))) 537 goto dont_redistribute; 538 539 /* do not redistribute the default route */ 540 if (kr->prefixlen == 0) 541 goto dont_redistribute; 542 543 /* 544 * Consider networks with nexthop loopback as not redistributable 545 * unless it is a reject or blackhole route. 546 */ 547 switch (kr->af) { 548 case AF_INET: 549 if (kr->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK) && 550 !(kr->flags & (F_BLACKHOLE|F_REJECT))) 551 goto dont_redistribute; 552 break; 553 case AF_INET6: 554 if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop.v6) && 555 !(kr->flags & (F_BLACKHOLE|F_REJECT))) 556 goto dont_redistribute; 557 break; 558 default: 559 log_debug("%s: unexpected address-family", __func__); 560 break; 561 } 562 563 /* prefix should be redistributed */ 564 kr->flags |= F_REDISTRIBUTED; 565 main_imsg_compose_lde(IMSG_NETWORK_ADD, 0, kr, sizeof(*kr)); 566 return (1); 567 568 dont_redistribute: 569 return (0); 570} 571 572static void 573kr_redistribute(struct kroute_prefix *kp) 574{ 575 struct kroute_priority *kprio; 576 struct kroute_node *kn; 577 578 TAILQ_FOREACH_REVERSE(kprio, &kp->priorities, plist, entry) { 579 if (kprio == TAILQ_FIRST(&kp->priorities)) { 580 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 581 kr_redist_eval(&kn->r); 582 } else { 583 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 584 kr_redist_remove(&kn->r); 585 } 586 } 587} 588 589/* rb-tree compare */ 590static __inline int 591kroute_compare(struct kroute_prefix *a, struct kroute_prefix *b) 592{ 593 int addrcmp; 594 595 if (a->af < b->af) 596 return (-1); 597 if (a->af > b->af) 598 return (1); 599 600 addrcmp = ldp_addrcmp(a->af, &a->prefix, &b->prefix); 601 if (addrcmp != 0) 602 return (addrcmp); 603 604 if (a->prefixlen < b->prefixlen) 605 return (-1); 606 if (a->prefixlen > b->prefixlen) 607 return (1); 608 609 return (0); 610} 611 612/* tree management */ 613static struct kroute_prefix * 614kroute_find_prefix(int af, union ldpd_addr *prefix, uint8_t prefixlen) 615{ 616 struct kroute_prefix s; 617 618 s.af = af; 619 s.prefix = *prefix; 620 s.prefixlen = prefixlen; 621 622 return (RB_FIND(kroute_tree, &krt, &s)); 623} 624 625static struct kroute_priority * 626kroute_find_prio(struct kroute_prefix *kp, uint8_t prio) 627{ 628 struct kroute_priority *kprio; 629 630 /* RTP_ANY here picks the lowest priority node */ 631 if (prio == RTP_ANY) 632 return (TAILQ_FIRST(&kp->priorities)); 633 634 TAILQ_FOREACH(kprio, &kp->priorities, entry) 635 if (kprio->priority == prio) 636 return (kprio); 637 638 return (NULL); 639} 640 641static struct kroute_node * 642kroute_find_gw(struct kroute_priority *kprio, union ldpd_addr *nh) 643{ 644 struct kroute_node *kn; 645 646 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 647 if (ldp_addrcmp(kprio->kp->af, &kn->r.nexthop, nh) == 0) 648 return (kn); 649 650 return (NULL); 651} 652 653static int 654kroute_insert(struct kroute *kr) 655{ 656 struct kroute_prefix *kp; 657 struct kroute_priority *kprio, *tmp; 658 struct kroute_node *kn; 659 660 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 661 if (kp == NULL) { 662 kp = calloc(1, sizeof((*kp))); 663 if (kp == NULL) 664 fatal(__func__); 665 kp->af = kr->af; 666 kp->prefix = kr->prefix; 667 kp->prefixlen = kr->prefixlen; 668 TAILQ_INIT(&kp->priorities); 669 RB_INSERT(kroute_tree, &krt, kp); 670 } 671 672 kprio = kroute_find_prio(kp, kr->priority); 673 if (kprio == NULL) { 674 kprio = calloc(1, sizeof(*kprio)); 675 if (kprio == NULL) 676 fatal(__func__); 677 kprio->kp = kp; 678 kprio->priority = kr->priority; 679 TAILQ_INIT(&kprio->nexthops); 680 681 /* lower priorities first */ 682 TAILQ_FOREACH(tmp, &kp->priorities, entry) 683 if (tmp->priority > kprio->priority) 684 break; 685 if (tmp) 686 TAILQ_INSERT_BEFORE(tmp, kprio, entry); 687 else 688 TAILQ_INSERT_TAIL(&kp->priorities, kprio, entry); 689 } 690 691 kn = kroute_find_gw(kprio, &kr->nexthop); 692 if (kn == NULL) { 693 kn = calloc(1, sizeof(*kn)); 694 if (kn == NULL) 695 fatal(__func__); 696 kn->kprio = kprio; 697 kn->r = *kr; 698 TAILQ_INSERT_TAIL(&kprio->nexthops, kn, entry); 699 } 700 701 kr_redistribute(kp); 702 return (0); 703} 704 705static int 706kroute_uninstall(struct kroute_node *kn) 707{ 708 /* kill MPLS LSP if one was installed */ 709 if (kn->r.flags & F_LDPD_INSERTED) 710 if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) 711 return (-1); 712 713 return (0); 714} 715 716static int 717kroute_remove(struct kroute *kr) 718{ 719 struct kroute_prefix *kp; 720 struct kroute_priority *kprio; 721 struct kroute_node *kn; 722 723 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 724 if (kp == NULL) 725 goto notfound; 726 kprio = kroute_find_prio(kp, kr->priority); 727 if (kprio == NULL) 728 goto notfound; 729 kn = kroute_find_gw(kprio, &kr->nexthop); 730 if (kn == NULL) 731 goto notfound; 732 733 kr_redist_remove(&kn->r); 734 kroute_uninstall(kn); 735 736 TAILQ_REMOVE(&kprio->nexthops, kn, entry); 737 free(kn); 738 739 if (TAILQ_EMPTY(&kprio->nexthops)) { 740 TAILQ_REMOVE(&kp->priorities, kprio, entry); 741 free(kprio); 742 } 743 744 if (TAILQ_EMPTY(&kp->priorities)) { 745 if (RB_REMOVE(kroute_tree, &krt, kp) == NULL) { 746 log_warnx("%s failed for %s/%u", __func__, 747 log_addr(kr->af, &kr->prefix), kp->prefixlen); 748 return (-1); 749 } 750 free(kp); 751 } else 752 kr_redistribute(kp); 753 754 return (0); 755 756 notfound: 757 log_warnx("%s failed to find %s/%u", __func__, 758 log_addr(kr->af, &kr->prefix), kr->prefixlen); 759 return (-1); 760} 761 762static void 763kroute_clear(void) 764{ 765 struct kroute_prefix *kp; 766 struct kroute_priority *kprio; 767 struct kroute_node *kn; 768 769 while ((kp = RB_MIN(kroute_tree, &krt)) != NULL) { 770 while ((kprio = TAILQ_FIRST(&kp->priorities)) != NULL) { 771 while ((kn = TAILQ_FIRST(&kprio->nexthops)) != NULL) { 772 kr_redist_remove(&kn->r); 773 kroute_uninstall(kn); 774 TAILQ_REMOVE(&kprio->nexthops, kn, entry); 775 free(kn); 776 } 777 TAILQ_REMOVE(&kp->priorities, kprio, entry); 778 free(kprio); 779 } 780 RB_REMOVE(kroute_tree, &krt, kp); 781 free(kp); 782 } 783} 784 785static __inline int 786kif_compare(struct kif_node *a, struct kif_node *b) 787{ 788 return (b->k.ifindex - a->k.ifindex); 789} 790 791/* tree management */ 792static struct kif_node * 793kif_find(unsigned short ifindex) 794{ 795 struct kif_node s; 796 797 memset(&s, 0, sizeof(s)); 798 s.k.ifindex = ifindex; 799 800 return (RB_FIND(kif_tree, &kit, &s)); 801} 802 803struct kif * 804kif_findname(char *ifname) 805{ 806 struct kif_node *kif; 807 808 RB_FOREACH(kif, kif_tree, &kit) 809 if (!strcmp(ifname, kif->k.ifname)) 810 return (&kif->k); 811 812 return (NULL); 813} 814 815static struct kif_node * 816kif_insert(unsigned short ifindex) 817{ 818 struct kif_node *kif; 819 820 if ((kif = calloc(1, sizeof(struct kif_node))) == NULL) 821 return (NULL); 822 823 kif->k.ifindex = ifindex; 824 TAILQ_INIT(&kif->addrs); 825 826 if (RB_INSERT(kif_tree, &kit, kif) != NULL) 827 fatalx("kif_insert: RB_INSERT"); 828 829 return (kif); 830} 831 832static int 833kif_remove(struct kif_node *kif) 834{ 835 struct kif_addr *ka; 836 837 if (RB_REMOVE(kif_tree, &kit, kif) == NULL) { 838 log_warnx("RB_REMOVE(kif_tree, &kit, kif)"); 839 return (-1); 840 } 841 842 while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) { 843 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); 844 TAILQ_REMOVE(&kif->addrs, ka, entry); 845 free(ka); 846 } 847 free(kif); 848 return (0); 849} 850 851void 852kif_clear(void) 853{ 854 struct kif_node *kif; 855 856 while ((kif = RB_MIN(kif_tree, &kit)) != NULL) 857 kif_remove(kif); 858} 859 860static struct kif_node * 861kif_update(unsigned short ifindex, int flags, struct if_data *ifd, 862 struct sockaddr_dl *sdl, int *link_old) 863{ 864 struct kif_node *kif; 865 866 if ((kif = kif_find(ifindex)) == NULL) { 867 if ((kif = kif_insert(ifindex)) == NULL) 868 return (NULL); 869 } else 870 *link_old = (kif->k.flags & IFF_UP) && 871 LINK_STATE_IS_UP(kif->k.link_state); 872 873 kif->k.flags = flags; 874 kif->k.link_state = ifd->ifi_link_state; 875 if (sdl) 876 memcpy(kif->k.mac, LLADDR(sdl), sizeof(kif->k.mac)); 877 kif->k.if_type = ifd->ifi_type; 878 kif->k.baudrate = ifd->ifi_baudrate; 879 kif->k.mtu = ifd->ifi_mtu; 880 kif->k.rdomain = ifd->ifi_rdomain; 881 882 if (sdl && sdl->sdl_family == AF_LINK) { 883 if (sdl->sdl_nlen >= sizeof(kif->k.ifname)) 884 memcpy(kif->k.ifname, sdl->sdl_data, 885 sizeof(kif->k.ifname) - 1); 886 else if (sdl->sdl_nlen > 0) 887 memcpy(kif->k.ifname, sdl->sdl_data, 888 sdl->sdl_nlen); 889 /* string already terminated via calloc() */ 890 } 891 892 return (kif); 893} 894 895static struct kroute_priority * 896kroute_match(int af, union ldpd_addr *key) 897{ 898 int i, maxprefixlen; 899 struct kroute_prefix *kp; 900 struct kroute_priority *kprio; 901 union ldpd_addr addr; 902 903 switch (af) { 904 case AF_INET: 905 maxprefixlen = 32; 906 break; 907 case AF_INET6: 908 maxprefixlen = 128; 909 break; 910 default: 911 log_warnx("%s: unknown af", __func__); 912 return (NULL); 913 } 914 915 for (i = maxprefixlen; i >= 0; i--) { 916 ldp_applymask(af, &addr, key, i); 917 918 kp = kroute_find_prefix(af, &addr, i); 919 if (kp == NULL) 920 continue; 921 922 kprio = kroute_find_prio(kp, RTP_ANY); 923 if (kprio != NULL) 924 return (kprio); 925 } 926 927 return (NULL); 928} 929 930/* misc */ 931static uint8_t 932prefixlen_classful(in_addr_t ina) 933{ 934 /* it hurt to write this. */ 935 936 if (ina >= 0xf0000000U) /* class E */ 937 return (32); 938 else if (ina >= 0xe0000000U) /* class D */ 939 return (4); 940 else if (ina >= 0xc0000000U) /* class C */ 941 return (24); 942 else if (ina >= 0x80000000U) /* class B */ 943 return (16); 944 else /* class A */ 945 return (8); 946} 947 948#define ROUNDUP(a) \ 949 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 950 951static void 952get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info) 953{ 954 int i; 955 956 for (i = 0; i < RTAX_MAX; i++) { 957 if (addrs & (1 << i)) { 958 rti_info[i] = sa; 959 sa = (struct sockaddr *)((char *)(sa) + 960 ROUNDUP(sa->sa_len)); 961 } else 962 rti_info[i] = NULL; 963 } 964} 965 966static void 967if_change(unsigned short ifindex, int flags, struct if_data *ifd, 968 struct sockaddr_dl *sdl) 969{ 970 struct kif_node *kif; 971 struct kif_addr *ka; 972 int link_old = 0, link_new; 973 974 kif = kif_update(ifindex, flags, ifd, sdl, &link_old); 975 if (!kif) { 976 log_warn("%s: kif_update(%u)", __func__, ifindex); 977 return; 978 } 979 link_new = (kif->k.flags & IFF_UP) && 980 LINK_STATE_IS_UP(kif->k.link_state); 981 982 if (link_new == link_old) 983 return; 984 985 main_imsg_compose_ldpe(IMSG_IFSTATUS, 0, &kif->k, sizeof(struct kif)); 986 if (link_new) { 987 TAILQ_FOREACH(ka, &kif->addrs, entry) 988 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, 989 sizeof(ka->a)); 990 } else { 991 TAILQ_FOREACH(ka, &kif->addrs, entry) 992 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, 993 sizeof(ka->a)); 994 } 995} 996 997static void 998if_newaddr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, 999 struct sockaddr *brd) 1000{ 1001 struct kif_node *kif; 1002 struct sockaddr_in *ifa4, *mask4, *brd4; 1003 struct sockaddr_in6 *ifa6, *mask6, *brd6; 1004 struct kif_addr *ka; 1005 1006 if (ifa == NULL) 1007 return; 1008 if ((kif = kif_find(ifindex)) == NULL) { 1009 log_warnx("%s: corresponding if %d not found", __func__, 1010 ifindex); 1011 return; 1012 } 1013 1014 switch (ifa->sa_family) { 1015 case AF_INET: 1016 ifa4 = (struct sockaddr_in *) ifa; 1017 mask4 = (struct sockaddr_in *) mask; 1018 brd4 = (struct sockaddr_in *) brd; 1019 1020 /* filter out unwanted addresses */ 1021 if (bad_addr_v4(ifa4->sin_addr)) 1022 return; 1023 1024 if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) 1025 fatal("if_newaddr"); 1026 ka->a.addr.v4 = ifa4->sin_addr; 1027 if (mask4) 1028 ka->a.prefixlen = 1029 mask2prefixlen(mask4->sin_addr.s_addr); 1030 if (brd4) 1031 ka->a.dstbrd.v4 = brd4->sin_addr; 1032 break; 1033 case AF_INET6: 1034 ifa6 = (struct sockaddr_in6 *) ifa; 1035 mask6 = (struct sockaddr_in6 *) mask; 1036 brd6 = (struct sockaddr_in6 *) brd; 1037 1038 /* We only care about link-local and global-scope. */ 1039 if (bad_addr_v6(&ifa6->sin6_addr)) 1040 return; 1041 1042 clearscope(&ifa6->sin6_addr); 1043 1044 if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) 1045 fatal("if_newaddr"); 1046 ka->a.addr.v6 = ifa6->sin6_addr; 1047 if (mask6) 1048 ka->a.prefixlen = mask2prefixlen6(mask6); 1049 if (brd6) 1050 ka->a.dstbrd.v6 = brd6->sin6_addr; 1051 break; 1052 default: 1053 return; 1054 } 1055 1056 ka->a.ifindex = ifindex; 1057 ka->a.af = ifa->sa_family; 1058 TAILQ_INSERT_TAIL(&kif->addrs, ka, entry); 1059 1060 /* notify ldpe about new address */ 1061 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a)); 1062} 1063 1064static void 1065if_deladdr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, 1066 struct sockaddr *brd) 1067{ 1068 struct kif_node *kif; 1069 struct sockaddr_in *ifa4, *mask4, *brd4; 1070 struct sockaddr_in6 *ifa6, *mask6, *brd6; 1071 struct kaddr k; 1072 struct kif_addr *ka, *nka; 1073 1074 if (ifa == NULL) 1075 return; 1076 if ((kif = kif_find(ifindex)) == NULL) { 1077 log_warnx("%s: corresponding if %d not found", __func__, 1078 ifindex); 1079 return; 1080 } 1081 1082 memset(&k, 0, sizeof(k)); 1083 k.af = ifa->sa_family; 1084 switch (ifa->sa_family) { 1085 case AF_INET: 1086 ifa4 = (struct sockaddr_in *) ifa; 1087 mask4 = (struct sockaddr_in *) mask; 1088 brd4 = (struct sockaddr_in *) brd; 1089 1090 /* filter out unwanted addresses */ 1091 if (bad_addr_v4(ifa4->sin_addr)) 1092 return; 1093 1094 k.addr.v4 = ifa4->sin_addr; 1095 if (mask4) 1096 k.prefixlen = mask2prefixlen(mask4->sin_addr.s_addr); 1097 if (brd4) 1098 k.dstbrd.v4 = brd4->sin_addr; 1099 break; 1100 case AF_INET6: 1101 ifa6 = (struct sockaddr_in6 *) ifa; 1102 mask6 = (struct sockaddr_in6 *) mask; 1103 brd6 = (struct sockaddr_in6 *) brd; 1104 1105 /* We only care about link-local and global-scope. */ 1106 if (bad_addr_v6(&ifa6->sin6_addr)) 1107 return; 1108 1109 clearscope(&ifa6->sin6_addr); 1110 1111 k.addr.v6 = ifa6->sin6_addr; 1112 if (mask6) 1113 k.prefixlen = mask2prefixlen6(mask6); 1114 if (brd6) 1115 k.dstbrd.v6 = brd6->sin6_addr; 1116 break; 1117 default: 1118 return; 1119 } 1120 1121 for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) { 1122 nka = TAILQ_NEXT(ka, entry); 1123 1124 if (ka->a.af != k.af || 1125 ka->a.prefixlen != k.prefixlen || 1126 ldp_addrcmp(ka->a.af, &ka->a.addr, &k.addr)) 1127 continue; 1128 1129 /* notify ldpe about removed address */ 1130 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); 1131 TAILQ_REMOVE(&kif->addrs, ka, entry); 1132 free(ka); 1133 return; 1134 } 1135} 1136 1137static void 1138if_announce(void *msg) 1139{ 1140 struct if_announcemsghdr *ifan; 1141 struct kif_node *kif; 1142 1143 ifan = msg; 1144 1145 switch (ifan->ifan_what) { 1146 case IFAN_ARRIVAL: 1147 kif = kif_insert(ifan->ifan_index); 1148 if (kif) 1149 strlcpy(kif->k.ifname, ifan->ifan_name, 1150 sizeof(kif->k.ifname)); 1151 break; 1152 case IFAN_DEPARTURE: 1153 kif = kif_find(ifan->ifan_index); 1154 if (kif) 1155 kif_remove(kif); 1156 break; 1157 } 1158} 1159 1160/* rtsock */ 1161static int 1162send_rtmsg(int fd, int action, struct kroute *kr, int family) 1163{ 1164 switch (kr->af) { 1165 case AF_INET: 1166 return (send_rtmsg_v4(fd, action, kr, family)); 1167 case AF_INET6: 1168 return (send_rtmsg_v6(fd, action, kr, family)); 1169 default: 1170 fatalx("send_rtmsg: unknown af"); 1171 } 1172} 1173 1174static int 1175send_rtmsg_v4(int fd, int action, struct kroute *kr, int family) 1176{ 1177 struct iovec iov[5]; 1178 struct rt_msghdr hdr; 1179 struct sockaddr_mpls label_in, label_out; 1180 struct sockaddr_in dst, mask, nexthop; 1181 int iovcnt = 0; 1182 1183 if (kr_state.fib_sync == 0) 1184 return (0); 1185 1186 /* 1187 * Reserved labels (implicit and explicit NULL) should not be added 1188 * to the FIB. 1189 */ 1190 if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX) 1191 return (0); 1192 1193 /* initialize header */ 1194 memset(&hdr, 0, sizeof(hdr)); 1195 hdr.rtm_version = RTM_VERSION; 1196 1197 hdr.rtm_type = action; 1198 hdr.rtm_flags = RTF_UP; 1199 hdr.rtm_fmask = RTF_MPLS; 1200 hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ 1201 hdr.rtm_msglen = sizeof(hdr); 1202 hdr.rtm_hdrlen = sizeof(struct rt_msghdr); 1203 hdr.rtm_priority = kr->priority; 1204 hdr.rtm_tableid = kr_state.rdomain; /* rtableid */ 1205 /* adjust iovec */ 1206 iov[iovcnt].iov_base = &hdr; 1207 iov[iovcnt++].iov_len = sizeof(hdr); 1208 1209 if (family == AF_MPLS) { 1210 memset(&label_in, 0, sizeof(label_in)); 1211 label_in.smpls_len = sizeof(label_in); 1212 label_in.smpls_family = AF_MPLS; 1213 label_in.smpls_label = 1214 htonl(kr->local_label << MPLS_LABEL_OFFSET); 1215 /* adjust header */ 1216 hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; 1217 hdr.rtm_addrs |= RTA_DST; 1218 hdr.rtm_msglen += sizeof(label_in); 1219 /* adjust iovec */ 1220 iov[iovcnt].iov_base = &label_in; 1221 iov[iovcnt++].iov_len = sizeof(label_in); 1222 } else { 1223 memset(&dst, 0, sizeof(dst)); 1224 dst.sin_len = sizeof(dst); 1225 dst.sin_family = AF_INET; 1226 dst.sin_addr = kr->prefix.v4; 1227 /* adjust header */ 1228 hdr.rtm_addrs |= RTA_DST; 1229 hdr.rtm_msglen += sizeof(dst); 1230 /* adjust iovec */ 1231 iov[iovcnt].iov_base = &dst; 1232 iov[iovcnt++].iov_len = sizeof(dst); 1233 } 1234 1235 memset(&nexthop, 0, sizeof(nexthop)); 1236 nexthop.sin_len = sizeof(nexthop); 1237 nexthop.sin_family = AF_INET; 1238 nexthop.sin_addr = kr->nexthop.v4; 1239 /* adjust header */ 1240 hdr.rtm_flags |= RTF_GATEWAY; 1241 hdr.rtm_addrs |= RTA_GATEWAY; 1242 hdr.rtm_msglen += sizeof(nexthop); 1243 /* adjust iovec */ 1244 iov[iovcnt].iov_base = &nexthop; 1245 iov[iovcnt++].iov_len = sizeof(nexthop); 1246 1247 if (family == AF_INET) { 1248 memset(&mask, 0, sizeof(mask)); 1249 mask.sin_len = sizeof(mask); 1250 mask.sin_family = AF_INET; 1251 mask.sin_addr.s_addr = prefixlen2mask(kr->prefixlen); 1252 /* adjust header */ 1253 hdr.rtm_addrs |= RTA_NETMASK; 1254 hdr.rtm_msglen += sizeof(mask); 1255 /* adjust iovec */ 1256 iov[iovcnt].iov_base = &mask; 1257 iov[iovcnt++].iov_len = sizeof(mask); 1258 } 1259 1260 /* If action is RTM_DELETE we have to get rid of MPLS infos */ 1261 if (kr->remote_label != NO_LABEL && action != RTM_DELETE) { 1262 memset(&label_out, 0, sizeof(label_out)); 1263 label_out.smpls_len = sizeof(label_out); 1264 label_out.smpls_family = AF_MPLS; 1265 label_out.smpls_label = 1266 htonl(kr->remote_label << MPLS_LABEL_OFFSET); 1267 /* adjust header */ 1268 hdr.rtm_addrs |= RTA_SRC; 1269 hdr.rtm_flags |= RTF_MPLS; 1270 hdr.rtm_msglen += sizeof(label_out); 1271 /* adjust iovec */ 1272 iov[iovcnt].iov_base = &label_out; 1273 iov[iovcnt++].iov_len = sizeof(label_out); 1274 1275 if (kr->remote_label == MPLS_LABEL_IMPLNULL) { 1276 if (family == AF_MPLS) 1277 hdr.rtm_mpls = MPLS_OP_POP; 1278 else 1279 return (0); 1280 } else { 1281 if (family == AF_MPLS) 1282 hdr.rtm_mpls = MPLS_OP_SWAP; 1283 else 1284 hdr.rtm_mpls = MPLS_OP_PUSH; 1285 } 1286 } 1287 1288 retry: 1289 if (writev(fd, iov, iovcnt) == -1) { 1290 if (errno == ESRCH) { 1291 if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) { 1292 hdr.rtm_type = RTM_ADD; 1293 goto retry; 1294 } else if (hdr.rtm_type == RTM_DELETE) { 1295 log_info("route %s/%u vanished before delete", 1296 inet_ntoa(kr->prefix.v4), kr->prefixlen); 1297 return (-1); 1298 } 1299 } 1300 log_warn("%s action %u, af %s, prefix %s/%u", __func__, 1301 hdr.rtm_type, af_name(family), inet_ntoa(kr->prefix.v4), 1302 kr->prefixlen); 1303 return (-1); 1304 } 1305 1306 return (0); 1307} 1308 1309static int 1310send_rtmsg_v6(int fd, int action, struct kroute *kr, int family) 1311{ 1312 struct iovec iov[5]; 1313 struct rt_msghdr hdr; 1314 struct sockaddr_mpls label_in, label_out; 1315 struct sockaddr_in6 dst, mask, nexthop; 1316 int iovcnt = 0; 1317 1318 if (kr_state.fib_sync == 0) 1319 return (0); 1320 1321 /* 1322 * Reserved labels (implicit and explicit NULL) should not be added 1323 * to the FIB. 1324 */ 1325 if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX) 1326 return (0); 1327 1328 /* initialize header */ 1329 memset(&hdr, 0, sizeof(hdr)); 1330 hdr.rtm_version = RTM_VERSION; 1331 1332 hdr.rtm_type = action; 1333 hdr.rtm_flags = RTF_UP; 1334 hdr.rtm_fmask = RTF_MPLS; 1335 hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ 1336 hdr.rtm_msglen = sizeof(hdr); 1337 hdr.rtm_hdrlen = sizeof(struct rt_msghdr); 1338 hdr.rtm_priority = kr->priority; 1339 hdr.rtm_tableid = kr_state.rdomain; /* rtableid */ 1340 /* adjust iovec */ 1341 iov[iovcnt].iov_base = &hdr; 1342 iov[iovcnt++].iov_len = sizeof(hdr); 1343 1344 if (family == AF_MPLS) { 1345 memset(&label_in, 0, sizeof(label_in)); 1346 label_in.smpls_len = sizeof(label_in); 1347 label_in.smpls_family = AF_MPLS; 1348 label_in.smpls_label = 1349 htonl(kr->local_label << MPLS_LABEL_OFFSET); 1350 /* adjust header */ 1351 hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; 1352 hdr.rtm_addrs |= RTA_DST; 1353 hdr.rtm_msglen += sizeof(label_in); 1354 /* adjust iovec */ 1355 iov[iovcnt].iov_base = &label_in; 1356 iov[iovcnt++].iov_len = sizeof(label_in); 1357 } else { 1358 memset(&dst, 0, sizeof(dst)); 1359 dst.sin6_len = sizeof(dst); 1360 dst.sin6_family = AF_INET6; 1361 dst.sin6_addr = kr->prefix.v6; 1362 /* adjust header */ 1363 hdr.rtm_addrs |= RTA_DST; 1364 hdr.rtm_msglen += ROUNDUP(sizeof(dst)); 1365 /* adjust iovec */ 1366 iov[iovcnt].iov_base = &dst; 1367 iov[iovcnt++].iov_len = ROUNDUP(sizeof(dst)); 1368 } 1369 1370 memset(&nexthop, 0, sizeof(nexthop)); 1371 nexthop.sin6_len = sizeof(nexthop); 1372 nexthop.sin6_family = AF_INET6; 1373 nexthop.sin6_addr = kr->nexthop.v6; 1374 nexthop.sin6_scope_id = kr->ifindex; 1375 /* 1376 * XXX we should set the sin6_scope_id but the kernel 1377 * XXX does not expect it that way. It must be fiddled 1378 * XXX into the sin6_addr. Welcome to the typical 1379 * XXX IPv6 insanity and all without wine bottles. 1380 */ 1381 embedscope(&nexthop); 1382 1383 /* adjust header */ 1384 hdr.rtm_flags |= RTF_GATEWAY; 1385 hdr.rtm_addrs |= RTA_GATEWAY; 1386 hdr.rtm_msglen += ROUNDUP(sizeof(nexthop)); 1387 /* adjust iovec */ 1388 iov[iovcnt].iov_base = &nexthop; 1389 iov[iovcnt++].iov_len = ROUNDUP(sizeof(nexthop)); 1390 1391 if (family == AF_INET6) { 1392 memset(&mask, 0, sizeof(mask)); 1393 mask.sin6_len = sizeof(mask); 1394 mask.sin6_family = AF_INET6; 1395 mask.sin6_addr = *prefixlen2mask6(kr->prefixlen); 1396 /* adjust header */ 1397 if (kr->prefixlen == 128) 1398 hdr.rtm_flags |= RTF_HOST; 1399 hdr.rtm_addrs |= RTA_NETMASK; 1400 hdr.rtm_msglen += ROUNDUP(sizeof(mask)); 1401 /* adjust iovec */ 1402 iov[iovcnt].iov_base = &mask; 1403 iov[iovcnt++].iov_len = ROUNDUP(sizeof(mask)); 1404 } 1405 1406 /* If action is RTM_DELETE we have to get rid of MPLS infos */ 1407 if (kr->remote_label != NO_LABEL && action != RTM_DELETE) { 1408 memset(&label_out, 0, sizeof(label_out)); 1409 label_out.smpls_len = sizeof(label_out); 1410 label_out.smpls_family = AF_MPLS; 1411 label_out.smpls_label = 1412 htonl(kr->remote_label << MPLS_LABEL_OFFSET); 1413 /* adjust header */ 1414 hdr.rtm_addrs |= RTA_SRC; 1415 hdr.rtm_flags |= RTF_MPLS; 1416 hdr.rtm_msglen += sizeof(label_out); 1417 /* adjust iovec */ 1418 iov[iovcnt].iov_base = &label_out; 1419 iov[iovcnt++].iov_len = sizeof(label_out); 1420 1421 if (kr->remote_label == MPLS_LABEL_IMPLNULL) { 1422 if (family == AF_MPLS) 1423 hdr.rtm_mpls = MPLS_OP_POP; 1424 else 1425 return (0); 1426 } else { 1427 if (family == AF_MPLS) 1428 hdr.rtm_mpls = MPLS_OP_SWAP; 1429 else 1430 hdr.rtm_mpls = MPLS_OP_PUSH; 1431 } 1432 } 1433 1434 retry: 1435 if (writev(fd, iov, iovcnt) == -1) { 1436 if (errno == ESRCH) { 1437 if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) { 1438 hdr.rtm_type = RTM_ADD; 1439 goto retry; 1440 } else if (hdr.rtm_type == RTM_DELETE) { 1441 log_info("route %s/%u vanished before delete", 1442 log_addr(kr->af, &kr->prefix), 1443 kr->prefixlen); 1444 return (-1); 1445 } 1446 } 1447 log_warn("%s action %u, af %s, prefix %s/%u", __func__, 1448 hdr.rtm_type, af_name(family), log_addr(kr->af, 1449 &kr->prefix), kr->prefixlen); 1450 return (-1); 1451 } 1452 return (0); 1453} 1454 1455static int 1456fetchtable(void) 1457{ 1458 size_t len; 1459 int mib[7]; 1460 char *buf; 1461 int rv; 1462 1463 mib[0] = CTL_NET; 1464 mib[1] = PF_ROUTE; 1465 mib[2] = 0; 1466 mib[3] = 0; 1467 mib[4] = NET_RT_DUMP; 1468 mib[5] = 0; 1469 mib[6] = kr_state.rdomain; /* rtableid */ 1470 1471 if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) { 1472 log_warn("sysctl"); 1473 return (-1); 1474 } 1475 if ((buf = malloc(len)) == NULL) { 1476 log_warn(__func__); 1477 return (-1); 1478 } 1479 if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) { 1480 log_warn("sysctl"); 1481 free(buf); 1482 return (-1); 1483 } 1484 1485 rv = rtmsg_process(buf, len); 1486 free(buf); 1487 1488 return (rv); 1489} 1490 1491static int 1492fetchifs(void) 1493{ 1494 size_t len; 1495 int mib[6]; 1496 char *buf; 1497 int rv; 1498 1499 mib[0] = CTL_NET; 1500 mib[1] = PF_ROUTE; 1501 mib[2] = 0; 1502 mib[3] = 0; /* wildcard */ 1503 mib[4] = NET_RT_IFLIST; 1504 mib[5] = 0; 1505 1506 if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) { 1507 log_warn("sysctl"); 1508 return (-1); 1509 } 1510 if ((buf = malloc(len)) == NULL) { 1511 log_warn(__func__); 1512 return (-1); 1513 } 1514 if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) { 1515 log_warn("sysctl"); 1516 free(buf); 1517 return (-1); 1518 } 1519 1520 rv = rtmsg_process(buf, len); 1521 free(buf); 1522 1523 return (rv); 1524} 1525 1526static int 1527dispatch_rtmsg(void) 1528{ 1529 char buf[RT_BUF_SIZE]; 1530 ssize_t n; 1531 1532 if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) { 1533 if (errno == EAGAIN || errno == EINTR) 1534 return (0); 1535 log_warn("%s: read error", __func__); 1536 return (-1); 1537 } 1538 1539 if (n == 0) { 1540 log_warnx("routing socket closed"); 1541 return (-1); 1542 } 1543 1544 return (rtmsg_process(buf, n)); 1545} 1546 1547static int 1548rtmsg_process(char *buf, size_t len) 1549{ 1550 struct rt_msghdr *rtm; 1551 struct if_msghdr ifm; 1552 struct ifa_msghdr *ifam; 1553 struct sockaddr *sa, *rti_info[RTAX_MAX]; 1554 size_t offset; 1555 char *next; 1556 1557 for (offset = 0; offset < len; offset += rtm->rtm_msglen) { 1558 next = buf + offset; 1559 rtm = (struct rt_msghdr *)next; 1560 if (len < offset + sizeof(unsigned short) || 1561 len < offset + rtm->rtm_msglen) 1562 fatalx("rtmsg_process: partial rtm in buffer"); 1563 if (rtm->rtm_version != RTM_VERSION) 1564 continue; 1565 1566 sa = (struct sockaddr *)(next + rtm->rtm_hdrlen); 1567 get_rtaddrs(rtm->rtm_addrs, sa, rti_info); 1568 1569 switch (rtm->rtm_type) { 1570 case RTM_ADD: 1571 case RTM_GET: 1572 case RTM_CHANGE: 1573 case RTM_DELETE: 1574 if (rtm->rtm_errno) /* failed attempts... */ 1575 continue; 1576 1577 if (rtm->rtm_tableid != kr_state.rdomain) 1578 continue; 1579 1580 if (rtm->rtm_type == RTM_GET && 1581 rtm->rtm_pid != kr_state.pid) 1582 continue; 1583 1584 /* Skip ARP/ND cache and broadcast routes. */ 1585 if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST)) 1586 continue; 1587 1588 /* LDP should follow the IGP and ignore BGP routes */ 1589 if (rtm->rtm_priority == RTP_BGP) 1590 continue; 1591 1592 if (rtmsg_process_route(rtm, rti_info) == -1) 1593 return (-1); 1594 } 1595 1596 switch (rtm->rtm_type) { 1597 case RTM_IFINFO: 1598 memcpy(&ifm, next, sizeof(ifm)); 1599 if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data, 1600 (struct sockaddr_dl *)rti_info[RTAX_IFP]); 1601 break; 1602 case RTM_NEWADDR: 1603 ifam = (struct ifa_msghdr *)rtm; 1604 if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | 1605 RTA_BRD)) == 0) 1606 break; 1607 1608 if_newaddr(ifam->ifam_index, 1609 (struct sockaddr *)rti_info[RTAX_IFA], 1610 (struct sockaddr *)rti_info[RTAX_NETMASK], 1611 (struct sockaddr *)rti_info[RTAX_BRD]); 1612 break; 1613 case RTM_DELADDR: 1614 ifam = (struct ifa_msghdr *)rtm; 1615 if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | 1616 RTA_BRD)) == 0) 1617 break; 1618 1619 if_deladdr(ifam->ifam_index, 1620 (struct sockaddr *)rti_info[RTAX_IFA], 1621 (struct sockaddr *)rti_info[RTAX_NETMASK], 1622 (struct sockaddr *)rti_info[RTAX_BRD]); 1623 break; 1624 case RTM_IFANNOUNCE: 1625 if_announce(next); 1626 break; 1627 default: 1628 /* ignore for now */ 1629 break; 1630 } 1631 } 1632 1633 return (offset); 1634} 1635 1636static int 1637rtmsg_process_route(struct rt_msghdr *rtm, struct sockaddr *rti_info[RTAX_MAX]) 1638{ 1639 struct sockaddr *sa; 1640 struct sockaddr_in *sa_in; 1641 struct sockaddr_in6 *sa_in6; 1642 struct kroute kr; 1643 struct kroute_prefix *kp; 1644 struct kroute_priority *kprio; 1645 struct kroute_node *kn; 1646 1647 if ((sa = rti_info[RTAX_DST]) == NULL) 1648 return (-1); 1649 1650 memset(&kr, 0, sizeof(kr)); 1651 kr.af = sa->sa_family; 1652 switch (kr.af) { 1653 case AF_INET: 1654 kr.prefix.v4 = ((struct sockaddr_in *)sa)->sin_addr; 1655 sa_in = (struct sockaddr_in *) rti_info[RTAX_NETMASK]; 1656 if (sa_in != NULL && sa_in->sin_len != 0) 1657 kr.prefixlen = mask2prefixlen(sa_in->sin_addr.s_addr); 1658 else if (rtm->rtm_flags & RTF_HOST) 1659 kr.prefixlen = 32; 1660 else if (kr.prefix.v4.s_addr == INADDR_ANY) 1661 kr.prefixlen = 0; 1662 else 1663 kr.prefixlen = prefixlen_classful(kr.prefix.v4.s_addr); 1664 break; 1665 case AF_INET6: 1666 kr.prefix.v6 = ((struct sockaddr_in6 *)sa)->sin6_addr; 1667 sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK]; 1668 if (sa_in6 != NULL && sa_in6->sin6_len != 0) 1669 kr.prefixlen = mask2prefixlen6(sa_in6); 1670 else if (rtm->rtm_flags & RTF_HOST) 1671 kr.prefixlen = 128; 1672 else if (IN6_IS_ADDR_UNSPECIFIED(&kr.prefix.v6)) 1673 kr.prefixlen = 0; 1674 else 1675 fatalx("in6 net addr without netmask"); 1676 break; 1677 default: 1678 return (0); 1679 } 1680 kr.ifindex = rtm->rtm_index; 1681 if ((sa = rti_info[RTAX_GATEWAY]) != NULL) { 1682 switch (sa->sa_family) { 1683 case AF_INET: 1684 kr.nexthop.v4 = ((struct sockaddr_in *)sa)->sin_addr; 1685 break; 1686 case AF_INET6: 1687 sa_in6 = (struct sockaddr_in6 *)sa; 1688 recoverscope(sa_in6); 1689 kr.nexthop.v6 = sa_in6->sin6_addr; 1690 if (sa_in6->sin6_scope_id) 1691 kr.ifindex = sa_in6->sin6_scope_id; 1692 break; 1693 case AF_LINK: 1694 kr.flags |= F_CONNECTED; 1695 break; 1696 } 1697 } 1698 1699 if (rtm->rtm_flags & RTF_STATIC) 1700 kr.flags |= F_STATIC; 1701 if (rtm->rtm_flags & RTF_BLACKHOLE) 1702 kr.flags |= F_BLACKHOLE; 1703 if (rtm->rtm_flags & RTF_REJECT) 1704 kr.flags |= F_REJECT; 1705 if (rtm->rtm_flags & RTF_DYNAMIC) 1706 kr.flags |= F_DYNAMIC; 1707 /* routes attached to connected or loopback interfaces */ 1708 if (rtm->rtm_flags & RTF_CONNECTED || 1709 ldp_addrcmp(kr.af, &kr.prefix, &kr.nexthop) == 0) 1710 kr.flags |= F_CONNECTED; 1711 kr.priority = rtm->rtm_priority; 1712 1713 if (rtm->rtm_type == RTM_CHANGE) { 1714 /* 1715 * The kernel doesn't allow RTM_CHANGE for multipath routes. 1716 * If we got this message we know that the route has only one 1717 * nexthop and we should remove it before installing the same 1718 * route with the new nexthop. 1719 */ 1720 kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); 1721 if (kp) { 1722 kprio = kroute_find_prio(kp, kr.priority); 1723 if (kprio) { 1724 kn = TAILQ_FIRST(&kprio->nexthops); 1725 if (kn) 1726 kroute_remove(&kn->r); 1727 } 1728 } 1729 } 1730 1731 kn = NULL; 1732 kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); 1733 if (kp) { 1734 kprio = kroute_find_prio(kp, kr.priority); 1735 if (kprio) 1736 kn = kroute_find_gw(kprio, &kr.nexthop); 1737 } 1738 1739 if (rtm->rtm_type == RTM_DELETE) { 1740 if (kn == NULL) 1741 return (0); 1742 return (kroute_remove(&kr)); 1743 } 1744 1745 if (!ldp_addrisset(kr.af, &kr.nexthop) && !(kr.flags & F_CONNECTED)) { 1746 log_warnx("%s: no nexthop for %s/%u", __func__, 1747 log_addr(kr.af, &kr.prefix), kr.prefixlen); 1748 return (-1); 1749 } 1750 1751 if (kn != NULL) { 1752 /* update route */ 1753 kn->r = kr; 1754 kr_redistribute(kp); 1755 } else { 1756 kr.local_label = NO_LABEL; 1757 kr.remote_label = NO_LABEL; 1758 kroute_insert(&kr); 1759 } 1760 1761 return (0); 1762} 1763 1764int 1765kmpw_set(struct kpw *kpw) 1766{ 1767 struct kif_node *kif; 1768 1769 kif = kif_find(kpw->ifindex); 1770 if (kif == NULL) { 1771 log_warnx("%s: failed to find mpw by index (%u)", __func__, 1772 kpw->ifindex); 1773 return (-1); 1774 } 1775 1776 if (kif->kpw == NULL) 1777 kif->kpw = malloc(sizeof(*kif->kpw)); 1778 *kif->kpw = *kpw; 1779 1780 return (kmpw_install(kif->k.ifname, kpw)); 1781} 1782 1783int 1784kmpw_unset(struct kpw *kpw) 1785{ 1786 struct kif_node *kif; 1787 1788 kif = kif_find(kpw->ifindex); 1789 if (kif == NULL) { 1790 log_warnx("%s: failed to find mpw by index (%u)", __func__, 1791 kpw->ifindex); 1792 return (-1); 1793 } 1794 1795 if (kif->kpw == NULL) { 1796 log_warnx("%s: %s is not set", __func__, kif->k.ifname); 1797 return (-1); 1798 } 1799 1800 free(kif->kpw); 1801 kif->kpw = NULL; 1802 return (kmpw_uninstall(kif->k.ifname)); 1803} 1804 1805static int 1806kmpw_install(const char *ifname, struct kpw *kpw) 1807{ 1808 struct ifreq ifr; 1809 struct ifmpwreq imr; 1810 1811 memset(&imr, 0, sizeof(imr)); 1812 switch (kpw->pw_type) { 1813 case PW_TYPE_ETHERNET: 1814 imr.imr_type = IMR_TYPE_ETHERNET; 1815 break; 1816 case PW_TYPE_ETHERNET_TAGGED: 1817 imr.imr_type = IMR_TYPE_ETHERNET_TAGGED; 1818 break; 1819 default: 1820 log_warnx("%s: unhandled pseudowire type (%#X)", __func__, 1821 kpw->pw_type); 1822 return (-1); 1823 } 1824 1825 if (kpw->flags & F_PW_CWORD) 1826 imr.imr_flags |= IMR_FLAG_CONTROLWORD; 1827 1828 memcpy(&imr.imr_nexthop, addr2sa(kpw->af, &kpw->nexthop, 0), 1829 sizeof(imr.imr_nexthop)); 1830 1831 imr.imr_lshim.shim_label = kpw->local_label; 1832 imr.imr_rshim.shim_label = kpw->remote_label; 1833 1834 memset(&ifr, 0, sizeof(ifr)); 1835 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 1836 ifr.ifr_data = (caddr_t) &imr; 1837 if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { 1838 log_warn("ioctl SIOCSETMPWCFG"); 1839 return (-1); 1840 } 1841 1842 return (0); 1843} 1844 1845static int 1846kmpw_uninstall(const char *ifname) 1847{ 1848 struct ifreq ifr; 1849 struct ifmpwreq imr; 1850 1851 memset(&ifr, 0, sizeof(ifr)); 1852 memset(&imr, 0, sizeof(imr)); 1853 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 1854 ifr.ifr_data = (caddr_t) &imr; 1855 if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { 1856 log_warn("ioctl SIOCSETMPWCFG"); 1857 return (-1); 1858 } 1859 1860 return (0); 1861} 1862 1863int 1864kmpw_find(const char *ifname) 1865{ 1866 struct ifreq ifr; 1867 1868 memset(&ifr, 0, sizeof(ifr)); 1869 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >= 1870 sizeof(ifr.ifr_name)) { 1871 errno = ENAMETOOLONG; 1872 return (-1); 1873 } 1874 1875 if (ioctl(kr_state.ioctl_fd, SIOCGPWE3, &ifr) == -1) 1876 return (-1); 1877 1878 if (ifr.ifr_pwe3 != IF_PWE3_ETHERNET) { 1879 errno = EPFNOSUPPORT; 1880 return (-1); 1881 } 1882 1883 return (0); 1884} 1885