1/* $OpenBSD: rde_peer.c,v 1.37 2024/05/22 08:41:14 claudio Exp $ */ 2 3/* 4 * Copyright (c) 2019 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19#include <sys/queue.h> 20 21#include <stdlib.h> 22#include <stdio.h> 23#include <string.h> 24#include <unistd.h> 25 26#include "bgpd.h" 27#include "rde.h" 28 29struct peer_tree peertable; 30struct rde_peer *peerself; 31static long imsg_pending; 32 33CTASSERT(sizeof(peerself->recv_eor) * 8 > AID_MAX); 34CTASSERT(sizeof(peerself->sent_eor) * 8 > AID_MAX); 35 36struct iq { 37 SIMPLEQ_ENTRY(iq) entry; 38 struct imsg imsg; 39}; 40 41int 42peer_has_as4byte(struct rde_peer *peer) 43{ 44 return (peer->capa.as4byte); 45} 46 47/* 48 * Check if ADD_PATH is enabled for aid and mode (rx / tx). If aid is 49 * AID_UNSPEC then the function returns true if any aid has mode enabled. 50 */ 51int 52peer_has_add_path(struct rde_peer *peer, uint8_t aid, int mode) 53{ 54 if (aid >= AID_MAX) 55 return 0; 56 return (peer->capa.add_path[aid] & mode); 57} 58 59int 60peer_accept_no_as_set(struct rde_peer *peer) 61{ 62 return (peer->flags & PEERFLAG_NO_AS_SET); 63} 64 65void 66peer_init(struct filter_head *rules) 67{ 68 struct peer_config pc; 69 70 RB_INIT(&peertable); 71 72 memset(&pc, 0, sizeof(pc)); 73 snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); 74 pc.id = PEER_ID_SELF; 75 76 peerself = peer_add(PEER_ID_SELF, &pc, rules); 77 peerself->state = PEER_UP; 78} 79 80void 81peer_shutdown(void) 82{ 83 if (!RB_EMPTY(&peertable)) 84 log_warnx("%s: free non-free table", __func__); 85} 86 87/* 88 * Traverse all peers calling callback for each peer. 89 */ 90void 91peer_foreach(void (*callback)(struct rde_peer *, void *), void *arg) 92{ 93 struct rde_peer *peer, *np; 94 95 RB_FOREACH_SAFE(peer, peer_tree, &peertable, np) 96 callback(peer, arg); 97} 98 99/* 100 * Lookup a peer by peer_id, return NULL if not found. 101 */ 102struct rde_peer * 103peer_get(uint32_t id) 104{ 105 struct rde_peer needle; 106 107 needle.conf.id = id; 108 return RB_FIND(peer_tree, &peertable, &needle); 109} 110 111/* 112 * Find next peer that matches neighbor options in *n. 113 * If peerid was set then pickup the lookup after that peer. 114 * Returns NULL if no more peers match. 115 */ 116struct rde_peer * 117peer_match(struct ctl_neighbor *n, uint32_t peerid) 118{ 119 struct rde_peer *peer; 120 121 if (peerid != 0) { 122 peer = peer_get(peerid); 123 if (peer) 124 peer = RB_NEXT(peer_tree, &peertable, peer); 125 } else 126 peer = RB_MIN(peer_tree, &peertable); 127 128 for (; peer != NULL; peer = RB_NEXT(peer_tree, &peertable, peer)) { 129 if (rde_match_peer(peer, n)) 130 return peer; 131 } 132 return NULL; 133} 134 135struct rde_peer * 136peer_add(uint32_t id, struct peer_config *p_conf, struct filter_head *rules) 137{ 138 struct rde_peer *peer; 139 int conflict; 140 141 if ((peer = peer_get(id))) { 142 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 143 return (peer); 144 } 145 146 peer = calloc(1, sizeof(struct rde_peer)); 147 if (peer == NULL) 148 fatal("peer_add"); 149 150 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 151 peer->remote_bgpid = 0; 152 peer->loc_rib_id = rib_find(peer->conf.rib); 153 if (peer->loc_rib_id == RIB_NOTFOUND) 154 fatalx("King Bula's new peer met an unknown RIB"); 155 peer->state = PEER_NONE; 156 peer->eval = peer->conf.eval; 157 peer->role = peer->conf.role; 158 peer->export_type = peer->conf.export_type; 159 peer->flags = peer->conf.flags; 160 SIMPLEQ_INIT(&peer->imsg_queue); 161 162 peer_apply_out_filter(peer, rules); 163 164 /* 165 * Assign an even random unique transmit path id. 166 * Odd path_id_tx numbers are for peers using add-path recv. 167 */ 168 do { 169 struct rde_peer *p; 170 171 conflict = 0; 172 peer->path_id_tx = arc4random() << 1; 173 RB_FOREACH(p, peer_tree, &peertable) { 174 if (p->path_id_tx == peer->path_id_tx) { 175 conflict = 1; 176 break; 177 } 178 } 179 } while (conflict); 180 181 if (RB_INSERT(peer_tree, &peertable, peer) != NULL) 182 fatalx("rde peer table corrupted"); 183 184 return (peer); 185} 186 187struct filter_head * 188peer_apply_out_filter(struct rde_peer *peer, struct filter_head *rules) 189{ 190 struct filter_head *old; 191 struct filter_rule *fr, *new; 192 193 old = peer->out_rules; 194 if ((peer->out_rules = malloc(sizeof(*peer->out_rules))) == NULL) 195 fatal(NULL); 196 TAILQ_INIT(peer->out_rules); 197 198 TAILQ_FOREACH(fr, rules, entry) { 199 if (rde_filter_skip_rule(peer, fr)) 200 continue; 201 202 if ((new = malloc(sizeof(*new))) == NULL) 203 fatal(NULL); 204 memcpy(new, fr, sizeof(*new)); 205 filterset_copy(&fr->set, &new->set); 206 207 TAILQ_INSERT_TAIL(peer->out_rules, new, entry); 208 } 209 210 return old; 211} 212 213static inline int 214peer_cmp(struct rde_peer *a, struct rde_peer *b) 215{ 216 if (a->conf.id > b->conf.id) 217 return 1; 218 if (a->conf.id < b->conf.id) 219 return -1; 220 return 0; 221} 222 223RB_GENERATE(peer_tree, rde_peer, entry, peer_cmp); 224 225static void 226peer_generate_update(struct rde_peer *peer, struct rib_entry *re, 227 struct prefix *newpath, struct prefix *oldpath, 228 enum eval_mode mode) 229{ 230 uint8_t aid; 231 232 aid = re->prefix->aid; 233 234 /* skip ourself */ 235 if (peer == peerself) 236 return; 237 if (peer->state != PEER_UP) 238 return; 239 /* skip peers using a different rib */ 240 if (peer->loc_rib_id != re->rib_id) 241 return; 242 /* check if peer actually supports the address family */ 243 if (peer->capa.mp[aid] == 0) 244 return; 245 /* skip peers with special export types */ 246 if (peer->export_type == EXPORT_NONE || 247 peer->export_type == EXPORT_DEFAULT_ROUTE) 248 return; 249 250 /* if reconf skip peers which don't need to reconfigure */ 251 if (mode == EVAL_RECONF && peer->reconf_out == 0) 252 return; 253 254 /* handle peers with add-path */ 255 if (peer_has_add_path(peer, aid, CAPA_AP_SEND)) { 256 if (peer->eval.mode == ADDPATH_EVAL_ALL) 257 up_generate_addpath_all(peer, re, newpath, oldpath); 258 else 259 up_generate_addpath(peer, re); 260 return; 261 } 262 263 /* skip regular peers if the best path didn't change */ 264 if (mode == EVAL_ALL && (peer->flags & PEERFLAG_EVALUATE_ALL) == 0) 265 return; 266 up_generate_updates(peer, re); 267} 268 269void 270rde_generate_updates(struct rib_entry *re, struct prefix *newpath, 271 struct prefix *oldpath, enum eval_mode mode) 272{ 273 struct rde_peer *peer; 274 275 RB_FOREACH(peer, peer_tree, &peertable) 276 peer_generate_update(peer, re, newpath, oldpath, mode); 277} 278 279/* 280 * Various RIB walker callbacks. 281 */ 282static void 283peer_adjout_clear_upcall(struct prefix *p, void *arg) 284{ 285 prefix_adjout_destroy(p); 286} 287 288static void 289peer_adjout_stale_upcall(struct prefix *p, void *arg) 290{ 291 if (p->flags & PREFIX_FLAG_DEAD) { 292 return; 293 } else if (p->flags & PREFIX_FLAG_WITHDRAW) { 294 /* no need to keep stale withdraws, they miss all attributes */ 295 prefix_adjout_destroy(p); 296 return; 297 } else if (p->flags & PREFIX_FLAG_UPDATE) { 298 RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p); 299 p->flags &= ~PREFIX_FLAG_UPDATE; 300 } 301 p->flags |= PREFIX_FLAG_STALE; 302} 303 304struct peer_flush { 305 struct rde_peer *peer; 306 time_t staletime; 307}; 308 309static void 310peer_flush_upcall(struct rib_entry *re, void *arg) 311{ 312 struct rde_peer *peer = ((struct peer_flush *)arg)->peer; 313 struct rde_aspath *asp; 314 struct bgpd_addr addr; 315 struct prefix *p, *np, *rp; 316 time_t staletime = ((struct peer_flush *)arg)->staletime; 317 uint32_t i; 318 uint8_t prefixlen; 319 320 pt_getaddr(re->prefix, &addr); 321 prefixlen = re->prefix->prefixlen; 322 TAILQ_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) { 323 if (peer != prefix_peer(p)) 324 continue; 325 if (staletime && p->lastchange > staletime) 326 continue; 327 328 for (i = RIB_LOC_START; i < rib_size; i++) { 329 struct rib *rib = rib_byid(i); 330 if (rib == NULL) 331 continue; 332 rp = prefix_get(rib, peer, p->path_id, 333 &addr, prefixlen); 334 if (rp) { 335 asp = prefix_aspath(rp); 336 if (asp && asp->pftableid) 337 rde_pftable_del(asp->pftableid, rp); 338 339 prefix_destroy(rp); 340 rde_update_log("flush", i, peer, NULL, 341 &addr, prefixlen); 342 } 343 } 344 345 prefix_destroy(p); 346 peer->stats.prefix_cnt--; 347 } 348} 349 350static void 351rde_up_adjout_force_upcall(struct prefix *p, void *ptr) 352{ 353 if (p->flags & PREFIX_FLAG_STALE) { 354 /* remove stale entries */ 355 prefix_adjout_destroy(p); 356 } else if (p->flags & PREFIX_FLAG_DEAD) { 357 /* ignore dead prefixes, they will go away soon */ 358 } else if ((p->flags & PREFIX_FLAG_MASK) == 0) { 359 /* put entries on the update queue if not allready on a queue */ 360 p->flags |= PREFIX_FLAG_UPDATE; 361 if (RB_INSERT(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], 362 p) != NULL) 363 fatalx("%s: RB tree invariant violated", __func__); 364 } 365} 366 367static void 368rde_up_adjout_force_done(void *ptr, uint8_t aid) 369{ 370 struct rde_peer *peer = ptr; 371 372 /* Adj-RIB-Out ready, unthrottle peer and inject EOR */ 373 peer->throttled = 0; 374 if (peer->capa.grestart.restart) 375 prefix_add_eor(peer, aid); 376} 377 378static void 379rde_up_dump_upcall(struct rib_entry *re, void *ptr) 380{ 381 struct rde_peer *peer = ptr; 382 struct prefix *p; 383 384 if ((p = prefix_best(re)) == NULL) 385 /* no eligible prefix, not even for 'evaluate all' */ 386 return; 387 388 peer_generate_update(peer, re, NULL, NULL, 0); 389} 390 391static void 392rde_up_dump_done(void *ptr, uint8_t aid) 393{ 394 struct rde_peer *peer = ptr; 395 396 /* force out all updates of Adj-RIB-Out for this peer */ 397 if (prefix_dump_new(peer, aid, 0, peer, rde_up_adjout_force_upcall, 398 rde_up_adjout_force_done, NULL) == -1) 399 fatal("%s: prefix_dump_new", __func__); 400} 401 402/* 403 * Session got established, bring peer up, load RIBs do initial table dump. 404 */ 405void 406peer_up(struct rde_peer *peer, struct session_up *sup) 407{ 408 uint8_t i; 409 410 if (peer->state == PEER_ERR) { 411 /* 412 * There is a race condition when doing PEER_ERR -> PEER_DOWN. 413 * So just do a full reset of the peer here. 414 */ 415 rib_dump_terminate(peer); 416 peer_imsg_flush(peer); 417 if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, 418 peer_adjout_clear_upcall, NULL, NULL) == -1) 419 fatal("%s: prefix_dump_new", __func__); 420 peer_flush(peer, AID_UNSPEC, 0); 421 peer->stats.prefix_cnt = 0; 422 peer->stats.prefix_out_cnt = 0; 423 peer->state = PEER_DOWN; 424 } 425 peer->remote_bgpid = sup->remote_bgpid; 426 peer->short_as = sup->short_as; 427 peer->remote_addr = sup->remote_addr; 428 peer->local_v4_addr = sup->local_v4_addr; 429 peer->local_v6_addr = sup->local_v6_addr; 430 peer->local_if_scope = sup->if_scope; 431 memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); 432 433 /* clear eor markers depending on GR flags */ 434 if (peer->capa.grestart.restart) { 435 peer->sent_eor = 0; 436 peer->recv_eor = 0; 437 } else { 438 /* no EOR expected */ 439 peer->sent_eor = ~0; 440 peer->recv_eor = ~0; 441 } 442 peer->state = PEER_UP; 443 444 for (i = AID_MIN; i < AID_MAX; i++) { 445 if (peer->capa.mp[i]) 446 peer_dump(peer, i); 447 } 448} 449 450/* 451 * Session dropped and no graceful restart is done. Stop everything for 452 * this peer and clean up. 453 */ 454void 455peer_down(struct rde_peer *peer, void *bula) 456{ 457 peer->remote_bgpid = 0; 458 peer->state = PEER_DOWN; 459 /* 460 * stop all pending dumps which may depend on this peer 461 * and flush all pending imsg from the SE. 462 */ 463 rib_dump_terminate(peer); 464 peer_imsg_flush(peer); 465 466 /* flush Adj-RIB-Out */ 467 if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, 468 peer_adjout_clear_upcall, NULL, NULL) == -1) 469 fatal("%s: prefix_dump_new", __func__); 470 471 /* flush Adj-RIB-In */ 472 peer_flush(peer, AID_UNSPEC, 0); 473 peer->stats.prefix_cnt = 0; 474 peer->stats.prefix_out_cnt = 0; 475 476 /* free filters */ 477 filterlist_free(peer->out_rules); 478 479 RB_REMOVE(peer_tree, &peertable, peer); 480 free(peer); 481} 482 483/* 484 * Flush all routes older then staletime. If staletime is 0 all routes will 485 * be flushed. 486 */ 487void 488peer_flush(struct rde_peer *peer, uint8_t aid, time_t staletime) 489{ 490 struct peer_flush pf = { peer, staletime }; 491 492 /* this dump must run synchronous, too much depends on that right now */ 493 if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall, 494 NULL, NULL) == -1) 495 fatal("%s: rib_dump_new", __func__); 496 497 /* every route is gone so reset staletime */ 498 if (aid == AID_UNSPEC) { 499 uint8_t i; 500 for (i = AID_MIN; i < AID_MAX; i++) 501 peer->staletime[i] = 0; 502 } else { 503 peer->staletime[aid] = 0; 504 } 505} 506 507/* 508 * During graceful restart mark a peer as stale if the session goes down. 509 * For the specified AID the Adj-RIB-Out is marked stale and the staletime 510 * is set to the current timestamp for identifying stale routes in Adj-RIB-In. 511 */ 512void 513peer_stale(struct rde_peer *peer, uint8_t aid, int flushall) 514{ 515 time_t now; 516 517 /* flush the now even staler routes out */ 518 if (peer->staletime[aid]) 519 peer_flush(peer, aid, peer->staletime[aid]); 520 521 peer->staletime[aid] = now = getmonotime(); 522 peer->state = PEER_DOWN; 523 524 /* 525 * stop all pending dumps which may depend on this peer 526 * and flush all pending imsg from the SE. 527 */ 528 rib_dump_terminate(peer); 529 peer_imsg_flush(peer); 530 531 if (flushall) 532 peer_flush(peer, aid, 0); 533 534 /* XXX this is not quite correct */ 535 /* mark Adj-RIB-Out stale for this peer */ 536 if (prefix_dump_new(peer, aid, 0, NULL, 537 peer_adjout_stale_upcall, NULL, NULL) == -1) 538 fatal("%s: prefix_dump_new", __func__); 539 540 /* make sure new prefixes start on a higher timestamp */ 541 while (now >= getmonotime()) 542 sleep(1); 543} 544 545/* 546 * Load the Adj-RIB-Out of a peer normally called when a session is established. 547 * Once the Adj-RIB-Out is ready stale routes are removed from the Adj-RIB-Out 548 * and all routes are put on the update queue so they will be sent out. 549 */ 550void 551peer_dump(struct rde_peer *peer, uint8_t aid) 552{ 553 if (peer->capa.enhanced_rr && (peer->sent_eor & (1 << aid))) 554 rde_peer_send_rrefresh(peer, aid, ROUTE_REFRESH_BEGIN_RR); 555 556 if (peer->export_type == EXPORT_NONE) { 557 /* nothing to send apart from the marker */ 558 if (peer->capa.grestart.restart) 559 prefix_add_eor(peer, aid); 560 } else if (peer->export_type == EXPORT_DEFAULT_ROUTE) { 561 up_generate_default(peer, aid); 562 rde_up_dump_done(peer, aid); 563 } else if (aid == AID_FLOWSPECv4 || aid == AID_FLOWSPECv6) { 564 prefix_flowspec_dump(aid, peer, rde_up_dump_upcall, 565 rde_up_dump_done); 566 } else { 567 if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer, 568 rde_up_dump_upcall, rde_up_dump_done, NULL) == -1) 569 fatal("%s: rib_dump_new", __func__); 570 /* throttle peer until dump is done */ 571 peer->throttled = 1; 572 } 573} 574 575/* 576 * Start of an enhanced route refresh. Mark all routes as stale. 577 * Once the route refresh ends a End of Route Refresh message is sent 578 * which calls peer_flush() to remove all stale routes. 579 */ 580void 581peer_begin_rrefresh(struct rde_peer *peer, uint8_t aid) 582{ 583 time_t now; 584 585 /* flush the now even staler routes out */ 586 if (peer->staletime[aid]) 587 peer_flush(peer, aid, peer->staletime[aid]); 588 589 peer->staletime[aid] = now = getmonotime(); 590 591 /* make sure new prefixes start on a higher timestamp */ 592 while (now >= getmonotime()) 593 sleep(1); 594} 595 596/* 597 * move an imsg from src to dst, disconnecting any dynamic memory from src. 598 */ 599static void 600imsg_move(struct imsg *dst, struct imsg *src) 601{ 602 *dst = *src; 603 memset(src, 0, sizeof(*src)); 604} 605 606/* 607 * push an imsg onto the peer imsg queue. 608 */ 609void 610peer_imsg_push(struct rde_peer *peer, struct imsg *imsg) 611{ 612 struct iq *iq; 613 614 if ((iq = calloc(1, sizeof(*iq))) == NULL) 615 fatal(NULL); 616 imsg_move(&iq->imsg, imsg); 617 SIMPLEQ_INSERT_TAIL(&peer->imsg_queue, iq, entry); 618 imsg_pending++; 619} 620 621/* 622 * pop first imsg from peer imsg queue and move it into imsg argument. 623 * Returns 1 if an element is returned else 0. 624 */ 625int 626peer_imsg_pop(struct rde_peer *peer, struct imsg *imsg) 627{ 628 struct iq *iq; 629 630 iq = SIMPLEQ_FIRST(&peer->imsg_queue); 631 if (iq == NULL) 632 return 0; 633 634 imsg_move(imsg, &iq->imsg); 635 636 SIMPLEQ_REMOVE_HEAD(&peer->imsg_queue, entry); 637 free(iq); 638 imsg_pending--; 639 640 return 1; 641} 642 643/* 644 * Check if any imsg are pending, return 0 if none are pending 645 */ 646int 647peer_imsg_pending(void) 648{ 649 return imsg_pending != 0; 650} 651 652/* 653 * flush all imsg queued for a peer. 654 */ 655void 656peer_imsg_flush(struct rde_peer *peer) 657{ 658 struct iq *iq; 659 660 while ((iq = SIMPLEQ_FIRST(&peer->imsg_queue)) != NULL) { 661 SIMPLEQ_REMOVE_HEAD(&peer->imsg_queue, entry); 662 free(iq); 663 imsg_pending--; 664 } 665} 666