1/* $OpenBSD: rde_update.c,v 1.168 2024/05/30 08:29:30 claudio Exp $ */ 2 3/* 4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19#include <sys/queue.h> 20#include <sys/tree.h> 21 22#include <limits.h> 23#include <stdlib.h> 24#include <string.h> 25#include <stdio.h> 26 27#include "bgpd.h" 28#include "rde.h" 29#include "log.h" 30 31enum up_state { 32 UP_OK, 33 UP_ERR_LIMIT, 34 UP_FILTERED, 35 UP_EXCLUDED, 36}; 37 38static struct community comm_no_advertise = { 39 .flags = COMMUNITY_TYPE_BASIC, 40 .data1 = COMMUNITY_WELLKNOWN, 41 .data2 = COMMUNITY_NO_ADVERTISE 42}; 43static struct community comm_no_export = { 44 .flags = COMMUNITY_TYPE_BASIC, 45 .data1 = COMMUNITY_WELLKNOWN, 46 .data2 = COMMUNITY_NO_EXPORT 47}; 48static struct community comm_no_expsubconfed = { 49 .flags = COMMUNITY_TYPE_BASIC, 50 .data1 = COMMUNITY_WELLKNOWN, 51 .data2 = COMMUNITY_NO_EXPSUBCONFED 52}; 53 54static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t); 55 56static int 57up_test_update(struct rde_peer *peer, struct prefix *p) 58{ 59 struct rde_aspath *asp; 60 struct rde_community *comm; 61 struct rde_peer *frompeer; 62 63 frompeer = prefix_peer(p); 64 asp = prefix_aspath(p); 65 comm = prefix_communities(p); 66 67 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR) 68 fatalx("try to send out a botched path"); 69 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK)) 70 fatalx("try to send out a looped path"); 71 72 if (peer == frompeer) 73 /* Do not send routes back to sender */ 74 return (0); 75 76 if (!frompeer->conf.ebgp && !peer->conf.ebgp) { 77 /* 78 * route reflector redistribution rules: 79 * 1. if announce is set -> announce 80 * 2. from non-client, to non-client -> no 81 * 3. from client, to non-client -> yes 82 * 4. from non-client, to client -> yes 83 * 5. from client, to client -> yes 84 */ 85 if (frompeer->conf.reflector_client == 0 && 86 peer->conf.reflector_client == 0 && 87 (asp->flags & F_PREFIX_ANNOUNCED) == 0) 88 /* Do not redistribute updates to ibgp peers */ 89 return (0); 90 } 91 92 /* well known communities */ 93 if (community_match(comm, &comm_no_advertise, NULL)) 94 return (0); 95 if (peer->conf.ebgp) { 96 if (community_match(comm, &comm_no_export, NULL)) 97 return (0); 98 if (community_match(comm, &comm_no_expsubconfed, NULL)) 99 return (0); 100 } 101 102 return (1); 103} 104 105/* RFC9234 open policy handling */ 106static int 107up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state, 108 uint8_t aid) 109{ 110 /* only for IPv4 and IPv6 unicast */ 111 if (aid != AID_INET && aid != AID_INET6) 112 return 0; 113 114 /* 115 * do not propagate (consider it filtered) if OTC is present and 116 * local role is peer, customer or rs-client. 117 */ 118 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER || 119 peer->role == ROLE_RS_CLIENT) 120 if (state->aspath.flags & F_ATTR_OTC) 121 return 1; 122 123 /* 124 * add OTC attribute if not present towards peers, customers and 125 * rs-clients (local roles peer, provider, rs). 126 */ 127 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER || 128 peer->role == ROLE_RS) 129 if ((state->aspath.flags & F_ATTR_OTC) == 0) { 130 uint32_t tmp; 131 132 tmp = htonl(peer->conf.local_as); 133 if (attr_optadd(&state->aspath, 134 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC, 135 &tmp, sizeof(tmp)) == -1) 136 log_peer_warnx(&peer->conf, 137 "failed to add OTC attribute"); 138 state->aspath.flags |= F_ATTR_OTC; 139 } 140 141 return 0; 142} 143 144/* 145 * Process a single prefix by passing it through the various filter stages 146 * and if not filtered out update the Adj-RIB-Out. Returns: 147 * - UP_OK if prefix was added 148 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached 149 * - UP_FILTERED if prefix was filtered out 150 * - UP_EXCLUDED if prefix was excluded because of up_test_update() 151 */ 152static enum up_state 153up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p) 154{ 155 struct filterstate state; 156 struct bgpd_addr addr; 157 int excluded = 0; 158 159 /* 160 * up_test_update() needs to run before the output filters 161 * else the well known communities won't work properly. 162 * The output filters would not be able to add well known 163 * communities. 164 */ 165 if (!up_test_update(peer, new)) 166 excluded = 1; 167 168 rde_filterstate_prep(&state, new); 169 pt_getaddr(new->pt, &addr); 170 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr, 171 new->pt->prefixlen, &state) == ACTION_DENY) { 172 rde_filterstate_clean(&state); 173 return UP_FILTERED; 174 } 175 176 /* Open Policy Check: acts like an output filter */ 177 if (up_enforce_open_policy(peer, &state, new->pt->aid)) { 178 rde_filterstate_clean(&state); 179 return UP_FILTERED; 180 } 181 182 if (excluded) { 183 rde_filterstate_clean(&state); 184 return UP_EXCLUDED; 185 } 186 187 /* from here on we know this is an update */ 188 if (p == (void *)-1) 189 p = prefix_adjout_get(peer, new->path_id_tx, new->pt); 190 191 up_prep_adjout(peer, &state, new->pt->aid); 192 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx); 193 rde_filterstate_clean(&state); 194 195 /* max prefix checker outbound */ 196 if (peer->conf.max_out_prefix && 197 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) { 198 log_peer_warnx(&peer->conf, 199 "outbound prefix limit reached (>%u/%u)", 200 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix); 201 rde_update_err(peer, ERR_CEASE, 202 ERR_CEASE_MAX_SENT_PREFIX, NULL); 203 return UP_ERR_LIMIT; 204 } 205 206 return UP_OK; 207} 208 209void 210up_generate_updates(struct rde_peer *peer, struct rib_entry *re) 211{ 212 struct prefix *new, *p; 213 214 p = prefix_adjout_first(peer, re->prefix); 215 216 new = prefix_best(re); 217 while (new != NULL) { 218 switch (up_process_prefix(peer, new, p)) { 219 case UP_OK: 220 case UP_ERR_LIMIT: 221 return; 222 case UP_FILTERED: 223 if (peer->flags & PEERFLAG_EVALUATE_ALL) { 224 new = TAILQ_NEXT(new, entry.list.rib); 225 if (new != NULL && prefix_eligible(new)) 226 continue; 227 } 228 goto done; 229 case UP_EXCLUDED: 230 goto done; 231 } 232 } 233 234done: 235 /* withdraw prefix */ 236 if (p != NULL) 237 prefix_adjout_withdraw(p); 238} 239 240/* 241 * Generate updates for the add-path send case. Depending on the 242 * peer eval settings prefixes are selected and distributed. 243 * This highly depends on the Adj-RIB-Out to handle prefixes with no 244 * changes gracefully. It may be possible to improve the API so that 245 * less churn is needed. 246 */ 247void 248up_generate_addpath(struct rde_peer *peer, struct rib_entry *re) 249{ 250 struct prefix *head, *new, *p; 251 int maxpaths = 0, extrapaths = 0, extra; 252 int checkmode = 1; 253 254 head = prefix_adjout_first(peer, re->prefix); 255 256 /* mark all paths as stale */ 257 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) 258 p->flags |= PREFIX_FLAG_STALE; 259 260 /* update paths */ 261 new = prefix_best(re); 262 while (new != NULL) { 263 /* check limits and stop when a limit is reached */ 264 if (peer->eval.maxpaths != 0 && 265 maxpaths >= peer->eval.maxpaths) 266 break; 267 if (peer->eval.extrapaths != 0 && 268 extrapaths >= peer->eval.extrapaths) 269 break; 270 271 extra = 1; 272 if (checkmode) { 273 switch (peer->eval.mode) { 274 case ADDPATH_EVAL_BEST: 275 if (new->dmetric == PREFIX_DMETRIC_BEST) 276 extra = 0; 277 else 278 checkmode = 0; 279 break; 280 case ADDPATH_EVAL_ECMP: 281 if (new->dmetric == PREFIX_DMETRIC_BEST || 282 new->dmetric == PREFIX_DMETRIC_ECMP) 283 extra = 0; 284 else 285 checkmode = 0; 286 break; 287 case ADDPATH_EVAL_AS_WIDE: 288 if (new->dmetric == PREFIX_DMETRIC_BEST || 289 new->dmetric == PREFIX_DMETRIC_ECMP || 290 new->dmetric == PREFIX_DMETRIC_AS_WIDE) 291 extra = 0; 292 else 293 checkmode = 0; 294 break; 295 case ADDPATH_EVAL_ALL: 296 /* nothing to check */ 297 checkmode = 0; 298 break; 299 default: 300 fatalx("unknown add-path eval mode"); 301 } 302 } 303 304 switch (up_process_prefix(peer, new, (void *)-1)) { 305 case UP_OK: 306 maxpaths++; 307 extrapaths += extra; 308 break; 309 case UP_FILTERED: 310 case UP_EXCLUDED: 311 break; 312 case UP_ERR_LIMIT: 313 /* just give up */ 314 return; 315 } 316 317 /* only allow valid prefixes */ 318 new = TAILQ_NEXT(new, entry.list.rib); 319 if (new == NULL || !prefix_eligible(new)) 320 break; 321 } 322 323 /* withdraw stale paths */ 324 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) { 325 if (p->flags & PREFIX_FLAG_STALE) 326 prefix_adjout_withdraw(p); 327 } 328} 329 330/* 331 * Generate updates for the add-path send all case. Since all prefixes 332 * are distributed just remove old and add new. 333 */ 334void 335up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re, 336 struct prefix *new, struct prefix *old) 337{ 338 struct prefix *p, *head = NULL; 339 int all = 0; 340 341 /* 342 * if old and new are NULL then insert all prefixes from best, 343 * clearing old routes in the process 344 */ 345 if (old == NULL && new == NULL) { 346 /* mark all paths as stale */ 347 head = prefix_adjout_first(peer, re->prefix); 348 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) 349 p->flags |= PREFIX_FLAG_STALE; 350 351 new = prefix_best(re); 352 all = 1; 353 } 354 355 if (new != NULL && !prefix_eligible(new)) { 356 /* only allow valid prefixes */ 357 new = NULL; 358 } 359 360 if (old != NULL) { 361 /* withdraw stale paths */ 362 p = prefix_adjout_get(peer, old->path_id_tx, old->pt); 363 if (p != NULL) 364 prefix_adjout_withdraw(p); 365 } 366 367 /* add new path (or multiple if all is set) */ 368 while (new != NULL) { 369 switch (up_process_prefix(peer, new, (void *)-1)) { 370 case UP_OK: 371 case UP_FILTERED: 372 case UP_EXCLUDED: 373 break; 374 case UP_ERR_LIMIT: 375 /* just give up */ 376 return; 377 } 378 379 if (!all) 380 break; 381 382 /* only allow valid prefixes */ 383 new = TAILQ_NEXT(new, entry.list.rib); 384 if (new == NULL || !prefix_eligible(new)) 385 break; 386 } 387 388 if (all) { 389 /* withdraw stale paths */ 390 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) { 391 if (p->flags & PREFIX_FLAG_STALE) 392 prefix_adjout_withdraw(p); 393 } 394 } 395} 396 397/* send a default route to the specified peer */ 398void 399up_generate_default(struct rde_peer *peer, uint8_t aid) 400{ 401 extern struct rde_peer *peerself; 402 struct filterstate state; 403 struct rde_aspath *asp; 404 struct prefix *p; 405 struct pt_entry *pte; 406 struct bgpd_addr addr; 407 408 if (peer->capa.mp[aid] == 0) 409 return; 410 411 rde_filterstate_init(&state); 412 asp = &state.aspath; 413 asp->aspath = aspath_get(NULL, 0); 414 asp->origin = ORIGIN_IGP; 415 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN); 416 /* the other default values are OK, nexthop is once again NULL */ 417 418 /* 419 * XXX apply default overrides. Not yet possible, mainly a parse.y 420 * problem. 421 */ 422 /* rde_apply_set(asp, peerself, peerself, set, af); */ 423 424 memset(&addr, 0, sizeof(addr)); 425 addr.aid = aid; 426 p = prefix_adjout_lookup(peer, &addr, 0); 427 428 /* outbound filter as usual */ 429 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) == 430 ACTION_DENY) { 431 rde_filterstate_clean(&state); 432 return; 433 } 434 435 up_prep_adjout(peer, &state, addr.aid); 436 /* can't use pt_fill here since prefix_adjout_update keeps a ref */ 437 pte = pt_get(&addr, 0); 438 if (pte == NULL) 439 pte = pt_add(&addr, 0); 440 prefix_adjout_update(p, peer, &state, pte, 0); 441 rde_filterstate_clean(&state); 442 443 /* max prefix checker outbound */ 444 if (peer->conf.max_out_prefix && 445 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) { 446 log_peer_warnx(&peer->conf, 447 "outbound prefix limit reached (>%u/%u)", 448 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix); 449 rde_update_err(peer, ERR_CEASE, 450 ERR_CEASE_MAX_SENT_PREFIX, NULL); 451 } 452} 453 454static struct bgpd_addr * 455up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid) 456{ 457 struct bgpd_addr *peer_local = NULL; 458 459 switch (aid) { 460 case AID_INET: 461 case AID_VPN_IPv4: 462 if (peer->local_v4_addr.aid == AID_INET) 463 peer_local = &peer->local_v4_addr; 464 break; 465 case AID_INET6: 466 case AID_VPN_IPv6: 467 if (peer->local_v6_addr.aid == AID_INET6) 468 peer_local = &peer->local_v6_addr; 469 break; 470 case AID_FLOWSPECv4: 471 case AID_FLOWSPECv6: 472 /* flowspec has no nexthop */ 473 return (NULL); 474 default: 475 fatalx("%s, bad AID %s", __func__, aid2str(aid)); 476 } 477 478 if (state->nhflags & NEXTHOP_SELF) { 479 /* 480 * Forcing the nexthop to self is always possible 481 * and has precedence over other flags. 482 */ 483 return (peer_local); 484 } else if (!peer->conf.ebgp) { 485 /* 486 * in the ibgp case the nexthop is normally not 487 * modified unless it points at the peer itself. 488 */ 489 if (state->nexthop == NULL) { 490 /* announced networks without explicit nexthop set */ 491 return (peer_local); 492 } 493 /* 494 * per RFC: if remote peer address is equal to the nexthop set 495 * the nexthop to our local address. This reduces the risk of 496 * routing loops. This overrides NEXTHOP_NOMODIFY. 497 */ 498 if (memcmp(&state->nexthop->exit_nexthop, 499 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) { 500 return (peer_local); 501 } 502 return (&state->nexthop->exit_nexthop); 503 } else if (peer->conf.distance == 1) { 504 /* 505 * In the ebgp directly connected case never send 506 * out a nexthop that is outside of the connected 507 * network of the peer. No matter what flags are 508 * set. This follows section 5.1.3 of RFC 4271. 509 * So just check if the nexthop is in the same net 510 * is enough here. 511 */ 512 if (state->nexthop != NULL && 513 state->nexthop->flags & NEXTHOP_CONNECTED && 514 prefix_compare(&peer->remote_addr, 515 &state->nexthop->nexthop_net, 516 state->nexthop->nexthop_netlen) == 0) { 517 /* nexthop and peer are in the same net */ 518 return (&state->nexthop->exit_nexthop); 519 } 520 return (peer_local); 521 } else { 522 /* 523 * For ebgp multihop make it possible to overrule 524 * the sent nexthop by setting NEXTHOP_NOMODIFY. 525 * Similar to the ibgp case there is no same net check 526 * needed but still ensure that the nexthop is not 527 * pointing to the peer itself. 528 */ 529 if (state->nhflags & NEXTHOP_NOMODIFY && 530 state->nexthop != NULL && 531 memcmp(&state->nexthop->exit_nexthop, 532 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) { 533 /* no modify flag set and nexthop not peer addr */ 534 return (&state->nexthop->exit_nexthop); 535 } 536 return (peer_local); 537 } 538} 539 540static void 541up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid) 542{ 543 struct bgpd_addr *nexthop; 544 struct nexthop *nh = NULL; 545 u_char *np; 546 uint16_t nl; 547 548 /* prepend local AS number for eBGP sessions. */ 549 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) { 550 uint32_t prep_as = peer->conf.local_as; 551 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl); 552 aspath_put(state->aspath.aspath); 553 state->aspath.aspath = aspath_get(np, nl); 554 free(np); 555 } 556 557 /* update nexthop */ 558 nexthop = up_get_nexthop(peer, state, aid); 559 if (nexthop != NULL) 560 nh = nexthop_get(nexthop); 561 nexthop_unref(state->nexthop); 562 state->nexthop = nh; 563 state->nhflags = 0; 564} 565 566 567static int 568up_generate_attr(struct ibuf *buf, struct rde_peer *peer, 569 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh, 570 uint8_t aid) 571{ 572 struct attr *oa = NULL, *newaggr = NULL; 573 u_char *pdata; 574 uint32_t tmp32; 575 int flags, neednewpath = 0, rv; 576 uint16_t plen; 577 uint8_t oalen = 0, type; 578 579 if (asp->others_len > 0) 580 oa = asp->others[oalen++]; 581 582 /* dump attributes in ascending order */ 583 for (type = ATTR_ORIGIN; type < 255; type++) { 584 while (oa && oa->type < type) { 585 if (oalen < asp->others_len) 586 oa = asp->others[oalen++]; 587 else 588 oa = NULL; 589 } 590 591 switch (type) { 592 /* 593 * Attributes stored in rde_aspath 594 */ 595 case ATTR_ORIGIN: 596 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 597 ATTR_ORIGIN, &asp->origin, 1) == -1) 598 return -1; 599 break; 600 case ATTR_ASPATH: 601 plen = aspath_length(asp->aspath); 602 pdata = aspath_dump(asp->aspath); 603 604 if (!peer_has_as4byte(peer)) 605 pdata = aspath_deflate(pdata, &plen, 606 &neednewpath); 607 rv = attr_writebuf(buf, ATTR_WELL_KNOWN, 608 ATTR_ASPATH, pdata, plen); 609 if (!peer_has_as4byte(peer)) 610 free(pdata); 611 612 if (rv == -1) 613 return -1; 614 break; 615 case ATTR_NEXTHOP: 616 switch (aid) { 617 case AID_INET: 618 if (nh == NULL) 619 return -1; 620 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 621 ATTR_NEXTHOP, &nh->exit_nexthop.v4, 622 sizeof(nh->exit_nexthop.v4)) == -1) 623 return -1; 624 break; 625 default: 626 break; 627 } 628 break; 629 case ATTR_MED: 630 /* 631 * The old MED from other peers MUST not be announced 632 * to others unless the MED is originating from us or 633 * the peer is an IBGP one. Only exception are routers 634 * with "transparent-as yes" set. 635 */ 636 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp || 637 asp->flags & F_ATTR_MED_ANNOUNCE || 638 peer->flags & PEERFLAG_TRANS_AS)) { 639 tmp32 = htonl(asp->med); 640 if (attr_writebuf(buf, ATTR_OPTIONAL, 641 ATTR_MED, &tmp32, 4) == -1) 642 return -1; 643 } 644 break; 645 case ATTR_LOCALPREF: 646 if (!peer->conf.ebgp) { 647 /* local preference, only valid for ibgp */ 648 tmp32 = htonl(asp->lpref); 649 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 650 ATTR_LOCALPREF, &tmp32, 4) == -1) 651 return -1; 652 } 653 break; 654 /* 655 * Communities are stored in struct rde_community 656 */ 657 case ATTR_COMMUNITIES: 658 case ATTR_EXT_COMMUNITIES: 659 case ATTR_LARGE_COMMUNITIES: 660 if (community_writebuf(comm, type, peer->conf.ebgp, 661 buf) == -1) 662 return -1; 663 break; 664 /* 665 * NEW to OLD conversion when sending stuff to a 2byte AS peer 666 */ 667 case ATTR_AS4_PATH: 668 if (neednewpath) { 669 plen = aspath_length(asp->aspath); 670 pdata = aspath_dump(asp->aspath); 671 672 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 673 if (!(asp->flags & F_PREFIX_ANNOUNCED)) 674 flags |= ATTR_PARTIAL; 675 if (plen != 0) 676 if (attr_writebuf(buf, flags, 677 ATTR_AS4_PATH, pdata, plen) == -1) 678 return -1; 679 } 680 break; 681 case ATTR_AS4_AGGREGATOR: 682 if (newaggr) { 683 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 684 if (!(asp->flags & F_PREFIX_ANNOUNCED)) 685 flags |= ATTR_PARTIAL; 686 if (attr_writebuf(buf, flags, 687 ATTR_AS4_AGGREGATOR, newaggr->data, 688 newaggr->len) == -1) 689 return -1; 690 } 691 break; 692 /* 693 * multiprotocol attributes are handled elsewhere 694 */ 695 case ATTR_MP_REACH_NLRI: 696 case ATTR_MP_UNREACH_NLRI: 697 break; 698 /* 699 * dump all other path attributes. Following rules apply: 700 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and 701 * ATTR_AGGREGATOR pass unmodified (enforce flags 702 * to correct values). Actually ATTR_AGGREGATOR may be 703 * deflated for OLD 2-byte peers. 704 * 2. non-transitive attrs: don't re-announce to ebgp peers 705 * 3. transitive known attrs: announce unmodified 706 * 4. transitive unknown attrs: set partial bit and re-announce 707 */ 708 case ATTR_ATOMIC_AGGREGATE: 709 if (oa == NULL || oa->type != type) 710 break; 711 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 712 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1) 713 return -1; 714 break; 715 case ATTR_AGGREGATOR: 716 if (oa == NULL || oa->type != type) 717 break; 718 if ((!(oa->flags & ATTR_TRANSITIVE)) && 719 peer->conf.ebgp) 720 break; 721 if (!peer_has_as4byte(peer)) { 722 /* need to deflate the aggregator */ 723 uint8_t t[6]; 724 uint16_t tas; 725 726 if ((!(oa->flags & ATTR_TRANSITIVE)) && 727 peer->conf.ebgp) 728 break; 729 730 memcpy(&tmp32, oa->data, sizeof(tmp32)); 731 if (ntohl(tmp32) > USHRT_MAX) { 732 tas = htons(AS_TRANS); 733 newaggr = oa; 734 } else 735 tas = htons(ntohl(tmp32)); 736 737 memcpy(t, &tas, sizeof(tas)); 738 memcpy(t + sizeof(tas), 739 oa->data + sizeof(tmp32), 740 oa->len - sizeof(tmp32)); 741 if (attr_writebuf(buf, oa->flags, 742 oa->type, &t, sizeof(t)) == -1) 743 return -1; 744 } else { 745 if (attr_writebuf(buf, oa->flags, oa->type, 746 oa->data, oa->len) == -1) 747 return -1; 748 } 749 break; 750 case ATTR_ORIGINATOR_ID: 751 case ATTR_CLUSTER_LIST: 752 case ATTR_OTC: 753 if (oa == NULL || oa->type != type) 754 break; 755 if ((!(oa->flags & ATTR_TRANSITIVE)) && 756 peer->conf.ebgp) 757 break; 758 if (attr_writebuf(buf, oa->flags, oa->type, 759 oa->data, oa->len) == -1) 760 return -1; 761 break; 762 default: 763 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN) 764 /* there is no attribute left to dump */ 765 return (0); 766 767 if (oa == NULL || oa->type != type) 768 break; 769 /* unknown attribute */ 770 if (!(oa->flags & ATTR_TRANSITIVE)) { 771 /* 772 * RFC 1771: 773 * Unrecognized non-transitive optional 774 * attributes must be quietly ignored and 775 * not passed along to other BGP peers. 776 */ 777 break; 778 } 779 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL, 780 oa->type, oa->data, oa->len) == -1) 781 return -1; 782 } 783 } 784 return 0; 785} 786 787/* 788 * Check if the pending element is a EoR marker. If so remove it from the 789 * tree and return 1. 790 */ 791int 792up_is_eor(struct rde_peer *peer, uint8_t aid) 793{ 794 struct prefix *p; 795 796 p = RB_MIN(prefix_tree, &peer->updates[aid]); 797 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) { 798 /* 799 * Need to remove eor from update tree because 800 * prefix_adjout_destroy() can't handle that. 801 */ 802 RB_REMOVE(prefix_tree, &peer->updates[aid], p); 803 p->flags &= ~PREFIX_FLAG_UPDATE; 804 prefix_adjout_destroy(p); 805 return 1; 806 } 807 return 0; 808} 809 810/* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */ 811#define MIN_UPDATE_LEN 16 812 813static void 814up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p, 815 struct rde_peer *peer, int withdraw) 816{ 817 if (withdraw) { 818 /* prefix no longer needed, remove it */ 819 prefix_adjout_destroy(p); 820 peer->stats.prefix_sent_withdraw++; 821 } else { 822 /* prefix still in Adj-RIB-Out, keep it */ 823 RB_REMOVE(prefix_tree, prefix_head, p); 824 p->flags &= ~PREFIX_FLAG_UPDATE; 825 peer->stats.pending_update--; 826 peer->stats.prefix_sent_update++; 827 } 828} 829 830/* 831 * Write prefixes to buffer until either there is no more space or 832 * the next prefix has no longer the same ASPATH attributes. 833 * Returns -1 if no prefix was written else 0. 834 */ 835static int 836up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head, 837 struct rde_peer *peer, int withdraw) 838{ 839 struct prefix *p, *np; 840 int done = 0, has_ap = -1, rv = -1; 841 842 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) { 843 if (has_ap == -1) 844 has_ap = peer_has_add_path(peer, p->pt->aid, 845 CAPA_AP_SEND); 846 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) == 847 -1) 848 break; 849 850 /* make sure we only dump prefixes which belong together */ 851 if (np == NULL || 852 np->aspath != p->aspath || 853 np->communities != p->communities || 854 np->nexthop != p->nexthop || 855 np->nhflags != p->nhflags || 856 (np->flags & PREFIX_FLAG_EOR)) 857 done = 1; 858 859 rv = 0; 860 up_prefix_free(prefix_head, p, peer, withdraw); 861 if (done) 862 break; 863 } 864 return rv; 865} 866 867static int 868up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer, 869 struct nexthop *nh, uint8_t aid) 870{ 871 struct bgpd_addr *nexthop; 872 size_t off; 873 uint16_t len, afi; 874 uint8_t safi; 875 876 /* attribute header, defaulting to extended length one */ 877 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) 878 return -1; 879 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1) 880 return -1; 881 off = ibuf_size(buf); 882 if (ibuf_add_zero(buf, sizeof(len)) == -1) 883 return -1; 884 885 if (aid2afi(aid, &afi, &safi)) 886 fatalx("up_generate_mp_reach: bad AID"); 887 888 /* AFI + SAFI + NH LEN + NH + Reserved */ 889 if (ibuf_add_n16(buf, afi) == -1) 890 return -1; 891 if (ibuf_add_n8(buf, safi) == -1) 892 return -1; 893 894 switch (aid) { 895 case AID_INET6: 896 if (nh == NULL) 897 return -1; 898 /* NH LEN */ 899 if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1) 900 return -1; 901 /* write nexthop */ 902 nexthop = &nh->exit_nexthop; 903 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1) 904 return -1; 905 break; 906 case AID_VPN_IPv4: 907 if (nh == NULL) 908 return -1; 909 /* NH LEN */ 910 if (ibuf_add_n8(buf, 911 sizeof(uint64_t) + sizeof(struct in_addr)) == -1) 912 return -1; 913 /* write zero rd */ 914 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1) 915 return -1; 916 /* write nexthop */ 917 nexthop = &nh->exit_nexthop; 918 if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1) 919 return -1; 920 break; 921 case AID_VPN_IPv6: 922 if (nh == NULL) 923 return -1; 924 /* NH LEN */ 925 if (ibuf_add_n8(buf, 926 sizeof(uint64_t) + sizeof(struct in6_addr)) == -1) 927 return -1; 928 /* write zero rd */ 929 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1) 930 return -1; 931 /* write nexthop */ 932 nexthop = &nh->exit_nexthop; 933 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1) 934 return -1; 935 break; 936 case AID_FLOWSPECv4: 937 case AID_FLOWSPECv6: 938 if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */ 939 return -1; 940 /* no NH */ 941 break; 942 default: 943 fatalx("up_generate_mp_reach: unknown AID"); 944 } 945 946 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */ 947 return -1; 948 949 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) 950 /* no prefixes written, fail update */ 951 return (-1); 952 953 /* update MP_REACH attribute length field */ 954 len = ibuf_size(buf) - off - sizeof(len); 955 if (ibuf_set_n16(buf, off, len) == -1) 956 return -1; 957 958 return 0; 959} 960 961/* 962 * Generate UPDATE message containing either just withdraws or updates. 963 * UPDATE messages are contructed like this: 964 * 965 * +-----------------------------------------------------+ 966 * | Withdrawn Routes Length (2 octets) | 967 * +-----------------------------------------------------+ 968 * | Withdrawn Routes (variable) | 969 * +-----------------------------------------------------+ 970 * | Total Path Attribute Length (2 octets) | 971 * +-----------------------------------------------------+ 972 * | Path Attributes (variable) | 973 * +-----------------------------------------------------+ 974 * | Network Layer Reachability Information (variable) | 975 * +-----------------------------------------------------+ 976 * 977 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI 978 * the latter will be the only path attribute in a message. 979 */ 980 981/* 982 * Write UPDATE message for withdrawn routes. The size of buf limits 983 * how may routes can be added. Return 0 on success -1 on error which 984 * includes generating an empty withdraw message. 985 */ 986int 987up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid) 988{ 989 size_t off; 990 uint16_t afi, len; 991 uint8_t safi; 992 993 /* reserve space for the withdrawn routes length field */ 994 off = ibuf_size(buf); 995 if (ibuf_add_zero(buf, sizeof(len)) == -1) 996 return -1; 997 998 if (aid != AID_INET) { 999 /* reserve space for 2-byte path attribute length */ 1000 off = ibuf_size(buf); 1001 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1002 return -1; 1003 1004 /* attribute header, defaulting to extended length one */ 1005 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) 1006 return -1; 1007 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1) 1008 return -1; 1009 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1010 return -1; 1011 1012 /* afi & safi */ 1013 if (aid2afi(aid, &afi, &safi)) 1014 fatalx("up_dump_mp_unreach: bad AID"); 1015 if (ibuf_add_n16(buf, afi) == -1) 1016 return -1; 1017 if (ibuf_add_n8(buf, safi) == -1) 1018 return -1; 1019 } 1020 1021 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1) 1022 return -1; 1023 1024 /* update length field (either withdrawn routes or attribute length) */ 1025 len = ibuf_size(buf) - off - sizeof(len); 1026 if (ibuf_set_n16(buf, off, len) == -1) 1027 return -1; 1028 1029 if (aid != AID_INET) { 1030 /* write MP_UNREACH_NLRI attribute length (always extended) */ 1031 len -= 4; /* skip attribute header */ 1032 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1) 1033 return -1; 1034 } else { 1035 /* no extra attributes so set attribute len to 0 */ 1036 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1037 return -1; 1038 } 1039 1040 return 0; 1041} 1042 1043/* 1044 * Write UPDATE message for changed and added routes. The size of buf limits 1045 * how may routes can be added. The function first dumps the path attributes 1046 * and then tries to add as many prefixes using these attributes. 1047 * Return 0 on success -1 on error which includes producing an empty message. 1048 */ 1049int 1050up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid) 1051{ 1052 struct bgpd_addr addr; 1053 struct prefix *p; 1054 size_t off; 1055 uint16_t len; 1056 1057 p = RB_MIN(prefix_tree, &peer->updates[aid]); 1058 if (p == NULL) 1059 return -1; 1060 1061 /* withdrawn routes length field is 0 */ 1062 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1063 return -1; 1064 1065 /* reserve space for 2-byte path attribute length */ 1066 off = ibuf_size(buf); 1067 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1068 return -1; 1069 1070 if (up_generate_attr(buf, peer, prefix_aspath(p), 1071 prefix_communities(p), prefix_nexthop(p), aid) == -1) 1072 goto fail; 1073 1074 if (aid != AID_INET) { 1075 /* write mp attribute including nlri */ 1076 1077 /* 1078 * RFC 7606 wants this to be first but then we need 1079 * to use multiple buffers with adjusted length to 1080 * merge the attributes together in reverse order of 1081 * creation. 1082 */ 1083 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) == 1084 -1) 1085 goto fail; 1086 } 1087 1088 /* update attribute length field */ 1089 len = ibuf_size(buf) - off - sizeof(len); 1090 if (ibuf_set_n16(buf, off, len) == -1) 1091 return -1; 1092 1093 if (aid == AID_INET) { 1094 /* last but not least dump the IPv4 nlri */ 1095 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) 1096 goto fail; 1097 } 1098 1099 return 0; 1100 1101fail: 1102 /* Not enough space. Drop prefix, it will never fit. */ 1103 pt_getaddr(p->pt, &addr); 1104 log_peer_warnx(&peer->conf, "dump of path attributes failed, " 1105 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen); 1106 1107 up_prefix_free(&peer->updates[aid], p, peer, 0); 1108 /* XXX should probably send a withdraw for this prefix */ 1109 return -1; 1110} 1111