1/* $OpenBSD: session.c,v 1.480 2024/06/10 12:51:25 claudio Exp $ */ 2 3/* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20#include <sys/types.h> 21 22#include <sys/mman.h> 23#include <sys/socket.h> 24#include <sys/time.h> 25#include <sys/resource.h> 26#include <sys/un.h> 27#include <netinet/in.h> 28#include <netinet/ip.h> 29#include <netinet/tcp.h> 30#include <arpa/inet.h> 31#include <limits.h> 32 33#include <err.h> 34#include <errno.h> 35#include <fcntl.h> 36#include <ifaddrs.h> 37#include <poll.h> 38#include <pwd.h> 39#include <signal.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <syslog.h> 44#include <unistd.h> 45 46#include "bgpd.h" 47#include "session.h" 48#include "log.h" 49 50#define PFD_PIPE_MAIN 0 51#define PFD_PIPE_ROUTE 1 52#define PFD_PIPE_ROUTE_CTL 2 53#define PFD_SOCK_CTL 3 54#define PFD_SOCK_RCTL 4 55#define PFD_LISTENERS_START 5 56 57void session_sighdlr(int); 58int setup_listeners(u_int *); 59void init_peer(struct peer *); 60void start_timer_holdtime(struct peer *); 61void start_timer_sendholdtime(struct peer *); 62void start_timer_keepalive(struct peer *); 63void session_close_connection(struct peer *); 64void change_state(struct peer *, enum session_state, enum session_events); 65int session_setup_socket(struct peer *); 66void session_accept(int); 67int session_connect(struct peer *); 68void session_tcp_established(struct peer *); 69int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70int session_capa_add_mp(struct ibuf *, uint8_t); 71int session_capa_add_afi(struct ibuf *, uint8_t, uint8_t); 72struct bgp_msg *session_newmsg(enum msg_type, uint16_t); 73int session_sendmsg(struct bgp_msg *, struct peer *); 74void session_open(struct peer *); 75void session_keepalive(struct peer *); 76void session_update(uint32_t, struct ibuf *); 77void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); 78void session_notification_data(struct peer *, uint8_t, uint8_t, void *, 79 size_t); 80void session_rrefresh(struct peer *, uint8_t, uint8_t); 81int session_graceful_restart(struct peer *); 82int session_graceful_stop(struct peer *); 83int session_dispatch_msg(struct pollfd *, struct peer *); 84void session_process_msg(struct peer *); 85int parse_header(struct peer *, u_char *, uint16_t *, uint8_t *); 86int parse_open(struct peer *); 87int parse_update(struct peer *); 88int parse_rrefresh(struct peer *); 89void parse_notification(struct peer *); 90int parse_capabilities(struct peer *, struct ibuf *, uint32_t *); 91int capa_neg_calc(struct peer *); 92void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 93void session_up(struct peer *); 94void session_down(struct peer *); 95int imsg_rde(int, uint32_t, void *, uint16_t); 96void session_demote(struct peer *, int); 97void merge_peers(struct bgpd_config *, struct bgpd_config *); 98 99int la_cmp(struct listen_addr *, struct listen_addr *); 100void session_template_clone(struct peer *, struct sockaddr *, 101 uint32_t, uint32_t); 102int session_match_mask(struct peer *, struct bgpd_addr *); 103 104static struct bgpd_config *conf, *nconf; 105static struct imsgbuf *ibuf_rde; 106static struct imsgbuf *ibuf_rde_ctl; 107static struct imsgbuf *ibuf_main; 108 109struct bgpd_sysdep sysdep; 110volatile sig_atomic_t session_quit; 111int pending_reconf; 112int csock = -1, rcsock = -1; 113u_int peer_cnt; 114 115struct mrt_head mrthead; 116time_t pauseaccept; 117 118static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 119 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 120 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 121}; 122 123static inline int 124peer_compare(const struct peer *a, const struct peer *b) 125{ 126 return a->conf.id - b->conf.id; 127} 128 129RB_GENERATE(peer_head, peer, entry, peer_compare); 130 131void 132session_sighdlr(int sig) 133{ 134 switch (sig) { 135 case SIGINT: 136 case SIGTERM: 137 session_quit = 1; 138 break; 139 } 140} 141 142int 143setup_listeners(u_int *la_cnt) 144{ 145 int ttl = 255; 146 struct listen_addr *la; 147 u_int cnt = 0; 148 149 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 150 la->reconf = RECONF_NONE; 151 cnt++; 152 153 if (la->flags & LISTENER_LISTENING) 154 continue; 155 156 if (la->fd == -1) { 157 log_warn("cannot establish listener on %s: invalid fd", 158 log_sockaddr((struct sockaddr *)&la->sa, 159 la->sa_len)); 160 continue; 161 } 162 163 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 164 fatal("tcp_md5_prep_listener"); 165 166 /* set ttl to 255 so that ttl-security works */ 167 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 168 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 169 log_warn("setup_listeners setsockopt TTL"); 170 continue; 171 } 172 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 173 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 174 log_warn("setup_listeners setsockopt hoplimit"); 175 continue; 176 } 177 178 if (listen(la->fd, MAX_BACKLOG)) { 179 close(la->fd); 180 fatal("listen"); 181 } 182 183 la->flags |= LISTENER_LISTENING; 184 185 log_info("listening on %s", 186 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 187 } 188 189 *la_cnt = cnt; 190 191 return (0); 192} 193 194void 195session_main(int debug, int verbose) 196{ 197 int timeout; 198 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 199 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 200 u_int listener_cnt, ctl_cnt, mrt_cnt; 201 u_int new_cnt; 202 struct passwd *pw; 203 struct peer *p, **peer_l = NULL, *next; 204 struct mrt *m, *xm, **mrt_l = NULL; 205 struct pollfd *pfd = NULL; 206 struct listen_addr *la; 207 void *newp; 208 time_t now; 209 short events; 210 211 log_init(debug, LOG_DAEMON); 212 log_setverbose(verbose); 213 214 log_procinit(log_procnames[PROC_SE]); 215 216 if ((pw = getpwnam(BGPD_USER)) == NULL) 217 fatal(NULL); 218 219 if (chroot(pw->pw_dir) == -1) 220 fatal("chroot"); 221 if (chdir("/") == -1) 222 fatal("chdir(\"/\")"); 223 224 setproctitle("session engine"); 225 226 if (setgroups(1, &pw->pw_gid) || 227 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 228 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 229 fatal("can't drop privileges"); 230 231 if (pledge("stdio inet recvfd", NULL) == -1) 232 fatal("pledge"); 233 234 signal(SIGTERM, session_sighdlr); 235 signal(SIGINT, session_sighdlr); 236 signal(SIGPIPE, SIG_IGN); 237 signal(SIGHUP, SIG_IGN); 238 signal(SIGALRM, SIG_IGN); 239 signal(SIGUSR1, SIG_IGN); 240 241 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 242 fatal(NULL); 243 imsg_init(ibuf_main, 3); 244 245 LIST_INIT(&mrthead); 246 listener_cnt = 0; 247 peer_cnt = 0; 248 ctl_cnt = 0; 249 250 conf = new_config(); 251 log_info("session engine ready"); 252 253 while (session_quit == 0) { 254 /* check for peers to be initialized or deleted */ 255 if (!pending_reconf) { 256 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 257 /* cloned peer that idled out? */ 258 if (p->template && (p->state == STATE_IDLE || 259 p->state == STATE_ACTIVE) && 260 getmonotime() - p->stats.last_updown >= 261 INTERVAL_HOLD_CLONED) 262 p->reconf_action = RECONF_DELETE; 263 264 /* new peer that needs init? */ 265 if (p->state == STATE_NONE) 266 init_peer(p); 267 268 /* deletion due? */ 269 if (p->reconf_action == RECONF_DELETE) { 270 if (p->demoted) 271 session_demote(p, -1); 272 p->conf.demote_group[0] = 0; 273 session_stop(p, ERR_CEASE_PEER_UNCONF, 274 NULL); 275 timer_remove_all(&p->timers); 276 tcp_md5_del_listener(conf, p); 277 RB_REMOVE(peer_head, &conf->peers, p); 278 log_peer_warnx(&p->conf, "removed"); 279 free(p); 280 peer_cnt--; 281 continue; 282 } 283 p->reconf_action = RECONF_NONE; 284 } 285 } 286 287 if (peer_cnt > peer_l_elms) { 288 if ((newp = reallocarray(peer_l, peer_cnt, 289 sizeof(struct peer *))) == NULL) { 290 /* panic for now */ 291 log_warn("could not resize peer_l from %u -> %u" 292 " entries", peer_l_elms, peer_cnt); 293 fatalx("exiting"); 294 } 295 peer_l = newp; 296 peer_l_elms = peer_cnt; 297 } 298 299 mrt_cnt = 0; 300 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 301 xm = LIST_NEXT(m, entry); 302 if (m->state == MRT_STATE_REMOVE) { 303 mrt_clean(m); 304 LIST_REMOVE(m, entry); 305 free(m); 306 continue; 307 } 308 if (m->wbuf.queued) 309 mrt_cnt++; 310 } 311 312 if (mrt_cnt > mrt_l_elms) { 313 if ((newp = reallocarray(mrt_l, mrt_cnt, 314 sizeof(struct mrt *))) == NULL) { 315 /* panic for now */ 316 log_warn("could not resize mrt_l from %u -> %u" 317 " entries", mrt_l_elms, mrt_cnt); 318 fatalx("exiting"); 319 } 320 mrt_l = newp; 321 mrt_l_elms = mrt_cnt; 322 } 323 324 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 325 ctl_cnt + mrt_cnt; 326 if (new_cnt > pfd_elms) { 327 if ((newp = reallocarray(pfd, new_cnt, 328 sizeof(struct pollfd))) == NULL) { 329 /* panic for now */ 330 log_warn("could not resize pfd from %u -> %u" 331 " entries", pfd_elms, new_cnt); 332 fatalx("exiting"); 333 } 334 pfd = newp; 335 pfd_elms = new_cnt; 336 } 337 338 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms); 339 340 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 341 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 342 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 343 344 if (pauseaccept == 0) { 345 pfd[PFD_SOCK_CTL].fd = csock; 346 pfd[PFD_SOCK_CTL].events = POLLIN; 347 pfd[PFD_SOCK_RCTL].fd = rcsock; 348 pfd[PFD_SOCK_RCTL].events = POLLIN; 349 } else { 350 pfd[PFD_SOCK_CTL].fd = -1; 351 pfd[PFD_SOCK_RCTL].fd = -1; 352 } 353 354 i = PFD_LISTENERS_START; 355 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 356 if (pauseaccept == 0) { 357 pfd[i].fd = la->fd; 358 pfd[i].events = POLLIN; 359 } else 360 pfd[i].fd = -1; 361 i++; 362 } 363 idx_listeners = i; 364 timeout = 240; /* loop every 240s at least */ 365 366 now = getmonotime(); 367 RB_FOREACH(p, peer_head, &conf->peers) { 368 time_t nextaction; 369 struct timer *pt; 370 371 /* check timers */ 372 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 373 switch (pt->type) { 374 case Timer_Hold: 375 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 376 break; 377 case Timer_SendHold: 378 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 379 break; 380 case Timer_ConnectRetry: 381 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 382 break; 383 case Timer_Keepalive: 384 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 385 break; 386 case Timer_IdleHold: 387 bgp_fsm(p, EVNT_START); 388 break; 389 case Timer_IdleHoldReset: 390 p->IdleHoldTime = 391 INTERVAL_IDLE_HOLD_INITIAL; 392 p->errcnt = 0; 393 timer_stop(&p->timers, 394 Timer_IdleHoldReset); 395 break; 396 case Timer_CarpUndemote: 397 timer_stop(&p->timers, 398 Timer_CarpUndemote); 399 if (p->demoted && 400 p->state == STATE_ESTABLISHED) 401 session_demote(p, -1); 402 break; 403 case Timer_RestartTimeout: 404 timer_stop(&p->timers, 405 Timer_RestartTimeout); 406 session_graceful_stop(p); 407 break; 408 default: 409 fatalx("King Bula lost in time"); 410 } 411 } 412 if ((nextaction = timer_nextduein(&p->timers, 413 now)) != -1 && nextaction < timeout) 414 timeout = nextaction; 415 416 /* are we waiting for a write? */ 417 events = POLLIN; 418 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 419 events |= POLLOUT; 420 /* is there still work to do? */ 421 if (p->rpending && p->rbuf && p->rbuf->wpos) 422 timeout = 0; 423 424 /* poll events */ 425 if (p->fd != -1 && events != 0) { 426 pfd[i].fd = p->fd; 427 pfd[i].events = events; 428 peer_l[i - idx_listeners] = p; 429 i++; 430 } 431 } 432 433 idx_peers = i; 434 435 LIST_FOREACH(m, &mrthead, entry) 436 if (m->wbuf.queued) { 437 pfd[i].fd = m->wbuf.fd; 438 pfd[i].events = POLLOUT; 439 mrt_l[i - idx_peers] = m; 440 i++; 441 } 442 443 idx_mrts = i; 444 445 i += control_fill_pfds(pfd + i, pfd_elms -i); 446 447 if (i > pfd_elms) 448 fatalx("poll pfd overflow"); 449 450 if (pauseaccept && timeout > 1) 451 timeout = 1; 452 if (timeout < 0) 453 timeout = 0; 454 if (poll(pfd, i, timeout * 1000) == -1) { 455 if (errno == EINTR) 456 continue; 457 fatal("poll error"); 458 } 459 460 /* 461 * If we previously saw fd exhaustion, we stop accept() 462 * for 1 second to throttle the accept() loop. 463 */ 464 if (pauseaccept && getmonotime() > pauseaccept + 1) 465 pauseaccept = 0; 466 467 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 468 log_warnx("SE: Lost connection to parent"); 469 session_quit = 1; 470 continue; 471 } else 472 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 473 &listener_cnt); 474 475 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 476 log_warnx("SE: Lost connection to RDE"); 477 msgbuf_clear(&ibuf_rde->w); 478 free(ibuf_rde); 479 ibuf_rde = NULL; 480 } else 481 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 482 &listener_cnt); 483 484 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 485 -1) { 486 log_warnx("SE: Lost connection to RDE control"); 487 msgbuf_clear(&ibuf_rde_ctl->w); 488 free(ibuf_rde_ctl); 489 ibuf_rde_ctl = NULL; 490 } else 491 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 492 &listener_cnt); 493 494 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 495 ctl_cnt += control_accept(csock, 0); 496 497 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 498 ctl_cnt += control_accept(rcsock, 1); 499 500 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 501 if (pfd[j].revents & POLLIN) 502 session_accept(pfd[j].fd); 503 504 for (; j < idx_peers; j++) 505 session_dispatch_msg(&pfd[j], 506 peer_l[j - idx_listeners]); 507 508 RB_FOREACH(p, peer_head, &conf->peers) 509 if (p->rbuf && p->rbuf->wpos) 510 session_process_msg(p); 511 512 for (; j < idx_mrts; j++) 513 if (pfd[j].revents & POLLOUT) 514 mrt_write(mrt_l[j - idx_peers]); 515 516 for (; j < i; j++) 517 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 518 } 519 520 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 521 session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down"); 522 timer_remove_all(&p->timers); 523 tcp_md5_del_listener(conf, p); 524 RB_REMOVE(peer_head, &conf->peers, p); 525 free(p); 526 } 527 528 while ((m = LIST_FIRST(&mrthead)) != NULL) { 529 mrt_clean(m); 530 LIST_REMOVE(m, entry); 531 free(m); 532 } 533 534 free_config(conf); 535 free(peer_l); 536 free(mrt_l); 537 free(pfd); 538 539 /* close pipes */ 540 if (ibuf_rde) { 541 msgbuf_write(&ibuf_rde->w); 542 msgbuf_clear(&ibuf_rde->w); 543 close(ibuf_rde->fd); 544 free(ibuf_rde); 545 } 546 if (ibuf_rde_ctl) { 547 msgbuf_clear(&ibuf_rde_ctl->w); 548 close(ibuf_rde_ctl->fd); 549 free(ibuf_rde_ctl); 550 } 551 msgbuf_write(&ibuf_main->w); 552 msgbuf_clear(&ibuf_main->w); 553 close(ibuf_main->fd); 554 free(ibuf_main); 555 556 control_shutdown(csock); 557 control_shutdown(rcsock); 558 log_info("session engine exiting"); 559 exit(0); 560} 561 562void 563init_peer(struct peer *p) 564{ 565 TAILQ_INIT(&p->timers); 566 p->fd = p->wbuf.fd = -1; 567 568 if (p->conf.if_depend[0]) 569 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 570 p->conf.if_depend, sizeof(p->conf.if_depend)); 571 else 572 p->depend_ok = 1; 573 574 peer_cnt++; 575 576 change_state(p, STATE_IDLE, EVNT_NONE); 577 if (p->conf.down) 578 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 579 else 580 timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY); 581 582 p->stats.last_updown = getmonotime(); 583 584 /* 585 * on startup, demote if requested. 586 * do not handle new peers. they must reach ESTABLISHED beforehand. 587 * peers added at runtime have reconf_action set to RECONF_REINIT. 588 */ 589 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 590 session_demote(p, +1); 591} 592 593void 594bgp_fsm(struct peer *peer, enum session_events event) 595{ 596 switch (peer->state) { 597 case STATE_NONE: 598 /* nothing */ 599 break; 600 case STATE_IDLE: 601 switch (event) { 602 case EVNT_START: 603 timer_stop(&peer->timers, Timer_Hold); 604 timer_stop(&peer->timers, Timer_SendHold); 605 timer_stop(&peer->timers, Timer_Keepalive); 606 timer_stop(&peer->timers, Timer_IdleHold); 607 608 /* allocate read buffer */ 609 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 610 if (peer->rbuf == NULL) 611 fatal(NULL); 612 613 /* init write buffer */ 614 msgbuf_init(&peer->wbuf); 615 616 if (!peer->depend_ok) 617 timer_stop(&peer->timers, Timer_ConnectRetry); 618 else if (peer->passive || peer->conf.passive || 619 peer->conf.template) { 620 change_state(peer, STATE_ACTIVE, event); 621 timer_stop(&peer->timers, Timer_ConnectRetry); 622 } else { 623 change_state(peer, STATE_CONNECT, event); 624 timer_set(&peer->timers, Timer_ConnectRetry, 625 conf->connectretry); 626 session_connect(peer); 627 } 628 peer->passive = 0; 629 break; 630 case EVNT_STOP: 631 timer_stop(&peer->timers, Timer_IdleHold); 632 break; 633 default: 634 /* ignore */ 635 break; 636 } 637 break; 638 case STATE_CONNECT: 639 switch (event) { 640 case EVNT_START: 641 /* ignore */ 642 break; 643 case EVNT_CON_OPEN: 644 session_tcp_established(peer); 645 session_open(peer); 646 timer_stop(&peer->timers, Timer_ConnectRetry); 647 peer->holdtime = INTERVAL_HOLD_INITIAL; 648 start_timer_holdtime(peer); 649 change_state(peer, STATE_OPENSENT, event); 650 break; 651 case EVNT_CON_OPENFAIL: 652 timer_set(&peer->timers, Timer_ConnectRetry, 653 conf->connectretry); 654 session_close_connection(peer); 655 change_state(peer, STATE_ACTIVE, event); 656 break; 657 case EVNT_TIMER_CONNRETRY: 658 timer_set(&peer->timers, Timer_ConnectRetry, 659 conf->connectretry); 660 session_connect(peer); 661 break; 662 default: 663 change_state(peer, STATE_IDLE, event); 664 break; 665 } 666 break; 667 case STATE_ACTIVE: 668 switch (event) { 669 case EVNT_START: 670 /* ignore */ 671 break; 672 case EVNT_CON_OPEN: 673 session_tcp_established(peer); 674 session_open(peer); 675 timer_stop(&peer->timers, Timer_ConnectRetry); 676 peer->holdtime = INTERVAL_HOLD_INITIAL; 677 start_timer_holdtime(peer); 678 change_state(peer, STATE_OPENSENT, event); 679 break; 680 case EVNT_CON_OPENFAIL: 681 timer_set(&peer->timers, Timer_ConnectRetry, 682 conf->connectretry); 683 session_close_connection(peer); 684 change_state(peer, STATE_ACTIVE, event); 685 break; 686 case EVNT_TIMER_CONNRETRY: 687 timer_set(&peer->timers, Timer_ConnectRetry, 688 peer->holdtime); 689 change_state(peer, STATE_CONNECT, event); 690 session_connect(peer); 691 break; 692 default: 693 change_state(peer, STATE_IDLE, event); 694 break; 695 } 696 break; 697 case STATE_OPENSENT: 698 switch (event) { 699 case EVNT_START: 700 /* ignore */ 701 break; 702 case EVNT_STOP: 703 change_state(peer, STATE_IDLE, event); 704 break; 705 case EVNT_CON_CLOSED: 706 session_close_connection(peer); 707 timer_set(&peer->timers, Timer_ConnectRetry, 708 conf->connectretry); 709 change_state(peer, STATE_ACTIVE, event); 710 break; 711 case EVNT_CON_FATAL: 712 change_state(peer, STATE_IDLE, event); 713 break; 714 case EVNT_TIMER_HOLDTIME: 715 session_notification(peer, ERR_HOLDTIMEREXPIRED, 716 0, NULL); 717 change_state(peer, STATE_IDLE, event); 718 break; 719 case EVNT_TIMER_SENDHOLD: 720 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 721 0, NULL); 722 change_state(peer, STATE_IDLE, event); 723 break; 724 case EVNT_RCVD_OPEN: 725 /* parse_open calls change_state itself on failure */ 726 if (parse_open(peer)) 727 break; 728 session_keepalive(peer); 729 change_state(peer, STATE_OPENCONFIRM, event); 730 break; 731 case EVNT_RCVD_NOTIFICATION: 732 parse_notification(peer); 733 break; 734 default: 735 session_notification(peer, 736 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL); 737 change_state(peer, STATE_IDLE, event); 738 break; 739 } 740 break; 741 case STATE_OPENCONFIRM: 742 switch (event) { 743 case EVNT_START: 744 /* ignore */ 745 break; 746 case EVNT_STOP: 747 change_state(peer, STATE_IDLE, event); 748 break; 749 case EVNT_CON_CLOSED: 750 case EVNT_CON_FATAL: 751 change_state(peer, STATE_IDLE, event); 752 break; 753 case EVNT_TIMER_HOLDTIME: 754 session_notification(peer, ERR_HOLDTIMEREXPIRED, 755 0, NULL); 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_TIMER_SENDHOLD: 759 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 760 0, NULL); 761 change_state(peer, STATE_IDLE, event); 762 break; 763 case EVNT_TIMER_KEEPALIVE: 764 session_keepalive(peer); 765 break; 766 case EVNT_RCVD_KEEPALIVE: 767 start_timer_holdtime(peer); 768 change_state(peer, STATE_ESTABLISHED, event); 769 break; 770 case EVNT_RCVD_NOTIFICATION: 771 parse_notification(peer); 772 break; 773 default: 774 session_notification(peer, 775 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL); 776 change_state(peer, STATE_IDLE, event); 777 break; 778 } 779 break; 780 case STATE_ESTABLISHED: 781 switch (event) { 782 case EVNT_START: 783 /* ignore */ 784 break; 785 case EVNT_STOP: 786 change_state(peer, STATE_IDLE, event); 787 break; 788 case EVNT_CON_CLOSED: 789 case EVNT_CON_FATAL: 790 change_state(peer, STATE_IDLE, event); 791 break; 792 case EVNT_TIMER_HOLDTIME: 793 session_notification(peer, ERR_HOLDTIMEREXPIRED, 794 0, NULL); 795 change_state(peer, STATE_IDLE, event); 796 break; 797 case EVNT_TIMER_SENDHOLD: 798 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 799 0, NULL); 800 change_state(peer, STATE_IDLE, event); 801 break; 802 case EVNT_TIMER_KEEPALIVE: 803 session_keepalive(peer); 804 break; 805 case EVNT_RCVD_KEEPALIVE: 806 start_timer_holdtime(peer); 807 break; 808 case EVNT_RCVD_UPDATE: 809 start_timer_holdtime(peer); 810 if (parse_update(peer)) 811 change_state(peer, STATE_IDLE, event); 812 else 813 start_timer_holdtime(peer); 814 break; 815 case EVNT_RCVD_NOTIFICATION: 816 parse_notification(peer); 817 break; 818 default: 819 session_notification(peer, 820 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL); 821 change_state(peer, STATE_IDLE, event); 822 break; 823 } 824 break; 825 } 826} 827 828void 829start_timer_holdtime(struct peer *peer) 830{ 831 if (peer->holdtime > 0) 832 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 833 else 834 timer_stop(&peer->timers, Timer_Hold); 835} 836 837void 838start_timer_sendholdtime(struct peer *peer) 839{ 840 uint16_t holdtime = INTERVAL_HOLD; 841 842 if (peer->holdtime > INTERVAL_HOLD) 843 holdtime = peer->holdtime; 844 845 if (peer->holdtime > 0) 846 timer_set(&peer->timers, Timer_SendHold, holdtime); 847 else 848 timer_stop(&peer->timers, Timer_SendHold); 849} 850 851void 852start_timer_keepalive(struct peer *peer) 853{ 854 if (peer->holdtime > 0) 855 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 856 else 857 timer_stop(&peer->timers, Timer_Keepalive); 858} 859 860void 861session_close_connection(struct peer *peer) 862{ 863 if (peer->fd != -1) { 864 close(peer->fd); 865 pauseaccept = 0; 866 } 867 peer->fd = peer->wbuf.fd = -1; 868} 869 870void 871change_state(struct peer *peer, enum session_state state, 872 enum session_events event) 873{ 874 struct mrt *mrt; 875 876 switch (state) { 877 case STATE_IDLE: 878 /* carp demotion first. new peers handled in init_peer */ 879 if (peer->state == STATE_ESTABLISHED && 880 peer->conf.demote_group[0] && !peer->demoted) 881 session_demote(peer, +1); 882 883 /* 884 * try to write out what's buffered (maybe a notification), 885 * don't bother if it fails 886 */ 887 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 888 msgbuf_write(&peer->wbuf); 889 890 /* 891 * we must start the timer for the next EVNT_START 892 * if we are coming here due to an error and the 893 * session was not established successfully before, the 894 * starttimerinterval needs to be exponentially increased 895 */ 896 if (peer->IdleHoldTime == 0) 897 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 898 peer->holdtime = INTERVAL_HOLD_INITIAL; 899 timer_stop(&peer->timers, Timer_ConnectRetry); 900 timer_stop(&peer->timers, Timer_Keepalive); 901 timer_stop(&peer->timers, Timer_Hold); 902 timer_stop(&peer->timers, Timer_SendHold); 903 timer_stop(&peer->timers, Timer_IdleHold); 904 timer_stop(&peer->timers, Timer_IdleHoldReset); 905 session_close_connection(peer); 906 msgbuf_clear(&peer->wbuf); 907 free(peer->rbuf); 908 peer->rbuf = NULL; 909 peer->rpending = 0; 910 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 911 if (!peer->template) 912 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 913 peer->conf.id, 0, -1, NULL, 0); 914 915 if (event != EVNT_STOP) { 916 timer_set(&peer->timers, Timer_IdleHold, 917 peer->IdleHoldTime); 918 if (event != EVNT_NONE && 919 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 920 peer->IdleHoldTime *= 2; 921 } 922 if (peer->state == STATE_ESTABLISHED) { 923 if (peer->capa.neg.grestart.restart == 2 && 924 (event == EVNT_CON_CLOSED || 925 event == EVNT_CON_FATAL)) { 926 /* don't punish graceful restart */ 927 timer_set(&peer->timers, Timer_IdleHold, 0); 928 peer->IdleHoldTime /= 2; 929 session_graceful_restart(peer); 930 } else 931 session_down(peer); 932 } 933 if (peer->state == STATE_NONE || 934 peer->state == STATE_ESTABLISHED) { 935 /* initialize capability negotiation structures */ 936 memcpy(&peer->capa.ann, &peer->conf.capabilities, 937 sizeof(peer->capa.ann)); 938 } 939 break; 940 case STATE_CONNECT: 941 if (peer->state == STATE_ESTABLISHED && 942 peer->capa.neg.grestart.restart == 2) { 943 /* do the graceful restart dance */ 944 session_graceful_restart(peer); 945 peer->holdtime = INTERVAL_HOLD_INITIAL; 946 timer_stop(&peer->timers, Timer_ConnectRetry); 947 timer_stop(&peer->timers, Timer_Keepalive); 948 timer_stop(&peer->timers, Timer_Hold); 949 timer_stop(&peer->timers, Timer_SendHold); 950 timer_stop(&peer->timers, Timer_IdleHold); 951 timer_stop(&peer->timers, Timer_IdleHoldReset); 952 session_close_connection(peer); 953 msgbuf_clear(&peer->wbuf); 954 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 955 } 956 break; 957 case STATE_ACTIVE: 958 if (!peer->template) 959 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 960 peer->conf.id, 0, -1, NULL, 0); 961 break; 962 case STATE_OPENSENT: 963 break; 964 case STATE_OPENCONFIRM: 965 break; 966 case STATE_ESTABLISHED: 967 timer_set(&peer->timers, Timer_IdleHoldReset, 968 peer->IdleHoldTime); 969 if (peer->demoted) 970 timer_set(&peer->timers, Timer_CarpUndemote, 971 INTERVAL_HOLD_DEMOTED); 972 session_up(peer); 973 break; 974 default: /* something seriously fucked */ 975 break; 976 } 977 978 log_statechange(peer, state, event); 979 LIST_FOREACH(mrt, &mrthead, entry) { 980 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 981 continue; 982 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 983 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 984 mrt->group_id == peer->conf.groupid)) 985 mrt_dump_state(mrt, peer->state, state, peer); 986 } 987 peer->prev_state = peer->state; 988 peer->state = state; 989} 990 991void 992session_accept(int listenfd) 993{ 994 int connfd; 995 socklen_t len; 996 struct sockaddr_storage cliaddr; 997 struct peer *p = NULL; 998 999 len = sizeof(cliaddr); 1000 if ((connfd = accept4(listenfd, 1001 (struct sockaddr *)&cliaddr, &len, 1002 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 1003 if (errno == ENFILE || errno == EMFILE) 1004 pauseaccept = getmonotime(); 1005 else if (errno != EWOULDBLOCK && errno != EINTR && 1006 errno != ECONNABORTED) 1007 log_warn("accept"); 1008 return; 1009 } 1010 1011 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 1012 1013 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1014 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1015 /* fast reconnect after clear */ 1016 p->passive = 1; 1017 bgp_fsm(p, EVNT_START); 1018 } 1019 } 1020 1021 if (p != NULL && 1022 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1023 if (p->fd != -1) { 1024 if (p->state == STATE_CONNECT) 1025 session_close_connection(p); 1026 else { 1027 close(connfd); 1028 return; 1029 } 1030 } 1031 1032open: 1033 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1034 log_peer_warnx(&p->conf, 1035 "ipsec or md5sig configured but not available"); 1036 close(connfd); 1037 return; 1038 } 1039 1040 if (tcp_md5_check(connfd, p) == -1) { 1041 close(connfd); 1042 return; 1043 } 1044 p->fd = p->wbuf.fd = connfd; 1045 if (session_setup_socket(p)) { 1046 close(connfd); 1047 return; 1048 } 1049 bgp_fsm(p, EVNT_CON_OPEN); 1050 return; 1051 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1052 p->capa.neg.grestart.restart == 2) { 1053 /* first do the graceful restart dance */ 1054 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1055 /* then do part of the open dance */ 1056 goto open; 1057 } else { 1058 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1059 close(connfd); 1060 } 1061} 1062 1063int 1064session_connect(struct peer *peer) 1065{ 1066 struct sockaddr *sa; 1067 struct bgpd_addr *bind_addr = NULL; 1068 socklen_t sa_len; 1069 1070 /* 1071 * we do not need the overcomplicated collision detection RFC 1771 1072 * describes; we simply make sure there is only ever one concurrent 1073 * tcp connection per peer. 1074 */ 1075 if (peer->fd != -1) 1076 return (-1); 1077 1078 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1079 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1080 log_peer_warn(&peer->conf, "session_connect socket"); 1081 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1082 return (-1); 1083 } 1084 1085 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1086 log_peer_warnx(&peer->conf, 1087 "ipsec or md5sig configured but not available"); 1088 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1089 return (-1); 1090 } 1091 1092 tcp_md5_set(peer->fd, peer); 1093 peer->wbuf.fd = peer->fd; 1094 1095 /* if local-address is set we need to bind() */ 1096 switch (peer->conf.remote_addr.aid) { 1097 case AID_INET: 1098 bind_addr = &peer->conf.local_addr_v4; 1099 break; 1100 case AID_INET6: 1101 bind_addr = &peer->conf.local_addr_v6; 1102 break; 1103 } 1104 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1105 if (bind(peer->fd, sa, sa_len) == -1) { 1106 log_peer_warn(&peer->conf, "session_connect bind"); 1107 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1108 return (-1); 1109 } 1110 } 1111 1112 if (session_setup_socket(peer)) { 1113 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1114 return (-1); 1115 } 1116 1117 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1118 if (connect(peer->fd, sa, sa_len) == -1) { 1119 if (errno != EINPROGRESS) { 1120 if (errno != peer->lasterr) 1121 log_peer_warn(&peer->conf, "connect"); 1122 peer->lasterr = errno; 1123 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1124 return (-1); 1125 } 1126 } else 1127 bgp_fsm(peer, EVNT_CON_OPEN); 1128 1129 return (0); 1130} 1131 1132int 1133session_setup_socket(struct peer *p) 1134{ 1135 int ttl = p->conf.distance; 1136 int pre = IPTOS_PREC_INTERNETCONTROL; 1137 int nodelay = 1; 1138 int bsize; 1139 1140 switch (p->conf.remote_addr.aid) { 1141 case AID_INET: 1142 /* set precedence, see RFC 1771 appendix 5 */ 1143 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1144 -1) { 1145 log_peer_warn(&p->conf, 1146 "session_setup_socket setsockopt TOS"); 1147 return (-1); 1148 } 1149 1150 if (p->conf.ebgp) { 1151 /* 1152 * set TTL to foreign router's distance 1153 * 1=direct n=multihop with ttlsec, we always use 255 1154 */ 1155 if (p->conf.ttlsec) { 1156 ttl = 256 - p->conf.distance; 1157 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1158 &ttl, sizeof(ttl)) == -1) { 1159 log_peer_warn(&p->conf, 1160 "session_setup_socket: " 1161 "setsockopt MINTTL"); 1162 return (-1); 1163 } 1164 ttl = 255; 1165 } 1166 1167 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1168 sizeof(ttl)) == -1) { 1169 log_peer_warn(&p->conf, 1170 "session_setup_socket setsockopt TTL"); 1171 return (-1); 1172 } 1173 } 1174 break; 1175 case AID_INET6: 1176 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre, 1177 sizeof(pre)) == -1) { 1178 log_peer_warn(&p->conf, "session_setup_socket " 1179 "setsockopt TCLASS"); 1180 return (-1); 1181 } 1182 1183 if (p->conf.ebgp) { 1184 /* 1185 * set hoplimit to foreign router's distance 1186 * 1=direct n=multihop with ttlsec, we always use 255 1187 */ 1188 if (p->conf.ttlsec) { 1189 ttl = 256 - p->conf.distance; 1190 if (setsockopt(p->fd, IPPROTO_IPV6, 1191 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1192 == -1) { 1193 log_peer_warn(&p->conf, 1194 "session_setup_socket: " 1195 "setsockopt MINHOPCOUNT"); 1196 return (-1); 1197 } 1198 ttl = 255; 1199 } 1200 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1201 &ttl, sizeof(ttl)) == -1) { 1202 log_peer_warn(&p->conf, 1203 "session_setup_socket setsockopt hoplimit"); 1204 return (-1); 1205 } 1206 } 1207 break; 1208 } 1209 1210 /* set TCP_NODELAY */ 1211 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1212 sizeof(nodelay)) == -1) { 1213 log_peer_warn(&p->conf, 1214 "session_setup_socket setsockopt TCP_NODELAY"); 1215 return (-1); 1216 } 1217 1218 /* limit bufsize. no biggie if it fails */ 1219 bsize = 65535; 1220 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize)); 1221 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize)); 1222 1223 return (0); 1224} 1225 1226/* 1227 * compare the bgpd_addr with the sockaddr by converting the latter into 1228 * a bgpd_addr. Return true if the two are equal, including any scope 1229 */ 1230static int 1231sa_equal(struct bgpd_addr *ba, struct sockaddr *b) 1232{ 1233 struct bgpd_addr bb; 1234 1235 sa2addr(b, &bb, NULL); 1236 return (memcmp(ba, &bb, sizeof(*ba)) == 0); 1237} 1238 1239static void 1240get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote, 1241 struct bgpd_addr *alt, unsigned int *scope) 1242{ 1243 struct ifaddrs *ifap, *ifa, *match; 1244 int connected = 0; 1245 u_int8_t plen; 1246 1247 if (getifaddrs(&ifap) == -1) 1248 fatal("getifaddrs"); 1249 1250 for (match = ifap; match != NULL; match = match->ifa_next) { 1251 if (match->ifa_addr == NULL) 1252 continue; 1253 if (match->ifa_addr->sa_family != AF_INET && 1254 match->ifa_addr->sa_family != AF_INET6) 1255 continue; 1256 if (sa_equal(local, match->ifa_addr)) { 1257 if (remote->aid == AID_INET6 && 1258 IN6_IS_ADDR_LINKLOCAL(&remote->v6)) { 1259 /* IPv6 LLA are by definition connected */ 1260 connected = 1; 1261 } else if (match->ifa_flags & IFF_POINTOPOINT && 1262 match->ifa_dstaddr != NULL) { 1263 if (sa_equal(remote, match->ifa_dstaddr)) 1264 connected = 1; 1265 } else if (match->ifa_netmask != NULL) { 1266 plen = mask2prefixlen( 1267 match->ifa_addr->sa_family, 1268 match->ifa_netmask); 1269 if (prefix_compare(local, remote, plen) == 0) 1270 connected = 1; 1271 } 1272 break; 1273 } 1274 } 1275 1276 if (match == NULL) { 1277 log_warnx("%s: local address not found", __func__); 1278 return; 1279 } 1280 if (connected) 1281 *scope = if_nametoindex(match->ifa_name); 1282 else 1283 *scope = 0; 1284 1285 switch (local->aid) { 1286 case AID_INET6: 1287 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1288 if (ifa->ifa_addr != NULL && 1289 ifa->ifa_addr->sa_family == AF_INET && 1290 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1291 sa2addr(ifa->ifa_addr, alt, NULL); 1292 break; 1293 } 1294 } 1295 break; 1296 case AID_INET: 1297 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1298 if (ifa->ifa_addr != NULL && 1299 ifa->ifa_addr->sa_family == AF_INET6 && 1300 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1301 struct sockaddr_in6 *s = 1302 (struct sockaddr_in6 *)ifa->ifa_addr; 1303 1304 /* only accept global scope addresses */ 1305 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1306 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1307 continue; 1308 sa2addr(ifa->ifa_addr, alt, NULL); 1309 break; 1310 } 1311 } 1312 break; 1313 default: 1314 log_warnx("%s: unsupported address family %s", __func__, 1315 aid2str(local->aid)); 1316 break; 1317 } 1318 1319 freeifaddrs(ifap); 1320} 1321 1322void 1323session_tcp_established(struct peer *peer) 1324{ 1325 struct sockaddr_storage ss; 1326 socklen_t len; 1327 1328 len = sizeof(ss); 1329 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1330 log_warn("getsockname"); 1331 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1332 len = sizeof(ss); 1333 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1334 log_warn("getpeername"); 1335 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1336 1337 get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, 1338 &peer->if_scope); 1339} 1340 1341int 1342session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1343{ 1344 int errs = 0; 1345 1346 errs += ibuf_add_n8(opb, capa_code); 1347 errs += ibuf_add_n8(opb, capa_len); 1348 return (errs); 1349} 1350 1351int 1352session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1353{ 1354 uint16_t afi; 1355 uint8_t safi; 1356 int errs = 0; 1357 1358 if (aid2afi(aid, &afi, &safi) == -1) { 1359 log_warn("%s: bad AID", __func__); 1360 return (-1); 1361 } 1362 1363 errs += ibuf_add_n16(buf, afi); 1364 errs += ibuf_add_zero(buf, 1); 1365 errs += ibuf_add_n8(buf, safi); 1366 1367 return (errs); 1368} 1369 1370int 1371session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) 1372{ 1373 u_int errs = 0; 1374 uint16_t afi; 1375 uint8_t safi; 1376 1377 if (aid2afi(aid, &afi, &safi)) { 1378 log_warn("%s: bad AID", __func__); 1379 return (-1); 1380 } 1381 1382 errs += ibuf_add_n16(b, afi); 1383 errs += ibuf_add_n8(b, safi); 1384 errs += ibuf_add_n8(b, flags); 1385 1386 return (errs); 1387} 1388 1389struct bgp_msg * 1390session_newmsg(enum msg_type msgtype, uint16_t len) 1391{ 1392 struct bgp_msg *msg; 1393 struct ibuf *buf; 1394 int errs = 0; 1395 1396 if ((buf = ibuf_open(len)) == NULL) 1397 return (NULL); 1398 1399 errs += ibuf_add(buf, marker, sizeof(marker)); 1400 errs += ibuf_add_n16(buf, len); 1401 errs += ibuf_add_n8(buf, msgtype); 1402 1403 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1404 ibuf_free(buf); 1405 return (NULL); 1406 } 1407 1408 msg->buf = buf; 1409 msg->type = msgtype; 1410 msg->len = len; 1411 1412 return (msg); 1413} 1414 1415int 1416session_sendmsg(struct bgp_msg *msg, struct peer *p) 1417{ 1418 struct mrt *mrt; 1419 1420 LIST_FOREACH(mrt, &mrthead, entry) { 1421 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1422 mrt->type == MRT_UPDATE_OUT))) 1423 continue; 1424 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1425 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1426 mrt->group_id == p->conf.groupid)) 1427 mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p, 1428 msg->type); 1429 } 1430 1431 ibuf_close(&p->wbuf, msg->buf); 1432 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1433 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1434 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1435 else 1436 p->throttled = 1; 1437 } 1438 1439 free(msg); 1440 return (0); 1441} 1442 1443/* 1444 * Translate between internal roles and the value expected by RFC 9234. 1445 */ 1446static uint8_t 1447role2capa(enum role role) 1448{ 1449 switch (role) { 1450 case ROLE_CUSTOMER: 1451 return CAPA_ROLE_CUSTOMER; 1452 case ROLE_PROVIDER: 1453 return CAPA_ROLE_PROVIDER; 1454 case ROLE_RS: 1455 return CAPA_ROLE_RS; 1456 case ROLE_RS_CLIENT: 1457 return CAPA_ROLE_RS_CLIENT; 1458 case ROLE_PEER: 1459 return CAPA_ROLE_PEER; 1460 default: 1461 fatalx("Unsupported role for role capability"); 1462 } 1463} 1464 1465static enum role 1466capa2role(uint8_t val) 1467{ 1468 switch (val) { 1469 case CAPA_ROLE_PROVIDER: 1470 return ROLE_PROVIDER; 1471 case CAPA_ROLE_RS: 1472 return ROLE_RS; 1473 case CAPA_ROLE_RS_CLIENT: 1474 return ROLE_RS_CLIENT; 1475 case CAPA_ROLE_CUSTOMER: 1476 return ROLE_CUSTOMER; 1477 case CAPA_ROLE_PEER: 1478 return ROLE_PEER; 1479 default: 1480 return ROLE_NONE; 1481 } 1482} 1483 1484void 1485session_open(struct peer *p) 1486{ 1487 struct bgp_msg *buf; 1488 struct ibuf *opb; 1489 size_t len, optparamlen; 1490 uint16_t holdtime; 1491 uint8_t i; 1492 int errs = 0, extlen = 0; 1493 int mpcapa = 0; 1494 1495 1496 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1497 bgp_fsm(p, EVNT_CON_FATAL); 1498 return; 1499 } 1500 1501 /* multiprotocol extensions, RFC 4760 */ 1502 for (i = AID_MIN; i < AID_MAX; i++) 1503 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1504 errs += session_capa_add(opb, CAPA_MP, 4); 1505 errs += session_capa_add_mp(opb, i); 1506 mpcapa++; 1507 } 1508 1509 /* route refresh, RFC 2918 */ 1510 if (p->capa.ann.refresh) /* no data */ 1511 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1512 1513 /* BGP open policy, RFC 9234, only for ebgp sessions */ 1514 if (p->conf.ebgp && p->capa.ann.policy && 1515 p->conf.role != ROLE_NONE && 1516 (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || 1517 mpcapa == 0)) { 1518 errs += session_capa_add(opb, CAPA_ROLE, 1); 1519 errs += ibuf_add_n8(opb, role2capa(p->conf.role)); 1520 } 1521 1522 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1523 if (p->capa.ann.grestart.restart) { 1524 int rst = 0; 1525 uint16_t hdr = 0; 1526 1527 for (i = AID_MIN; i < AID_MAX; i++) { 1528 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1529 rst++; 1530 } 1531 1532 /* Only set the R-flag if no graceful restart is ongoing */ 1533 if (!rst) 1534 hdr |= CAPA_GR_R_FLAG; 1535 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1536 errs += ibuf_add_n16(opb, hdr); 1537 } 1538 1539 /* 4-bytes AS numbers, RFC6793 */ 1540 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1541 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); 1542 errs += ibuf_add_n32(opb, p->conf.local_as); 1543 } 1544 1545 /* advertisement of multiple paths, RFC7911 */ 1546 if (p->capa.ann.add_path[AID_MIN]) { /* variable */ 1547 uint8_t aplen; 1548 1549 if (mpcapa) 1550 aplen = 4 * mpcapa; 1551 else /* AID_INET */ 1552 aplen = 4; 1553 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1554 if (mpcapa) { 1555 for (i = AID_MIN; i < AID_MAX; i++) { 1556 if (p->capa.ann.mp[i]) { 1557 errs += session_capa_add_afi(opb, 1558 i, p->capa.ann.add_path[i] & 1559 CAPA_AP_MASK); 1560 } 1561 } 1562 } else { /* AID_INET */ 1563 errs += session_capa_add_afi(opb, AID_INET, 1564 p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK); 1565 } 1566 } 1567 1568 /* enhanced route-refresh, RFC7313 */ 1569 if (p->capa.ann.enhanced_rr) /* no data */ 1570 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1571 1572 if (errs) { 1573 ibuf_free(opb); 1574 bgp_fsm(p, EVNT_CON_FATAL); 1575 return; 1576 } 1577 1578 optparamlen = ibuf_size(opb); 1579 len = MSGSIZE_OPEN_MIN + optparamlen; 1580 if (optparamlen == 0) { 1581 /* nothing */ 1582 } else if (optparamlen + 2 >= 255) { 1583 /* RFC9072: use 255 as magic size and request extra header */ 1584 optparamlen = 255; 1585 extlen = 1; 1586 /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ 1587 len += 2 * 3; 1588 } else { 1589 /* regular capabilities header */ 1590 optparamlen += 2; 1591 len += 2; 1592 } 1593 1594 if ((buf = session_newmsg(OPEN, len)) == NULL) { 1595 ibuf_free(opb); 1596 bgp_fsm(p, EVNT_CON_FATAL); 1597 return; 1598 } 1599 1600 if (p->conf.holdtime) 1601 holdtime = p->conf.holdtime; 1602 else 1603 holdtime = conf->holdtime; 1604 1605 errs += ibuf_add_n8(buf->buf, 4); 1606 errs += ibuf_add_n16(buf->buf, p->conf.local_short_as); 1607 errs += ibuf_add_n16(buf->buf, holdtime); 1608 /* is already in network byte order */ 1609 errs += ibuf_add_n32(buf->buf, conf->bgpid); 1610 errs += ibuf_add_n8(buf->buf, optparamlen); 1611 1612 if (extlen) { 1613 /* RFC9072 extra header which spans over the capabilities hdr */ 1614 errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN); 1615 errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2); 1616 } 1617 1618 if (optparamlen) { 1619 errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES); 1620 1621 if (extlen) { 1622 /* RFC9072: 2-byte extended length */ 1623 errs += ibuf_add_n16(buf->buf, ibuf_size(opb)); 1624 } else { 1625 errs += ibuf_add_n8(buf->buf, ibuf_size(opb)); 1626 } 1627 errs += ibuf_add_buf(buf->buf, opb); 1628 } 1629 1630 ibuf_free(opb); 1631 1632 if (errs) { 1633 ibuf_free(buf->buf); 1634 free(buf); 1635 bgp_fsm(p, EVNT_CON_FATAL); 1636 return; 1637 } 1638 1639 if (session_sendmsg(buf, p) == -1) { 1640 bgp_fsm(p, EVNT_CON_FATAL); 1641 return; 1642 } 1643 1644 p->stats.msg_sent_open++; 1645} 1646 1647void 1648session_keepalive(struct peer *p) 1649{ 1650 struct bgp_msg *buf; 1651 1652 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1653 session_sendmsg(buf, p) == -1) { 1654 bgp_fsm(p, EVNT_CON_FATAL); 1655 return; 1656 } 1657 1658 start_timer_keepalive(p); 1659 p->stats.msg_sent_keepalive++; 1660} 1661 1662void 1663session_update(uint32_t peerid, struct ibuf *ibuf) 1664{ 1665 struct peer *p; 1666 struct bgp_msg *buf; 1667 1668 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1669 log_warnx("no such peer: id=%u", peerid); 1670 return; 1671 } 1672 1673 if (p->state != STATE_ESTABLISHED) 1674 return; 1675 1676 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) == 1677 NULL) { 1678 bgp_fsm(p, EVNT_CON_FATAL); 1679 return; 1680 } 1681 1682 if (ibuf_add_buf(buf->buf, ibuf)) { 1683 ibuf_free(buf->buf); 1684 free(buf); 1685 bgp_fsm(p, EVNT_CON_FATAL); 1686 return; 1687 } 1688 1689 if (session_sendmsg(buf, p) == -1) { 1690 bgp_fsm(p, EVNT_CON_FATAL); 1691 return; 1692 } 1693 1694 start_timer_keepalive(p); 1695 p->stats.msg_sent_update++; 1696} 1697 1698void 1699session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode, 1700 void *data, size_t datalen) 1701{ 1702 struct ibuf ibuf; 1703 1704 ibuf_from_buffer(&ibuf, data, datalen); 1705 session_notification(p, errcode, subcode, &ibuf); 1706} 1707 1708void 1709session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1710 struct ibuf *ibuf) 1711{ 1712 struct bgp_msg *buf; 1713 int errs = 0; 1714 size_t datalen = 0; 1715 1716 switch (p->state) { 1717 case STATE_OPENSENT: 1718 case STATE_OPENCONFIRM: 1719 case STATE_ESTABLISHED: 1720 break; 1721 default: 1722 /* session not open, no need to send notification */ 1723 log_notification(p, errcode, subcode, ibuf, "dropping"); 1724 return; 1725 } 1726 1727 log_notification(p, errcode, subcode, ibuf, "sending"); 1728 1729 /* cap to maximum size */ 1730 if (ibuf != NULL) { 1731 if (ibuf_size(ibuf) > 1732 MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1733 log_peer_warnx(&p->conf, 1734 "oversized notification, data trunkated"); 1735 ibuf_truncate(ibuf, MAX_PKTSIZE - 1736 MSGSIZE_NOTIFICATION_MIN); 1737 } 1738 datalen = ibuf_size(ibuf); 1739 } 1740 1741 if ((buf = session_newmsg(NOTIFICATION, 1742 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1743 bgp_fsm(p, EVNT_CON_FATAL); 1744 return; 1745 } 1746 1747 errs += ibuf_add_n8(buf->buf, errcode); 1748 errs += ibuf_add_n8(buf->buf, subcode); 1749 1750 if (ibuf != NULL) 1751 errs += ibuf_add_buf(buf->buf, ibuf); 1752 1753 if (errs) { 1754 ibuf_free(buf->buf); 1755 free(buf); 1756 bgp_fsm(p, EVNT_CON_FATAL); 1757 return; 1758 } 1759 1760 if (session_sendmsg(buf, p) == -1) { 1761 bgp_fsm(p, EVNT_CON_FATAL); 1762 return; 1763 } 1764 1765 p->stats.msg_sent_notification++; 1766 p->stats.last_sent_errcode = errcode; 1767 p->stats.last_sent_suberr = subcode; 1768} 1769 1770int 1771session_neighbor_rrefresh(struct peer *p) 1772{ 1773 uint8_t i; 1774 1775 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1776 return (-1); 1777 1778 for (i = AID_MIN; i < AID_MAX; i++) { 1779 if (p->capa.neg.mp[i] != 0) 1780 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1781 } 1782 1783 return (0); 1784} 1785 1786void 1787session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1788{ 1789 struct bgp_msg *buf; 1790 int errs = 0; 1791 uint16_t afi; 1792 uint8_t safi; 1793 1794 switch (subtype) { 1795 case ROUTE_REFRESH_REQUEST: 1796 p->stats.refresh_sent_req++; 1797 break; 1798 case ROUTE_REFRESH_BEGIN_RR: 1799 case ROUTE_REFRESH_END_RR: 1800 /* requires enhanced route refresh */ 1801 if (!p->capa.neg.enhanced_rr) 1802 return; 1803 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1804 p->stats.refresh_sent_borr++; 1805 else 1806 p->stats.refresh_sent_eorr++; 1807 break; 1808 default: 1809 fatalx("session_rrefresh: bad subtype %d", subtype); 1810 } 1811 1812 if (aid2afi(aid, &afi, &safi) == -1) 1813 fatalx("session_rrefresh: bad afi/safi pair"); 1814 1815 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1816 bgp_fsm(p, EVNT_CON_FATAL); 1817 return; 1818 } 1819 1820 errs += ibuf_add_n16(buf->buf, afi); 1821 errs += ibuf_add_n8(buf->buf, subtype); 1822 errs += ibuf_add_n8(buf->buf, safi); 1823 1824 if (errs) { 1825 ibuf_free(buf->buf); 1826 free(buf); 1827 bgp_fsm(p, EVNT_CON_FATAL); 1828 return; 1829 } 1830 1831 if (session_sendmsg(buf, p) == -1) { 1832 bgp_fsm(p, EVNT_CON_FATAL); 1833 return; 1834 } 1835 1836 p->stats.msg_sent_rrefresh++; 1837} 1838 1839int 1840session_graceful_restart(struct peer *p) 1841{ 1842 uint8_t i; 1843 1844 timer_set(&p->timers, Timer_RestartTimeout, 1845 p->capa.neg.grestart.timeout); 1846 1847 for (i = AID_MIN; i < AID_MAX; i++) { 1848 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1849 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1850 &i, sizeof(i)) == -1) 1851 return (-1); 1852 log_peer_warnx(&p->conf, 1853 "graceful restart of %s, keeping routes", 1854 aid2str(i)); 1855 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1856 } else if (p->capa.neg.mp[i]) { 1857 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, 1858 &i, sizeof(i)) == -1) 1859 return (-1); 1860 log_peer_warnx(&p->conf, 1861 "graceful restart of %s, flushing routes", 1862 aid2str(i)); 1863 } 1864 } 1865 return (0); 1866} 1867 1868int 1869session_graceful_stop(struct peer *p) 1870{ 1871 uint8_t i; 1872 1873 for (i = AID_MIN; i < AID_MAX; i++) { 1874 /* 1875 * Only flush if the peer is restarting and the timeout fired. 1876 * In all other cases the session was already flushed when the 1877 * session went down or when the new open message was parsed. 1878 */ 1879 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1880 log_peer_warnx(&p->conf, "graceful restart of %s, " 1881 "time-out, flushing", aid2str(i)); 1882 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1883 &i, sizeof(i)) == -1) 1884 return (-1); 1885 } 1886 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1887 } 1888 return (0); 1889} 1890 1891int 1892session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1893{ 1894 ssize_t n; 1895 socklen_t len; 1896 int error; 1897 1898 if (p->state == STATE_CONNECT) { 1899 if (pfd->revents & POLLOUT) { 1900 if (pfd->revents & POLLIN) { 1901 /* error occurred */ 1902 len = sizeof(error); 1903 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1904 &error, &len) == -1 || error) { 1905 if (error) 1906 errno = error; 1907 if (errno != p->lasterr) { 1908 log_peer_warn(&p->conf, 1909 "socket error"); 1910 p->lasterr = errno; 1911 } 1912 bgp_fsm(p, EVNT_CON_OPENFAIL); 1913 return (1); 1914 } 1915 } 1916 bgp_fsm(p, EVNT_CON_OPEN); 1917 return (1); 1918 } 1919 if (pfd->revents & POLLHUP) { 1920 bgp_fsm(p, EVNT_CON_OPENFAIL); 1921 return (1); 1922 } 1923 if (pfd->revents & (POLLERR|POLLNVAL)) { 1924 bgp_fsm(p, EVNT_CON_FATAL); 1925 return (1); 1926 } 1927 return (0); 1928 } 1929 1930 if (pfd->revents & POLLHUP) { 1931 bgp_fsm(p, EVNT_CON_CLOSED); 1932 return (1); 1933 } 1934 if (pfd->revents & (POLLERR|POLLNVAL)) { 1935 bgp_fsm(p, EVNT_CON_FATAL); 1936 return (1); 1937 } 1938 1939 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1940 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1941 if (error == 0) 1942 log_peer_warnx(&p->conf, "Connection closed"); 1943 else if (error == -1) 1944 log_peer_warn(&p->conf, "write error"); 1945 bgp_fsm(p, EVNT_CON_FATAL); 1946 return (1); 1947 } 1948 p->stats.last_write = getmonotime(); 1949 start_timer_sendholdtime(p); 1950 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1951 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1952 log_peer_warn(&p->conf, "imsg_compose XON"); 1953 else 1954 p->throttled = 0; 1955 } 1956 if (!(pfd->revents & POLLIN)) 1957 return (1); 1958 } 1959 1960 if (p->rbuf && pfd->revents & POLLIN) { 1961 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1962 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1963 if (errno != EINTR && errno != EAGAIN) { 1964 log_peer_warn(&p->conf, "read error"); 1965 bgp_fsm(p, EVNT_CON_FATAL); 1966 } 1967 return (1); 1968 } 1969 if (n == 0) { /* connection closed */ 1970 bgp_fsm(p, EVNT_CON_CLOSED); 1971 return (1); 1972 } 1973 1974 p->rbuf->wpos += n; 1975 p->stats.last_read = getmonotime(); 1976 return (1); 1977 } 1978 return (0); 1979} 1980 1981void 1982session_process_msg(struct peer *p) 1983{ 1984 struct mrt *mrt; 1985 ssize_t rpos, av, left; 1986 int processed = 0; 1987 uint16_t msglen; 1988 uint8_t msgtype; 1989 1990 rpos = 0; 1991 av = p->rbuf->wpos; 1992 p->rpending = 0; 1993 1994 /* 1995 * session might drop to IDLE -> buffers deallocated 1996 * we MUST check rbuf != NULL before use 1997 */ 1998 for (;;) { 1999 if (p->rbuf == NULL) 2000 return; 2001 if (rpos + MSGSIZE_HEADER > av) 2002 break; 2003 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 2004 &msgtype) == -1) 2005 return; 2006 if (rpos + msglen > av) 2007 break; 2008 p->rbuf->rptr = p->rbuf->buf + rpos; 2009 2010 /* dump to MRT as soon as we have a full packet */ 2011 LIST_FOREACH(mrt, &mrthead, entry) { 2012 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 2013 mrt->type == MRT_UPDATE_IN))) 2014 continue; 2015 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 2016 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 2017 mrt->group_id == p->conf.groupid)) 2018 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 2019 msgtype); 2020 } 2021 2022 switch (msgtype) { 2023 case OPEN: 2024 bgp_fsm(p, EVNT_RCVD_OPEN); 2025 p->stats.msg_rcvd_open++; 2026 break; 2027 case UPDATE: 2028 bgp_fsm(p, EVNT_RCVD_UPDATE); 2029 p->stats.msg_rcvd_update++; 2030 break; 2031 case NOTIFICATION: 2032 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 2033 p->stats.msg_rcvd_notification++; 2034 break; 2035 case KEEPALIVE: 2036 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 2037 p->stats.msg_rcvd_keepalive++; 2038 break; 2039 case RREFRESH: 2040 parse_rrefresh(p); 2041 p->stats.msg_rcvd_rrefresh++; 2042 break; 2043 default: /* cannot happen */ 2044 session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE, 2045 &msgtype, 1); 2046 log_warnx("received message with unknown type %u", 2047 msgtype); 2048 bgp_fsm(p, EVNT_CON_FATAL); 2049 } 2050 rpos += msglen; 2051 if (++processed > MSG_PROCESS_LIMIT) { 2052 p->rpending = 1; 2053 break; 2054 } 2055 } 2056 2057 if (p->rbuf == NULL) 2058 return; 2059 if (rpos < av) { 2060 left = av - rpos; 2061 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 2062 p->rbuf->wpos = left; 2063 } else 2064 p->rbuf->wpos = 0; 2065} 2066 2067int 2068parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type) 2069{ 2070 u_char *p; 2071 uint16_t olen; 2072 2073 /* caller MUST make sure we are getting 19 bytes! */ 2074 p = data; 2075 if (memcmp(p, marker, sizeof(marker))) { 2076 log_peer_warnx(&peer->conf, "sync error"); 2077 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL); 2078 bgp_fsm(peer, EVNT_CON_FATAL); 2079 return (-1); 2080 } 2081 p += MSGSIZE_HEADER_MARKER; 2082 2083 memcpy(&olen, p, 2); 2084 *len = ntohs(olen); 2085 p += 2; 2086 memcpy(type, p, 1); 2087 2088 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 2089 log_peer_warnx(&peer->conf, 2090 "received message: illegal length: %u byte", *len); 2091 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2092 &olen, sizeof(olen)); 2093 bgp_fsm(peer, EVNT_CON_FATAL); 2094 return (-1); 2095 } 2096 2097 switch (*type) { 2098 case OPEN: 2099 if (*len < MSGSIZE_OPEN_MIN) { 2100 log_peer_warnx(&peer->conf, 2101 "received OPEN: illegal len: %u byte", *len); 2102 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2103 &olen, sizeof(olen)); 2104 bgp_fsm(peer, EVNT_CON_FATAL); 2105 return (-1); 2106 } 2107 break; 2108 case NOTIFICATION: 2109 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2110 log_peer_warnx(&peer->conf, 2111 "received NOTIFICATION: illegal len: %u byte", 2112 *len); 2113 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2114 &olen, sizeof(olen)); 2115 bgp_fsm(peer, EVNT_CON_FATAL); 2116 return (-1); 2117 } 2118 break; 2119 case UPDATE: 2120 if (*len < MSGSIZE_UPDATE_MIN) { 2121 log_peer_warnx(&peer->conf, 2122 "received UPDATE: illegal len: %u byte", *len); 2123 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2124 &olen, sizeof(olen)); 2125 bgp_fsm(peer, EVNT_CON_FATAL); 2126 return (-1); 2127 } 2128 break; 2129 case KEEPALIVE: 2130 if (*len != MSGSIZE_KEEPALIVE) { 2131 log_peer_warnx(&peer->conf, 2132 "received KEEPALIVE: illegal len: %u byte", *len); 2133 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2134 &olen, sizeof(olen)); 2135 bgp_fsm(peer, EVNT_CON_FATAL); 2136 return (-1); 2137 } 2138 break; 2139 case RREFRESH: 2140 if (*len < MSGSIZE_RREFRESH_MIN) { 2141 log_peer_warnx(&peer->conf, 2142 "received RREFRESH: illegal len: %u byte", *len); 2143 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2144 &olen, sizeof(olen)); 2145 bgp_fsm(peer, EVNT_CON_FATAL); 2146 return (-1); 2147 } 2148 break; 2149 default: 2150 log_peer_warnx(&peer->conf, 2151 "received msg with unknown type %u", *type); 2152 session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE, 2153 type, 1); 2154 bgp_fsm(peer, EVNT_CON_FATAL); 2155 return (-1); 2156 } 2157 return (0); 2158} 2159 2160int 2161parse_open(struct peer *peer) 2162{ 2163 struct ibuf ibuf; 2164 u_char *p; 2165 uint8_t version, rversion; 2166 uint16_t short_as, msglen; 2167 uint16_t holdtime, myholdtime; 2168 uint32_t as, bgpid; 2169 uint8_t optparamlen; 2170 2171 p = peer->rbuf->rptr; 2172 p += MSGSIZE_HEADER_MARKER; 2173 memcpy(&msglen, p, sizeof(msglen)); 2174 msglen = ntohs(msglen); 2175 2176 p = peer->rbuf->rptr; 2177 p += MSGSIZE_HEADER; /* header is already checked */ 2178 msglen -= MSGSIZE_HEADER; 2179 2180 /* XXX */ 2181 ibuf_from_buffer(&ibuf, p, msglen); 2182 2183 if (ibuf_get_n8(&ibuf, &version) == -1 || 2184 ibuf_get_n16(&ibuf, &short_as) == -1 || 2185 ibuf_get_n16(&ibuf, &holdtime) == -1 || 2186 ibuf_get_n32(&ibuf, &bgpid) == -1 || 2187 ibuf_get_n8(&ibuf, &optparamlen) == -1) 2188 goto bad_len; 2189 2190 if (version != BGP_VERSION) { 2191 log_peer_warnx(&peer->conf, 2192 "peer wants unrecognized version %u", version); 2193 if (version > BGP_VERSION) 2194 rversion = version - BGP_VERSION; 2195 else 2196 rversion = BGP_VERSION; 2197 session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION, 2198 &rversion, sizeof(rversion)); 2199 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2200 return (-1); 2201 } 2202 2203 as = peer->short_as = short_as; 2204 if (as == 0) { 2205 log_peer_warnx(&peer->conf, 2206 "peer requests unacceptable AS %u", as); 2207 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2208 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2209 return (-1); 2210 } 2211 2212 if (holdtime && holdtime < peer->conf.min_holdtime) { 2213 log_peer_warnx(&peer->conf, 2214 "peer requests unacceptable holdtime %u", holdtime); 2215 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL); 2216 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2217 return (-1); 2218 } 2219 2220 myholdtime = peer->conf.holdtime; 2221 if (!myholdtime) 2222 myholdtime = conf->holdtime; 2223 if (holdtime < myholdtime) 2224 peer->holdtime = holdtime; 2225 else 2226 peer->holdtime = myholdtime; 2227 2228 /* check bgpid for validity - just disallow 0 */ 2229 if (bgpid == 0) { 2230 log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable"); 2231 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2232 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2233 return (-1); 2234 } 2235 peer->remote_bgpid = bgpid; 2236 2237 if (optparamlen != 0) { 2238 struct ibuf oparams, op; 2239 uint8_t ext_type, op_type; 2240 uint16_t ext_len, op_len; 2241 2242 ibuf_from_ibuf(&oparams, &ibuf); 2243 2244 /* check for RFC9072 encoding */ 2245 if (ibuf_get_n8(&oparams, &ext_type) == -1) 2246 goto bad_len; 2247 if (ext_type == OPT_PARAM_EXT_LEN) { 2248 if (ibuf_get_n16(&oparams, &ext_len) == -1) 2249 goto bad_len; 2250 /* skip RFC9072 header */ 2251 if (ibuf_skip(&ibuf, 3) == -1) 2252 goto bad_len; 2253 } else { 2254 ext_len = optparamlen; 2255 ibuf_rewind(&oparams); 2256 } 2257 2258 if (ibuf_truncate(&oparams, ext_len) == -1 || 2259 ibuf_skip(&ibuf, ext_len) == -1) 2260 goto bad_len; 2261 2262 while (ibuf_size(&oparams) > 0) { 2263 if (ibuf_get_n8(&oparams, &op_type) == -1) 2264 goto bad_len; 2265 2266 if (ext_type == OPT_PARAM_EXT_LEN) { 2267 if (ibuf_get_n16(&oparams, &op_len) == -1) 2268 goto bad_len; 2269 } else { 2270 uint8_t tmp; 2271 if (ibuf_get_n8(&oparams, &tmp) == -1) 2272 goto bad_len; 2273 op_len = tmp; 2274 } 2275 2276 if (ibuf_get_ibuf(&oparams, op_len, &op) == -1) 2277 goto bad_len; 2278 2279 switch (op_type) { 2280 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2281 if (parse_capabilities(peer, &op, &as) == -1) { 2282 session_notification(peer, ERR_OPEN, 0, 2283 NULL); 2284 change_state(peer, STATE_IDLE, 2285 EVNT_RCVD_OPEN); 2286 return (-1); 2287 } 2288 break; 2289 case OPT_PARAM_AUTH: /* deprecated */ 2290 default: 2291 /* 2292 * unsupported type 2293 * the RFCs tell us to leave the data section 2294 * empty and notify the peer with ERR_OPEN, 2295 * ERR_OPEN_OPT. How the peer should know 2296 * _which_ optional parameter we don't support 2297 * is beyond me. 2298 */ 2299 log_peer_warnx(&peer->conf, 2300 "received OPEN message with unsupported " 2301 "optional parameter: type %u", op_type); 2302 session_notification(peer, ERR_OPEN, 2303 ERR_OPEN_OPT, NULL); 2304 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2305 return (-1); 2306 } 2307 } 2308 } 2309 2310 if (ibuf_size(&ibuf) != 0) { 2311 bad_len: 2312 log_peer_warnx(&peer->conf, 2313 "corrupt OPEN message received: length mismatch"); 2314 session_notification(peer, ERR_OPEN, 0, NULL); 2315 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2316 return (-1); 2317 } 2318 2319 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2320 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2321 peer->conf.remote_as = as; 2322 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2323 if (!peer->conf.ebgp) 2324 /* force enforce_as off for iBGP sessions */ 2325 peer->conf.enforce_as = ENFORCE_AS_OFF; 2326 } 2327 2328 if (peer->conf.remote_as != as) { 2329 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2330 log_as(as)); 2331 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2332 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2333 return (-1); 2334 } 2335 2336 /* on iBGP sessions check for bgpid collision */ 2337 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2338 struct in_addr ina; 2339 ina.s_addr = htonl(bgpid); 2340 log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours", 2341 inet_ntoa(ina)); 2342 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2343 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2344 return (-1); 2345 } 2346 2347 if (capa_neg_calc(peer) == -1) { 2348 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2349 return (-1); 2350 } 2351 2352 return (0); 2353} 2354 2355int 2356parse_update(struct peer *peer) 2357{ 2358 u_char *p; 2359 uint16_t datalen; 2360 2361 /* 2362 * we pass the message verbatim to the rde. 2363 * in case of errors the whole session is reset with a 2364 * notification anyway, we only need to know the peer 2365 */ 2366 p = peer->rbuf->rptr; 2367 p += MSGSIZE_HEADER_MARKER; 2368 memcpy(&datalen, p, sizeof(datalen)); 2369 datalen = ntohs(datalen); 2370 2371 p = peer->rbuf->rptr; 2372 p += MSGSIZE_HEADER; /* header is already checked */ 2373 datalen -= MSGSIZE_HEADER; 2374 2375 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2376 return (-1); 2377 2378 return (0); 2379} 2380 2381int 2382parse_rrefresh(struct peer *peer) 2383{ 2384 struct route_refresh rr; 2385 struct ibuf ibuf; 2386 uint16_t afi, datalen; 2387 uint8_t aid, safi, subtype; 2388 u_char *p; 2389 2390 p = peer->rbuf->rptr; 2391 p += MSGSIZE_HEADER_MARKER; 2392 memcpy(&datalen, p, sizeof(datalen)); 2393 datalen = ntohs(datalen); 2394 2395 p = peer->rbuf->rptr; 2396 p += MSGSIZE_HEADER; /* header is already checked */ 2397 datalen -= MSGSIZE_HEADER; 2398 2399 /* XXX */ 2400 ibuf_from_buffer(&ibuf, p, datalen); 2401 2402 if (ibuf_get_n16(&ibuf, &afi) == -1 || 2403 ibuf_get_n8(&ibuf, &subtype) == -1 || 2404 ibuf_get_n8(&ibuf, &safi) == -1) { 2405 /* minimum size checked in session_process_msg() */ 2406 fatalx("%s: message too small", __func__); 2407 } 2408 2409 /* check subtype if peer announced enhanced route refresh */ 2410 if (peer->capa.neg.enhanced_rr) { 2411 switch (subtype) { 2412 case ROUTE_REFRESH_REQUEST: 2413 /* no ORF support, so no oversized RREFRESH msgs */ 2414 if (datalen != MSGSIZE_RREFRESH) { 2415 log_peer_warnx(&peer->conf, 2416 "received RREFRESH: illegal len: %u byte", 2417 datalen); 2418 datalen = htons(datalen); 2419 session_notification_data(peer, ERR_HEADER, 2420 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2421 bgp_fsm(peer, EVNT_CON_FATAL); 2422 return (-1); 2423 } 2424 peer->stats.refresh_rcvd_req++; 2425 break; 2426 case ROUTE_REFRESH_BEGIN_RR: 2427 case ROUTE_REFRESH_END_RR: 2428 /* special handling for RFC7313 */ 2429 if (datalen != MSGSIZE_RREFRESH) { 2430 log_peer_warnx(&peer->conf, 2431 "received RREFRESH: illegal len: %u byte", 2432 datalen); 2433 ibuf_rewind(&ibuf); 2434 session_notification(peer, ERR_RREFRESH, 2435 ERR_RR_INV_LEN, &ibuf); 2436 bgp_fsm(peer, EVNT_CON_FATAL); 2437 return (-1); 2438 } 2439 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2440 peer->stats.refresh_rcvd_borr++; 2441 else 2442 peer->stats.refresh_rcvd_eorr++; 2443 break; 2444 default: 2445 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2446 "bad subtype %d", subtype); 2447 return (0); 2448 } 2449 } else { 2450 /* force subtype to default */ 2451 subtype = ROUTE_REFRESH_REQUEST; 2452 peer->stats.refresh_rcvd_req++; 2453 } 2454 2455 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2456 if (afi2aid(afi, safi, &aid) == -1) { 2457 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2458 "invalid afi/safi pair"); 2459 return (0); 2460 } 2461 2462 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2463 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2464 return (0); 2465 } 2466 2467 rr.aid = aid; 2468 rr.subtype = subtype; 2469 2470 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2471 return (-1); 2472 2473 return (0); 2474} 2475 2476void 2477parse_notification(struct peer *peer) 2478{ 2479 struct ibuf ibuf; 2480 u_char *p; 2481 uint16_t datalen; 2482 uint8_t errcode, subcode; 2483 uint8_t reason_len; 2484 2485 /* just log */ 2486 p = peer->rbuf->rptr; 2487 p += MSGSIZE_HEADER_MARKER; 2488 memcpy(&datalen, p, sizeof(datalen)); 2489 datalen = ntohs(datalen); 2490 2491 p = peer->rbuf->rptr; 2492 p += MSGSIZE_HEADER; /* header is already checked */ 2493 datalen -= MSGSIZE_HEADER; 2494 2495 /* XXX */ 2496 ibuf_from_buffer(&ibuf, p, datalen); 2497 2498 if (ibuf_get_n8(&ibuf, &errcode) == -1 || 2499 ibuf_get_n8(&ibuf, &subcode) == -1) { 2500 log_peer_warnx(&peer->conf, "received bad notification"); 2501 goto done; 2502 } 2503 2504 peer->errcnt++; 2505 peer->stats.last_rcvd_errcode = errcode; 2506 peer->stats.last_rcvd_suberr = subcode; 2507 2508 log_notification(peer, errcode, subcode, &ibuf, "received"); 2509 2510 CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX); 2511 memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason)); 2512 if (errcode == ERR_CEASE && 2513 (subcode == ERR_CEASE_ADMIN_DOWN || 2514 subcode == ERR_CEASE_ADMIN_RESET)) { 2515 /* check if shutdown reason is included */ 2516 if (ibuf_get_n8(&ibuf, &reason_len) != -1 && reason_len != 0) { 2517 if (ibuf_get(&ibuf, peer->stats.last_reason, 2518 reason_len) == -1) 2519 log_peer_warnx(&peer->conf, 2520 "received truncated shutdown reason"); 2521 } 2522 } 2523 2524done: 2525 change_state(peer, STATE_IDLE, EVNT_RCVD_NOTIFICATION); 2526} 2527 2528int 2529parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) 2530{ 2531 struct ibuf capabuf; 2532 uint16_t afi, gr_header; 2533 uint8_t capa_code, capa_len; 2534 uint8_t safi, aid, role, flags; 2535 2536 while (ibuf_size(buf) > 0) { 2537 if (ibuf_get_n8(buf, &capa_code) == -1 || 2538 ibuf_get_n8(buf, &capa_len) == -1) { 2539 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2540 "length: too short"); 2541 return (-1); 2542 } 2543 if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) { 2544 log_peer_warnx(&peer->conf, 2545 "Received bad capabilities attr length: " 2546 "len %zu smaller than capa_len %u", 2547 ibuf_size(buf), capa_len); 2548 return (-1); 2549 } 2550 2551 switch (capa_code) { 2552 case CAPA_MP: /* RFC 4760 */ 2553 if (capa_len != 4 || 2554 ibuf_get_n16(&capabuf, &afi) == -1 || 2555 ibuf_skip(&capabuf, 1) == -1 || 2556 ibuf_get_n8(&capabuf, &safi) == -1) { 2557 log_peer_warnx(&peer->conf, 2558 "Received bad multi protocol capability"); 2559 break; 2560 } 2561 if (afi2aid(afi, safi, &aid) == -1) { 2562 log_peer_warnx(&peer->conf, 2563 "Received multi protocol capability: " 2564 " unknown AFI %u, safi %u pair", 2565 afi, safi); 2566 peer->capa.peer.mp[AID_UNSPEC] = 1; 2567 break; 2568 } 2569 peer->capa.peer.mp[aid] = 1; 2570 break; 2571 case CAPA_REFRESH: 2572 peer->capa.peer.refresh = 1; 2573 break; 2574 case CAPA_ROLE: 2575 if (capa_len != 1 || 2576 ibuf_get_n8(&capabuf, &role) == -1) { 2577 log_peer_warnx(&peer->conf, 2578 "Received bad role capability"); 2579 break; 2580 } 2581 if (!peer->conf.ebgp) { 2582 log_peer_warnx(&peer->conf, 2583 "Received role capability on iBGP session"); 2584 break; 2585 } 2586 peer->capa.peer.policy = 1; 2587 peer->remote_role = capa2role(role); 2588 break; 2589 case CAPA_RESTART: 2590 if (capa_len == 2) { 2591 /* peer only supports EoR marker */ 2592 peer->capa.peer.grestart.restart = 1; 2593 peer->capa.peer.grestart.timeout = 0; 2594 break; 2595 } else if (capa_len % 4 != 2) { 2596 log_peer_warnx(&peer->conf, 2597 "Bad graceful restart capability"); 2598 peer->capa.peer.grestart.restart = 0; 2599 peer->capa.peer.grestart.timeout = 0; 2600 break; 2601 } 2602 2603 if (ibuf_get_n16(&capabuf, &gr_header) == -1) { 2604 bad_gr_restart: 2605 log_peer_warnx(&peer->conf, 2606 "Bad graceful restart capability"); 2607 peer->capa.peer.grestart.restart = 0; 2608 peer->capa.peer.grestart.timeout = 0; 2609 break; 2610 } 2611 2612 peer->capa.peer.grestart.timeout = 2613 gr_header & CAPA_GR_TIMEMASK; 2614 if (peer->capa.peer.grestart.timeout == 0) { 2615 log_peer_warnx(&peer->conf, "Received " 2616 "graceful restart with zero timeout"); 2617 peer->capa.peer.grestart.restart = 0; 2618 break; 2619 } 2620 2621 while (ibuf_size(&capabuf) > 0) { 2622 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2623 ibuf_get_n8(&capabuf, &safi) == -1 || 2624 ibuf_get_n8(&capabuf, &flags) == -1) 2625 goto bad_gr_restart; 2626 if (afi2aid(afi, safi, &aid) == -1) { 2627 log_peer_warnx(&peer->conf, 2628 "Received graceful restart capa: " 2629 " unknown AFI %u, safi %u pair", 2630 afi, safi); 2631 continue; 2632 } 2633 peer->capa.peer.grestart.flags[aid] |= 2634 CAPA_GR_PRESENT; 2635 if (flags & CAPA_GR_F_FLAG) 2636 peer->capa.peer.grestart.flags[aid] |= 2637 CAPA_GR_FORWARD; 2638 if (gr_header & CAPA_GR_R_FLAG) 2639 peer->capa.peer.grestart.flags[aid] |= 2640 CAPA_GR_RESTART; 2641 peer->capa.peer.grestart.restart = 2; 2642 } 2643 break; 2644 case CAPA_AS4BYTE: 2645 if (capa_len != 4 || 2646 ibuf_get_n32(&capabuf, as) == -1) { 2647 log_peer_warnx(&peer->conf, 2648 "Received bad AS4BYTE capability"); 2649 peer->capa.peer.as4byte = 0; 2650 break; 2651 } 2652 if (*as == 0) { 2653 log_peer_warnx(&peer->conf, 2654 "peer requests unacceptable AS %u", *as); 2655 session_notification(peer, ERR_OPEN, 2656 ERR_OPEN_AS, NULL); 2657 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2658 return (-1); 2659 } 2660 peer->capa.peer.as4byte = 1; 2661 break; 2662 case CAPA_ADD_PATH: 2663 if (capa_len % 4 != 0) { 2664 bad_add_path: 2665 log_peer_warnx(&peer->conf, 2666 "Received bad ADD-PATH capability"); 2667 memset(peer->capa.peer.add_path, 0, 2668 sizeof(peer->capa.peer.add_path)); 2669 break; 2670 } 2671 while (ibuf_size(&capabuf) > 0) { 2672 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2673 ibuf_get_n8(&capabuf, &safi) == -1 || 2674 ibuf_get_n8(&capabuf, &flags) == -1) 2675 goto bad_add_path; 2676 if (afi2aid(afi, safi, &aid) == -1) { 2677 log_peer_warnx(&peer->conf, 2678 "Received ADD-PATH capa: " 2679 " unknown AFI %u, safi %u pair", 2680 afi, safi); 2681 memset(peer->capa.peer.add_path, 0, 2682 sizeof(peer->capa.peer.add_path)); 2683 break; 2684 } 2685 if (flags & ~CAPA_AP_BIDIR) { 2686 log_peer_warnx(&peer->conf, 2687 "Received ADD-PATH capa: " 2688 " bad flags %x", flags); 2689 memset(peer->capa.peer.add_path, 0, 2690 sizeof(peer->capa.peer.add_path)); 2691 break; 2692 } 2693 peer->capa.peer.add_path[aid] = flags; 2694 } 2695 break; 2696 case CAPA_ENHANCED_RR: 2697 peer->capa.peer.enhanced_rr = 1; 2698 break; 2699 default: 2700 break; 2701 } 2702 } 2703 2704 return (0); 2705} 2706 2707int 2708capa_neg_calc(struct peer *p) 2709{ 2710 struct ibuf *ebuf; 2711 uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0; 2712 2713 /* a capability is accepted only if both sides announced it */ 2714 2715 p->capa.neg.refresh = 2716 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2717 p->capa.neg.enhanced_rr = 2718 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2719 p->capa.neg.as4byte = 2720 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2721 2722 /* MP: both side must agree on the AFI,SAFI pair */ 2723 if (p->capa.peer.mp[AID_UNSPEC]) 2724 hasmp = 1; 2725 for (i = AID_MIN; i < AID_MAX; i++) { 2726 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2727 p->capa.neg.mp[i] = 1; 2728 else 2729 p->capa.neg.mp[i] = 0; 2730 if (p->capa.ann.mp[i] || p->capa.peer.mp[i]) 2731 hasmp = 1; 2732 } 2733 /* if no MP capability present default to IPv4 unicast mode */ 2734 if (!hasmp) 2735 p->capa.neg.mp[AID_INET] = 1; 2736 2737 /* 2738 * graceful restart: the peer capabilities are of interest here. 2739 * It is necessary to compare the new values with the previous ones 2740 * and act accordingly. AFI/SAFI that are not part in the MP capability 2741 * are treated as not being present. 2742 * Also make sure that a flush happens if the session stopped 2743 * supporting graceful restart. 2744 */ 2745 2746 for (i = AID_MIN; i < AID_MAX; i++) { 2747 int8_t negflags; 2748 2749 /* disable GR if the AFI/SAFI is not present */ 2750 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2751 p->capa.neg.mp[i] == 0)) 2752 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2753 /* look at current GR state and decide what to do */ 2754 negflags = p->capa.neg.grestart.flags[i]; 2755 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2756 if (negflags & CAPA_GR_RESTARTING) { 2757 if (p->capa.ann.grestart.restart != 0 && 2758 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2759 p->capa.neg.grestart.flags[i] |= 2760 CAPA_GR_RESTARTING; 2761 } else { 2762 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2763 &i, sizeof(i)) == -1) { 2764 log_peer_warnx(&p->conf, 2765 "imsg send failed"); 2766 return (-1); 2767 } 2768 log_peer_warnx(&p->conf, "graceful restart of " 2769 "%s, not restarted, flushing", aid2str(i)); 2770 } 2771 } 2772 } 2773 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2774 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2775 if (p->capa.ann.grestart.restart == 0) 2776 p->capa.neg.grestart.restart = 0; 2777 2778 /* 2779 * ADD-PATH: set only those bits where both sides agree. 2780 * For this compare our send bit with the recv bit from the peer 2781 * and vice versa. 2782 * The flags are stored from this systems view point. 2783 * At index 0 the flags are set if any per-AID flag is set. 2784 */ 2785 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2786 for (i = AID_MIN; i < AID_MAX; i++) { 2787 if (p->capa.neg.mp[i] == 0) 2788 continue; 2789 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2790 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2791 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2792 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2793 } 2794 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2795 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2796 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2797 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2798 } 2799 } 2800 2801 /* 2802 * Open policy: check that the policy is sensible. 2803 * 2804 * Make sure that the roles match and set the negotiated capability 2805 * to the role of the peer. So the RDE can inject the OTC attribute. 2806 * See RFC 9234, section 4.2. 2807 * These checks should only happen on ebgp sessions. 2808 */ 2809 if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && 2810 p->conf.ebgp) { 2811 switch (p->conf.role) { 2812 case ROLE_PROVIDER: 2813 if (p->remote_role != ROLE_CUSTOMER) 2814 goto policyfail; 2815 break; 2816 case ROLE_RS: 2817 if (p->remote_role != ROLE_RS_CLIENT) 2818 goto policyfail; 2819 break; 2820 case ROLE_RS_CLIENT: 2821 if (p->remote_role != ROLE_RS) 2822 goto policyfail; 2823 break; 2824 case ROLE_CUSTOMER: 2825 if (p->remote_role != ROLE_PROVIDER) 2826 goto policyfail; 2827 break; 2828 case ROLE_PEER: 2829 if (p->remote_role != ROLE_PEER) 2830 goto policyfail; 2831 break; 2832 default: 2833 policyfail: 2834 log_peer_warnx(&p->conf, "open policy role mismatch: " 2835 "our role %s, their role %s", 2836 log_policy(p->conf.role), 2837 log_policy(p->remote_role)); 2838 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2839 return (-1); 2840 } 2841 p->capa.neg.policy = 1; 2842 } 2843 2844 /* enforce presence of open policy role capability */ 2845 if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 && 2846 p->conf.ebgp) { 2847 log_peer_warnx(&p->conf, "open policy role enforced but " 2848 "not present"); 2849 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2850 return (-1); 2851 } 2852 2853 /* enforce presence of other capabilities */ 2854 if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) { 2855 capa_code = CAPA_REFRESH; 2856 capa_len = 0; 2857 goto fail; 2858 } 2859 if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { 2860 capa_code = CAPA_ENHANCED_RR; 2861 capa_len = 0; 2862 goto fail; 2863 } 2864 if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) { 2865 capa_code = CAPA_AS4BYTE; 2866 capa_len = 4; 2867 goto fail; 2868 } 2869 if (p->capa.ann.grestart.restart == 2 && 2870 p->capa.neg.grestart.restart == 0) { 2871 capa_code = CAPA_RESTART; 2872 capa_len = 2; 2873 goto fail; 2874 } 2875 for (i = AID_MIN; i < AID_MAX; i++) { 2876 if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) { 2877 capa_code = CAPA_MP; 2878 capa_len = 4; 2879 capa_aid = i; 2880 goto fail; 2881 } 2882 } 2883 2884 for (i = AID_MIN; i < AID_MAX; i++) { 2885 if (p->capa.neg.mp[i] == 0) 2886 continue; 2887 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) && 2888 (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) { 2889 capa_code = CAPA_ADD_PATH; 2890 capa_len = 4; 2891 capa_aid = i; 2892 goto fail; 2893 } 2894 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) && 2895 (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) { 2896 capa_code = CAPA_ADD_PATH; 2897 capa_len = 4; 2898 capa_aid = i; 2899 goto fail; 2900 } 2901 } 2902 2903 return (0); 2904 2905 fail: 2906 if ((ebuf = ibuf_dynamic(2, 256)) == NULL) 2907 return (-1); 2908 /* best effort, no problem if it fails */ 2909 session_capa_add(ebuf, capa_code, capa_len); 2910 if (capa_code == CAPA_MP) 2911 session_capa_add_mp(ebuf, capa_aid); 2912 else if (capa_code == CAPA_ADD_PATH) 2913 session_capa_add_afi(ebuf, capa_aid, 0); 2914 else if (capa_len > 0) 2915 ibuf_add_zero(ebuf, capa_len); 2916 2917 session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf); 2918 ibuf_free(ebuf); 2919 return (-1); 2920} 2921 2922void 2923session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt) 2924{ 2925 struct imsg imsg; 2926 struct ibuf ibuf; 2927 struct mrt xmrt; 2928 struct route_refresh rr; 2929 struct mrt *mrt; 2930 struct imsgbuf *i; 2931 struct peer *p; 2932 struct listen_addr *la, *next, nla; 2933 struct session_dependon sdon; 2934 struct bgpd_config tconf; 2935 size_t len; 2936 uint32_t peerid; 2937 int n, fd, depend_ok, restricted; 2938 uint16_t t; 2939 uint8_t aid, errcode, subcode; 2940 2941 while (imsgbuf) { 2942 if ((n = imsg_get(imsgbuf, &imsg)) == -1) 2943 fatal("session_dispatch_imsg: imsg_get error"); 2944 2945 if (n == 0) 2946 break; 2947 2948 peerid = imsg_get_id(&imsg); 2949 switch (imsg_get_type(&imsg)) { 2950 case IMSG_SOCKET_CONN: 2951 case IMSG_SOCKET_CONN_CTL: 2952 if (idx != PFD_PIPE_MAIN) 2953 fatalx("reconf request not from parent"); 2954 if ((fd = imsg_get_fd(&imsg)) == -1) { 2955 log_warnx("expected to receive imsg fd to " 2956 "RDE but didn't receive any"); 2957 break; 2958 } 2959 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2960 fatal(NULL); 2961 imsg_init(i, fd); 2962 if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) { 2963 if (ibuf_rde) { 2964 log_warnx("Unexpected imsg connection " 2965 "to RDE received"); 2966 msgbuf_clear(&ibuf_rde->w); 2967 free(ibuf_rde); 2968 } 2969 ibuf_rde = i; 2970 } else { 2971 if (ibuf_rde_ctl) { 2972 log_warnx("Unexpected imsg ctl " 2973 "connection to RDE received"); 2974 msgbuf_clear(&ibuf_rde_ctl->w); 2975 free(ibuf_rde_ctl); 2976 } 2977 ibuf_rde_ctl = i; 2978 } 2979 break; 2980 case IMSG_RECONF_CONF: 2981 if (idx != PFD_PIPE_MAIN) 2982 fatalx("reconf request not from parent"); 2983 if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1) 2984 fatal("imsg_get_data"); 2985 2986 nconf = new_config(); 2987 copy_config(nconf, &tconf); 2988 pending_reconf = 1; 2989 break; 2990 case IMSG_RECONF_PEER: 2991 if (idx != PFD_PIPE_MAIN) 2992 fatalx("reconf request not from parent"); 2993 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2994 fatal("new_peer"); 2995 if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) == 2996 -1) 2997 fatal("imsg_get_data"); 2998 p->state = p->prev_state = STATE_NONE; 2999 p->reconf_action = RECONF_REINIT; 3000 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 3001 fatalx("%s: peer tree is corrupt", __func__); 3002 break; 3003 case IMSG_RECONF_LISTENER: 3004 if (idx != PFD_PIPE_MAIN) 3005 fatalx("reconf request not from parent"); 3006 if (nconf == NULL) 3007 fatalx("IMSG_RECONF_LISTENER but no config"); 3008 if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1) 3009 fatal("imsg_get_data"); 3010 TAILQ_FOREACH(la, conf->listen_addrs, entry) 3011 if (!la_cmp(la, &nla)) 3012 break; 3013 3014 if (la == NULL) { 3015 if (nla.reconf != RECONF_REINIT) 3016 fatalx("king bula sez: " 3017 "expected REINIT"); 3018 3019 if ((nla.fd = imsg_get_fd(&imsg)) == -1) 3020 log_warnx("expected to receive fd for " 3021 "%s but didn't receive any", 3022 log_sockaddr((struct sockaddr *) 3023 &nla.sa, nla.sa_len)); 3024 3025 la = calloc(1, sizeof(struct listen_addr)); 3026 if (la == NULL) 3027 fatal(NULL); 3028 memcpy(&la->sa, &nla.sa, sizeof(la->sa)); 3029 la->flags = nla.flags; 3030 la->fd = nla.fd; 3031 la->reconf = RECONF_REINIT; 3032 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 3033 entry); 3034 } else { 3035 if (nla.reconf != RECONF_KEEP) 3036 fatalx("king bula sez: expected KEEP"); 3037 la->reconf = RECONF_KEEP; 3038 } 3039 3040 break; 3041 case IMSG_RECONF_CTRL: 3042 if (idx != PFD_PIPE_MAIN) 3043 fatalx("reconf request not from parent"); 3044 3045 if (imsg_get_data(&imsg, &restricted, 3046 sizeof(restricted)) == -1) 3047 fatal("imsg_get_data"); 3048 if ((fd = imsg_get_fd(&imsg)) == -1) { 3049 log_warnx("expected to receive fd for control " 3050 "socket but didn't receive any"); 3051 break; 3052 } 3053 if (restricted) { 3054 control_shutdown(rcsock); 3055 rcsock = fd; 3056 } else { 3057 control_shutdown(csock); 3058 csock = fd; 3059 } 3060 break; 3061 case IMSG_RECONF_DRAIN: 3062 switch (idx) { 3063 case PFD_PIPE_ROUTE: 3064 if (nconf != NULL) 3065 fatalx("got unexpected %s from RDE", 3066 "IMSG_RECONF_DONE"); 3067 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3068 -1, NULL, 0); 3069 break; 3070 case PFD_PIPE_MAIN: 3071 if (nconf == NULL) 3072 fatalx("got unexpected %s from parent", 3073 "IMSG_RECONF_DONE"); 3074 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3075 -1, NULL, 0); 3076 break; 3077 default: 3078 fatalx("reconf request not from parent or RDE"); 3079 } 3080 break; 3081 case IMSG_RECONF_DONE: 3082 if (idx != PFD_PIPE_MAIN) 3083 fatalx("reconf request not from parent"); 3084 if (nconf == NULL) 3085 fatalx("got IMSG_RECONF_DONE but no config"); 3086 copy_config(conf, nconf); 3087 merge_peers(conf, nconf); 3088 3089 /* delete old listeners */ 3090 TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry, 3091 next) { 3092 if (la->reconf == RECONF_NONE) { 3093 log_info("not listening on %s any more", 3094 log_sockaddr((struct sockaddr *) 3095 &la->sa, la->sa_len)); 3096 TAILQ_REMOVE(conf->listen_addrs, la, 3097 entry); 3098 close(la->fd); 3099 free(la); 3100 } 3101 } 3102 3103 /* add new listeners */ 3104 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3105 entry); 3106 3107 setup_listeners(listener_cnt); 3108 free_config(nconf); 3109 nconf = NULL; 3110 pending_reconf = 0; 3111 log_info("SE reconfigured"); 3112 /* 3113 * IMSG_RECONF_DONE is sent when the RDE drained 3114 * the peer config sent in merge_peers(). 3115 */ 3116 break; 3117 case IMSG_SESSION_DEPENDON: 3118 if (idx != PFD_PIPE_MAIN) 3119 fatalx("IFINFO message not from parent"); 3120 if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1) 3121 fatalx("DEPENDON imsg with wrong len"); 3122 depend_ok = sdon.depend_state; 3123 3124 RB_FOREACH(p, peer_head, &conf->peers) 3125 if (!strcmp(p->conf.if_depend, sdon.ifname)) { 3126 if (depend_ok && !p->depend_ok) { 3127 p->depend_ok = depend_ok; 3128 bgp_fsm(p, EVNT_START); 3129 } else if (!depend_ok && p->depend_ok) { 3130 p->depend_ok = depend_ok; 3131 session_stop(p, 3132 ERR_CEASE_OTHER_CHANGE, 3133 NULL); 3134 } 3135 } 3136 break; 3137 case IMSG_MRT_OPEN: 3138 case IMSG_MRT_REOPEN: 3139 if (idx != PFD_PIPE_MAIN) 3140 fatalx("mrt request not from parent"); 3141 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3142 log_warnx("mrt open, wrong imsg len"); 3143 break; 3144 } 3145 3146 if ((xmrt.wbuf.fd = imsg_get_fd(&imsg)) == -1) { 3147 log_warnx("expected to receive fd for mrt dump " 3148 "but didn't receive any"); 3149 break; 3150 } 3151 3152 mrt = mrt_get(&mrthead, &xmrt); 3153 if (mrt == NULL) { 3154 /* new dump */ 3155 mrt = calloc(1, sizeof(struct mrt)); 3156 if (mrt == NULL) 3157 fatal("session_dispatch_imsg"); 3158 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3159 TAILQ_INIT(&mrt->wbuf.bufs); 3160 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3161 } else { 3162 /* old dump reopened */ 3163 close(mrt->wbuf.fd); 3164 mrt->wbuf.fd = xmrt.wbuf.fd; 3165 } 3166 break; 3167 case IMSG_MRT_CLOSE: 3168 if (idx != PFD_PIPE_MAIN) 3169 fatalx("mrt request not from parent"); 3170 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3171 log_warnx("mrt close, wrong imsg len"); 3172 break; 3173 } 3174 3175 mrt = mrt_get(&mrthead, &xmrt); 3176 if (mrt != NULL) 3177 mrt_done(mrt); 3178 break; 3179 case IMSG_CTL_KROUTE: 3180 case IMSG_CTL_KROUTE_ADDR: 3181 case IMSG_CTL_SHOW_NEXTHOP: 3182 case IMSG_CTL_SHOW_INTERFACE: 3183 case IMSG_CTL_SHOW_FIB_TABLES: 3184 case IMSG_CTL_SHOW_RTR: 3185 case IMSG_CTL_SHOW_TIMER: 3186 if (idx != PFD_PIPE_MAIN) 3187 fatalx("ctl kroute request not from parent"); 3188 control_imsg_relay(&imsg, NULL); 3189 break; 3190 case IMSG_CTL_SHOW_NEIGHBOR: 3191 if (idx != PFD_PIPE_ROUTE_CTL) 3192 fatalx("ctl rib request not from RDE"); 3193 p = getpeerbyid(conf, peerid); 3194 control_imsg_relay(&imsg, p); 3195 break; 3196 case IMSG_CTL_SHOW_RIB: 3197 case IMSG_CTL_SHOW_RIB_PREFIX: 3198 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3199 case IMSG_CTL_SHOW_RIB_ATTR: 3200 case IMSG_CTL_SHOW_RIB_MEM: 3201 case IMSG_CTL_SHOW_NETWORK: 3202 case IMSG_CTL_SHOW_FLOWSPEC: 3203 case IMSG_CTL_SHOW_SET: 3204 if (idx != PFD_PIPE_ROUTE_CTL) 3205 fatalx("ctl rib request not from RDE"); 3206 control_imsg_relay(&imsg, NULL); 3207 break; 3208 case IMSG_CTL_END: 3209 case IMSG_CTL_RESULT: 3210 control_imsg_relay(&imsg, NULL); 3211 break; 3212 case IMSG_UPDATE: 3213 if (idx != PFD_PIPE_ROUTE) 3214 fatalx("update request not from RDE"); 3215 len = imsg_get_len(&imsg); 3216 if (imsg_get_ibuf(&imsg, &ibuf) == -1 || 3217 len > MAX_PKTSIZE - MSGSIZE_HEADER || 3218 len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3219 log_warnx("RDE sent invalid update"); 3220 else 3221 session_update(peerid, &ibuf); 3222 break; 3223 case IMSG_UPDATE_ERR: 3224 if (idx != PFD_PIPE_ROUTE) 3225 fatalx("update request not from RDE"); 3226 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3227 log_warnx("no such peer: id=%u", peerid); 3228 break; 3229 } 3230 if (imsg_get_ibuf(&imsg, &ibuf) == -1 || 3231 ibuf_get_n8(&ibuf, &errcode) == -1 || 3232 ibuf_get_n8(&ibuf, &subcode) == -1) { 3233 log_warnx("RDE sent invalid notification"); 3234 break; 3235 } 3236 3237 session_notification(p, errcode, subcode, &ibuf); 3238 switch (errcode) { 3239 case ERR_CEASE: 3240 switch (subcode) { 3241 case ERR_CEASE_MAX_PREFIX: 3242 case ERR_CEASE_MAX_SENT_PREFIX: 3243 t = p->conf.max_out_prefix_restart; 3244 if (subcode == ERR_CEASE_MAX_PREFIX) 3245 t = p->conf.max_prefix_restart; 3246 3247 bgp_fsm(p, EVNT_STOP); 3248 if (t) 3249 timer_set(&p->timers, 3250 Timer_IdleHold, 60 * t); 3251 break; 3252 default: 3253 bgp_fsm(p, EVNT_CON_FATAL); 3254 break; 3255 } 3256 break; 3257 default: 3258 bgp_fsm(p, EVNT_CON_FATAL); 3259 break; 3260 } 3261 break; 3262 case IMSG_REFRESH: 3263 if (idx != PFD_PIPE_ROUTE) 3264 fatalx("route refresh request not from RDE"); 3265 if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) { 3266 log_warnx("RDE sent invalid refresh msg"); 3267 break; 3268 } 3269 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3270 log_warnx("no such peer: id=%u", peerid); 3271 break; 3272 } 3273 if (rr.aid < AID_MIN || rr.aid >= AID_MAX) 3274 fatalx("IMSG_REFRESH: bad AID"); 3275 session_rrefresh(p, rr.aid, rr.subtype); 3276 break; 3277 case IMSG_SESSION_RESTARTED: 3278 if (idx != PFD_PIPE_ROUTE) 3279 fatalx("session restart not from RDE"); 3280 if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) { 3281 log_warnx("RDE sent invalid restart msg"); 3282 break; 3283 } 3284 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3285 log_warnx("no such peer: id=%u", peerid); 3286 break; 3287 } 3288 if (aid < AID_MIN || aid >= AID_MAX) 3289 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3290 if (p->capa.neg.grestart.flags[aid] & 3291 CAPA_GR_RESTARTING) { 3292 log_peer_warnx(&p->conf, 3293 "graceful restart of %s finished", 3294 aid2str(aid)); 3295 p->capa.neg.grestart.flags[aid] &= 3296 ~CAPA_GR_RESTARTING; 3297 timer_stop(&p->timers, Timer_RestartTimeout); 3298 3299 /* signal back to RDE to cleanup stale routes */ 3300 if (imsg_rde(IMSG_SESSION_RESTARTED, 3301 peerid, &aid, sizeof(aid)) == -1) 3302 fatal("imsg_compose: " 3303 "IMSG_SESSION_RESTARTED"); 3304 } 3305 break; 3306 default: 3307 break; 3308 } 3309 imsg_free(&imsg); 3310 } 3311} 3312 3313int 3314la_cmp(struct listen_addr *a, struct listen_addr *b) 3315{ 3316 struct sockaddr_in *in_a, *in_b; 3317 struct sockaddr_in6 *in6_a, *in6_b; 3318 3319 if (a->sa.ss_family != b->sa.ss_family) 3320 return (1); 3321 3322 switch (a->sa.ss_family) { 3323 case AF_INET: 3324 in_a = (struct sockaddr_in *)&a->sa; 3325 in_b = (struct sockaddr_in *)&b->sa; 3326 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3327 return (1); 3328 if (in_a->sin_port != in_b->sin_port) 3329 return (1); 3330 break; 3331 case AF_INET6: 3332 in6_a = (struct sockaddr_in6 *)&a->sa; 3333 in6_b = (struct sockaddr_in6 *)&b->sa; 3334 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3335 sizeof(struct in6_addr))) 3336 return (1); 3337 if (in6_a->sin6_port != in6_b->sin6_port) 3338 return (1); 3339 break; 3340 default: 3341 fatal("king bula sez: unknown address family"); 3342 /* NOTREACHED */ 3343 } 3344 3345 return (0); 3346} 3347 3348struct peer * 3349getpeerbydesc(struct bgpd_config *c, const char *descr) 3350{ 3351 struct peer *p, *res = NULL; 3352 int match = 0; 3353 3354 RB_FOREACH(p, peer_head, &c->peers) 3355 if (!strcmp(p->conf.descr, descr)) { 3356 res = p; 3357 match++; 3358 } 3359 3360 if (match > 1) 3361 log_info("neighbor description \"%s\" not unique, request " 3362 "aborted", descr); 3363 3364 if (match == 1) 3365 return (res); 3366 else 3367 return (NULL); 3368} 3369 3370struct peer * 3371getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3372{ 3373 struct bgpd_addr addr; 3374 struct peer *p, *newpeer, *loose = NULL; 3375 uint32_t id; 3376 3377 sa2addr(ip, &addr, NULL); 3378 3379 /* we might want a more effective way to find peers by IP */ 3380 RB_FOREACH(p, peer_head, &c->peers) 3381 if (!p->conf.template && 3382 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3383 return (p); 3384 3385 /* try template matching */ 3386 RB_FOREACH(p, peer_head, &c->peers) 3387 if (p->conf.template && 3388 p->conf.remote_addr.aid == addr.aid && 3389 session_match_mask(p, &addr)) 3390 if (loose == NULL || loose->conf.remote_masklen < 3391 p->conf.remote_masklen) 3392 loose = p; 3393 3394 if (loose != NULL) { 3395 /* clone */ 3396 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3397 fatal(NULL); 3398 memcpy(newpeer, loose, sizeof(struct peer)); 3399 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3400 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3401 break; 3402 } 3403 newpeer->template = loose; 3404 session_template_clone(newpeer, ip, id, 0); 3405 newpeer->state = newpeer->prev_state = STATE_NONE; 3406 newpeer->reconf_action = RECONF_KEEP; 3407 newpeer->rbuf = NULL; 3408 newpeer->rpending = 0; 3409 init_peer(newpeer); 3410 bgp_fsm(newpeer, EVNT_START); 3411 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3412 fatalx("%s: peer tree is corrupt", __func__); 3413 return (newpeer); 3414 } 3415 3416 return (NULL); 3417} 3418 3419struct peer * 3420getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3421{ 3422 static struct peer lookup; 3423 3424 lookup.conf.id = peerid; 3425 3426 return RB_FIND(peer_head, &c->peers, &lookup); 3427} 3428 3429int 3430peer_matched(struct peer *p, struct ctl_neighbor *n) 3431{ 3432 char *s; 3433 3434 if (n && n->addr.aid) { 3435 if (memcmp(&p->conf.remote_addr, &n->addr, 3436 sizeof(p->conf.remote_addr))) 3437 return 0; 3438 } else if (n && n->descr[0]) { 3439 s = n->is_group ? p->conf.group : p->conf.descr; 3440 /* cannot trust n->descr to be properly terminated */ 3441 if (strncmp(s, n->descr, sizeof(n->descr))) 3442 return 0; 3443 } 3444 return 1; 3445} 3446 3447void 3448session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3449 uint32_t as) 3450{ 3451 struct bgpd_addr remote_addr; 3452 3453 if (ip) 3454 sa2addr(ip, &remote_addr, NULL); 3455 else 3456 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3457 3458 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3459 3460 p->conf.id = id; 3461 3462 if (as) { 3463 p->conf.remote_as = as; 3464 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3465 if (!p->conf.ebgp) 3466 /* force enforce_as off for iBGP sessions */ 3467 p->conf.enforce_as = ENFORCE_AS_OFF; 3468 } 3469 3470 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3471 switch (p->conf.remote_addr.aid) { 3472 case AID_INET: 3473 p->conf.remote_masklen = 32; 3474 break; 3475 case AID_INET6: 3476 p->conf.remote_masklen = 128; 3477 break; 3478 } 3479 p->conf.template = 0; 3480} 3481 3482int 3483session_match_mask(struct peer *p, struct bgpd_addr *a) 3484{ 3485 struct bgpd_addr masked; 3486 3487 applymask(&masked, a, p->conf.remote_masklen); 3488 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3489 return (1); 3490 return (0); 3491} 3492 3493void 3494session_down(struct peer *peer) 3495{ 3496 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg)); 3497 peer->stats.last_updown = getmonotime(); 3498 /* 3499 * session_down is called in the exit code path so check 3500 * if the RDE is still around, if not there is no need to 3501 * send the message. 3502 */ 3503 if (ibuf_rde == NULL) 3504 return; 3505 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3506 fatalx("imsg_compose error"); 3507} 3508 3509void 3510session_up(struct peer *p) 3511{ 3512 struct session_up sup; 3513 3514 /* clear last errors, now that the session is up */ 3515 p->stats.last_sent_errcode = 0; 3516 p->stats.last_sent_suberr = 0; 3517 p->stats.last_rcvd_errcode = 0; 3518 p->stats.last_rcvd_suberr = 0; 3519 memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason)); 3520 3521 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3522 &p->conf, sizeof(p->conf)) == -1) 3523 fatalx("imsg_compose error"); 3524 3525 if (p->local.aid == AID_INET) { 3526 sup.local_v4_addr = p->local; 3527 sup.local_v6_addr = p->local_alt; 3528 } else { 3529 sup.local_v6_addr = p->local; 3530 sup.local_v4_addr = p->local_alt; 3531 } 3532 sup.remote_addr = p->remote; 3533 sup.if_scope = p->if_scope; 3534 3535 sup.remote_bgpid = p->remote_bgpid; 3536 sup.short_as = p->short_as; 3537 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3538 p->stats.last_updown = getmonotime(); 3539 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3540 fatalx("imsg_compose error"); 3541} 3542 3543int 3544imsg_ctl_parent(struct imsg *imsg) 3545{ 3546 return imsg_forward(ibuf_main, imsg); 3547} 3548 3549int 3550imsg_ctl_rde(struct imsg *imsg) 3551{ 3552 if (ibuf_rde_ctl == NULL) 3553 return (0); 3554 /* 3555 * Use control socket to talk to RDE to bypass the queue of the 3556 * regular imsg socket. 3557 */ 3558 return imsg_forward(ibuf_rde_ctl, imsg); 3559} 3560 3561int 3562imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid) 3563{ 3564 if (ibuf_rde_ctl == NULL) 3565 return (0); 3566 3567 /* 3568 * Use control socket to talk to RDE to bypass the queue of the 3569 * regular imsg socket. 3570 */ 3571 return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0); 3572} 3573 3574int 3575imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3576{ 3577 if (ibuf_rde == NULL) 3578 return (0); 3579 3580 return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen); 3581} 3582 3583void 3584session_demote(struct peer *p, int level) 3585{ 3586 struct demote_msg msg; 3587 3588 strlcpy(msg.demote_group, p->conf.demote_group, 3589 sizeof(msg.demote_group)); 3590 msg.level = level; 3591 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3592 &msg, sizeof(msg)) == -1) 3593 fatalx("imsg_compose error"); 3594 3595 p->demoted += level; 3596} 3597 3598void 3599session_stop(struct peer *peer, uint8_t subcode, const char *reason) 3600{ 3601 struct ibuf *ibuf; 3602 3603 if (reason != NULL) 3604 strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason)); 3605 3606 ibuf = ibuf_dynamic(0, REASON_LEN); 3607 3608 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3609 subcode == ERR_CEASE_ADMIN_RESET) && 3610 reason != NULL && *reason != '\0' && 3611 ibuf != NULL) { 3612 if (ibuf_add_n8(ibuf, strlen(reason)) == -1 || 3613 ibuf_add(ibuf, reason, strlen(reason))) { 3614 log_peer_warnx(&peer->conf, 3615 "trying to send overly long shutdown reason"); 3616 ibuf_free(ibuf); 3617 ibuf = NULL; 3618 } 3619 } 3620 switch (peer->state) { 3621 case STATE_OPENSENT: 3622 case STATE_OPENCONFIRM: 3623 case STATE_ESTABLISHED: 3624 session_notification(peer, ERR_CEASE, subcode, ibuf); 3625 break; 3626 default: 3627 /* session not open, no need to send notification */ 3628 if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) || 3629 suberr_cease_names[subcode] == NULL) 3630 log_peer_warnx(&peer->conf, "session stop: %s, " 3631 "unknown subcode %u", errnames[ERR_CEASE], subcode); 3632 else 3633 log_peer_warnx(&peer->conf, "session stop: %s, %s", 3634 errnames[ERR_CEASE], suberr_cease_names[subcode]); 3635 break; 3636 } 3637 ibuf_free(ibuf); 3638 bgp_fsm(peer, EVNT_STOP); 3639} 3640 3641void 3642merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3643{ 3644 struct peer *p, *np, *next; 3645 3646 RB_FOREACH(p, peer_head, &c->peers) { 3647 /* templates are handled specially */ 3648 if (p->template != NULL) 3649 continue; 3650 np = getpeerbyid(nc, p->conf.id); 3651 if (np == NULL) { 3652 p->reconf_action = RECONF_DELETE; 3653 continue; 3654 } 3655 3656 /* peer no longer uses TCP MD5SIG so deconfigure */ 3657 if (p->conf.auth.method == AUTH_MD5SIG && 3658 np->conf.auth.method != AUTH_MD5SIG) 3659 tcp_md5_del_listener(c, p); 3660 else if (np->conf.auth.method == AUTH_MD5SIG) 3661 tcp_md5_add_listener(c, np); 3662 3663 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3664 RB_REMOVE(peer_head, &nc->peers, np); 3665 free(np); 3666 3667 p->reconf_action = RECONF_KEEP; 3668 3669 /* had demotion, is demoted, demote removed? */ 3670 if (p->demoted && !p->conf.demote_group[0]) 3671 session_demote(p, -1); 3672 3673 /* if session is not open then refresh pfkey data */ 3674 if (p->state < STATE_OPENSENT && !p->template) 3675 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3676 p->conf.id, 0, -1, NULL, 0); 3677 3678 /* sync the RDE in case we keep the peer */ 3679 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3680 &p->conf, sizeof(struct peer_config)) == -1) 3681 fatalx("imsg_compose error"); 3682 3683 /* apply the config to all clones of a template */ 3684 if (p->conf.template) { 3685 struct peer *xp; 3686 RB_FOREACH(xp, peer_head, &c->peers) { 3687 if (xp->template != p) 3688 continue; 3689 session_template_clone(xp, NULL, xp->conf.id, 3690 xp->conf.remote_as); 3691 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3692 &xp->conf, sizeof(xp->conf)) == -1) 3693 fatalx("imsg_compose error"); 3694 } 3695 } 3696 } 3697 3698 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3699 fatalx("imsg_compose error"); 3700 3701 /* pfkeys of new peers already loaded by the parent process */ 3702 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3703 RB_REMOVE(peer_head, &nc->peers, np); 3704 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3705 fatalx("%s: peer tree is corrupt", __func__); 3706 if (np->conf.auth.method == AUTH_MD5SIG) 3707 tcp_md5_add_listener(c, np); 3708 } 3709} 3710