1251881Speter/*	$OpenBSD: session.c,v 1.480 2024/06/10 12:51:25 claudio Exp $ */
2251881Speter
3251881Speter/*
4251881Speter * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5251881Speter * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6251881Speter *
7251881Speter * Permission to use, copy, modify, and distribute this software for any
8251881Speter * purpose with or without fee is hereby granted, provided that the above
9251881Speter * copyright notice and this permission notice appear in all copies.
10251881Speter *
11251881Speter * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12251881Speter * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13251881Speter * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14251881Speter * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15251881Speter * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16251881Speter * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17251881Speter * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18251881Speter */
19251881Speter
20251881Speter#include <sys/types.h>
21251881Speter
22251881Speter#include <sys/mman.h>
23251881Speter#include <sys/socket.h>
24251881Speter#include <sys/time.h>
25251881Speter#include <sys/resource.h>
26251881Speter#include <sys/un.h>
27251881Speter#include <netinet/in.h>
28251881Speter#include <netinet/ip.h>
29251881Speter#include <netinet/tcp.h>
30251881Speter#include <arpa/inet.h>
31251881Speter#include <limits.h>
32251881Speter
33251881Speter#include <err.h>
34251881Speter#include <errno.h>
35251881Speter#include <fcntl.h>
36251881Speter#include <ifaddrs.h>
37251881Speter#include <poll.h>
38251881Speter#include <pwd.h>
39251881Speter#include <signal.h>
40251881Speter#include <stdio.h>
41251881Speter#include <stdlib.h>
42251881Speter#include <string.h>
43251881Speter#include <syslog.h>
44251881Speter#include <unistd.h>
45251881Speter
46251881Speter#include "bgpd.h"
47251881Speter#include "session.h"
48251881Speter#include "log.h"
49251881Speter
50251881Speter#define PFD_PIPE_MAIN		0
51251881Speter#define PFD_PIPE_ROUTE		1
52251881Speter#define PFD_PIPE_ROUTE_CTL	2
53251881Speter#define PFD_SOCK_CTL		3
54251881Speter#define PFD_SOCK_RCTL		4
55251881Speter#define PFD_LISTENERS_START	5
56251881Speter
57251881Spetervoid	session_sighdlr(int);
58251881Speterint	setup_listeners(u_int *);
59251881Spetervoid	init_peer(struct peer *);
60251881Spetervoid	start_timer_holdtime(struct peer *);
61251881Spetervoid	start_timer_sendholdtime(struct peer *);
62251881Spetervoid	start_timer_keepalive(struct peer *);
63251881Spetervoid	session_close_connection(struct peer *);
64251881Spetervoid	change_state(struct peer *, enum session_state, enum session_events);
65251881Speterint	session_setup_socket(struct peer *);
66251881Spetervoid	session_accept(int);
67251881Speterint	session_connect(struct peer *);
68251881Spetervoid	session_tcp_established(struct peer *);
69251881Speterint	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70251881Speterint	session_capa_add_mp(struct ibuf *, uint8_t);
71251881Speterint	session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
72251881Speterstruct bgp_msg	*session_newmsg(enum msg_type, uint16_t);
73251881Speterint	session_sendmsg(struct bgp_msg *, struct peer *);
74251881Spetervoid	session_open(struct peer *);
75251881Spetervoid	session_keepalive(struct peer *);
76251881Spetervoid	session_update(uint32_t, struct ibuf *);
77251881Spetervoid	session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *);
78251881Spetervoid	session_notification_data(struct peer *, uint8_t, uint8_t, void *,
79251881Speter	    size_t);
80251881Spetervoid	session_rrefresh(struct peer *, uint8_t, uint8_t);
81251881Speterint	session_graceful_restart(struct peer *);
82251881Speterint	session_graceful_stop(struct peer *);
83251881Speterint	session_dispatch_msg(struct pollfd *, struct peer *);
84251881Spetervoid	session_process_msg(struct peer *);
85251881Speterint	parse_header(struct peer *, u_char *, uint16_t *, uint8_t *);
86251881Speterint	parse_open(struct peer *);
87251881Speterint	parse_update(struct peer *);
88251881Speterint	parse_rrefresh(struct peer *);
89251881Spetervoid	parse_notification(struct peer *);
90251881Speterint	parse_capabilities(struct peer *, struct ibuf *, uint32_t *);
91251881Speterint	capa_neg_calc(struct peer *);
92251881Spetervoid	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
93251881Spetervoid	session_up(struct peer *);
94251881Spetervoid	session_down(struct peer *);
95251881Speterint	imsg_rde(int, uint32_t, void *, uint16_t);
96251881Spetervoid	session_demote(struct peer *, int);
97251881Spetervoid	merge_peers(struct bgpd_config *, struct bgpd_config *);
98251881Speter
99251881Speterint		 la_cmp(struct listen_addr *, struct listen_addr *);
100251881Spetervoid		 session_template_clone(struct peer *, struct sockaddr *,
101251881Speter		    uint32_t, uint32_t);
102251881Speterint		 session_match_mask(struct peer *, struct bgpd_addr *);
103251881Speter
104251881Speterstatic struct bgpd_config	*conf, *nconf;
105251881Speterstatic struct imsgbuf		*ibuf_rde;
106251881Speterstatic struct imsgbuf		*ibuf_rde_ctl;
107251881Speterstatic struct imsgbuf		*ibuf_main;
108251881Speter
109251881Speterstruct bgpd_sysdep	 sysdep;
110251881Spetervolatile sig_atomic_t	 session_quit;
111251881Speterint			 pending_reconf;
112251881Speterint			 csock = -1, rcsock = -1;
113251881Speteru_int			 peer_cnt;
114251881Speter
115251881Speterstruct mrt_head		 mrthead;
116251881Spetertime_t			 pauseaccept;
117251881Speter
118262253Speterstatic const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = {
119251881Speter	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
120251881Speter	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
121251881Speter};
122251881Speter
123251881Speterstatic inline int
124251881Speterpeer_compare(const struct peer *a, const struct peer *b)
125251881Speter{
126251881Speter	return a->conf.id - b->conf.id;
127251881Speter}
128251881Speter
129251881SpeterRB_GENERATE(peer_head, peer, entry, peer_compare);
130251881Speter
131251881Spetervoid
132251881Spetersession_sighdlr(int sig)
133251881Speter{
134251881Speter	switch (sig) {
135251881Speter	case SIGINT:
136251881Speter	case SIGTERM:
137251881Speter		session_quit = 1;
138251881Speter		break;
139251881Speter	}
140251881Speter}
141251881Speter
142251881Speterint
143251881Spetersetup_listeners(u_int *la_cnt)
144251881Speter{
145251881Speter	int			 ttl = 255;
146251881Speter	struct listen_addr	*la;
147251881Speter	u_int			 cnt = 0;
148251881Speter
149251881Speter	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
150251881Speter		la->reconf = RECONF_NONE;
151251881Speter		cnt++;
152251881Speter
153251881Speter		if (la->flags & LISTENER_LISTENING)
154251881Speter			continue;
155251881Speter
156251881Speter		if (la->fd == -1) {
157251881Speter			log_warn("cannot establish listener on %s: invalid fd",
158251881Speter			    log_sockaddr((struct sockaddr *)&la->sa,
159251881Speter			    la->sa_len));
160251881Speter			continue;
161251881Speter		}
162251881Speter
163251881Speter		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
164251881Speter			fatal("tcp_md5_prep_listener");
165251881Speter
166251881Speter		/* set ttl to 255 so that ttl-security works */
167251881Speter		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
168251881Speter		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
169251881Speter			log_warn("setup_listeners setsockopt TTL");
170251881Speter			continue;
171251881Speter		}
172251881Speter		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
173251881Speter		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
174251881Speter			log_warn("setup_listeners setsockopt hoplimit");
175251881Speter			continue;
176251881Speter		}
177251881Speter
178251881Speter		if (listen(la->fd, MAX_BACKLOG)) {
179251881Speter			close(la->fd);
180251881Speter			fatal("listen");
181251881Speter		}
182251881Speter
183251881Speter		la->flags |= LISTENER_LISTENING;
184251881Speter
185251881Speter		log_info("listening on %s",
186251881Speter		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
187251881Speter	}
188251881Speter
189251881Speter	*la_cnt = cnt;
190251881Speter
191251881Speter	return (0);
192251881Speter}
193251881Speter
194251881Spetervoid
195251881Spetersession_main(int debug, int verbose)
196251881Speter{
197251881Speter	int			 timeout;
198251881Speter	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
199251881Speter	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
200251881Speter	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
201251881Speter	u_int			 new_cnt;
202251881Speter	struct passwd		*pw;
203251881Speter	struct peer		*p, **peer_l = NULL, *next;
204251881Speter	struct mrt		*m, *xm, **mrt_l = NULL;
205251881Speter	struct pollfd		*pfd = NULL;
206251881Speter	struct listen_addr	*la;
207251881Speter	void			*newp;
208251881Speter	time_t			 now;
209251881Speter	short			 events;
210251881Speter
211251881Speter	log_init(debug, LOG_DAEMON);
212251881Speter	log_setverbose(verbose);
213251881Speter
214251881Speter	log_procinit(log_procnames[PROC_SE]);
215251881Speter
216251881Speter	if ((pw = getpwnam(BGPD_USER)) == NULL)
217251881Speter		fatal(NULL);
218251881Speter
219251881Speter	if (chroot(pw->pw_dir) == -1)
220251881Speter		fatal("chroot");
221251881Speter	if (chdir("/") == -1)
222251881Speter		fatal("chdir(\"/\")");
223251881Speter
224251881Speter	setproctitle("session engine");
225251881Speter
226251881Speter	if (setgroups(1, &pw->pw_gid) ||
227251881Speter	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
228251881Speter	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
229251881Speter		fatal("can't drop privileges");
230251881Speter
231251881Speter	if (pledge("stdio inet recvfd", NULL) == -1)
232251881Speter		fatal("pledge");
233251881Speter
234251881Speter	signal(SIGTERM, session_sighdlr);
235251881Speter	signal(SIGINT, session_sighdlr);
236251881Speter	signal(SIGPIPE, SIG_IGN);
237251881Speter	signal(SIGHUP, SIG_IGN);
238251881Speter	signal(SIGALRM, SIG_IGN);
239251881Speter	signal(SIGUSR1, SIG_IGN);
240251881Speter
241251881Speter	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
242251881Speter		fatal(NULL);
243251881Speter	imsg_init(ibuf_main, 3);
244251881Speter
245251881Speter	LIST_INIT(&mrthead);
246251881Speter	listener_cnt = 0;
247251881Speter	peer_cnt = 0;
248251881Speter	ctl_cnt = 0;
249251881Speter
250251881Speter	conf = new_config();
251251881Speter	log_info("session engine ready");
252251881Speter
253251881Speter	while (session_quit == 0) {
254251881Speter		/* check for peers to be initialized or deleted */
255251881Speter		if (!pending_reconf) {
256251881Speter			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
257251881Speter				/* cloned peer that idled out? */
258251881Speter				if (p->template && (p->state == STATE_IDLE ||
259251881Speter				    p->state == STATE_ACTIVE) &&
260251881Speter				    getmonotime() - p->stats.last_updown >=
261251881Speter				    INTERVAL_HOLD_CLONED)
262251881Speter					p->reconf_action = RECONF_DELETE;
263251881Speter
264251881Speter				/* new peer that needs init? */
265251881Speter				if (p->state == STATE_NONE)
266251881Speter					init_peer(p);
267251881Speter
268251881Speter				/* deletion due? */
269251881Speter				if (p->reconf_action == RECONF_DELETE) {
270251881Speter					if (p->demoted)
271251881Speter						session_demote(p, -1);
272251881Speter					p->conf.demote_group[0] = 0;
273251881Speter					session_stop(p, ERR_CEASE_PEER_UNCONF,
274251881Speter					    NULL);
275251881Speter					timer_remove_all(&p->timers);
276251881Speter					tcp_md5_del_listener(conf, p);
277251881Speter					RB_REMOVE(peer_head, &conf->peers, p);
278251881Speter					log_peer_warnx(&p->conf, "removed");
279251881Speter					free(p);
280251881Speter					peer_cnt--;
281251881Speter					continue;
282251881Speter				}
283251881Speter				p->reconf_action = RECONF_NONE;
284251881Speter			}
285251881Speter		}
286251881Speter
287251881Speter		if (peer_cnt > peer_l_elms) {
288251881Speter			if ((newp = reallocarray(peer_l, peer_cnt,
289251881Speter			    sizeof(struct peer *))) == NULL) {
290251881Speter				/* panic for now */
291251881Speter				log_warn("could not resize peer_l from %u -> %u"
292251881Speter				    " entries", peer_l_elms, peer_cnt);
293251881Speter				fatalx("exiting");
294251881Speter			}
295251881Speter			peer_l = newp;
296251881Speter			peer_l_elms = peer_cnt;
297251881Speter		}
298251881Speter
299251881Speter		mrt_cnt = 0;
300251881Speter		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
301251881Speter			xm = LIST_NEXT(m, entry);
302251881Speter			if (m->state == MRT_STATE_REMOVE) {
303251881Speter				mrt_clean(m);
304251881Speter				LIST_REMOVE(m, entry);
305251881Speter				free(m);
306251881Speter				continue;
307251881Speter			}
308251881Speter			if (m->wbuf.queued)
309251881Speter				mrt_cnt++;
310251881Speter		}
311251881Speter
312251881Speter		if (mrt_cnt > mrt_l_elms) {
313251881Speter			if ((newp = reallocarray(mrt_l, mrt_cnt,
314251881Speter			    sizeof(struct mrt *))) == NULL) {
315251881Speter				/* panic for now */
316251881Speter				log_warn("could not resize mrt_l from %u -> %u"
317251881Speter				    " entries", mrt_l_elms, mrt_cnt);
318251881Speter				fatalx("exiting");
319251881Speter			}
320251881Speter			mrt_l = newp;
321251881Speter			mrt_l_elms = mrt_cnt;
322251881Speter		}
323251881Speter
324251881Speter		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
325251881Speter		    ctl_cnt + mrt_cnt;
326251881Speter		if (new_cnt > pfd_elms) {
327251881Speter			if ((newp = reallocarray(pfd, new_cnt,
328251881Speter			    sizeof(struct pollfd))) == NULL) {
329251881Speter				/* panic for now */
330251881Speter				log_warn("could not resize pfd from %u -> %u"
331251881Speter				    " entries", pfd_elms, new_cnt);
332251881Speter				fatalx("exiting");
333251881Speter			}
334251881Speter			pfd = newp;
335251881Speter			pfd_elms = new_cnt;
336251881Speter		}
337251881Speter
338251881Speter		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
339251881Speter
340251881Speter		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
341251881Speter		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
342251881Speter		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
343251881Speter
344251881Speter		if (pauseaccept == 0) {
345251881Speter			pfd[PFD_SOCK_CTL].fd = csock;
346251881Speter			pfd[PFD_SOCK_CTL].events = POLLIN;
347251881Speter			pfd[PFD_SOCK_RCTL].fd = rcsock;
348251881Speter			pfd[PFD_SOCK_RCTL].events = POLLIN;
349251881Speter		} else {
350251881Speter			pfd[PFD_SOCK_CTL].fd = -1;
351251881Speter			pfd[PFD_SOCK_RCTL].fd = -1;
352251881Speter		}
353251881Speter
354251881Speter		i = PFD_LISTENERS_START;
355251881Speter		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
356251881Speter			if (pauseaccept == 0) {
357251881Speter				pfd[i].fd = la->fd;
358251881Speter				pfd[i].events = POLLIN;
359251881Speter			} else
360251881Speter				pfd[i].fd = -1;
361251881Speter			i++;
362251881Speter		}
363251881Speter		idx_listeners = i;
364251881Speter		timeout = 240;	/* loop every 240s at least */
365251881Speter
366251881Speter		now = getmonotime();
367251881Speter		RB_FOREACH(p, peer_head, &conf->peers) {
368251881Speter			time_t	nextaction;
369251881Speter			struct timer *pt;
370251881Speter
371251881Speter			/* check timers */
372251881Speter			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
373251881Speter				switch (pt->type) {
374251881Speter				case Timer_Hold:
375251881Speter					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
376251881Speter					break;
377251881Speter				case Timer_SendHold:
378251881Speter					bgp_fsm(p, EVNT_TIMER_SENDHOLD);
379251881Speter					break;
380251881Speter				case Timer_ConnectRetry:
381251881Speter					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
382251881Speter					break;
383251881Speter				case Timer_Keepalive:
384251881Speter					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
385251881Speter					break;
386251881Speter				case Timer_IdleHold:
387251881Speter					bgp_fsm(p, EVNT_START);
388251881Speter					break;
389251881Speter				case Timer_IdleHoldReset:
390251881Speter					p->IdleHoldTime =
391251881Speter					    INTERVAL_IDLE_HOLD_INITIAL;
392251881Speter					p->errcnt = 0;
393251881Speter					timer_stop(&p->timers,
394251881Speter					    Timer_IdleHoldReset);
395251881Speter					break;
396251881Speter				case Timer_CarpUndemote:
397251881Speter					timer_stop(&p->timers,
398251881Speter					    Timer_CarpUndemote);
399251881Speter					if (p->demoted &&
400251881Speter					    p->state == STATE_ESTABLISHED)
401251881Speter						session_demote(p, -1);
402251881Speter					break;
403251881Speter				case Timer_RestartTimeout:
404251881Speter					timer_stop(&p->timers,
405251881Speter					    Timer_RestartTimeout);
406251881Speter					session_graceful_stop(p);
407251881Speter					break;
408251881Speter				default:
409251881Speter					fatalx("King Bula lost in time");
410251881Speter				}
411251881Speter			}
412251881Speter			if ((nextaction = timer_nextduein(&p->timers,
413251881Speter			    now)) != -1 && nextaction < timeout)
414251881Speter				timeout = nextaction;
415251881Speter
416251881Speter			/* are we waiting for a write? */
417251881Speter			events = POLLIN;
418251881Speter			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
419251881Speter				events |= POLLOUT;
420251881Speter			/* is there still work to do? */
421251881Speter			if (p->rpending && p->rbuf && p->rbuf->wpos)
422251881Speter				timeout = 0;
423251881Speter
424251881Speter			/* poll events */
425251881Speter			if (p->fd != -1 && events != 0) {
426251881Speter				pfd[i].fd = p->fd;
427251881Speter				pfd[i].events = events;
428251881Speter				peer_l[i - idx_listeners] = p;
429251881Speter				i++;
430251881Speter			}
431251881Speter		}
432251881Speter
433251881Speter		idx_peers = i;
434251881Speter
435251881Speter		LIST_FOREACH(m, &mrthead, entry)
436251881Speter			if (m->wbuf.queued) {
437251881Speter				pfd[i].fd = m->wbuf.fd;
438251881Speter				pfd[i].events = POLLOUT;
439251881Speter				mrt_l[i - idx_peers] = m;
440251881Speter				i++;
441251881Speter			}
442251881Speter
443251881Speter		idx_mrts = i;
444251881Speter
445251881Speter		i += control_fill_pfds(pfd + i, pfd_elms -i);
446251881Speter
447251881Speter		if (i > pfd_elms)
448251881Speter			fatalx("poll pfd overflow");
449251881Speter
450251881Speter		if (pauseaccept && timeout > 1)
451251881Speter			timeout = 1;
452251881Speter		if (timeout < 0)
453251881Speter			timeout = 0;
454251881Speter		if (poll(pfd, i, timeout * 1000) == -1) {
455251881Speter			if (errno == EINTR)
456251881Speter				continue;
457251881Speter			fatal("poll error");
458251881Speter		}
459251881Speter
460251881Speter		/*
461251881Speter		 * If we previously saw fd exhaustion, we stop accept()
462251881Speter		 * for 1 second to throttle the accept() loop.
463251881Speter		 */
464251881Speter		if (pauseaccept && getmonotime() > pauseaccept + 1)
465251881Speter			pauseaccept = 0;
466251881Speter
467251881Speter		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
468251881Speter			log_warnx("SE: Lost connection to parent");
469251881Speter			session_quit = 1;
470251881Speter			continue;
471251881Speter		} else
472251881Speter			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
473251881Speter			    &listener_cnt);
474251881Speter
475251881Speter		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
476251881Speter			log_warnx("SE: Lost connection to RDE");
477251881Speter			msgbuf_clear(&ibuf_rde->w);
478251881Speter			free(ibuf_rde);
479251881Speter			ibuf_rde = NULL;
480251881Speter		} else
481251881Speter			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
482251881Speter			    &listener_cnt);
483251881Speter
484251881Speter		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
485251881Speter		    -1) {
486251881Speter			log_warnx("SE: Lost connection to RDE control");
487251881Speter			msgbuf_clear(&ibuf_rde_ctl->w);
488251881Speter			free(ibuf_rde_ctl);
489251881Speter			ibuf_rde_ctl = NULL;
490251881Speter		} else
491251881Speter			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
492251881Speter			    &listener_cnt);
493251881Speter
494251881Speter		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
495251881Speter			ctl_cnt += control_accept(csock, 0);
496251881Speter
497251881Speter		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
498251881Speter			ctl_cnt += control_accept(rcsock, 1);
499251881Speter
500251881Speter		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
501251881Speter			if (pfd[j].revents & POLLIN)
502251881Speter				session_accept(pfd[j].fd);
503251881Speter
504251881Speter		for (; j < idx_peers; j++)
505251881Speter			session_dispatch_msg(&pfd[j],
506251881Speter			    peer_l[j - idx_listeners]);
507251881Speter
508251881Speter		RB_FOREACH(p, peer_head, &conf->peers)
509251881Speter			if (p->rbuf && p->rbuf->wpos)
510251881Speter				session_process_msg(p);
511251881Speter
512251881Speter		for (; j < idx_mrts; j++)
513251881Speter			if (pfd[j].revents & POLLOUT)
514251881Speter				mrt_write(mrt_l[j - idx_peers]);
515251881Speter
516251881Speter		for (; j < i; j++)
517251881Speter			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
518251881Speter	}
519251881Speter
520251881Speter	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
521251881Speter		session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down");
522251881Speter		timer_remove_all(&p->timers);
523251881Speter		tcp_md5_del_listener(conf, p);
524251881Speter		RB_REMOVE(peer_head, &conf->peers, p);
525251881Speter		free(p);
526251881Speter	}
527251881Speter
528251881Speter	while ((m = LIST_FIRST(&mrthead)) != NULL) {
529251881Speter		mrt_clean(m);
530251881Speter		LIST_REMOVE(m, entry);
531251881Speter		free(m);
532251881Speter	}
533251881Speter
534251881Speter	free_config(conf);
535251881Speter	free(peer_l);
536251881Speter	free(mrt_l);
537251881Speter	free(pfd);
538251881Speter
539251881Speter	/* close pipes */
540251881Speter	if (ibuf_rde) {
541251881Speter		msgbuf_write(&ibuf_rde->w);
542251881Speter		msgbuf_clear(&ibuf_rde->w);
543251881Speter		close(ibuf_rde->fd);
544251881Speter		free(ibuf_rde);
545251881Speter	}
546251881Speter	if (ibuf_rde_ctl) {
547251881Speter		msgbuf_clear(&ibuf_rde_ctl->w);
548251881Speter		close(ibuf_rde_ctl->fd);
549251881Speter		free(ibuf_rde_ctl);
550251881Speter	}
551251881Speter	msgbuf_write(&ibuf_main->w);
552251881Speter	msgbuf_clear(&ibuf_main->w);
553251881Speter	close(ibuf_main->fd);
554251881Speter	free(ibuf_main);
555251881Speter
556251881Speter	control_shutdown(csock);
557251881Speter	control_shutdown(rcsock);
558251881Speter	log_info("session engine exiting");
559251881Speter	exit(0);
560251881Speter}
561251881Speter
562251881Spetervoid
563251881Speterinit_peer(struct peer *p)
564251881Speter{
565251881Speter	TAILQ_INIT(&p->timers);
566251881Speter	p->fd = p->wbuf.fd = -1;
567251881Speter
568251881Speter	if (p->conf.if_depend[0])
569251881Speter		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
570251881Speter		    p->conf.if_depend, sizeof(p->conf.if_depend));
571251881Speter	else
572251881Speter		p->depend_ok = 1;
573251881Speter
574251881Speter	peer_cnt++;
575251881Speter
576251881Speter	change_state(p, STATE_IDLE, EVNT_NONE);
577251881Speter	if (p->conf.down)
578251881Speter		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
579251881Speter	else
580251881Speter		timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY);
581251881Speter
582251881Speter	p->stats.last_updown = getmonotime();
583251881Speter
584251881Speter	/*
585251881Speter	 * on startup, demote if requested.
586251881Speter	 * do not handle new peers. they must reach ESTABLISHED beforehand.
587251881Speter	 * peers added at runtime have reconf_action set to RECONF_REINIT.
588251881Speter	 */
589251881Speter	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
590251881Speter		session_demote(p, +1);
591251881Speter}
592251881Speter
593251881Spetervoid
594251881Speterbgp_fsm(struct peer *peer, enum session_events event)
595251881Speter{
596251881Speter	switch (peer->state) {
597251881Speter	case STATE_NONE:
598251881Speter		/* nothing */
599251881Speter		break;
600251881Speter	case STATE_IDLE:
601251881Speter		switch (event) {
602251881Speter		case EVNT_START:
603251881Speter			timer_stop(&peer->timers, Timer_Hold);
604251881Speter			timer_stop(&peer->timers, Timer_SendHold);
605251881Speter			timer_stop(&peer->timers, Timer_Keepalive);
606251881Speter			timer_stop(&peer->timers, Timer_IdleHold);
607251881Speter
608251881Speter			/* allocate read buffer */
609251881Speter			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
610251881Speter			if (peer->rbuf == NULL)
611251881Speter				fatal(NULL);
612251881Speter
613251881Speter			/* init write buffer */
614251881Speter			msgbuf_init(&peer->wbuf);
615251881Speter
616251881Speter			if (!peer->depend_ok)
617251881Speter				timer_stop(&peer->timers, Timer_ConnectRetry);
618251881Speter			else if (peer->passive || peer->conf.passive ||
619251881Speter			    peer->conf.template) {
620251881Speter				change_state(peer, STATE_ACTIVE, event);
621251881Speter				timer_stop(&peer->timers, Timer_ConnectRetry);
622251881Speter			} else {
623251881Speter				change_state(peer, STATE_CONNECT, event);
624251881Speter				timer_set(&peer->timers, Timer_ConnectRetry,
625251881Speter				    conf->connectretry);
626251881Speter				session_connect(peer);
627251881Speter			}
628251881Speter			peer->passive = 0;
629251881Speter			break;
630251881Speter		case EVNT_STOP:
631251881Speter			timer_stop(&peer->timers, Timer_IdleHold);
632251881Speter			break;
633251881Speter		default:
634251881Speter			/* ignore */
635251881Speter			break;
636251881Speter		}
637251881Speter		break;
638251881Speter	case STATE_CONNECT:
639251881Speter		switch (event) {
640251881Speter		case EVNT_START:
641251881Speter			/* ignore */
642251881Speter			break;
643251881Speter		case EVNT_CON_OPEN:
644251881Speter			session_tcp_established(peer);
645251881Speter			session_open(peer);
646251881Speter			timer_stop(&peer->timers, Timer_ConnectRetry);
647251881Speter			peer->holdtime = INTERVAL_HOLD_INITIAL;
648251881Speter			start_timer_holdtime(peer);
649251881Speter			change_state(peer, STATE_OPENSENT, event);
650251881Speter			break;
651251881Speter		case EVNT_CON_OPENFAIL:
652251881Speter			timer_set(&peer->timers, Timer_ConnectRetry,
653251881Speter			    conf->connectretry);
654251881Speter			session_close_connection(peer);
655251881Speter			change_state(peer, STATE_ACTIVE, event);
656251881Speter			break;
657251881Speter		case EVNT_TIMER_CONNRETRY:
658251881Speter			timer_set(&peer->timers, Timer_ConnectRetry,
659251881Speter			    conf->connectretry);
660251881Speter			session_connect(peer);
661251881Speter			break;
662251881Speter		default:
663251881Speter			change_state(peer, STATE_IDLE, event);
664251881Speter			break;
665251881Speter		}
666251881Speter		break;
667251881Speter	case STATE_ACTIVE:
668251881Speter		switch (event) {
669251881Speter		case EVNT_START:
670251881Speter			/* ignore */
671251881Speter			break;
672251881Speter		case EVNT_CON_OPEN:
673251881Speter			session_tcp_established(peer);
674251881Speter			session_open(peer);
675251881Speter			timer_stop(&peer->timers, Timer_ConnectRetry);
676251881Speter			peer->holdtime = INTERVAL_HOLD_INITIAL;
677251881Speter			start_timer_holdtime(peer);
678251881Speter			change_state(peer, STATE_OPENSENT, event);
679251881Speter			break;
680251881Speter		case EVNT_CON_OPENFAIL:
681251881Speter			timer_set(&peer->timers, Timer_ConnectRetry,
682251881Speter			    conf->connectretry);
683251881Speter			session_close_connection(peer);
684251881Speter			change_state(peer, STATE_ACTIVE, event);
685251881Speter			break;
686251881Speter		case EVNT_TIMER_CONNRETRY:
687251881Speter			timer_set(&peer->timers, Timer_ConnectRetry,
688251881Speter			    peer->holdtime);
689251881Speter			change_state(peer, STATE_CONNECT, event);
690251881Speter			session_connect(peer);
691251881Speter			break;
692251881Speter		default:
693251881Speter			change_state(peer, STATE_IDLE, event);
694251881Speter			break;
695251881Speter		}
696251881Speter		break;
697251881Speter	case STATE_OPENSENT:
698251881Speter		switch (event) {
699251881Speter		case EVNT_START:
700251881Speter			/* ignore */
701251881Speter			break;
702251881Speter		case EVNT_STOP:
703251881Speter			change_state(peer, STATE_IDLE, event);
704251881Speter			break;
705251881Speter		case EVNT_CON_CLOSED:
706251881Speter			session_close_connection(peer);
707251881Speter			timer_set(&peer->timers, Timer_ConnectRetry,
708251881Speter			    conf->connectretry);
709251881Speter			change_state(peer, STATE_ACTIVE, event);
710251881Speter			break;
711251881Speter		case EVNT_CON_FATAL:
712251881Speter			change_state(peer, STATE_IDLE, event);
713251881Speter			break;
714251881Speter		case EVNT_TIMER_HOLDTIME:
715251881Speter			session_notification(peer, ERR_HOLDTIMEREXPIRED,
716251881Speter			    0, NULL);
717251881Speter			change_state(peer, STATE_IDLE, event);
718251881Speter			break;
719251881Speter		case EVNT_TIMER_SENDHOLD:
720251881Speter			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
721251881Speter			    0, NULL);
722251881Speter			change_state(peer, STATE_IDLE, event);
723251881Speter			break;
724251881Speter		case EVNT_RCVD_OPEN:
725251881Speter			/* parse_open calls change_state itself on failure */
726251881Speter			if (parse_open(peer))
727251881Speter				break;
728251881Speter			session_keepalive(peer);
729251881Speter			change_state(peer, STATE_OPENCONFIRM, event);
730251881Speter			break;
731251881Speter		case EVNT_RCVD_NOTIFICATION:
732251881Speter			parse_notification(peer);
733251881Speter			break;
734251881Speter		default:
735251881Speter			session_notification(peer,
736251881Speter			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL);
737251881Speter			change_state(peer, STATE_IDLE, event);
738251881Speter			break;
739251881Speter		}
740251881Speter		break;
741251881Speter	case STATE_OPENCONFIRM:
742251881Speter		switch (event) {
743251881Speter		case EVNT_START:
744251881Speter			/* ignore */
745251881Speter			break;
746251881Speter		case EVNT_STOP:
747251881Speter			change_state(peer, STATE_IDLE, event);
748251881Speter			break;
749251881Speter		case EVNT_CON_CLOSED:
750251881Speter		case EVNT_CON_FATAL:
751251881Speter			change_state(peer, STATE_IDLE, event);
752251881Speter			break;
753251881Speter		case EVNT_TIMER_HOLDTIME:
754251881Speter			session_notification(peer, ERR_HOLDTIMEREXPIRED,
755251881Speter			    0, NULL);
756251881Speter			change_state(peer, STATE_IDLE, event);
757251881Speter			break;
758251881Speter		case EVNT_TIMER_SENDHOLD:
759251881Speter			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
760251881Speter			    0, NULL);
761251881Speter			change_state(peer, STATE_IDLE, event);
762251881Speter			break;
763251881Speter		case EVNT_TIMER_KEEPALIVE:
764251881Speter			session_keepalive(peer);
765251881Speter			break;
766251881Speter		case EVNT_RCVD_KEEPALIVE:
767251881Speter			start_timer_holdtime(peer);
768251881Speter			change_state(peer, STATE_ESTABLISHED, event);
769251881Speter			break;
770251881Speter		case EVNT_RCVD_NOTIFICATION:
771251881Speter			parse_notification(peer);
772251881Speter			break;
773251881Speter		default:
774251881Speter			session_notification(peer,
775251881Speter			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL);
776251881Speter			change_state(peer, STATE_IDLE, event);
777251881Speter			break;
778251881Speter		}
779251881Speter		break;
780251881Speter	case STATE_ESTABLISHED:
781251881Speter		switch (event) {
782251881Speter		case EVNT_START:
783251881Speter			/* ignore */
784251881Speter			break;
785251881Speter		case EVNT_STOP:
786251881Speter			change_state(peer, STATE_IDLE, event);
787251881Speter			break;
788251881Speter		case EVNT_CON_CLOSED:
789251881Speter		case EVNT_CON_FATAL:
790251881Speter			change_state(peer, STATE_IDLE, event);
791251881Speter			break;
792251881Speter		case EVNT_TIMER_HOLDTIME:
793251881Speter			session_notification(peer, ERR_HOLDTIMEREXPIRED,
794251881Speter			    0, NULL);
795251881Speter			change_state(peer, STATE_IDLE, event);
796251881Speter			break;
797251881Speter		case EVNT_TIMER_SENDHOLD:
798251881Speter			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
799251881Speter			    0, NULL);
800251881Speter			change_state(peer, STATE_IDLE, event);
801251881Speter			break;
802251881Speter		case EVNT_TIMER_KEEPALIVE:
803251881Speter			session_keepalive(peer);
804251881Speter			break;
805251881Speter		case EVNT_RCVD_KEEPALIVE:
806251881Speter			start_timer_holdtime(peer);
807251881Speter			break;
808251881Speter		case EVNT_RCVD_UPDATE:
809251881Speter			start_timer_holdtime(peer);
810251881Speter			if (parse_update(peer))
811251881Speter				change_state(peer, STATE_IDLE, event);
812251881Speter			else
813251881Speter				start_timer_holdtime(peer);
814251881Speter			break;
815251881Speter		case EVNT_RCVD_NOTIFICATION:
816251881Speter			parse_notification(peer);
817251881Speter			break;
818251881Speter		default:
819251881Speter			session_notification(peer,
820251881Speter			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL);
821251881Speter			change_state(peer, STATE_IDLE, event);
822251881Speter			break;
823251881Speter		}
824251881Speter		break;
825251881Speter	}
826251881Speter}
827251881Speter
828251881Spetervoid
829251881Speterstart_timer_holdtime(struct peer *peer)
830251881Speter{
831251881Speter	if (peer->holdtime > 0)
832251881Speter		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
833251881Speter	else
834251881Speter		timer_stop(&peer->timers, Timer_Hold);
835251881Speter}
836251881Speter
837251881Spetervoid
838251881Speterstart_timer_sendholdtime(struct peer *peer)
839251881Speter{
840251881Speter	uint16_t holdtime = INTERVAL_HOLD;
841251881Speter
842251881Speter	if (peer->holdtime > INTERVAL_HOLD)
843251881Speter		holdtime = peer->holdtime;
844251881Speter
845251881Speter	if (peer->holdtime > 0)
846251881Speter		timer_set(&peer->timers, Timer_SendHold, holdtime);
847251881Speter	else
848251881Speter		timer_stop(&peer->timers, Timer_SendHold);
849251881Speter}
850251881Speter
851251881Spetervoid
852251881Speterstart_timer_keepalive(struct peer *peer)
853251881Speter{
854251881Speter	if (peer->holdtime > 0)
855251881Speter		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
856251881Speter	else
857251881Speter		timer_stop(&peer->timers, Timer_Keepalive);
858251881Speter}
859251881Speter
860251881Spetervoid
861251881Spetersession_close_connection(struct peer *peer)
862251881Speter{
863251881Speter	if (peer->fd != -1) {
864251881Speter		close(peer->fd);
865251881Speter		pauseaccept = 0;
866251881Speter	}
867251881Speter	peer->fd = peer->wbuf.fd = -1;
868251881Speter}
869251881Speter
870251881Spetervoid
871251881Speterchange_state(struct peer *peer, enum session_state state,
872251881Speter    enum session_events event)
873251881Speter{
874251881Speter	struct mrt	*mrt;
875251881Speter
876251881Speter	switch (state) {
877251881Speter	case STATE_IDLE:
878251881Speter		/* carp demotion first. new peers handled in init_peer */
879251881Speter		if (peer->state == STATE_ESTABLISHED &&
880251881Speter		    peer->conf.demote_group[0] && !peer->demoted)
881251881Speter			session_demote(peer, +1);
882251881Speter
883251881Speter		/*
884251881Speter		 * try to write out what's buffered (maybe a notification),
885251881Speter		 * don't bother if it fails
886251881Speter		 */
887251881Speter		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
888251881Speter			msgbuf_write(&peer->wbuf);
889251881Speter
890251881Speter		/*
891251881Speter		 * we must start the timer for the next EVNT_START
892251881Speter		 * if we are coming here due to an error and the
893251881Speter		 * session was not established successfully before, the
894251881Speter		 * starttimerinterval needs to be exponentially increased
895251881Speter		 */
896251881Speter		if (peer->IdleHoldTime == 0)
897251881Speter			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
898251881Speter		peer->holdtime = INTERVAL_HOLD_INITIAL;
899251881Speter		timer_stop(&peer->timers, Timer_ConnectRetry);
900251881Speter		timer_stop(&peer->timers, Timer_Keepalive);
901251881Speter		timer_stop(&peer->timers, Timer_Hold);
902251881Speter		timer_stop(&peer->timers, Timer_SendHold);
903251881Speter		timer_stop(&peer->timers, Timer_IdleHold);
904251881Speter		timer_stop(&peer->timers, Timer_IdleHoldReset);
905251881Speter		session_close_connection(peer);
906251881Speter		msgbuf_clear(&peer->wbuf);
907251881Speter		free(peer->rbuf);
908251881Speter		peer->rbuf = NULL;
909251881Speter		peer->rpending = 0;
910251881Speter		memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
911251881Speter		if (!peer->template)
912251881Speter			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
913251881Speter			    peer->conf.id, 0, -1, NULL, 0);
914251881Speter
915251881Speter		if (event != EVNT_STOP) {
916251881Speter			timer_set(&peer->timers, Timer_IdleHold,
917251881Speter			    peer->IdleHoldTime);
918251881Speter			if (event != EVNT_NONE &&
919251881Speter			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
920251881Speter				peer->IdleHoldTime *= 2;
921251881Speter		}
922251881Speter		if (peer->state == STATE_ESTABLISHED) {
923251881Speter			if (peer->capa.neg.grestart.restart == 2 &&
924251881Speter			    (event == EVNT_CON_CLOSED ||
925251881Speter			    event == EVNT_CON_FATAL)) {
926251881Speter				/* don't punish graceful restart */
927251881Speter				timer_set(&peer->timers, Timer_IdleHold, 0);
928251881Speter				peer->IdleHoldTime /= 2;
929251881Speter				session_graceful_restart(peer);
930251881Speter			} else
931251881Speter				session_down(peer);
932251881Speter		}
933251881Speter		if (peer->state == STATE_NONE ||
934251881Speter		    peer->state == STATE_ESTABLISHED) {
935251881Speter			/* initialize capability negotiation structures */
936251881Speter			memcpy(&peer->capa.ann, &peer->conf.capabilities,
937251881Speter			    sizeof(peer->capa.ann));
938251881Speter		}
939251881Speter		break;
940251881Speter	case STATE_CONNECT:
941251881Speter		if (peer->state == STATE_ESTABLISHED &&
942251881Speter		    peer->capa.neg.grestart.restart == 2) {
943251881Speter			/* do the graceful restart dance */
944251881Speter			session_graceful_restart(peer);
945251881Speter			peer->holdtime = INTERVAL_HOLD_INITIAL;
946251881Speter			timer_stop(&peer->timers, Timer_ConnectRetry);
947251881Speter			timer_stop(&peer->timers, Timer_Keepalive);
948251881Speter			timer_stop(&peer->timers, Timer_Hold);
949251881Speter			timer_stop(&peer->timers, Timer_SendHold);
950251881Speter			timer_stop(&peer->timers, Timer_IdleHold);
951251881Speter			timer_stop(&peer->timers, Timer_IdleHoldReset);
952251881Speter			session_close_connection(peer);
953251881Speter			msgbuf_clear(&peer->wbuf);
954251881Speter			memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
955251881Speter		}
956251881Speter		break;
957251881Speter	case STATE_ACTIVE:
958251881Speter		if (!peer->template)
959251881Speter			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
960251881Speter			    peer->conf.id, 0, -1, NULL, 0);
961251881Speter		break;
962251881Speter	case STATE_OPENSENT:
963251881Speter		break;
964251881Speter	case STATE_OPENCONFIRM:
965251881Speter		break;
966251881Speter	case STATE_ESTABLISHED:
967251881Speter		timer_set(&peer->timers, Timer_IdleHoldReset,
968251881Speter		    peer->IdleHoldTime);
969251881Speter		if (peer->demoted)
970251881Speter			timer_set(&peer->timers, Timer_CarpUndemote,
971251881Speter			    INTERVAL_HOLD_DEMOTED);
972251881Speter		session_up(peer);
973251881Speter		break;
974251881Speter	default:		/* something seriously fucked */
975251881Speter		break;
976251881Speter	}
977251881Speter
978251881Speter	log_statechange(peer, state, event);
979251881Speter	LIST_FOREACH(mrt, &mrthead, entry) {
980251881Speter		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
981251881Speter			continue;
982251881Speter		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
983251881Speter		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
984251881Speter		    mrt->group_id == peer->conf.groupid))
985251881Speter			mrt_dump_state(mrt, peer->state, state, peer);
986251881Speter	}
987251881Speter	peer->prev_state = peer->state;
988251881Speter	peer->state = state;
989251881Speter}
990251881Speter
991251881Spetervoid
992251881Spetersession_accept(int listenfd)
993251881Speter{
994251881Speter	int			 connfd;
995251881Speter	socklen_t		 len;
996251881Speter	struct sockaddr_storage	 cliaddr;
997251881Speter	struct peer		*p = NULL;
998251881Speter
999251881Speter	len = sizeof(cliaddr);
1000251881Speter	if ((connfd = accept4(listenfd,
1001251881Speter	    (struct sockaddr *)&cliaddr, &len,
1002251881Speter	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1003251881Speter		if (errno == ENFILE || errno == EMFILE)
1004251881Speter			pauseaccept = getmonotime();
1005251881Speter		else if (errno != EWOULDBLOCK && errno != EINTR &&
1006251881Speter		    errno != ECONNABORTED)
1007251881Speter			log_warn("accept");
1008251881Speter		return;
1009251881Speter	}
1010251881Speter
1011251881Speter	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1012251881Speter
1013251881Speter	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1014251881Speter		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1015251881Speter			/* fast reconnect after clear */
1016251881Speter			p->passive = 1;
1017251881Speter			bgp_fsm(p, EVNT_START);
1018251881Speter		}
1019251881Speter	}
1020251881Speter
1021251881Speter	if (p != NULL &&
1022251881Speter	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1023251881Speter		if (p->fd != -1) {
1024251881Speter			if (p->state == STATE_CONNECT)
1025251881Speter				session_close_connection(p);
1026251881Speter			else {
1027251881Speter				close(connfd);
1028251881Speter				return;
1029251881Speter			}
1030251881Speter		}
1031251881Speter
1032251881Speteropen:
1033251881Speter		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1034251881Speter			log_peer_warnx(&p->conf,
1035251881Speter			    "ipsec or md5sig configured but not available");
1036251881Speter			close(connfd);
1037251881Speter			return;
1038251881Speter		}
1039251881Speter
1040251881Speter		if (tcp_md5_check(connfd, p) == -1) {
1041251881Speter			close(connfd);
1042251881Speter			return;
1043251881Speter		}
1044251881Speter		p->fd = p->wbuf.fd = connfd;
1045251881Speter		if (session_setup_socket(p)) {
1046251881Speter			close(connfd);
1047251881Speter			return;
1048251881Speter		}
1049251881Speter		bgp_fsm(p, EVNT_CON_OPEN);
1050251881Speter		return;
1051251881Speter	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1052251881Speter	    p->capa.neg.grestart.restart == 2) {
1053251881Speter		/* first do the graceful restart dance */
1054251881Speter		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1055251881Speter		/* then do part of the open dance */
1056251881Speter		goto open;
1057251881Speter	} else {
1058251881Speter		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1059251881Speter		close(connfd);
1060251881Speter	}
1061251881Speter}
1062251881Speter
1063251881Speterint
1064251881Spetersession_connect(struct peer *peer)
1065251881Speter{
1066251881Speter	struct sockaddr		*sa;
1067251881Speter	struct bgpd_addr	*bind_addr = NULL;
1068251881Speter	socklen_t		 sa_len;
1069251881Speter
1070251881Speter	/*
1071251881Speter	 * we do not need the overcomplicated collision detection RFC 1771
1072251881Speter	 * describes; we simply make sure there is only ever one concurrent
1073251881Speter	 * tcp connection per peer.
1074251881Speter	 */
1075251881Speter	if (peer->fd != -1)
1076251881Speter		return (-1);
1077251881Speter
1078251881Speter	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1079251881Speter	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1080251881Speter		log_peer_warn(&peer->conf, "session_connect socket");
1081251881Speter		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1082251881Speter		return (-1);
1083251881Speter	}
1084251881Speter
1085251881Speter	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1086251881Speter		log_peer_warnx(&peer->conf,
1087251881Speter		    "ipsec or md5sig configured but not available");
1088251881Speter		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1089251881Speter		return (-1);
1090251881Speter	}
1091251881Speter
1092251881Speter	tcp_md5_set(peer->fd, peer);
1093251881Speter	peer->wbuf.fd = peer->fd;
1094251881Speter
1095251881Speter	/* if local-address is set we need to bind() */
1096251881Speter	switch (peer->conf.remote_addr.aid) {
1097251881Speter	case AID_INET:
1098251881Speter		bind_addr = &peer->conf.local_addr_v4;
1099251881Speter		break;
1100251881Speter	case AID_INET6:
1101251881Speter		bind_addr = &peer->conf.local_addr_v6;
1102251881Speter		break;
1103251881Speter	}
1104251881Speter	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1105251881Speter		if (bind(peer->fd, sa, sa_len) == -1) {
1106251881Speter			log_peer_warn(&peer->conf, "session_connect bind");
1107251881Speter			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1108251881Speter			return (-1);
1109251881Speter		}
1110251881Speter	}
1111251881Speter
1112251881Speter	if (session_setup_socket(peer)) {
1113251881Speter		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1114251881Speter		return (-1);
1115251881Speter	}
1116251881Speter
1117251881Speter	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1118251881Speter	if (connect(peer->fd, sa, sa_len) == -1) {
1119251881Speter		if (errno != EINPROGRESS) {
1120251881Speter			if (errno != peer->lasterr)
1121251881Speter				log_peer_warn(&peer->conf, "connect");
1122251881Speter			peer->lasterr = errno;
1123251881Speter			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1124251881Speter			return (-1);
1125251881Speter		}
1126251881Speter	} else
1127251881Speter		bgp_fsm(peer, EVNT_CON_OPEN);
1128251881Speter
1129251881Speter	return (0);
1130251881Speter}
1131251881Speter
1132251881Speterint
1133251881Spetersession_setup_socket(struct peer *p)
1134251881Speter{
1135251881Speter	int	ttl = p->conf.distance;
1136251881Speter	int	pre = IPTOS_PREC_INTERNETCONTROL;
1137251881Speter	int	nodelay = 1;
1138251881Speter	int	bsize;
1139251881Speter
1140251881Speter	switch (p->conf.remote_addr.aid) {
1141251881Speter	case AID_INET:
1142251881Speter		/* set precedence, see RFC 1771 appendix 5 */
1143251881Speter		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1144251881Speter		    -1) {
1145251881Speter			log_peer_warn(&p->conf,
1146251881Speter			    "session_setup_socket setsockopt TOS");
1147251881Speter			return (-1);
1148251881Speter		}
1149251881Speter
1150251881Speter		if (p->conf.ebgp) {
1151251881Speter			/*
1152251881Speter			 * set TTL to foreign router's distance
1153251881Speter			 * 1=direct n=multihop with ttlsec, we always use 255
1154251881Speter			 */
1155251881Speter			if (p->conf.ttlsec) {
1156251881Speter				ttl = 256 - p->conf.distance;
1157251881Speter				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1158251881Speter				    &ttl, sizeof(ttl)) == -1) {
1159251881Speter					log_peer_warn(&p->conf,
1160251881Speter					    "session_setup_socket: "
1161251881Speter					    "setsockopt MINTTL");
1162251881Speter					return (-1);
1163251881Speter				}
1164251881Speter				ttl = 255;
1165251881Speter			}
1166251881Speter
1167251881Speter			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1168251881Speter			    sizeof(ttl)) == -1) {
1169251881Speter				log_peer_warn(&p->conf,
1170251881Speter				    "session_setup_socket setsockopt TTL");
1171251881Speter				return (-1);
1172251881Speter			}
1173251881Speter		}
1174251881Speter		break;
1175251881Speter	case AID_INET6:
1176251881Speter		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre,
1177251881Speter		    sizeof(pre)) == -1) {
1178251881Speter			log_peer_warn(&p->conf, "session_setup_socket "
1179251881Speter			    "setsockopt TCLASS");
1180251881Speter			return (-1);
1181251881Speter		}
1182251881Speter
1183251881Speter		if (p->conf.ebgp) {
1184251881Speter			/*
1185251881Speter			 * set hoplimit to foreign router's distance
1186251881Speter			 * 1=direct n=multihop with ttlsec, we always use 255
1187251881Speter			 */
1188251881Speter			if (p->conf.ttlsec) {
1189251881Speter				ttl = 256 - p->conf.distance;
1190251881Speter				if (setsockopt(p->fd, IPPROTO_IPV6,
1191251881Speter				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1192251881Speter				    == -1) {
1193251881Speter					log_peer_warn(&p->conf,
1194251881Speter					    "session_setup_socket: "
1195251881Speter					    "setsockopt MINHOPCOUNT");
1196251881Speter					return (-1);
1197251881Speter				}
1198251881Speter				ttl = 255;
1199251881Speter			}
1200251881Speter			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1201251881Speter			    &ttl, sizeof(ttl)) == -1) {
1202251881Speter				log_peer_warn(&p->conf,
1203251881Speter				    "session_setup_socket setsockopt hoplimit");
1204251881Speter				return (-1);
1205251881Speter			}
1206251881Speter		}
1207251881Speter		break;
1208251881Speter	}
1209251881Speter
1210251881Speter	/* set TCP_NODELAY */
1211251881Speter	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1212251881Speter	    sizeof(nodelay)) == -1) {
1213251881Speter		log_peer_warn(&p->conf,
1214251881Speter		    "session_setup_socket setsockopt TCP_NODELAY");
1215251881Speter		return (-1);
1216251881Speter	}
1217251881Speter
1218251881Speter	/* limit bufsize. no biggie if it fails */
1219251881Speter	bsize = 65535;
1220251881Speter	setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize));
1221251881Speter	setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize));
1222251881Speter
1223251881Speter	return (0);
1224251881Speter}
1225251881Speter
1226251881Speter/*
1227251881Speter * compare the bgpd_addr with the sockaddr by converting the latter into
1228251881Speter * a bgpd_addr. Return true if the two are equal, including any scope
1229251881Speter */
1230251881Speterstatic int
1231251881Spetersa_equal(struct bgpd_addr *ba, struct sockaddr *b)
1232251881Speter{
1233251881Speter	struct bgpd_addr bb;
1234251881Speter
1235251881Speter	sa2addr(b, &bb, NULL);
1236251881Speter	return (memcmp(ba, &bb, sizeof(*ba)) == 0);
1237251881Speter}
1238251881Speter
1239251881Speterstatic void
1240251881Speterget_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote,
1241251881Speter    struct bgpd_addr *alt, unsigned int *scope)
1242251881Speter{
1243251881Speter	struct ifaddrs	*ifap, *ifa, *match;
1244251881Speter	int connected = 0;
1245251881Speter	u_int8_t plen;
1246251881Speter
1247251881Speter	if (getifaddrs(&ifap) == -1)
1248251881Speter		fatal("getifaddrs");
1249251881Speter
1250251881Speter	for (match = ifap; match != NULL; match = match->ifa_next) {
1251251881Speter		if (match->ifa_addr == NULL)
1252251881Speter			continue;
1253251881Speter		if (match->ifa_addr->sa_family != AF_INET &&
1254251881Speter		    match->ifa_addr->sa_family != AF_INET6)
1255251881Speter			continue;
1256251881Speter		if (sa_equal(local, match->ifa_addr)) {
1257251881Speter			if (remote->aid == AID_INET6 &&
1258251881Speter			    IN6_IS_ADDR_LINKLOCAL(&remote->v6)) {
1259251881Speter				/* IPv6 LLA are by definition connected */
1260251881Speter				connected = 1;
1261251881Speter			} else if (match->ifa_flags & IFF_POINTOPOINT &&
1262251881Speter			    match->ifa_dstaddr != NULL) {
1263251881Speter				if (sa_equal(remote, match->ifa_dstaddr))
1264251881Speter					connected = 1;
1265251881Speter			} else if (match->ifa_netmask != NULL) {
1266251881Speter				plen = mask2prefixlen(
1267251881Speter				    match->ifa_addr->sa_family,
1268251881Speter				    match->ifa_netmask);
1269251881Speter				if (prefix_compare(local, remote, plen) == 0)
1270251881Speter					connected = 1;
1271251881Speter			}
1272251881Speter			break;
1273251881Speter		}
1274251881Speter	}
1275251881Speter
1276251881Speter	if (match == NULL) {
1277251881Speter		log_warnx("%s: local address not found", __func__);
1278251881Speter		return;
1279251881Speter	}
1280251881Speter	if (connected)
1281251881Speter		*scope = if_nametoindex(match->ifa_name);
1282251881Speter	else
1283251881Speter		*scope = 0;
1284251881Speter
1285251881Speter	switch (local->aid) {
1286251881Speter	case AID_INET6:
1287251881Speter		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1288251881Speter			if (ifa->ifa_addr != NULL &&
1289251881Speter			    ifa->ifa_addr->sa_family == AF_INET &&
1290251881Speter			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1291251881Speter				sa2addr(ifa->ifa_addr, alt, NULL);
1292251881Speter				break;
1293251881Speter			}
1294251881Speter		}
1295251881Speter		break;
1296251881Speter	case AID_INET:
1297251881Speter		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1298251881Speter			if (ifa->ifa_addr != NULL &&
1299251881Speter			    ifa->ifa_addr->sa_family == AF_INET6 &&
1300251881Speter			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1301251881Speter				struct sockaddr_in6 *s =
1302251881Speter				    (struct sockaddr_in6 *)ifa->ifa_addr;
1303251881Speter
1304251881Speter				/* only accept global scope addresses */
1305251881Speter				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1306251881Speter				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1307251881Speter					continue;
1308251881Speter				sa2addr(ifa->ifa_addr, alt, NULL);
1309251881Speter				break;
1310251881Speter			}
1311251881Speter		}
1312251881Speter		break;
1313251881Speter	default:
1314251881Speter		log_warnx("%s: unsupported address family %s", __func__,
1315251881Speter		    aid2str(local->aid));
1316251881Speter		break;
1317251881Speter	}
1318251881Speter
1319251881Speter	freeifaddrs(ifap);
1320251881Speter}
1321251881Speter
1322251881Spetervoid
1323251881Spetersession_tcp_established(struct peer *peer)
1324251881Speter{
1325251881Speter	struct sockaddr_storage	ss;
1326251881Speter	socklen_t		len;
1327251881Speter
1328251881Speter	len = sizeof(ss);
1329251881Speter	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1330251881Speter		log_warn("getsockname");
1331251881Speter	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1332251881Speter	len = sizeof(ss);
1333251881Speter	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1334251881Speter		log_warn("getpeername");
1335251881Speter	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1336251881Speter
1337251881Speter	get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt,
1338251881Speter	    &peer->if_scope);
1339251881Speter}
1340251881Speter
1341251881Speterint
1342251881Spetersession_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1343251881Speter{
1344251881Speter	int errs = 0;
1345251881Speter
1346251881Speter	errs += ibuf_add_n8(opb, capa_code);
1347251881Speter	errs += ibuf_add_n8(opb, capa_len);
1348251881Speter	return (errs);
1349251881Speter}
1350251881Speter
1351251881Speterint
1352251881Spetersession_capa_add_mp(struct ibuf *buf, uint8_t aid)
1353251881Speter{
1354251881Speter	uint16_t		 afi;
1355251881Speter	uint8_t			 safi;
1356251881Speter	int			 errs = 0;
1357251881Speter
1358251881Speter	if (aid2afi(aid, &afi, &safi) == -1) {
1359251881Speter		log_warn("%s: bad AID", __func__);
1360251881Speter		return (-1);
1361251881Speter	}
1362251881Speter
1363251881Speter	errs += ibuf_add_n16(buf, afi);
1364251881Speter	errs += ibuf_add_zero(buf, 1);
1365251881Speter	errs += ibuf_add_n8(buf, safi);
1366251881Speter
1367251881Speter	return (errs);
1368251881Speter}
1369251881Speter
1370251881Speterint
1371251881Spetersession_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
1372251881Speter{
1373251881Speter	u_int		errs = 0;
1374251881Speter	uint16_t	afi;
1375251881Speter	uint8_t		safi;
1376251881Speter
1377251881Speter	if (aid2afi(aid, &afi, &safi)) {
1378251881Speter		log_warn("%s: bad AID", __func__);
1379251881Speter		return (-1);
1380251881Speter	}
1381251881Speter
1382251881Speter	errs += ibuf_add_n16(b, afi);
1383251881Speter	errs += ibuf_add_n8(b, safi);
1384251881Speter	errs += ibuf_add_n8(b, flags);
1385251881Speter
1386251881Speter	return (errs);
1387251881Speter}
1388251881Speter
1389251881Speterstruct bgp_msg *
1390251881Spetersession_newmsg(enum msg_type msgtype, uint16_t len)
1391251881Speter{
1392251881Speter	struct bgp_msg		*msg;
1393251881Speter	struct ibuf		*buf;
1394251881Speter	int			 errs = 0;
1395251881Speter
1396251881Speter	if ((buf = ibuf_open(len)) == NULL)
1397251881Speter		return (NULL);
1398251881Speter
1399251881Speter	errs += ibuf_add(buf, marker, sizeof(marker));
1400251881Speter	errs += ibuf_add_n16(buf, len);
1401251881Speter	errs += ibuf_add_n8(buf, msgtype);
1402251881Speter
1403251881Speter	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1404251881Speter		ibuf_free(buf);
1405251881Speter		return (NULL);
1406251881Speter	}
1407251881Speter
1408251881Speter	msg->buf = buf;
1409251881Speter	msg->type = msgtype;
1410251881Speter	msg->len = len;
1411251881Speter
1412251881Speter	return (msg);
1413251881Speter}
1414251881Speter
1415251881Speterint
1416251881Spetersession_sendmsg(struct bgp_msg *msg, struct peer *p)
1417251881Speter{
1418251881Speter	struct mrt		*mrt;
1419251881Speter
1420251881Speter	LIST_FOREACH(mrt, &mrthead, entry) {
1421251881Speter		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1422251881Speter		    mrt->type == MRT_UPDATE_OUT)))
1423251881Speter			continue;
1424251881Speter		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1425251881Speter		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1426251881Speter		    mrt->group_id == p->conf.groupid))
1427251881Speter			mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p,
1428251881Speter			    msg->type);
1429251881Speter	}
1430251881Speter
1431251881Speter	ibuf_close(&p->wbuf, msg->buf);
1432251881Speter	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1433251881Speter		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1434251881Speter			log_peer_warn(&p->conf, "imsg_compose XOFF");
1435251881Speter		else
1436251881Speter			p->throttled = 1;
1437251881Speter	}
1438251881Speter
1439251881Speter	free(msg);
1440251881Speter	return (0);
1441251881Speter}
1442251881Speter
1443251881Speter/*
1444251881Speter * Translate between internal roles and the value expected by RFC 9234.
1445251881Speter */
1446251881Speterstatic uint8_t
1447251881Speterrole2capa(enum role role)
1448251881Speter{
1449251881Speter	switch (role) {
1450251881Speter	case ROLE_CUSTOMER:
1451251881Speter		return CAPA_ROLE_CUSTOMER;
1452251881Speter	case ROLE_PROVIDER:
1453251881Speter		return CAPA_ROLE_PROVIDER;
1454251881Speter	case ROLE_RS:
1455251881Speter		return CAPA_ROLE_RS;
1456251881Speter	case ROLE_RS_CLIENT:
1457251881Speter		return CAPA_ROLE_RS_CLIENT;
1458251881Speter	case ROLE_PEER:
1459251881Speter		return CAPA_ROLE_PEER;
1460251881Speter	default:
1461251881Speter		fatalx("Unsupported role for role capability");
1462251881Speter	}
1463251881Speter}
1464251881Speter
1465251881Speterstatic enum role
1466251881Spetercapa2role(uint8_t val)
1467251881Speter{
1468251881Speter	switch (val) {
1469251881Speter	case CAPA_ROLE_PROVIDER:
1470251881Speter		return ROLE_PROVIDER;
1471251881Speter	case CAPA_ROLE_RS:
1472251881Speter		return ROLE_RS;
1473251881Speter	case CAPA_ROLE_RS_CLIENT:
1474251881Speter		return ROLE_RS_CLIENT;
1475251881Speter	case CAPA_ROLE_CUSTOMER:
1476251881Speter		return ROLE_CUSTOMER;
1477251881Speter	case CAPA_ROLE_PEER:
1478251881Speter		return ROLE_PEER;
1479251881Speter	default:
1480251881Speter		return ROLE_NONE;
1481251881Speter	}
1482251881Speter}
1483251881Speter
1484251881Spetervoid
1485251881Spetersession_open(struct peer *p)
1486251881Speter{
1487251881Speter	struct bgp_msg		*buf;
1488251881Speter	struct ibuf		*opb;
1489251881Speter	size_t			 len, optparamlen;
1490251881Speter	uint16_t		 holdtime;
1491251881Speter	uint8_t			 i;
1492251881Speter	int			 errs = 0, extlen = 0;
1493251881Speter	int			 mpcapa = 0;
1494251881Speter
1495251881Speter
1496251881Speter	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
1497251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1498251881Speter		return;
1499251881Speter	}
1500251881Speter
1501251881Speter	/* multiprotocol extensions, RFC 4760 */
1502251881Speter	for (i = AID_MIN; i < AID_MAX; i++)
1503251881Speter		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1504251881Speter			errs += session_capa_add(opb, CAPA_MP, 4);
1505251881Speter			errs += session_capa_add_mp(opb, i);
1506251881Speter			mpcapa++;
1507251881Speter		}
1508251881Speter
1509251881Speter	/* route refresh, RFC 2918 */
1510251881Speter	if (p->capa.ann.refresh)	/* no data */
1511251881Speter		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1512251881Speter
1513251881Speter	/* BGP open policy, RFC 9234, only for ebgp sessions */
1514251881Speter	if (p->conf.ebgp && p->capa.ann.policy &&
1515251881Speter	    p->conf.role != ROLE_NONE &&
1516251881Speter	    (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
1517251881Speter	    mpcapa == 0)) {
1518251881Speter		errs += session_capa_add(opb, CAPA_ROLE, 1);
1519251881Speter		errs += ibuf_add_n8(opb, role2capa(p->conf.role));
1520251881Speter	}
1521251881Speter
1522251881Speter	/* graceful restart and End-of-RIB marker, RFC 4724 */
1523251881Speter	if (p->capa.ann.grestart.restart) {
1524251881Speter		int		rst = 0;
1525251881Speter		uint16_t	hdr = 0;
1526251881Speter
1527251881Speter		for (i = AID_MIN; i < AID_MAX; i++) {
1528251881Speter			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1529251881Speter				rst++;
1530251881Speter		}
1531251881Speter
1532251881Speter		/* Only set the R-flag if no graceful restart is ongoing */
1533251881Speter		if (!rst)
1534251881Speter			hdr |= CAPA_GR_R_FLAG;
1535251881Speter		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1536251881Speter		errs += ibuf_add_n16(opb, hdr);
1537251881Speter	}
1538251881Speter
1539251881Speter	/* 4-bytes AS numbers, RFC6793 */
1540251881Speter	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1541251881Speter		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
1542251881Speter		errs += ibuf_add_n32(opb, p->conf.local_as);
1543251881Speter	}
1544251881Speter
1545251881Speter	/* advertisement of multiple paths, RFC7911 */
1546251881Speter	if (p->capa.ann.add_path[AID_MIN]) {	/* variable */
1547251881Speter		uint8_t	aplen;
1548251881Speter
1549251881Speter		if (mpcapa)
1550251881Speter			aplen = 4 * mpcapa;
1551251881Speter		else	/* AID_INET */
1552251881Speter			aplen = 4;
1553251881Speter		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1554251881Speter		if (mpcapa) {
1555251881Speter			for (i = AID_MIN; i < AID_MAX; i++) {
1556251881Speter				if (p->capa.ann.mp[i]) {
1557251881Speter					errs += session_capa_add_afi(opb,
1558251881Speter					    i, p->capa.ann.add_path[i] &
1559251881Speter					    CAPA_AP_MASK);
1560251881Speter				}
1561251881Speter			}
1562251881Speter		} else {	/* AID_INET */
1563251881Speter			errs += session_capa_add_afi(opb, AID_INET,
1564251881Speter			    p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK);
1565251881Speter		}
1566251881Speter	}
1567251881Speter
1568251881Speter	/* enhanced route-refresh, RFC7313 */
1569251881Speter	if (p->capa.ann.enhanced_rr)	/* no data */
1570251881Speter		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1571251881Speter
1572251881Speter	if (errs) {
1573251881Speter		ibuf_free(opb);
1574251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1575251881Speter		return;
1576251881Speter	}
1577251881Speter
1578251881Speter	optparamlen = ibuf_size(opb);
1579251881Speter	len = MSGSIZE_OPEN_MIN + optparamlen;
1580251881Speter	if (optparamlen == 0) {
1581251881Speter		/* nothing */
1582251881Speter	} else if (optparamlen + 2 >= 255) {
1583251881Speter		/* RFC9072: use 255 as magic size and request extra header */
1584251881Speter		optparamlen = 255;
1585251881Speter		extlen = 1;
1586251881Speter		/* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */
1587251881Speter		len += 2 * 3;
1588251881Speter	} else {
1589251881Speter		/* regular capabilities header */
1590251881Speter		optparamlen += 2;
1591251881Speter		len += 2;
1592251881Speter	}
1593251881Speter
1594251881Speter	if ((buf = session_newmsg(OPEN, len)) == NULL) {
1595251881Speter		ibuf_free(opb);
1596251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1597251881Speter		return;
1598251881Speter	}
1599251881Speter
1600251881Speter	if (p->conf.holdtime)
1601251881Speter		holdtime = p->conf.holdtime;
1602251881Speter	else
1603251881Speter		holdtime = conf->holdtime;
1604251881Speter
1605251881Speter	errs += ibuf_add_n8(buf->buf, 4);
1606251881Speter	errs += ibuf_add_n16(buf->buf, p->conf.local_short_as);
1607251881Speter	errs += ibuf_add_n16(buf->buf, holdtime);
1608251881Speter	/* is already in network byte order */
1609251881Speter	errs += ibuf_add_n32(buf->buf, conf->bgpid);
1610251881Speter	errs += ibuf_add_n8(buf->buf, optparamlen);
1611251881Speter
1612251881Speter	if (extlen) {
1613251881Speter		/* RFC9072 extra header which spans over the capabilities hdr */
1614251881Speter		errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN);
1615251881Speter		errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2);
1616251881Speter	}
1617251881Speter
1618251881Speter	if (optparamlen) {
1619251881Speter		errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES);
1620251881Speter
1621251881Speter		if (extlen) {
1622251881Speter			/* RFC9072: 2-byte extended length */
1623251881Speter			errs += ibuf_add_n16(buf->buf, ibuf_size(opb));
1624251881Speter		} else {
1625251881Speter			errs += ibuf_add_n8(buf->buf, ibuf_size(opb));
1626251881Speter		}
1627251881Speter		errs += ibuf_add_buf(buf->buf, opb);
1628251881Speter	}
1629251881Speter
1630251881Speter	ibuf_free(opb);
1631251881Speter
1632251881Speter	if (errs) {
1633251881Speter		ibuf_free(buf->buf);
1634251881Speter		free(buf);
1635251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1636251881Speter		return;
1637251881Speter	}
1638251881Speter
1639251881Speter	if (session_sendmsg(buf, p) == -1) {
1640251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1641251881Speter		return;
1642251881Speter	}
1643251881Speter
1644251881Speter	p->stats.msg_sent_open++;
1645251881Speter}
1646251881Speter
1647251881Spetervoid
1648251881Spetersession_keepalive(struct peer *p)
1649251881Speter{
1650251881Speter	struct bgp_msg		*buf;
1651251881Speter
1652251881Speter	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1653251881Speter	    session_sendmsg(buf, p) == -1) {
1654251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1655251881Speter		return;
1656251881Speter	}
1657251881Speter
1658251881Speter	start_timer_keepalive(p);
1659251881Speter	p->stats.msg_sent_keepalive++;
1660251881Speter}
1661251881Speter
1662251881Spetervoid
1663251881Spetersession_update(uint32_t peerid, struct ibuf *ibuf)
1664251881Speter{
1665251881Speter	struct peer		*p;
1666251881Speter	struct bgp_msg		*buf;
1667251881Speter
1668251881Speter	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1669251881Speter		log_warnx("no such peer: id=%u", peerid);
1670251881Speter		return;
1671251881Speter	}
1672251881Speter
1673251881Speter	if (p->state != STATE_ESTABLISHED)
1674251881Speter		return;
1675251881Speter
1676251881Speter	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) ==
1677251881Speter	    NULL) {
1678251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1679251881Speter		return;
1680251881Speter	}
1681251881Speter
1682251881Speter	if (ibuf_add_buf(buf->buf, ibuf)) {
1683251881Speter		ibuf_free(buf->buf);
1684251881Speter		free(buf);
1685251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1686251881Speter		return;
1687251881Speter	}
1688251881Speter
1689251881Speter	if (session_sendmsg(buf, p) == -1) {
1690251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1691251881Speter		return;
1692251881Speter	}
1693251881Speter
1694251881Speter	start_timer_keepalive(p);
1695251881Speter	p->stats.msg_sent_update++;
1696251881Speter}
1697251881Speter
1698251881Spetervoid
1699251881Spetersession_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode,
1700251881Speter    void *data, size_t datalen)
1701251881Speter{
1702251881Speter	struct ibuf ibuf;
1703251881Speter
1704251881Speter	ibuf_from_buffer(&ibuf, data, datalen);
1705251881Speter	session_notification(p, errcode, subcode, &ibuf);
1706251881Speter}
1707251881Speter
1708251881Spetervoid
1709251881Spetersession_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1710251881Speter    struct ibuf *ibuf)
1711251881Speter{
1712251881Speter	struct bgp_msg		*buf;
1713251881Speter	int			 errs = 0;
1714251881Speter	size_t			 datalen = 0;
1715251881Speter
1716251881Speter	switch (p->state) {
1717251881Speter	case STATE_OPENSENT:
1718251881Speter	case STATE_OPENCONFIRM:
1719251881Speter	case STATE_ESTABLISHED:
1720251881Speter		break;
1721251881Speter	default:
1722251881Speter		/* session not open, no need to send notification */
1723251881Speter		log_notification(p, errcode, subcode, ibuf, "dropping");
1724251881Speter		return;
1725251881Speter	}
1726251881Speter
1727251881Speter	log_notification(p, errcode, subcode, ibuf, "sending");
1728251881Speter
1729251881Speter	/* cap to maximum size */
1730251881Speter	if (ibuf != NULL) {
1731251881Speter		if (ibuf_size(ibuf) >
1732251881Speter		    MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) {
1733251881Speter			log_peer_warnx(&p->conf,
1734251881Speter			    "oversized notification, data trunkated");
1735251881Speter			ibuf_truncate(ibuf, MAX_PKTSIZE -
1736251881Speter			    MSGSIZE_NOTIFICATION_MIN);
1737251881Speter		}
1738251881Speter		datalen = ibuf_size(ibuf);
1739251881Speter	}
1740251881Speter
1741251881Speter	if ((buf = session_newmsg(NOTIFICATION,
1742251881Speter	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1743251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1744251881Speter		return;
1745251881Speter	}
1746251881Speter
1747251881Speter	errs += ibuf_add_n8(buf->buf, errcode);
1748251881Speter	errs += ibuf_add_n8(buf->buf, subcode);
1749251881Speter
1750251881Speter	if (ibuf != NULL)
1751251881Speter		errs += ibuf_add_buf(buf->buf, ibuf);
1752251881Speter
1753251881Speter	if (errs) {
1754251881Speter		ibuf_free(buf->buf);
1755251881Speter		free(buf);
1756251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1757251881Speter		return;
1758251881Speter	}
1759251881Speter
1760251881Speter	if (session_sendmsg(buf, p) == -1) {
1761251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1762251881Speter		return;
1763251881Speter	}
1764251881Speter
1765251881Speter	p->stats.msg_sent_notification++;
1766251881Speter	p->stats.last_sent_errcode = errcode;
1767251881Speter	p->stats.last_sent_suberr = subcode;
1768251881Speter}
1769251881Speter
1770251881Speterint
1771251881Spetersession_neighbor_rrefresh(struct peer *p)
1772251881Speter{
1773251881Speter	uint8_t	i;
1774251881Speter
1775251881Speter	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1776251881Speter		return (-1);
1777251881Speter
1778251881Speter	for (i = AID_MIN; i < AID_MAX; i++) {
1779251881Speter		if (p->capa.neg.mp[i] != 0)
1780251881Speter			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1781251881Speter	}
1782251881Speter
1783251881Speter	return (0);
1784251881Speter}
1785251881Speter
1786251881Spetervoid
1787251881Spetersession_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1788251881Speter{
1789251881Speter	struct bgp_msg		*buf;
1790251881Speter	int			 errs = 0;
1791251881Speter	uint16_t		 afi;
1792251881Speter	uint8_t			 safi;
1793251881Speter
1794251881Speter	switch (subtype) {
1795251881Speter	case ROUTE_REFRESH_REQUEST:
1796251881Speter		p->stats.refresh_sent_req++;
1797251881Speter		break;
1798251881Speter	case ROUTE_REFRESH_BEGIN_RR:
1799251881Speter	case ROUTE_REFRESH_END_RR:
1800251881Speter		/* requires enhanced route refresh */
1801251881Speter		if (!p->capa.neg.enhanced_rr)
1802251881Speter			return;
1803251881Speter		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1804251881Speter			p->stats.refresh_sent_borr++;
1805251881Speter		else
1806251881Speter			p->stats.refresh_sent_eorr++;
1807251881Speter		break;
1808251881Speter	default:
1809251881Speter		fatalx("session_rrefresh: bad subtype %d", subtype);
1810251881Speter	}
1811251881Speter
1812251881Speter	if (aid2afi(aid, &afi, &safi) == -1)
1813251881Speter		fatalx("session_rrefresh: bad afi/safi pair");
1814251881Speter
1815251881Speter	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1816251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1817251881Speter		return;
1818251881Speter	}
1819251881Speter
1820251881Speter	errs += ibuf_add_n16(buf->buf, afi);
1821251881Speter	errs += ibuf_add_n8(buf->buf, subtype);
1822251881Speter	errs += ibuf_add_n8(buf->buf, safi);
1823251881Speter
1824251881Speter	if (errs) {
1825251881Speter		ibuf_free(buf->buf);
1826251881Speter		free(buf);
1827251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1828251881Speter		return;
1829251881Speter	}
1830251881Speter
1831251881Speter	if (session_sendmsg(buf, p) == -1) {
1832251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1833251881Speter		return;
1834251881Speter	}
1835251881Speter
1836251881Speter	p->stats.msg_sent_rrefresh++;
1837251881Speter}
1838251881Speter
1839251881Speterint
1840251881Spetersession_graceful_restart(struct peer *p)
1841251881Speter{
1842251881Speter	uint8_t	i;
1843251881Speter
1844251881Speter	timer_set(&p->timers, Timer_RestartTimeout,
1845251881Speter	    p->capa.neg.grestart.timeout);
1846251881Speter
1847251881Speter	for (i = AID_MIN; i < AID_MAX; i++) {
1848251881Speter		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1849251881Speter			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1850251881Speter			    &i, sizeof(i)) == -1)
1851251881Speter				return (-1);
1852251881Speter			log_peer_warnx(&p->conf,
1853251881Speter			    "graceful restart of %s, keeping routes",
1854251881Speter			    aid2str(i));
1855251881Speter			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1856251881Speter		} else if (p->capa.neg.mp[i]) {
1857251881Speter			if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1858251881Speter			    &i, sizeof(i)) == -1)
1859251881Speter				return (-1);
1860251881Speter			log_peer_warnx(&p->conf,
1861251881Speter			    "graceful restart of %s, flushing routes",
1862251881Speter			    aid2str(i));
1863251881Speter		}
1864251881Speter	}
1865251881Speter	return (0);
1866251881Speter}
1867251881Speter
1868251881Speterint
1869251881Spetersession_graceful_stop(struct peer *p)
1870251881Speter{
1871251881Speter	uint8_t	i;
1872251881Speter
1873262253Speter	for (i = AID_MIN; i < AID_MAX; i++) {
1874251881Speter		/*
1875251881Speter		 * Only flush if the peer is restarting and the timeout fired.
1876262253Speter		 * In all other cases the session was already flushed when the
1877251881Speter		 * session went down or when the new open message was parsed.
1878251881Speter		 */
1879262253Speter		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1880251881Speter			log_peer_warnx(&p->conf, "graceful restart of %s, "
1881251881Speter			    "time-out, flushing", aid2str(i));
1882262253Speter			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1883251881Speter			    &i, sizeof(i)) == -1)
1884251881Speter				return (-1);
1885262253Speter		}
1886251881Speter		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1887251881Speter	}
1888262253Speter	return (0);
1889251881Speter}
1890251881Speter
1891262253Speterint
1892251881Spetersession_dispatch_msg(struct pollfd *pfd, struct peer *p)
1893251881Speter{
1894262253Speter	ssize_t		n;
1895251881Speter	socklen_t	len;
1896251881Speter	int		error;
1897262253Speter
1898251881Speter	if (p->state == STATE_CONNECT) {
1899251881Speter		if (pfd->revents & POLLOUT) {
1900251881Speter			if (pfd->revents & POLLIN) {
1901251881Speter				/* error occurred */
1902251881Speter				len = sizeof(error);
1903262253Speter				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1904251881Speter				    &error, &len) == -1 || error) {
1905251881Speter					if (error)
1906262253Speter						errno = error;
1907251881Speter					if (errno != p->lasterr) {
1908251881Speter						log_peer_warn(&p->conf,
1909262253Speter						    "socket error");
1910251881Speter						p->lasterr = errno;
1911251881Speter					}
1912262253Speter					bgp_fsm(p, EVNT_CON_OPENFAIL);
1913251881Speter					return (1);
1914251881Speter				}
1915262253Speter			}
1916251881Speter			bgp_fsm(p, EVNT_CON_OPEN);
1917251881Speter			return (1);
1918262253Speter		}
1919251881Speter		if (pfd->revents & POLLHUP) {
1920251881Speter			bgp_fsm(p, EVNT_CON_OPENFAIL);
1921262253Speter			return (1);
1922251881Speter		}
1923251881Speter		if (pfd->revents & (POLLERR|POLLNVAL)) {
1924262253Speter			bgp_fsm(p, EVNT_CON_FATAL);
1925251881Speter			return (1);
1926251881Speter		}
1927251881Speter		return (0);
1928251881Speter	}
1929251881Speter
1930262253Speter	if (pfd->revents & POLLHUP) {
1931251881Speter		bgp_fsm(p, EVNT_CON_CLOSED);
1932251881Speter		return (1);
1933262253Speter	}
1934251881Speter	if (pfd->revents & (POLLERR|POLLNVAL)) {
1935251881Speter		bgp_fsm(p, EVNT_CON_FATAL);
1936262253Speter		return (1);
1937251881Speter	}
1938251881Speter
1939262253Speter	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1940251881Speter		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1941251881Speter			if (error == 0)
1942262253Speter				log_peer_warnx(&p->conf, "Connection closed");
1943251881Speter			else if (error == -1)
1944251881Speter				log_peer_warn(&p->conf, "write error");
1945262253Speter			bgp_fsm(p, EVNT_CON_FATAL);
1946251881Speter			return (1);
1947251881Speter		}
1948262253Speter		p->stats.last_write = getmonotime();
1949251881Speter		start_timer_sendholdtime(p);
1950251881Speter		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1951251881Speter			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1952251881Speter				log_peer_warn(&p->conf, "imsg_compose XON");
1953251881Speter			else
1954251881Speter				p->throttled = 0;
1955251881Speter		}
1956251881Speter		if (!(pfd->revents & POLLIN))
1957251881Speter			return (1);
1958251881Speter	}
1959251881Speter
1960251881Speter	if (p->rbuf && pfd->revents & POLLIN) {
1961251881Speter		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1962251881Speter		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1963251881Speter			if (errno != EINTR && errno != EAGAIN) {
1964251881Speter				log_peer_warn(&p->conf, "read error");
1965251881Speter				bgp_fsm(p, EVNT_CON_FATAL);
1966251881Speter			}
1967251881Speter			return (1);
1968251881Speter		}
1969251881Speter		if (n == 0) {	/* connection closed */
1970251881Speter			bgp_fsm(p, EVNT_CON_CLOSED);
1971251881Speter			return (1);
1972251881Speter		}
1973251881Speter
1974251881Speter		p->rbuf->wpos += n;
1975251881Speter		p->stats.last_read = getmonotime();
1976251881Speter		return (1);
1977251881Speter	}
1978251881Speter	return (0);
1979251881Speter}
1980262253Speter
1981251881Spetervoid
1982251881Spetersession_process_msg(struct peer *p)
1983251881Speter{
1984251881Speter	struct mrt	*mrt;
1985251881Speter	ssize_t		rpos, av, left;
1986251881Speter	int		processed = 0;
1987251881Speter	uint16_t	msglen;
1988262253Speter	uint8_t		msgtype;
1989251881Speter
1990251881Speter	rpos = 0;
1991251881Speter	av = p->rbuf->wpos;
1992251881Speter	p->rpending = 0;
1993251881Speter
1994262253Speter	/*
1995251881Speter	 * session might drop to IDLE -> buffers deallocated
1996251881Speter	 * we MUST check rbuf != NULL before use
1997251881Speter	 */
1998262253Speter	for (;;) {
1999251881Speter		if (p->rbuf == NULL)
2000251881Speter			return;
2001251881Speter		if (rpos + MSGSIZE_HEADER > av)
2002251881Speter			break;
2003251881Speter		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
2004251881Speter		    &msgtype) == -1)
2005251881Speter			return;
2006251881Speter		if (rpos + msglen > av)
2007251881Speter			break;
2008251881Speter		p->rbuf->rptr = p->rbuf->buf + rpos;
2009251881Speter
2010262253Speter		/* dump to MRT as soon as we have a full packet */
2011251881Speter		LIST_FOREACH(mrt, &mrthead, entry) {
2012251881Speter			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
2013251881Speter			    mrt->type == MRT_UPDATE_IN)))
2014262253Speter				continue;
2015251881Speter			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
2016251881Speter			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
2017251881Speter			    mrt->group_id == p->conf.groupid))
2018251881Speter				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
2019251881Speter				    msgtype);
2020251881Speter		}
2021251881Speter
2022262253Speter		switch (msgtype) {
2023251881Speter		case OPEN:
2024251881Speter			bgp_fsm(p, EVNT_RCVD_OPEN);
2025251881Speter			p->stats.msg_rcvd_open++;
2026251881Speter			break;
2027251881Speter		case UPDATE:
2028251881Speter			bgp_fsm(p, EVNT_RCVD_UPDATE);
2029251881Speter			p->stats.msg_rcvd_update++;
2030251881Speter			break;
2031251881Speter		case NOTIFICATION:
2032251881Speter			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
2033251881Speter			p->stats.msg_rcvd_notification++;
2034251881Speter			break;
2035251881Speter		case KEEPALIVE:
2036251881Speter			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
2037251881Speter			p->stats.msg_rcvd_keepalive++;
2038251881Speter			break;
2039251881Speter		case RREFRESH:
2040251881Speter			parse_rrefresh(p);
2041251881Speter			p->stats.msg_rcvd_rrefresh++;
2042251881Speter			break;
2043251881Speter		default:	/* cannot happen */
2044251881Speter			session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE,
2045251881Speter			    &msgtype, 1);
2046251881Speter			log_warnx("received message with unknown type %u",
2047251881Speter			    msgtype);
2048251881Speter			bgp_fsm(p, EVNT_CON_FATAL);
2049251881Speter		}
2050251881Speter		rpos += msglen;
2051251881Speter		if (++processed > MSG_PROCESS_LIMIT) {
2052251881Speter			p->rpending = 1;
2053251881Speter			break;
2054251881Speter		}
2055251881Speter	}
2056251881Speter
2057251881Speter	if (p->rbuf == NULL)
2058251881Speter		return;
2059251881Speter	if (rpos < av) {
2060251881Speter		left = av - rpos;
2061251881Speter		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
2062251881Speter		p->rbuf->wpos = left;
2063251881Speter	} else
2064251881Speter		p->rbuf->wpos = 0;
2065251881Speter}
2066251881Speter
2067251881Speterint
2068251881Speterparse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type)
2069251881Speter{
2070251881Speter	u_char			*p;
2071251881Speter	uint16_t		 olen;
2072251881Speter
2073251881Speter	/* caller MUST make sure we are getting 19 bytes! */
2074251881Speter	p = data;
2075251881Speter	if (memcmp(p, marker, sizeof(marker))) {
2076251881Speter		log_peer_warnx(&peer->conf, "sync error");
2077251881Speter		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL);
2078251881Speter		bgp_fsm(peer, EVNT_CON_FATAL);
2079251881Speter		return (-1);
2080251881Speter	}
2081251881Speter	p += MSGSIZE_HEADER_MARKER;
2082251881Speter
2083251881Speter	memcpy(&olen, p, 2);
2084251881Speter	*len = ntohs(olen);
2085251881Speter	p += 2;
2086251881Speter	memcpy(type, p, 1);
2087251881Speter
2088251881Speter	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
2089251881Speter		log_peer_warnx(&peer->conf,
2090251881Speter		    "received message: illegal length: %u byte", *len);
2091251881Speter		session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2092251881Speter		    &olen, sizeof(olen));
2093251881Speter		bgp_fsm(peer, EVNT_CON_FATAL);
2094251881Speter		return (-1);
2095251881Speter	}
2096251881Speter
2097251881Speter	switch (*type) {
2098251881Speter	case OPEN:
2099251881Speter		if (*len < MSGSIZE_OPEN_MIN) {
2100251881Speter			log_peer_warnx(&peer->conf,
2101251881Speter			    "received OPEN: illegal len: %u byte", *len);
2102251881Speter			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2103251881Speter			    &olen, sizeof(olen));
2104251881Speter			bgp_fsm(peer, EVNT_CON_FATAL);
2105251881Speter			return (-1);
2106251881Speter		}
2107251881Speter		break;
2108251881Speter	case NOTIFICATION:
2109251881Speter		if (*len < MSGSIZE_NOTIFICATION_MIN) {
2110251881Speter			log_peer_warnx(&peer->conf,
2111251881Speter			    "received NOTIFICATION: illegal len: %u byte",
2112251881Speter			    *len);
2113251881Speter			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2114251881Speter			    &olen, sizeof(olen));
2115251881Speter			bgp_fsm(peer, EVNT_CON_FATAL);
2116251881Speter			return (-1);
2117251881Speter		}
2118251881Speter		break;
2119251881Speter	case UPDATE:
2120251881Speter		if (*len < MSGSIZE_UPDATE_MIN) {
2121251881Speter			log_peer_warnx(&peer->conf,
2122251881Speter			    "received UPDATE: illegal len: %u byte", *len);
2123251881Speter			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2124251881Speter			    &olen, sizeof(olen));
2125251881Speter			bgp_fsm(peer, EVNT_CON_FATAL);
2126251881Speter			return (-1);
2127251881Speter		}
2128251881Speter		break;
2129251881Speter	case KEEPALIVE:
2130251881Speter		if (*len != MSGSIZE_KEEPALIVE) {
2131251881Speter			log_peer_warnx(&peer->conf,
2132251881Speter			    "received KEEPALIVE: illegal len: %u byte", *len);
2133251881Speter			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2134251881Speter			    &olen, sizeof(olen));
2135251881Speter			bgp_fsm(peer, EVNT_CON_FATAL);
2136262253Speter			return (-1);
2137251881Speter		}
2138251881Speter		break;
2139251881Speter	case RREFRESH:
2140251881Speter		if (*len < MSGSIZE_RREFRESH_MIN) {
2141251881Speter			log_peer_warnx(&peer->conf,
2142262253Speter			    "received RREFRESH: illegal len: %u byte", *len);
2143251881Speter			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2144251881Speter			    &olen, sizeof(olen));
2145251881Speter			bgp_fsm(peer, EVNT_CON_FATAL);
2146251881Speter			return (-1);
2147251881Speter		}
2148251881Speter		break;
2149251881Speter	default:
2150251881Speter		log_peer_warnx(&peer->conf,
2151251881Speter		    "received msg with unknown type %u", *type);
2152251881Speter		session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE,
2153251881Speter		    type, 1);
2154262253Speter		bgp_fsm(peer, EVNT_CON_FATAL);
2155251881Speter		return (-1);
2156251881Speter	}
2157251881Speter	return (0);
2158262253Speter}
2159251881Speter
2160251881Speterint
2161251881Speterparse_open(struct peer *peer)
2162262253Speter{
2163251881Speter	struct ibuf	 ibuf;
2164251881Speter	u_char		*p;
2165251881Speter	uint8_t		 version, rversion;
2166251881Speter	uint16_t	 short_as, msglen;
2167251881Speter	uint16_t	 holdtime, myholdtime;
2168251881Speter	uint32_t	 as, bgpid;
2169251881Speter	uint8_t		 optparamlen;
2170251881Speter
2171251881Speter	p = peer->rbuf->rptr;
2172251881Speter	p += MSGSIZE_HEADER_MARKER;
2173251881Speter	memcpy(&msglen, p, sizeof(msglen));
2174262253Speter	msglen = ntohs(msglen);
2175251881Speter
2176262253Speter	p = peer->rbuf->rptr;
2177251881Speter	p += MSGSIZE_HEADER;	/* header is already checked */
2178251881Speter	msglen -= MSGSIZE_HEADER;
2179251881Speter
2180251881Speter	/* XXX */
2181251881Speter	ibuf_from_buffer(&ibuf, p, msglen);
2182251881Speter
2183251881Speter	if (ibuf_get_n8(&ibuf, &version) == -1 ||
2184251881Speter	    ibuf_get_n16(&ibuf, &short_as) == -1 ||
2185251881Speter	    ibuf_get_n16(&ibuf, &holdtime) == -1 ||
2186251881Speter	    ibuf_get_n32(&ibuf, &bgpid) == -1 ||
2187251881Speter	    ibuf_get_n8(&ibuf, &optparamlen) == -1)
2188251881Speter		goto bad_len;
2189251881Speter
2190251881Speter	if (version != BGP_VERSION) {
2191251881Speter		log_peer_warnx(&peer->conf,
2192262253Speter		    "peer wants unrecognized version %u", version);
2193251881Speter		if (version > BGP_VERSION)
2194251881Speter			rversion = version - BGP_VERSION;
2195251881Speter		else
2196251881Speter			rversion = BGP_VERSION;
2197251881Speter		session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION,
2198251881Speter		    &rversion, sizeof(rversion));
2199251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2200251881Speter		return (-1);
2201251881Speter	}
2202251881Speter
2203251881Speter	as = peer->short_as = short_as;
2204251881Speter	if (as == 0) {
2205251881Speter		log_peer_warnx(&peer->conf,
2206251881Speter		    "peer requests unacceptable AS %u", as);
2207251881Speter		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2208251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2209251881Speter		return (-1);
2210251881Speter	}
2211251881Speter
2212251881Speter	if (holdtime && holdtime < peer->conf.min_holdtime) {
2213251881Speter		log_peer_warnx(&peer->conf,
2214251881Speter		    "peer requests unacceptable holdtime %u", holdtime);
2215251881Speter		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL);
2216251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2217251881Speter		return (-1);
2218251881Speter	}
2219251881Speter
2220251881Speter	myholdtime = peer->conf.holdtime;
2221251881Speter	if (!myholdtime)
2222251881Speter		myholdtime = conf->holdtime;
2223251881Speter	if (holdtime < myholdtime)
2224251881Speter		peer->holdtime = holdtime;
2225251881Speter	else
2226251881Speter		peer->holdtime = myholdtime;
2227251881Speter
2228251881Speter	/* check bgpid for validity - just disallow 0 */
2229251881Speter	if (bgpid == 0) {
2230251881Speter		log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable");
2231251881Speter		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2232251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2233251881Speter		return (-1);
2234269847Speter	}
2235251881Speter	peer->remote_bgpid = bgpid;
2236251881Speter
2237251881Speter	if (optparamlen != 0) {
2238251881Speter		struct ibuf oparams, op;
2239251881Speter		uint8_t ext_type, op_type;
2240262253Speter		uint16_t ext_len, op_len;
2241251881Speter
2242251881Speter		ibuf_from_ibuf(&oparams, &ibuf);
2243251881Speter
2244251881Speter		/* check for RFC9072 encoding */
2245251881Speter		if (ibuf_get_n8(&oparams, &ext_type) == -1)
2246251881Speter			goto bad_len;
2247251881Speter		if (ext_type == OPT_PARAM_EXT_LEN) {
2248251881Speter			if (ibuf_get_n16(&oparams, &ext_len) == -1)
2249251881Speter				goto bad_len;
2250251881Speter			/* skip RFC9072 header */
2251251881Speter			if (ibuf_skip(&ibuf, 3) == -1)
2252251881Speter				goto bad_len;
2253251881Speter		} else {
2254251881Speter			ext_len = optparamlen;
2255251881Speter			ibuf_rewind(&oparams);
2256251881Speter		}
2257251881Speter
2258251881Speter		if (ibuf_truncate(&oparams, ext_len) == -1 ||
2259251881Speter		    ibuf_skip(&ibuf, ext_len) == -1)
2260251881Speter			goto bad_len;
2261251881Speter
2262251881Speter		while (ibuf_size(&oparams) > 0) {
2263251881Speter			if (ibuf_get_n8(&oparams, &op_type) == -1)
2264251881Speter				goto bad_len;
2265251881Speter
2266251881Speter			if (ext_type == OPT_PARAM_EXT_LEN) {
2267251881Speter				if (ibuf_get_n16(&oparams, &op_len) == -1)
2268251881Speter					goto bad_len;
2269251881Speter			} else {
2270251881Speter				uint8_t tmp;
2271251881Speter				if (ibuf_get_n8(&oparams, &tmp) == -1)
2272251881Speter					goto bad_len;
2273251881Speter				op_len = tmp;
2274251881Speter			}
2275251881Speter
2276262253Speter			if (ibuf_get_ibuf(&oparams, op_len, &op) == -1)
2277251881Speter				goto bad_len;
2278251881Speter
2279251881Speter			switch (op_type) {
2280251881Speter			case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2281251881Speter				if (parse_capabilities(peer, &op, &as) == -1) {
2282251881Speter					session_notification(peer, ERR_OPEN, 0,
2283251881Speter					    NULL);
2284262253Speter					change_state(peer, STATE_IDLE,
2285251881Speter					    EVNT_RCVD_OPEN);
2286251881Speter					return (-1);
2287251881Speter				}
2288251881Speter				break;
2289251881Speter			case OPT_PARAM_AUTH:			/* deprecated */
2290251881Speter			default:
2291251881Speter				/*
2292251881Speter				 * unsupported type
2293251881Speter				 * the RFCs tell us to leave the data section
2294262253Speter				 * empty and notify the peer with ERR_OPEN,
2295251881Speter				 * ERR_OPEN_OPT. How the peer should know
2296262253Speter				 * _which_ optional parameter we don't support
2297251881Speter				 * is beyond me.
2298251881Speter				 */
2299251881Speter				log_peer_warnx(&peer->conf,
2300251881Speter				    "received OPEN message with unsupported "
2301251881Speter				    "optional parameter: type %u", op_type);
2302251881Speter				session_notification(peer, ERR_OPEN,
2303251881Speter				    ERR_OPEN_OPT, NULL);
2304251881Speter				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2305251881Speter				return (-1);
2306262253Speter			}
2307251881Speter		}
2308262253Speter	}
2309251881Speter
2310251881Speter	if (ibuf_size(&ibuf) != 0) {
2311251881Speter bad_len:
2312251881Speter		log_peer_warnx(&peer->conf,
2313251881Speter		    "corrupt OPEN message received: length mismatch");
2314251881Speter		session_notification(peer, ERR_OPEN, 0, NULL);
2315251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2316251881Speter		return (-1);
2317251881Speter	}
2318251881Speter
2319251881Speter	/* if remote-as is zero and it's a cloned neighbor, accept any */
2320251881Speter	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2321251881Speter		peer->conf.remote_as = as;
2322251881Speter		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2323251881Speter		if (!peer->conf.ebgp)
2324251881Speter			/* force enforce_as off for iBGP sessions */
2325251881Speter			peer->conf.enforce_as = ENFORCE_AS_OFF;
2326251881Speter	}
2327251881Speter
2328251881Speter	if (peer->conf.remote_as != as) {
2329251881Speter		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2330262253Speter		    log_as(as));
2331251881Speter		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2332269847Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2333251881Speter		return (-1);
2334251881Speter	}
2335251881Speter
2336251881Speter	/* on iBGP sessions check for bgpid collision */
2337251881Speter	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2338251881Speter		struct in_addr ina;
2339251881Speter		ina.s_addr = htonl(bgpid);
2340251881Speter		log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours",
2341251881Speter		    inet_ntoa(ina));
2342262253Speter		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2343251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2344251881Speter		return (-1);
2345251881Speter	}
2346251881Speter
2347251881Speter	if (capa_neg_calc(peer) == -1) {
2348251881Speter		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2349251881Speter		return (-1);
2350251881Speter	}
2351251881Speter
2352251881Speter	return (0);
2353251881Speter}
2354251881Speter
2355251881Speterint
2356251881Speterparse_update(struct peer *peer)
2357251881Speter{
2358262253Speter	u_char		*p;
2359251881Speter	uint16_t	 datalen;
2360251881Speter
2361251881Speter	/*
2362251881Speter	 * we pass the message verbatim to the rde.
2363251881Speter	 * in case of errors the whole session is reset with a
2364251881Speter	 * notification anyway, we only need to know the peer
2365251881Speter	 */
2366251881Speter	p = peer->rbuf->rptr;
2367251881Speter	p += MSGSIZE_HEADER_MARKER;
2368251881Speter	memcpy(&datalen, p, sizeof(datalen));
2369251881Speter	datalen = ntohs(datalen);
2370251881Speter
2371251881Speter	p = peer->rbuf->rptr;
2372251881Speter	p += MSGSIZE_HEADER;	/* header is already checked */
2373251881Speter	datalen -= MSGSIZE_HEADER;
2374262253Speter
2375251881Speter	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2376251881Speter		return (-1);
2377251881Speter
2378251881Speter	return (0);
2379251881Speter}
2380251881Speter
2381251881Speterint
2382251881Speterparse_rrefresh(struct peer *peer)
2383251881Speter{
2384251881Speter	struct route_refresh rr;
2385251881Speter	struct ibuf ibuf;
2386251881Speter	uint16_t afi, datalen;
2387251881Speter	uint8_t aid, safi, subtype;
2388262253Speter	u_char *p;
2389251881Speter
2390251881Speter	p = peer->rbuf->rptr;
2391251881Speter	p += MSGSIZE_HEADER_MARKER;
2392251881Speter	memcpy(&datalen, p, sizeof(datalen));
2393251881Speter	datalen = ntohs(datalen);
2394251881Speter
2395251881Speter	p = peer->rbuf->rptr;
2396251881Speter	p += MSGSIZE_HEADER;	/* header is already checked */
2397251881Speter	datalen -= MSGSIZE_HEADER;
2398251881Speter
2399251881Speter	/* XXX */
2400262253Speter	ibuf_from_buffer(&ibuf, p, datalen);
2401251881Speter
2402251881Speter	if (ibuf_get_n16(&ibuf, &afi) == -1 ||
2403251881Speter	    ibuf_get_n8(&ibuf, &subtype) == -1 ||
2404251881Speter	    ibuf_get_n8(&ibuf, &safi) == -1) {
2405251881Speter		/* minimum size checked in session_process_msg() */
2406251881Speter		fatalx("%s: message too small", __func__);
2407251881Speter	}
2408251881Speter
2409251881Speter	/* check subtype if peer announced enhanced route refresh */
2410251881Speter	if (peer->capa.neg.enhanced_rr) {
2411251881Speter		switch (subtype) {
2412251881Speter		case ROUTE_REFRESH_REQUEST:
2413251881Speter			/* no ORF support, so no oversized RREFRESH msgs */
2414251881Speter			if (datalen != MSGSIZE_RREFRESH) {
2415251881Speter				log_peer_warnx(&peer->conf,
2416251881Speter				    "received RREFRESH: illegal len: %u byte",
2417251881Speter				    datalen);
2418251881Speter				datalen = htons(datalen);
2419251881Speter				session_notification_data(peer, ERR_HEADER,
2420251881Speter				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2421251881Speter				bgp_fsm(peer, EVNT_CON_FATAL);
2422251881Speter				return (-1);
2423251881Speter			}
2424251881Speter			peer->stats.refresh_rcvd_req++;
2425251881Speter			break;
2426251881Speter		case ROUTE_REFRESH_BEGIN_RR:
2427251881Speter		case ROUTE_REFRESH_END_RR:
2428251881Speter			/* special handling for RFC7313 */
2429251881Speter			if (datalen != MSGSIZE_RREFRESH) {
2430251881Speter				log_peer_warnx(&peer->conf,
2431251881Speter				    "received RREFRESH: illegal len: %u byte",
2432251881Speter				    datalen);
2433251881Speter				ibuf_rewind(&ibuf);
2434251881Speter				session_notification(peer, ERR_RREFRESH,
2435251881Speter				    ERR_RR_INV_LEN, &ibuf);
2436251881Speter				bgp_fsm(peer, EVNT_CON_FATAL);
2437251881Speter				return (-1);
2438251881Speter			}
2439251881Speter			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2440251881Speter				peer->stats.refresh_rcvd_borr++;
2441251881Speter			else
2442251881Speter				peer->stats.refresh_rcvd_eorr++;
2443251881Speter			break;
2444262253Speter		default:
2445251881Speter			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2446251881Speter			    "bad subtype %d", subtype);
2447251881Speter			return (0);
2448251881Speter		}
2449251881Speter	} else {
2450251881Speter		/* force subtype to default */
2451251881Speter		subtype = ROUTE_REFRESH_REQUEST;
2452251881Speter		peer->stats.refresh_rcvd_req++;
2453251881Speter	}
2454251881Speter
2455251881Speter	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2456251881Speter	if (afi2aid(afi, safi, &aid) == -1) {
2457251881Speter		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2458251881Speter		    "invalid afi/safi pair");
2459251881Speter		return (0);
2460262253Speter	}
2461251881Speter
2462262253Speter	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2463251881Speter		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2464251881Speter		return (0);
2465251881Speter	}
2466251881Speter
2467251881Speter	rr.aid = aid;
2468262253Speter	rr.subtype = subtype;
2469251881Speter
2470251881Speter	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2471251881Speter		return (-1);
2472251881Speter
2473251881Speter	return (0);
2474251881Speter}
2475251881Speter
2476251881Spetervoid
2477251881Speterparse_notification(struct peer *peer)
2478251881Speter{
2479251881Speter	struct ibuf	 ibuf;
2480262253Speter	u_char		*p;
2481251881Speter	uint16_t	 datalen;
2482251881Speter	uint8_t		 errcode, subcode;
2483251881Speter	uint8_t		 reason_len;
2484251881Speter
2485251881Speter	/* just log */
2486251881Speter	p = peer->rbuf->rptr;
2487251881Speter	p += MSGSIZE_HEADER_MARKER;
2488251881Speter	memcpy(&datalen, p, sizeof(datalen));
2489251881Speter	datalen = ntohs(datalen);
2490251881Speter
2491251881Speter	p = peer->rbuf->rptr;
2492251881Speter	p += MSGSIZE_HEADER;	/* header is already checked */
2493251881Speter	datalen -= MSGSIZE_HEADER;
2494251881Speter
2495251881Speter	/* XXX */
2496262253Speter	ibuf_from_buffer(&ibuf, p, datalen);
2497251881Speter
2498251881Speter	if (ibuf_get_n8(&ibuf, &errcode) == -1 ||
2499251881Speter	    ibuf_get_n8(&ibuf, &subcode) == -1) {
2500251881Speter		log_peer_warnx(&peer->conf, "received bad notification");
2501251881Speter		goto done;
2502251881Speter	}
2503251881Speter
2504251881Speter	peer->errcnt++;
2505251881Speter	peer->stats.last_rcvd_errcode = errcode;
2506251881Speter	peer->stats.last_rcvd_suberr = subcode;
2507251881Speter
2508251881Speter	log_notification(peer, errcode, subcode, &ibuf, "received");
2509251881Speter
2510251881Speter	CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX);
2511251881Speter	memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason));
2512251881Speter	if (errcode == ERR_CEASE &&
2513251881Speter	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2514251881Speter	     subcode == ERR_CEASE_ADMIN_RESET)) {
2515251881Speter		/* check if shutdown reason is included */
2516251881Speter		if (ibuf_get_n8(&ibuf, &reason_len) != -1 && reason_len != 0) {
2517251881Speter			if (ibuf_get(&ibuf, peer->stats.last_reason,
2518251881Speter			    reason_len) == -1)
2519251881Speter				log_peer_warnx(&peer->conf,
2520251881Speter				    "received truncated shutdown reason");
2521251881Speter		}
2522251881Speter	}
2523251881Speter
2524251881Speterdone:
2525251881Speter	change_state(peer, STATE_IDLE, EVNT_RCVD_NOTIFICATION);
2526251881Speter}
2527251881Speter
2528251881Speterint
2529251881Speterparse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
2530251881Speter{
2531251881Speter	struct ibuf	 capabuf;
2532251881Speter	uint16_t	 afi, gr_header;
2533251881Speter	uint8_t		 capa_code, capa_len;
2534251881Speter	uint8_t		 safi, aid, role, flags;
2535251881Speter
2536251881Speter	while (ibuf_size(buf) > 0) {
2537251881Speter		if (ibuf_get_n8(buf, &capa_code) == -1 ||
2538251881Speter		    ibuf_get_n8(buf, &capa_len) == -1) {
2539251881Speter			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2540251881Speter			    "length: too short");
2541251881Speter			return (-1);
2542251881Speter		}
2543251881Speter		if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) {
2544251881Speter			log_peer_warnx(&peer->conf,
2545251881Speter			    "Received bad capabilities attr length: "
2546251881Speter			    "len %zu smaller than capa_len %u",
2547251881Speter			    ibuf_size(buf), capa_len);
2548251881Speter			return (-1);
2549251881Speter		}
2550251881Speter
2551251881Speter		switch (capa_code) {
2552251881Speter		case CAPA_MP:			/* RFC 4760 */
2553251881Speter			if (capa_len != 4 ||
2554251881Speter			    ibuf_get_n16(&capabuf, &afi) == -1 ||
2555251881Speter			    ibuf_skip(&capabuf, 1) == -1 ||
2556251881Speter			    ibuf_get_n8(&capabuf, &safi) == -1) {
2557251881Speter				log_peer_warnx(&peer->conf,
2558251881Speter				    "Received bad multi protocol capability");
2559251881Speter				break;
2560251881Speter			}
2561251881Speter			if (afi2aid(afi, safi, &aid) == -1) {
2562251881Speter				log_peer_warnx(&peer->conf,
2563251881Speter				    "Received multi protocol capability: "
2564251881Speter				    " unknown AFI %u, safi %u pair",
2565251881Speter				    afi, safi);
2566251881Speter				peer->capa.peer.mp[AID_UNSPEC] = 1;
2567251881Speter				break;
2568251881Speter			}
2569251881Speter			peer->capa.peer.mp[aid] = 1;
2570251881Speter			break;
2571251881Speter		case CAPA_REFRESH:
2572251881Speter			peer->capa.peer.refresh = 1;
2573251881Speter			break;
2574251881Speter		case CAPA_ROLE:
2575251881Speter			if (capa_len != 1 ||
2576251881Speter			    ibuf_get_n8(&capabuf, &role) == -1) {
2577251881Speter				log_peer_warnx(&peer->conf,
2578251881Speter				    "Received bad role capability");
2579251881Speter				break;
2580251881Speter			}
2581251881Speter			if (!peer->conf.ebgp) {
2582251881Speter				log_peer_warnx(&peer->conf,
2583251881Speter				    "Received role capability on iBGP session");
2584251881Speter				break;
2585251881Speter			}
2586251881Speter			peer->capa.peer.policy = 1;
2587251881Speter			peer->remote_role = capa2role(role);
2588251881Speter			break;
2589251881Speter		case CAPA_RESTART:
2590251881Speter			if (capa_len == 2) {
2591251881Speter				/* peer only supports EoR marker */
2592251881Speter				peer->capa.peer.grestart.restart = 1;
2593251881Speter				peer->capa.peer.grestart.timeout = 0;
2594251881Speter				break;
2595251881Speter			} else if (capa_len % 4 != 2) {
2596251881Speter				log_peer_warnx(&peer->conf,
2597251881Speter				    "Bad graceful restart capability");
2598251881Speter				peer->capa.peer.grestart.restart = 0;
2599251881Speter				peer->capa.peer.grestart.timeout = 0;
2600251881Speter				break;
2601251881Speter			}
2602251881Speter
2603251881Speter			if (ibuf_get_n16(&capabuf, &gr_header) == -1) {
2604251881Speter bad_gr_restart:
2605251881Speter				log_peer_warnx(&peer->conf,
2606251881Speter				    "Bad graceful restart capability");
2607251881Speter				peer->capa.peer.grestart.restart = 0;
2608251881Speter				peer->capa.peer.grestart.timeout = 0;
2609251881Speter				break;
2610251881Speter			}
2611251881Speter
2612251881Speter			peer->capa.peer.grestart.timeout =
2613251881Speter			    gr_header & CAPA_GR_TIMEMASK;
2614251881Speter			if (peer->capa.peer.grestart.timeout == 0) {
2615251881Speter				log_peer_warnx(&peer->conf, "Received "
2616251881Speter				    "graceful restart with zero timeout");
2617251881Speter				peer->capa.peer.grestart.restart = 0;
2618251881Speter				break;
2619251881Speter			}
2620251881Speter
2621251881Speter			while (ibuf_size(&capabuf) > 0) {
2622251881Speter				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2623251881Speter				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2624251881Speter				    ibuf_get_n8(&capabuf, &flags) == -1)
2625251881Speter					goto bad_gr_restart;
2626251881Speter				if (afi2aid(afi, safi, &aid) == -1) {
2627251881Speter					log_peer_warnx(&peer->conf,
2628251881Speter					    "Received graceful restart capa: "
2629251881Speter					    " unknown AFI %u, safi %u pair",
2630251881Speter					    afi, safi);
2631251881Speter					continue;
2632251881Speter				}
2633251881Speter				peer->capa.peer.grestart.flags[aid] |=
2634251881Speter				    CAPA_GR_PRESENT;
2635251881Speter				if (flags & CAPA_GR_F_FLAG)
2636251881Speter					peer->capa.peer.grestart.flags[aid] |=
2637251881Speter					    CAPA_GR_FORWARD;
2638251881Speter				if (gr_header & CAPA_GR_R_FLAG)
2639251881Speter					peer->capa.peer.grestart.flags[aid] |=
2640251881Speter					    CAPA_GR_RESTART;
2641251881Speter				peer->capa.peer.grestart.restart = 2;
2642251881Speter			}
2643251881Speter			break;
2644251881Speter		case CAPA_AS4BYTE:
2645251881Speter			if (capa_len != 4 ||
2646251881Speter			    ibuf_get_n32(&capabuf, as) == -1) {
2647251881Speter				log_peer_warnx(&peer->conf,
2648251881Speter				    "Received bad AS4BYTE capability");
2649251881Speter				peer->capa.peer.as4byte = 0;
2650251881Speter				break;
2651251881Speter			}
2652251881Speter			if (*as == 0) {
2653251881Speter				log_peer_warnx(&peer->conf,
2654251881Speter				    "peer requests unacceptable AS %u", *as);
2655251881Speter				session_notification(peer, ERR_OPEN,
2656251881Speter				    ERR_OPEN_AS, NULL);
2657251881Speter				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2658251881Speter				return (-1);
2659251881Speter			}
2660251881Speter			peer->capa.peer.as4byte = 1;
2661251881Speter			break;
2662262253Speter		case CAPA_ADD_PATH:
2663251881Speter			if (capa_len % 4 != 0) {
2664251881Speter bad_add_path:
2665251881Speter				log_peer_warnx(&peer->conf,
2666251881Speter				    "Received bad ADD-PATH capability");
2667251881Speter				memset(peer->capa.peer.add_path, 0,
2668251881Speter				    sizeof(peer->capa.peer.add_path));
2669251881Speter				break;
2670251881Speter			}
2671251881Speter			while (ibuf_size(&capabuf) > 0) {
2672251881Speter				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2673251881Speter				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2674251881Speter				    ibuf_get_n8(&capabuf, &flags) == -1)
2675251881Speter					goto bad_add_path;
2676251881Speter				if (afi2aid(afi, safi, &aid) == -1) {
2677251881Speter					log_peer_warnx(&peer->conf,
2678251881Speter					    "Received ADD-PATH capa: "
2679251881Speter					    " unknown AFI %u, safi %u pair",
2680251881Speter					    afi, safi);
2681251881Speter					memset(peer->capa.peer.add_path, 0,
2682251881Speter					    sizeof(peer->capa.peer.add_path));
2683251881Speter					break;
2684251881Speter				}
2685251881Speter				if (flags & ~CAPA_AP_BIDIR) {
2686251881Speter					log_peer_warnx(&peer->conf,
2687251881Speter					    "Received ADD-PATH capa: "
2688251881Speter					    " bad flags %x", flags);
2689251881Speter					memset(peer->capa.peer.add_path, 0,
2690262253Speter					    sizeof(peer->capa.peer.add_path));
2691251881Speter					break;
2692251881Speter				}
2693251881Speter				peer->capa.peer.add_path[aid] = flags;
2694251881Speter			}
2695251881Speter			break;
2696251881Speter		case CAPA_ENHANCED_RR:
2697251881Speter			peer->capa.peer.enhanced_rr = 1;
2698251881Speter			break;
2699251881Speter		default:
2700251881Speter			break;
2701251881Speter		}
2702251881Speter	}
2703251881Speter
2704262253Speter	return (0);
2705251881Speter}
2706262253Speter
2707251881Speterint
2708262253Spetercapa_neg_calc(struct peer *p)
2709251881Speter{
2710262253Speter	struct ibuf *ebuf;
2711251881Speter	uint8_t	i, hasmp = 0, capa_code, capa_len, capa_aid = 0;
2712251881Speter
2713251881Speter	/* a capability is accepted only if both sides announced it */
2714251881Speter
2715251881Speter	p->capa.neg.refresh =
2716251881Speter	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2717251881Speter	p->capa.neg.enhanced_rr =
2718251881Speter	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2719251881Speter	p->capa.neg.as4byte =
2720251881Speter	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2721251881Speter
2722251881Speter	/* MP: both side must agree on the AFI,SAFI pair */
2723251881Speter	if (p->capa.peer.mp[AID_UNSPEC])
2724262253Speter		hasmp = 1;
2725251881Speter	for (i = AID_MIN; i < AID_MAX; i++) {
2726251881Speter		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2727251881Speter			p->capa.neg.mp[i] = 1;
2728262253Speter		else
2729251881Speter			p->capa.neg.mp[i] = 0;
2730251881Speter		if (p->capa.ann.mp[i] || p->capa.peer.mp[i])
2731251881Speter			hasmp = 1;
2732262253Speter	}
2733251881Speter	/* if no MP capability present default to IPv4 unicast mode */
2734251881Speter	if (!hasmp)
2735251881Speter		p->capa.neg.mp[AID_INET] = 1;
2736251881Speter
2737251881Speter	/*
2738251881Speter	 * graceful restart: the peer capabilities are of interest here.
2739251881Speter	 * It is necessary to compare the new values with the previous ones
2740251881Speter	 * and act accordingly. AFI/SAFI that are not part in the MP capability
2741251881Speter	 * are treated as not being present.
2742251881Speter	 * Also make sure that a flush happens if the session stopped
2743251881Speter	 * supporting graceful restart.
2744251881Speter	 */
2745251881Speter
2746262253Speter	for (i = AID_MIN; i < AID_MAX; i++) {
2747251881Speter		int8_t	negflags;
2748251881Speter
2749251881Speter		/* disable GR if the AFI/SAFI is not present */
2750251881Speter		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2751251881Speter		    p->capa.neg.mp[i] == 0))
2752253734Speter			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2753251881Speter		/* look at current GR state and decide what to do */
2754251881Speter		negflags = p->capa.neg.grestart.flags[i];
2755251881Speter		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2756251881Speter		if (negflags & CAPA_GR_RESTARTING) {
2757251881Speter			if (p->capa.ann.grestart.restart != 0 &&
2758251881Speter			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2759251881Speter				p->capa.neg.grestart.flags[i] |=
2760251881Speter				    CAPA_GR_RESTARTING;
2761251881Speter			} else {
2762251881Speter				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2763251881Speter				    &i, sizeof(i)) == -1) {
2764251881Speter					log_peer_warnx(&p->conf,
2765251881Speter					    "imsg send failed");
2766251881Speter					return (-1);
2767251881Speter				}
2768251881Speter				log_peer_warnx(&p->conf, "graceful restart of "
2769251881Speter				    "%s, not restarted, flushing", aid2str(i));
2770251881Speter			}
2771251881Speter		}
2772251881Speter	}
2773251881Speter	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2774251881Speter	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2775251881Speter	if (p->capa.ann.grestart.restart == 0)
2776251881Speter		p->capa.neg.grestart.restart = 0;
2777251881Speter
2778251881Speter	/*
2779251881Speter	 * ADD-PATH: set only those bits where both sides agree.
2780251881Speter	 * For this compare our send bit with the recv bit from the peer
2781251881Speter	 * and vice versa.
2782251881Speter	 * The flags are stored from this systems view point.
2783251881Speter	 * At index 0 the flags are set if any per-AID flag is set.
2784269847Speter	 */
2785251881Speter	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2786251881Speter	for (i = AID_MIN; i < AID_MAX; i++) {
2787251881Speter		if (p->capa.neg.mp[i] == 0)
2788251881Speter			continue;
2789251881Speter		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2790251881Speter		    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2791251881Speter			p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2792251881Speter			p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2793251881Speter		}
2794251881Speter		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2795251881Speter		    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2796251881Speter			p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2797251881Speter			p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2798251881Speter		}
2799251881Speter	}
2800251881Speter
2801251881Speter	/*
2802251881Speter	 * Open policy: check that the policy is sensible.
2803251881Speter	 *
2804251881Speter	 * Make sure that the roles match and set the negotiated capability
2805251881Speter	 * to the role of the peer. So the RDE can inject the OTC attribute.
2806251881Speter	 * See RFC 9234, section 4.2.
2807251881Speter	 * These checks should only happen on ebgp sessions.
2808262253Speter	 */
2809251881Speter	if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 &&
2810251881Speter	    p->conf.ebgp) {
2811251881Speter		switch (p->conf.role) {
2812251881Speter		case ROLE_PROVIDER:
2813251881Speter			if (p->remote_role != ROLE_CUSTOMER)
2814251881Speter				goto policyfail;
2815251881Speter			break;
2816251881Speter		case ROLE_RS:
2817251881Speter			if (p->remote_role != ROLE_RS_CLIENT)
2818251881Speter				goto policyfail;
2819251881Speter			break;
2820251881Speter		case ROLE_RS_CLIENT:
2821251881Speter			if (p->remote_role != ROLE_RS)
2822251881Speter				goto policyfail;
2823251881Speter			break;
2824251881Speter		case ROLE_CUSTOMER:
2825251881Speter			if (p->remote_role != ROLE_PROVIDER)
2826251881Speter				goto policyfail;
2827251881Speter			break;
2828251881Speter		case ROLE_PEER:
2829251881Speter			if (p->remote_role != ROLE_PEER)
2830251881Speter				goto policyfail;
2831251881Speter			break;
2832269847Speter		default:
2833251881Speter policyfail:
2834269847Speter			log_peer_warnx(&p->conf, "open policy role mismatch: "
2835251881Speter			    "our role %s, their role %s",
2836269847Speter			    log_policy(p->conf.role),
2837251881Speter			    log_policy(p->remote_role));
2838269847Speter			session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2839251881Speter			return (-1);
2840269847Speter		}
2841251881Speter		p->capa.neg.policy = 1;
2842269847Speter	}
2843251881Speter
2844251881Speter	/* enforce presence of open policy role capability */
2845251881Speter	if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 &&
2846251881Speter	    p->conf.ebgp) {
2847251881Speter		log_peer_warnx(&p->conf, "open policy role enforced but "
2848251881Speter		    "not present");
2849251881Speter		session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2850251881Speter		return (-1);
2851251881Speter	}
2852253734Speter
2853251881Speter	/* enforce presence of other capabilities */
2854262253Speter	if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) {
2855251881Speter		capa_code = CAPA_REFRESH;
2856251881Speter		capa_len = 0;
2857251881Speter		goto fail;
2858251881Speter	}
2859251881Speter	if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) {
2860251881Speter		capa_code = CAPA_ENHANCED_RR;
2861251881Speter		capa_len = 0;
2862251881Speter		goto fail;
2863251881Speter	}
2864251881Speter	if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) {
2865251881Speter		capa_code = CAPA_AS4BYTE;
2866251881Speter		capa_len = 4;
2867251881Speter		goto fail;
2868251881Speter	}
2869251881Speter	if (p->capa.ann.grestart.restart == 2 &&
2870251881Speter	    p->capa.neg.grestart.restart == 0) {
2871251881Speter		capa_code = CAPA_RESTART;
2872262253Speter		capa_len = 2;
2873251881Speter		goto fail;
2874251881Speter	}
2875251881Speter	for (i = AID_MIN; i < AID_MAX; i++) {
2876251881Speter		if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) {
2877251881Speter			capa_code = CAPA_MP;
2878251881Speter			capa_len = 4;
2879251881Speter			capa_aid = i;
2880251881Speter			goto fail;
2881251881Speter		}
2882262253Speter	}
2883251881Speter
2884262253Speter	for (i = AID_MIN; i < AID_MAX; i++) {
2885251881Speter		if (p->capa.neg.mp[i] == 0)
2886251881Speter			continue;
2887251881Speter		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) &&
2888251881Speter		    (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) {
2889251881Speter			capa_code = CAPA_ADD_PATH;
2890251881Speter			capa_len = 4;
2891251881Speter			capa_aid = i;
2892251881Speter			goto fail;
2893251881Speter		}
2894251881Speter		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) &&
2895		    (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) {
2896			capa_code = CAPA_ADD_PATH;
2897			capa_len = 4;
2898			capa_aid = i;
2899			goto fail;
2900		}
2901	}
2902
2903	return (0);
2904
2905 fail:
2906	if ((ebuf = ibuf_dynamic(2, 256)) == NULL)
2907		return (-1);
2908	/* best effort, no problem if it fails */
2909	session_capa_add(ebuf, capa_code, capa_len);
2910	if (capa_code == CAPA_MP)
2911		session_capa_add_mp(ebuf, capa_aid);
2912	else if (capa_code == CAPA_ADD_PATH)
2913		session_capa_add_afi(ebuf, capa_aid, 0);
2914	else if (capa_len > 0)
2915		ibuf_add_zero(ebuf, capa_len);
2916
2917	session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf);
2918	ibuf_free(ebuf);
2919	return (-1);
2920}
2921
2922void
2923session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt)
2924{
2925	struct imsg		 imsg;
2926	struct ibuf		 ibuf;
2927	struct mrt		 xmrt;
2928	struct route_refresh	 rr;
2929	struct mrt		*mrt;
2930	struct imsgbuf		*i;
2931	struct peer		*p;
2932	struct listen_addr	*la, *next, nla;
2933	struct session_dependon	 sdon;
2934	struct bgpd_config	 tconf;
2935	size_t			 len;
2936	uint32_t		 peerid;
2937	int			 n, fd, depend_ok, restricted;
2938	uint16_t		 t;
2939	uint8_t			 aid, errcode, subcode;
2940
2941	while (imsgbuf) {
2942		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
2943			fatal("session_dispatch_imsg: imsg_get error");
2944
2945		if (n == 0)
2946			break;
2947
2948		peerid = imsg_get_id(&imsg);
2949		switch (imsg_get_type(&imsg)) {
2950		case IMSG_SOCKET_CONN:
2951		case IMSG_SOCKET_CONN_CTL:
2952			if (idx != PFD_PIPE_MAIN)
2953				fatalx("reconf request not from parent");
2954			if ((fd = imsg_get_fd(&imsg)) == -1) {
2955				log_warnx("expected to receive imsg fd to "
2956				    "RDE but didn't receive any");
2957				break;
2958			}
2959			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2960				fatal(NULL);
2961			imsg_init(i, fd);
2962			if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) {
2963				if (ibuf_rde) {
2964					log_warnx("Unexpected imsg connection "
2965					    "to RDE received");
2966					msgbuf_clear(&ibuf_rde->w);
2967					free(ibuf_rde);
2968				}
2969				ibuf_rde = i;
2970			} else {
2971				if (ibuf_rde_ctl) {
2972					log_warnx("Unexpected imsg ctl "
2973					    "connection to RDE received");
2974					msgbuf_clear(&ibuf_rde_ctl->w);
2975					free(ibuf_rde_ctl);
2976				}
2977				ibuf_rde_ctl = i;
2978			}
2979			break;
2980		case IMSG_RECONF_CONF:
2981			if (idx != PFD_PIPE_MAIN)
2982				fatalx("reconf request not from parent");
2983			if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1)
2984				fatal("imsg_get_data");
2985
2986			nconf = new_config();
2987			copy_config(nconf, &tconf);
2988			pending_reconf = 1;
2989			break;
2990		case IMSG_RECONF_PEER:
2991			if (idx != PFD_PIPE_MAIN)
2992				fatalx("reconf request not from parent");
2993			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2994				fatal("new_peer");
2995			if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) ==
2996			    -1)
2997				fatal("imsg_get_data");
2998			p->state = p->prev_state = STATE_NONE;
2999			p->reconf_action = RECONF_REINIT;
3000			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
3001				fatalx("%s: peer tree is corrupt", __func__);
3002			break;
3003		case IMSG_RECONF_LISTENER:
3004			if (idx != PFD_PIPE_MAIN)
3005				fatalx("reconf request not from parent");
3006			if (nconf == NULL)
3007				fatalx("IMSG_RECONF_LISTENER but no config");
3008			if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1)
3009				fatal("imsg_get_data");
3010			TAILQ_FOREACH(la, conf->listen_addrs, entry)
3011				if (!la_cmp(la, &nla))
3012					break;
3013
3014			if (la == NULL) {
3015				if (nla.reconf != RECONF_REINIT)
3016					fatalx("king bula sez: "
3017					    "expected REINIT");
3018
3019				if ((nla.fd = imsg_get_fd(&imsg)) == -1)
3020					log_warnx("expected to receive fd for "
3021					    "%s but didn't receive any",
3022					    log_sockaddr((struct sockaddr *)
3023					    &nla.sa, nla.sa_len));
3024
3025				la = calloc(1, sizeof(struct listen_addr));
3026				if (la == NULL)
3027					fatal(NULL);
3028				memcpy(&la->sa, &nla.sa, sizeof(la->sa));
3029				la->flags = nla.flags;
3030				la->fd = nla.fd;
3031				la->reconf = RECONF_REINIT;
3032				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3033				    entry);
3034			} else {
3035				if (nla.reconf != RECONF_KEEP)
3036					fatalx("king bula sez: expected KEEP");
3037				la->reconf = RECONF_KEEP;
3038			}
3039
3040			break;
3041		case IMSG_RECONF_CTRL:
3042			if (idx != PFD_PIPE_MAIN)
3043				fatalx("reconf request not from parent");
3044
3045			if (imsg_get_data(&imsg, &restricted,
3046			    sizeof(restricted)) == -1)
3047				fatal("imsg_get_data");
3048			if ((fd = imsg_get_fd(&imsg)) == -1) {
3049				log_warnx("expected to receive fd for control "
3050				    "socket but didn't receive any");
3051				break;
3052			}
3053			if (restricted) {
3054				control_shutdown(rcsock);
3055				rcsock = fd;
3056			} else {
3057				control_shutdown(csock);
3058				csock = fd;
3059			}
3060			break;
3061		case IMSG_RECONF_DRAIN:
3062			switch (idx) {
3063			case PFD_PIPE_ROUTE:
3064				if (nconf != NULL)
3065					fatalx("got unexpected %s from RDE",
3066					    "IMSG_RECONF_DONE");
3067				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3068				    -1, NULL, 0);
3069				break;
3070			case PFD_PIPE_MAIN:
3071				if (nconf == NULL)
3072					fatalx("got unexpected %s from parent",
3073					    "IMSG_RECONF_DONE");
3074				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3075				    -1, NULL, 0);
3076				break;
3077			default:
3078				fatalx("reconf request not from parent or RDE");
3079			}
3080			break;
3081		case IMSG_RECONF_DONE:
3082			if (idx != PFD_PIPE_MAIN)
3083				fatalx("reconf request not from parent");
3084			if (nconf == NULL)
3085				fatalx("got IMSG_RECONF_DONE but no config");
3086			copy_config(conf, nconf);
3087			merge_peers(conf, nconf);
3088
3089			/* delete old listeners */
3090			TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry,
3091			    next) {
3092				if (la->reconf == RECONF_NONE) {
3093					log_info("not listening on %s any more",
3094					    log_sockaddr((struct sockaddr *)
3095					    &la->sa, la->sa_len));
3096					TAILQ_REMOVE(conf->listen_addrs, la,
3097					    entry);
3098					close(la->fd);
3099					free(la);
3100				}
3101			}
3102
3103			/* add new listeners */
3104			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3105			    entry);
3106
3107			setup_listeners(listener_cnt);
3108			free_config(nconf);
3109			nconf = NULL;
3110			pending_reconf = 0;
3111			log_info("SE reconfigured");
3112			/*
3113			 * IMSG_RECONF_DONE is sent when the RDE drained
3114			 * the peer config sent in merge_peers().
3115			 */
3116			break;
3117		case IMSG_SESSION_DEPENDON:
3118			if (idx != PFD_PIPE_MAIN)
3119				fatalx("IFINFO message not from parent");
3120			if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1)
3121				fatalx("DEPENDON imsg with wrong len");
3122			depend_ok = sdon.depend_state;
3123
3124			RB_FOREACH(p, peer_head, &conf->peers)
3125				if (!strcmp(p->conf.if_depend, sdon.ifname)) {
3126					if (depend_ok && !p->depend_ok) {
3127						p->depend_ok = depend_ok;
3128						bgp_fsm(p, EVNT_START);
3129					} else if (!depend_ok && p->depend_ok) {
3130						p->depend_ok = depend_ok;
3131						session_stop(p,
3132						    ERR_CEASE_OTHER_CHANGE,
3133						    NULL);
3134					}
3135				}
3136			break;
3137		case IMSG_MRT_OPEN:
3138		case IMSG_MRT_REOPEN:
3139			if (idx != PFD_PIPE_MAIN)
3140				fatalx("mrt request not from parent");
3141			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3142				log_warnx("mrt open, wrong imsg len");
3143				break;
3144			}
3145
3146			if ((xmrt.wbuf.fd = imsg_get_fd(&imsg)) == -1) {
3147				log_warnx("expected to receive fd for mrt dump "
3148				    "but didn't receive any");
3149				break;
3150			}
3151
3152			mrt = mrt_get(&mrthead, &xmrt);
3153			if (mrt == NULL) {
3154				/* new dump */
3155				mrt = calloc(1, sizeof(struct mrt));
3156				if (mrt == NULL)
3157					fatal("session_dispatch_imsg");
3158				memcpy(mrt, &xmrt, sizeof(struct mrt));
3159				TAILQ_INIT(&mrt->wbuf.bufs);
3160				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3161			} else {
3162				/* old dump reopened */
3163				close(mrt->wbuf.fd);
3164				mrt->wbuf.fd = xmrt.wbuf.fd;
3165			}
3166			break;
3167		case IMSG_MRT_CLOSE:
3168			if (idx != PFD_PIPE_MAIN)
3169				fatalx("mrt request not from parent");
3170			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3171				log_warnx("mrt close, wrong imsg len");
3172				break;
3173			}
3174
3175			mrt = mrt_get(&mrthead, &xmrt);
3176			if (mrt != NULL)
3177				mrt_done(mrt);
3178			break;
3179		case IMSG_CTL_KROUTE:
3180		case IMSG_CTL_KROUTE_ADDR:
3181		case IMSG_CTL_SHOW_NEXTHOP:
3182		case IMSG_CTL_SHOW_INTERFACE:
3183		case IMSG_CTL_SHOW_FIB_TABLES:
3184		case IMSG_CTL_SHOW_RTR:
3185		case IMSG_CTL_SHOW_TIMER:
3186			if (idx != PFD_PIPE_MAIN)
3187				fatalx("ctl kroute request not from parent");
3188			control_imsg_relay(&imsg, NULL);
3189			break;
3190		case IMSG_CTL_SHOW_NEIGHBOR:
3191			if (idx != PFD_PIPE_ROUTE_CTL)
3192				fatalx("ctl rib request not from RDE");
3193			p = getpeerbyid(conf, peerid);
3194			control_imsg_relay(&imsg, p);
3195			break;
3196		case IMSG_CTL_SHOW_RIB:
3197		case IMSG_CTL_SHOW_RIB_PREFIX:
3198		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3199		case IMSG_CTL_SHOW_RIB_ATTR:
3200		case IMSG_CTL_SHOW_RIB_MEM:
3201		case IMSG_CTL_SHOW_NETWORK:
3202		case IMSG_CTL_SHOW_FLOWSPEC:
3203		case IMSG_CTL_SHOW_SET:
3204			if (idx != PFD_PIPE_ROUTE_CTL)
3205				fatalx("ctl rib request not from RDE");
3206			control_imsg_relay(&imsg, NULL);
3207			break;
3208		case IMSG_CTL_END:
3209		case IMSG_CTL_RESULT:
3210			control_imsg_relay(&imsg, NULL);
3211			break;
3212		case IMSG_UPDATE:
3213			if (idx != PFD_PIPE_ROUTE)
3214				fatalx("update request not from RDE");
3215			len = imsg_get_len(&imsg);
3216			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3217			    len > MAX_PKTSIZE - MSGSIZE_HEADER ||
3218			    len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
3219				log_warnx("RDE sent invalid update");
3220			else
3221				session_update(peerid, &ibuf);
3222			break;
3223		case IMSG_UPDATE_ERR:
3224			if (idx != PFD_PIPE_ROUTE)
3225				fatalx("update request not from RDE");
3226			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3227				log_warnx("no such peer: id=%u", peerid);
3228				break;
3229			}
3230			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3231			    ibuf_get_n8(&ibuf, &errcode) == -1 ||
3232			    ibuf_get_n8(&ibuf, &subcode) == -1) {
3233				log_warnx("RDE sent invalid notification");
3234				break;
3235			}
3236
3237			session_notification(p, errcode, subcode, &ibuf);
3238			switch (errcode) {
3239			case ERR_CEASE:
3240				switch (subcode) {
3241				case ERR_CEASE_MAX_PREFIX:
3242				case ERR_CEASE_MAX_SENT_PREFIX:
3243					t = p->conf.max_out_prefix_restart;
3244					if (subcode == ERR_CEASE_MAX_PREFIX)
3245						t = p->conf.max_prefix_restart;
3246
3247					bgp_fsm(p, EVNT_STOP);
3248					if (t)
3249						timer_set(&p->timers,
3250						    Timer_IdleHold, 60 * t);
3251					break;
3252				default:
3253					bgp_fsm(p, EVNT_CON_FATAL);
3254					break;
3255				}
3256				break;
3257			default:
3258				bgp_fsm(p, EVNT_CON_FATAL);
3259				break;
3260			}
3261			break;
3262		case IMSG_REFRESH:
3263			if (idx != PFD_PIPE_ROUTE)
3264				fatalx("route refresh request not from RDE");
3265			if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) {
3266				log_warnx("RDE sent invalid refresh msg");
3267				break;
3268			}
3269			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3270				log_warnx("no such peer: id=%u", peerid);
3271				break;
3272			}
3273			if (rr.aid < AID_MIN || rr.aid >= AID_MAX)
3274				fatalx("IMSG_REFRESH: bad AID");
3275			session_rrefresh(p, rr.aid, rr.subtype);
3276			break;
3277		case IMSG_SESSION_RESTARTED:
3278			if (idx != PFD_PIPE_ROUTE)
3279				fatalx("session restart not from RDE");
3280			if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) {
3281				log_warnx("RDE sent invalid restart msg");
3282				break;
3283			}
3284			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3285				log_warnx("no such peer: id=%u", peerid);
3286				break;
3287			}
3288			if (aid < AID_MIN || aid >= AID_MAX)
3289				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3290			if (p->capa.neg.grestart.flags[aid] &
3291			    CAPA_GR_RESTARTING) {
3292				log_peer_warnx(&p->conf,
3293				    "graceful restart of %s finished",
3294				    aid2str(aid));
3295				p->capa.neg.grestart.flags[aid] &=
3296				    ~CAPA_GR_RESTARTING;
3297				timer_stop(&p->timers, Timer_RestartTimeout);
3298
3299				/* signal back to RDE to cleanup stale routes */
3300				if (imsg_rde(IMSG_SESSION_RESTARTED,
3301				    peerid, &aid, sizeof(aid)) == -1)
3302					fatal("imsg_compose: "
3303					    "IMSG_SESSION_RESTARTED");
3304			}
3305			break;
3306		default:
3307			break;
3308		}
3309		imsg_free(&imsg);
3310	}
3311}
3312
3313int
3314la_cmp(struct listen_addr *a, struct listen_addr *b)
3315{
3316	struct sockaddr_in	*in_a, *in_b;
3317	struct sockaddr_in6	*in6_a, *in6_b;
3318
3319	if (a->sa.ss_family != b->sa.ss_family)
3320		return (1);
3321
3322	switch (a->sa.ss_family) {
3323	case AF_INET:
3324		in_a = (struct sockaddr_in *)&a->sa;
3325		in_b = (struct sockaddr_in *)&b->sa;
3326		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3327			return (1);
3328		if (in_a->sin_port != in_b->sin_port)
3329			return (1);
3330		break;
3331	case AF_INET6:
3332		in6_a = (struct sockaddr_in6 *)&a->sa;
3333		in6_b = (struct sockaddr_in6 *)&b->sa;
3334		if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3335		    sizeof(struct in6_addr)))
3336			return (1);
3337		if (in6_a->sin6_port != in6_b->sin6_port)
3338			return (1);
3339		break;
3340	default:
3341		fatal("king bula sez: unknown address family");
3342		/* NOTREACHED */
3343	}
3344
3345	return (0);
3346}
3347
3348struct peer *
3349getpeerbydesc(struct bgpd_config *c, const char *descr)
3350{
3351	struct peer	*p, *res = NULL;
3352	int		 match = 0;
3353
3354	RB_FOREACH(p, peer_head, &c->peers)
3355		if (!strcmp(p->conf.descr, descr)) {
3356			res = p;
3357			match++;
3358		}
3359
3360	if (match > 1)
3361		log_info("neighbor description \"%s\" not unique, request "
3362		    "aborted", descr);
3363
3364	if (match == 1)
3365		return (res);
3366	else
3367		return (NULL);
3368}
3369
3370struct peer *
3371getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3372{
3373	struct bgpd_addr addr;
3374	struct peer	*p, *newpeer, *loose = NULL;
3375	uint32_t	 id;
3376
3377	sa2addr(ip, &addr, NULL);
3378
3379	/* we might want a more effective way to find peers by IP */
3380	RB_FOREACH(p, peer_head, &c->peers)
3381		if (!p->conf.template &&
3382		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3383			return (p);
3384
3385	/* try template matching */
3386	RB_FOREACH(p, peer_head, &c->peers)
3387		if (p->conf.template &&
3388		    p->conf.remote_addr.aid == addr.aid &&
3389		    session_match_mask(p, &addr))
3390			if (loose == NULL || loose->conf.remote_masklen <
3391			    p->conf.remote_masklen)
3392				loose = p;
3393
3394	if (loose != NULL) {
3395		/* clone */
3396		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3397			fatal(NULL);
3398		memcpy(newpeer, loose, sizeof(struct peer));
3399		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3400			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3401				break;
3402		}
3403		newpeer->template = loose;
3404		session_template_clone(newpeer, ip, id, 0);
3405		newpeer->state = newpeer->prev_state = STATE_NONE;
3406		newpeer->reconf_action = RECONF_KEEP;
3407		newpeer->rbuf = NULL;
3408		newpeer->rpending = 0;
3409		init_peer(newpeer);
3410		bgp_fsm(newpeer, EVNT_START);
3411		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3412			fatalx("%s: peer tree is corrupt", __func__);
3413		return (newpeer);
3414	}
3415
3416	return (NULL);
3417}
3418
3419struct peer *
3420getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3421{
3422	static struct peer lookup;
3423
3424	lookup.conf.id = peerid;
3425
3426	return RB_FIND(peer_head, &c->peers, &lookup);
3427}
3428
3429int
3430peer_matched(struct peer *p, struct ctl_neighbor *n)
3431{
3432	char *s;
3433
3434	if (n && n->addr.aid) {
3435		if (memcmp(&p->conf.remote_addr, &n->addr,
3436		    sizeof(p->conf.remote_addr)))
3437			return 0;
3438	} else if (n && n->descr[0]) {
3439		s = n->is_group ? p->conf.group : p->conf.descr;
3440		/* cannot trust n->descr to be properly terminated */
3441		if (strncmp(s, n->descr, sizeof(n->descr)))
3442			return 0;
3443	}
3444	return 1;
3445}
3446
3447void
3448session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3449    uint32_t as)
3450{
3451	struct bgpd_addr	remote_addr;
3452
3453	if (ip)
3454		sa2addr(ip, &remote_addr, NULL);
3455	else
3456		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3457
3458	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3459
3460	p->conf.id = id;
3461
3462	if (as) {
3463		p->conf.remote_as = as;
3464		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3465		if (!p->conf.ebgp)
3466			/* force enforce_as off for iBGP sessions */
3467			p->conf.enforce_as = ENFORCE_AS_OFF;
3468	}
3469
3470	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3471	switch (p->conf.remote_addr.aid) {
3472	case AID_INET:
3473		p->conf.remote_masklen = 32;
3474		break;
3475	case AID_INET6:
3476		p->conf.remote_masklen = 128;
3477		break;
3478	}
3479	p->conf.template = 0;
3480}
3481
3482int
3483session_match_mask(struct peer *p, struct bgpd_addr *a)
3484{
3485	struct bgpd_addr masked;
3486
3487	applymask(&masked, a, p->conf.remote_masklen);
3488	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3489		return (1);
3490	return (0);
3491}
3492
3493void
3494session_down(struct peer *peer)
3495{
3496	memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3497	peer->stats.last_updown = getmonotime();
3498	/*
3499	 * session_down is called in the exit code path so check
3500	 * if the RDE is still around, if not there is no need to
3501	 * send the message.
3502	 */
3503	if (ibuf_rde == NULL)
3504		return;
3505	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3506		fatalx("imsg_compose error");
3507}
3508
3509void
3510session_up(struct peer *p)
3511{
3512	struct session_up	 sup;
3513
3514	/* clear last errors, now that the session is up */
3515	p->stats.last_sent_errcode = 0;
3516	p->stats.last_sent_suberr = 0;
3517	p->stats.last_rcvd_errcode = 0;
3518	p->stats.last_rcvd_suberr = 0;
3519	memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason));
3520
3521	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3522	    &p->conf, sizeof(p->conf)) == -1)
3523		fatalx("imsg_compose error");
3524
3525	if (p->local.aid == AID_INET) {
3526		sup.local_v4_addr = p->local;
3527		sup.local_v6_addr = p->local_alt;
3528	} else {
3529		sup.local_v6_addr = p->local;
3530		sup.local_v4_addr = p->local_alt;
3531	}
3532	sup.remote_addr = p->remote;
3533	sup.if_scope = p->if_scope;
3534
3535	sup.remote_bgpid = p->remote_bgpid;
3536	sup.short_as = p->short_as;
3537	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3538	p->stats.last_updown = getmonotime();
3539	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3540		fatalx("imsg_compose error");
3541}
3542
3543int
3544imsg_ctl_parent(struct imsg *imsg)
3545{
3546	return imsg_forward(ibuf_main, imsg);
3547}
3548
3549int
3550imsg_ctl_rde(struct imsg *imsg)
3551{
3552	if (ibuf_rde_ctl == NULL)
3553		return (0);
3554	/*
3555	 * Use control socket to talk to RDE to bypass the queue of the
3556	 * regular imsg socket.
3557	 */
3558	return imsg_forward(ibuf_rde_ctl, imsg);
3559}
3560
3561int
3562imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid)
3563{
3564	if (ibuf_rde_ctl == NULL)
3565		return (0);
3566
3567	/*
3568	 * Use control socket to talk to RDE to bypass the queue of the
3569	 * regular imsg socket.
3570	 */
3571	return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0);
3572}
3573
3574int
3575imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3576{
3577	if (ibuf_rde == NULL)
3578		return (0);
3579
3580	return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen);
3581}
3582
3583void
3584session_demote(struct peer *p, int level)
3585{
3586	struct demote_msg	msg;
3587
3588	strlcpy(msg.demote_group, p->conf.demote_group,
3589	    sizeof(msg.demote_group));
3590	msg.level = level;
3591	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3592	    &msg, sizeof(msg)) == -1)
3593		fatalx("imsg_compose error");
3594
3595	p->demoted += level;
3596}
3597
3598void
3599session_stop(struct peer *peer, uint8_t subcode, const char *reason)
3600{
3601	struct ibuf *ibuf;
3602
3603	if (reason != NULL)
3604		strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason));
3605
3606	ibuf = ibuf_dynamic(0, REASON_LEN);
3607
3608	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3609	    subcode == ERR_CEASE_ADMIN_RESET) &&
3610	    reason != NULL && *reason != '\0' &&
3611	    ibuf != NULL) {
3612		if (ibuf_add_n8(ibuf, strlen(reason)) == -1 ||
3613		    ibuf_add(ibuf, reason, strlen(reason))) {
3614			log_peer_warnx(&peer->conf,
3615			    "trying to send overly long shutdown reason");
3616			ibuf_free(ibuf);
3617			ibuf = NULL;
3618		}
3619	}
3620	switch (peer->state) {
3621	case STATE_OPENSENT:
3622	case STATE_OPENCONFIRM:
3623	case STATE_ESTABLISHED:
3624		session_notification(peer, ERR_CEASE, subcode, ibuf);
3625		break;
3626	default:
3627		/* session not open, no need to send notification */
3628		if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) ||
3629		    suberr_cease_names[subcode] == NULL)
3630			log_peer_warnx(&peer->conf, "session stop: %s, "
3631			    "unknown subcode %u", errnames[ERR_CEASE], subcode);
3632		else
3633			log_peer_warnx(&peer->conf, "session stop: %s, %s",
3634			    errnames[ERR_CEASE], suberr_cease_names[subcode]);
3635		break;
3636	}
3637	ibuf_free(ibuf);
3638	bgp_fsm(peer, EVNT_STOP);
3639}
3640
3641void
3642merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3643{
3644	struct peer *p, *np, *next;
3645
3646	RB_FOREACH(p, peer_head, &c->peers) {
3647		/* templates are handled specially */
3648		if (p->template != NULL)
3649			continue;
3650		np = getpeerbyid(nc, p->conf.id);
3651		if (np == NULL) {
3652			p->reconf_action = RECONF_DELETE;
3653			continue;
3654		}
3655
3656		/* peer no longer uses TCP MD5SIG so deconfigure */
3657		if (p->conf.auth.method == AUTH_MD5SIG &&
3658		    np->conf.auth.method != AUTH_MD5SIG)
3659			tcp_md5_del_listener(c, p);
3660		else if (np->conf.auth.method == AUTH_MD5SIG)
3661			tcp_md5_add_listener(c, np);
3662
3663		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3664		RB_REMOVE(peer_head, &nc->peers, np);
3665		free(np);
3666
3667		p->reconf_action = RECONF_KEEP;
3668
3669		/* had demotion, is demoted, demote removed? */
3670		if (p->demoted && !p->conf.demote_group[0])
3671			session_demote(p, -1);
3672
3673		/* if session is not open then refresh pfkey data */
3674		if (p->state < STATE_OPENSENT && !p->template)
3675			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3676			    p->conf.id, 0, -1, NULL, 0);
3677
3678		/* sync the RDE in case we keep the peer */
3679		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3680		    &p->conf, sizeof(struct peer_config)) == -1)
3681			fatalx("imsg_compose error");
3682
3683		/* apply the config to all clones of a template */
3684		if (p->conf.template) {
3685			struct peer *xp;
3686			RB_FOREACH(xp, peer_head, &c->peers) {
3687				if (xp->template != p)
3688					continue;
3689				session_template_clone(xp, NULL, xp->conf.id,
3690				    xp->conf.remote_as);
3691				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3692				    &xp->conf, sizeof(xp->conf)) == -1)
3693					fatalx("imsg_compose error");
3694			}
3695		}
3696	}
3697
3698	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3699		fatalx("imsg_compose error");
3700
3701	/* pfkeys of new peers already loaded by the parent process */
3702	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3703		RB_REMOVE(peer_head, &nc->peers, np);
3704		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3705			fatalx("%s: peer tree is corrupt", __func__);
3706		if (np->conf.auth.method == AUTH_MD5SIG)
3707			tcp_md5_add_listener(c, np);
3708	}
3709}
3710