bgpd.c revision 1.255
1/*	$OpenBSD: bgpd.c,v 1.255 2022/11/18 10:17:23 claudio Exp $ */
2
3/*
4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/socket.h>
21#include <sys/wait.h>
22#include <netinet/in.h>
23#include <arpa/inet.h>
24#include <err.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <poll.h>
28#include <pwd.h>
29#include <signal.h>
30#include <stddef.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <syslog.h>
35#include <unistd.h>
36
37#include "bgpd.h"
38#include "session.h"
39#include "log.h"
40#include "version.h"
41
42void		sighdlr(int);
43__dead void	usage(void);
44int		main(int, char *[]);
45pid_t		start_child(enum bgpd_process, char *, int, int, int);
46int		send_filterset(struct imsgbuf *, struct filter_set_head *);
47int		reconfigure(char *, struct bgpd_config *);
48int		send_config(struct bgpd_config *);
49int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
50int		control_setup(struct bgpd_config *);
51static void	getsockpair(int [2]);
52int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
53		    struct imsgbuf *);
54void		bgpd_rtr_connect(struct rtr_config *);
55void		bgpd_rtr_connect_done(int, struct bgpd_config *);
56
57int			 cflags;
58volatile sig_atomic_t	 mrtdump;
59volatile sig_atomic_t	 quit;
60volatile sig_atomic_t	 reconfig;
61pid_t			 reconfpid;
62int			 reconfpending;
63struct imsgbuf		*ibuf_se;
64struct imsgbuf		*ibuf_rde;
65struct imsgbuf		*ibuf_rtr;
66struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
67char			*cname;
68char			*rcname;
69
70struct connect_elm {
71	TAILQ_ENTRY(connect_elm)	entry;
72	uint32_t			id;
73	int				fd;
74};
75
76TAILQ_HEAD( ,connect_elm)	connect_queue = \
77				    TAILQ_HEAD_INITIALIZER(connect_queue);
78u_int				connect_cnt;
79#define MAX_CONNECT_CNT		32
80
81void
82sighdlr(int sig)
83{
84	switch (sig) {
85	case SIGTERM:
86	case SIGINT:
87		quit = 1;
88		break;
89	case SIGHUP:
90		reconfig = 1;
91		break;
92	case SIGALRM:
93	case SIGUSR1:
94		mrtdump = 1;
95		break;
96	}
97}
98
99__dead void
100usage(void)
101{
102	extern char *__progname;
103
104	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
105	    __progname);
106	exit(1);
107}
108
109#define PFD_PIPE_SESSION	0
110#define PFD_PIPE_RDE		1
111#define PFD_PIPE_RTR		2
112#define PFD_SOCK_ROUTE		3
113#define PFD_SOCK_PFKEY		4
114#define PFD_CONNECT_START	5
115#define MAX_TIMEOUT		3600
116
117int	 cmd_opts;
118
119int
120main(int argc, char *argv[])
121{
122	struct bgpd_config	*conf;
123	enum bgpd_process	 proc = PROC_MAIN;
124	struct rde_rib		*rr;
125	struct peer		*p;
126	struct pollfd		*pfd = NULL;
127	struct connect_elm	*ce;
128	time_t			 timeout;
129	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
130	char			*conffile;
131	char			*saved_argv0;
132	u_int			 pfd_elms = 0, npfd, i;
133	int			 debug = 0;
134	int			 rfd, keyfd;
135	int			 ch, status;
136	int			 pipe_m2s[2];
137	int			 pipe_m2r[2];
138	int			 pipe_m2roa[2];
139
140	conffile = CONFFILE;
141
142	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
143	log_procinit(log_procnames[PROC_MAIN]);
144	log_setverbose(1);
145
146	saved_argv0 = argv[0];
147	if (saved_argv0 == NULL)
148		saved_argv0 = "bgpd";
149
150	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
151		switch (ch) {
152		case 'c':
153			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
154			break;
155		case 'd':
156			debug = 1;
157			break;
158		case 'D':
159			if (cmdline_symset(optarg) < 0)
160				log_warnx("could not parse macro definition %s",
161				    optarg);
162			break;
163		case 'f':
164			conffile = optarg;
165			break;
166		case 'n':
167			cmd_opts |= BGPD_OPT_NOACTION;
168			break;
169		case 'v':
170			if (cmd_opts & BGPD_OPT_VERBOSE)
171				cmd_opts |= BGPD_OPT_VERBOSE2;
172			cmd_opts |= BGPD_OPT_VERBOSE;
173			break;
174		case 'R':
175			proc = PROC_RDE;
176			break;
177		case 'S':
178			proc = PROC_SE;
179			break;
180		case 'T':
181			proc = PROC_RTR;
182			break;
183		case 'V':
184			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
185			return 0;
186		default:
187			usage();
188			/* NOTREACHED */
189		}
190	}
191
192	argc -= optind;
193	argv += optind;
194	if (argc > 0)
195		usage();
196
197	if (cmd_opts & BGPD_OPT_NOACTION) {
198		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
199			exit(1);
200
201		if (cmd_opts & BGPD_OPT_VERBOSE)
202			print_config(conf, &ribnames);
203		else
204			fprintf(stderr, "configuration OK\n");
205
206		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
207			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
208			free(rr);
209		}
210		free_config(conf);
211		exit(0);
212	}
213
214	switch (proc) {
215	case PROC_MAIN:
216		break;
217	case PROC_RDE:
218		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
219		/* NOTREACHED */
220	case PROC_SE:
221		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
222		/* NOTREACHED */
223	case PROC_RTR:
224		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
225		/* NOTREACHED */
226	}
227
228	if (geteuid())
229		errx(1, "need root privileges");
230
231	if (getpwnam(BGPD_USER) == NULL)
232		errx(1, "unknown user %s", BGPD_USER);
233
234	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
235		log_warnx("config file %s has errors", conffile);
236		exit(1);
237	}
238
239	if (prepare_listeners(conf) == -1)
240		exit(1);
241
242	log_init(debug, LOG_DAEMON);
243	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
244
245	if (!debug)
246		daemon(1, 0);
247
248	log_info("startup");
249
250	getsockpair(pipe_m2s);
251	getsockpair(pipe_m2r);
252	getsockpair(pipe_m2roa);
253
254	/* fork children */
255	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
256	    cmd_opts & BGPD_OPT_VERBOSE);
257	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
258	    cmd_opts & BGPD_OPT_VERBOSE);
259	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
260	    cmd_opts & BGPD_OPT_VERBOSE);
261
262	signal(SIGTERM, sighdlr);
263	signal(SIGINT, sighdlr);
264	signal(SIGHUP, sighdlr);
265	signal(SIGALRM, sighdlr);
266	signal(SIGUSR1, sighdlr);
267	signal(SIGPIPE, SIG_IGN);
268
269	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
270	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
271	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
272		fatal(NULL);
273	imsg_init(ibuf_se, pipe_m2s[0]);
274	imsg_init(ibuf_rde, pipe_m2r[0]);
275	imsg_init(ibuf_rtr, pipe_m2roa[0]);
276	mrt_init(ibuf_rde, ibuf_se);
277	if (kr_init(&rfd, conf->fib_priority) == -1)
278		quit = 1;
279	keyfd = pfkey_init();
280
281	/*
282	 * rpath, read config file
283	 * cpath, unlink control socket
284	 * fattr, chmod on control socket
285	 * wpath, needed if we are doing mrt dumps
286	 *
287	 * pledge placed here because kr_init() does a setsockopt on the
288	 * routing socket thats not allowed at all.
289	 */
290#if 0
291	/*
292	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
293	 * this needs some redesign of bgpd to be fixed.
294	 */
295BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
296	    NULL) == -1)
297		fatal("pledge");
298#endif
299
300	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
301		fatal("could not establish imsg links");
302	/* control setup needs to happen late since it sends imsgs */
303	if (control_setup(conf) == -1)
304		quit = 1;
305	if (send_config(conf) != 0)
306		quit = 1;
307	if (pftable_clear_all() != 0)
308		quit = 1;
309
310	while (quit == 0) {
311		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
312			struct pollfd *newp;
313
314			if ((newp = reallocarray(pfd,
315			    PFD_CONNECT_START + connect_cnt,
316			    sizeof(struct pollfd))) == NULL) {
317				log_warn("could not resize pfd from %u -> %u"
318				    " entries", pfd_elms, PFD_CONNECT_START +
319				    connect_cnt);
320				fatalx("exiting");
321			}
322			pfd = newp;
323			pfd_elms = PFD_CONNECT_START + connect_cnt;
324		}
325		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
326
327		timeout = mrt_timeout(conf->mrt);
328
329		pfd[PFD_SOCK_ROUTE].fd = rfd;
330		pfd[PFD_SOCK_ROUTE].events = POLLIN;
331
332		pfd[PFD_SOCK_PFKEY].fd = keyfd;
333		pfd[PFD_SOCK_PFKEY].events = POLLIN;
334
335		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
336		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
337		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
338
339		npfd = PFD_CONNECT_START;
340		TAILQ_FOREACH(ce, &connect_queue, entry) {
341			pfd[npfd].fd = ce->fd;
342			pfd[npfd++].events = POLLOUT;
343			if (npfd > pfd_elms)
344				fatalx("polli pfd overflow");
345		}
346
347		if (timeout < 0 || timeout > MAX_TIMEOUT)
348			timeout = MAX_TIMEOUT;
349		if (poll(pfd, npfd, timeout * 1000) == -1) {
350			if (errno != EINTR) {
351				log_warn("poll error");
352				quit = 1;
353			}
354			goto next_loop;
355		}
356
357		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
358			log_warnx("main: Lost connection to SE");
359			msgbuf_clear(&ibuf_se->w);
360			free(ibuf_se);
361			ibuf_se = NULL;
362			quit = 1;
363		} else {
364			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
365			    -1)
366				quit = 1;
367		}
368
369		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
370			log_warnx("main: Lost connection to RDE");
371			msgbuf_clear(&ibuf_rde->w);
372			free(ibuf_rde);
373			ibuf_rde = NULL;
374			quit = 1;
375		} else {
376			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
377				quit = 1;
378		}
379
380		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
381			log_warnx("main: Lost connection to RTR");
382			msgbuf_clear(&ibuf_rtr->w);
383			free(ibuf_rtr);
384			ibuf_rtr = NULL;
385			quit = 1;
386		} else {
387			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
388				quit = 1;
389		}
390
391		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
392			if (kr_dispatch_msg() == -1)
393				quit = 1;
394		}
395
396		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
397			if (pfkey_read(keyfd, NULL) == -1) {
398				log_warnx("pfkey_read failed, exiting...");
399				quit = 1;
400			}
401		}
402
403		for (i = PFD_CONNECT_START; i < npfd; i++)
404			if (pfd[i].revents != 0)
405				bgpd_rtr_connect_done(pfd[i].fd, conf);
406
407 next_loop:
408		if (reconfig) {
409			u_int	error;
410
411			reconfig = 0;
412			switch (reconfigure(conffile, conf)) {
413			case -1:	/* fatal error */
414				quit = 1;
415				break;
416			case 0:		/* all OK */
417				error = 0;
418				break;
419			case 2:
420				log_info("previous reload still running");
421				error = CTL_RES_PENDING;
422				break;
423			default:	/* parse error */
424				log_warnx("config file %s has errors, "
425				    "not reloading", conffile);
426				error = CTL_RES_PARSE_ERROR;
427				break;
428			}
429			if (reconfpid != 0) {
430				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
431				    &error, sizeof(error));
432				reconfpid = 0;
433			}
434		}
435
436		if (mrtdump) {
437			mrtdump = 0;
438			mrt_handler(conf->mrt);
439		}
440	}
441
442	/* close pipes */
443	if (ibuf_se) {
444		msgbuf_clear(&ibuf_se->w);
445		close(ibuf_se->fd);
446		free(ibuf_se);
447		ibuf_se = NULL;
448	}
449	if (ibuf_rde) {
450		msgbuf_clear(&ibuf_rde->w);
451		close(ibuf_rde->fd);
452		free(ibuf_rde);
453		ibuf_rde = NULL;
454	}
455	if (ibuf_rtr) {
456		msgbuf_clear(&ibuf_rtr->w);
457		close(ibuf_rtr->fd);
458		free(ibuf_rtr);
459		ibuf_rtr = NULL;
460	}
461
462	/* cleanup kernel data structures */
463	carp_demote_shutdown();
464	kr_shutdown();
465	pftable_clear_all();
466
467	RB_FOREACH(p, peer_head, &conf->peers)
468		pfkey_remove(p);
469
470	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
471		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
472		free(rr);
473	}
474	free_config(conf);
475
476	log_debug("waiting for children to terminate");
477	do {
478		pid = wait(&status);
479		if (pid == -1) {
480			if (errno != EINTR && errno != ECHILD)
481				fatal("wait");
482		} else if (WIFSIGNALED(status)) {
483			char *name = "unknown process";
484			if (pid == rde_pid)
485				name = "route decision engine";
486			else if (pid == se_pid)
487				name = "session engine";
488			else if (pid == rtr_pid)
489				name = "rtr engine";
490			log_warnx("%s terminated; signal %d", name,
491				WTERMSIG(status));
492		}
493	} while (pid != -1 || (pid == -1 && errno == EINTR));
494
495	free(rcname);
496	free(cname);
497
498	log_info("terminating");
499	return (0);
500}
501
502pid_t
503start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
504{
505	char *argv[5];
506	int argc = 0;
507	pid_t pid;
508
509	switch (pid = fork()) {
510	case -1:
511		fatal("cannot fork");
512	case 0:
513		break;
514	default:
515		close(fd);
516		return (pid);
517	}
518
519	if (fd != 3) {
520		if (dup2(fd, 3) == -1)
521			fatal("cannot setup imsg fd");
522	} else if (fcntl(fd, F_SETFD, 0) == -1)
523		fatal("cannot setup imsg fd");
524
525	argv[argc++] = argv0;
526	switch (p) {
527	case PROC_MAIN:
528		fatalx("Can not start main process");
529	case PROC_RDE:
530		argv[argc++] = "-R";
531		break;
532	case PROC_SE:
533		argv[argc++] = "-S";
534		break;
535	case PROC_RTR:
536		argv[argc++] = "-T";
537		break;
538	}
539	if (debug)
540		argv[argc++] = "-d";
541	if (verbose)
542		argv[argc++] = "-v";
543	argv[argc++] = NULL;
544
545	execvp(argv0, argv);
546	fatal("execvp");
547}
548
549int
550send_filterset(struct imsgbuf *i, struct filter_set_head *set)
551{
552	struct filter_set	*s;
553
554	TAILQ_FOREACH(s, set, entry)
555		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
556		    sizeof(struct filter_set)) == -1)
557			return (-1);
558	return (0);
559}
560
561int
562reconfigure(char *conffile, struct bgpd_config *conf)
563{
564	struct bgpd_config	*new_conf;
565
566	if (reconfpending)
567		return (2);
568
569	log_info("rereading config");
570	if ((new_conf = parse_config(conffile, &conf->peers,
571	    &conf->rtrs)) == NULL)
572		return (1);
573
574	merge_config(conf, new_conf);
575
576	if (prepare_listeners(conf) == -1) {
577		return (1);
578	}
579
580	if (control_setup(conf) == -1) {
581		return (1);
582	}
583
584	return send_config(conf);
585}
586
587int
588send_config(struct bgpd_config *conf)
589{
590	struct peer		*p;
591	struct filter_rule	*r;
592	struct listen_addr	*la;
593	struct rde_rib		*rr;
594	struct l3vpn		*vpn;
595	struct as_set		*aset;
596	struct prefixset	*ps;
597	struct prefixset_item	*psi, *npsi;
598	struct roa		*roa;
599	struct aspa_set		*aspa;
600	struct rtr_config	*rtr;
601
602	reconfpending = 3;	/* one per child */
603
604	expand_networks(conf, &conf->networks);
605	SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry)
606		expand_networks(conf, &vpn->net_l);
607
608	cflags = conf->flags;
609
610	/* start reconfiguration */
611	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
612	    conf, sizeof(*conf)) == -1)
613		return (-1);
614	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
615	    conf, sizeof(*conf)) == -1)
616		return (-1);
617	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
618	    conf, sizeof(*conf)) == -1)
619		return (-1);
620
621	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
622		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
623		    la, sizeof(*la)) == -1)
624			return (-1);
625		la->fd = -1;
626	}
627
628	/* adjust fib syncing on reload */
629	ktable_preload();
630
631	/* RIBs for the RDE */
632	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
633		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
634		if (ktable_update(rr->rtableid, rr->name, rr->flags) == -1) {
635			log_warnx("failed to load routing table %d",
636			    rr->rtableid);
637			return (-1);
638		}
639		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
640		    rr, sizeof(*rr)) == -1)
641			return (-1);
642		free(rr);
643	}
644
645	/* send peer list to the SE */
646	RB_FOREACH(p, peer_head, &conf->peers) {
647		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
648		    &p->conf, sizeof(p->conf)) == -1)
649			return (-1);
650
651		if (p->reconf_action == RECONF_REINIT)
652			if (pfkey_establish(p) == -1)
653				log_peer_warnx(&p->conf, "pfkey setup failed");
654	}
655
656	/* networks go via kroute to the RDE */
657	kr_net_reload(conf->default_tableid, 0, &conf->networks);
658
659	/* prefixsets for filters in the RDE */
660	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
661		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
662		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
663		    ps->name, sizeof(ps->name)) == -1)
664			return (-1);
665		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
666			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
667			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
668			    0, 0, -1, psi, sizeof(*psi)) == -1)
669				return (-1);
670			free(psi);
671		}
672		free(ps);
673	}
674
675	/* originsets for filters in the RDE */
676	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
677		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
678		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
679		    ps->name, sizeof(ps->name)) == -1)
680			return (-1);
681		RB_FOREACH(roa, roa_tree, &ps->roaitems) {
682			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
683			    -1, roa, sizeof(*roa)) == -1)
684				return (-1);
685		}
686		free_roatree(&ps->roaitems);
687		free(ps);
688	}
689
690	/* roa table, aspa table and rtr config are sent to the RTR engine */
691	RB_FOREACH(roa, roa_tree, &conf->roa) {
692		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
693		    -1, roa, sizeof(*roa)) == -1)
694			return (-1);
695	}
696	free_roatree(&conf->roa);
697	RB_FOREACH(aspa, aspa_tree, &conf->aspa) {
698		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA, 0, 0,
699		    -1, aspa, offsetof(struct aspa_set, tas)) == -1)
700			return (-1);
701		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_TAS, 0, 0,
702		    -1, aspa->tas, sizeof(*aspa->tas) * aspa->num) == -1)
703			return (-1);
704		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_TAS_AID,
705		    0, 0, -1, aspa->tas_aid, aspa->num) == -1)
706			return (-1);
707		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_DONE, 0, 0, -1,
708		    NULL, 0) == -1)
709			return -1;
710	}
711	free_aspatree(&conf->aspa);
712	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
713		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
714		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
715			return (-1);
716	}
717
718	/* as-sets for filters in the RDE */
719	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
720		struct ibuf *wbuf;
721		uint32_t *as;
722		size_t i, l, n;
723
724		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
725
726		as = set_get(aset->set, &n);
727		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
728		    sizeof(n) + sizeof(aset->name))) == NULL)
729			return -1;
730		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
731		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
732			return -1;
733		imsg_close(ibuf_rde, wbuf);
734
735		for (i = 0; i < n; i += l) {
736			l = (n - i > 1024 ? 1024 : n - i);
737			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
738			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
739				return -1;
740		}
741
742		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
743		    NULL, 0) == -1)
744			return -1;
745
746		set_free(aset->set);
747		free(aset);
748	}
749
750	/* filters for the RDE */
751	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
752		TAILQ_REMOVE(conf->filters, r, entry);
753		if (send_filterset(ibuf_rde, &r->set) == -1)
754			return (-1);
755		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
756		    r, sizeof(struct filter_rule)) == -1)
757			return (-1);
758		filterset_free(&r->set);
759		free(r);
760	}
761
762	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
763		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
764		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags) ==
765		    -1) {
766			log_warnx("failed to load routing table %d",
767			    vpn->rtableid);
768			return (-1);
769		}
770		/* networks go via kroute to the RDE */
771		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
772
773		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
774		    vpn, sizeof(*vpn)) == -1)
775			return (-1);
776
777		/* export targets */
778		if (send_filterset(ibuf_rde, &vpn->export) == -1)
779			return (-1);
780		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
781		    -1, NULL, 0) == -1)
782			return (-1);
783		filterset_free(&vpn->export);
784
785		/* import targets */
786		if (send_filterset(ibuf_rde, &vpn->import) == -1)
787			return (-1);
788		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
789		    -1, NULL, 0) == -1)
790			return (-1);
791		filterset_free(&vpn->import);
792
793		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
794		    -1, NULL, 0) == -1)
795			return (-1);
796
797		free(vpn);
798	}
799
800	/* send a drain message to know when all messages where processed */
801	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
802		return (-1);
803	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
804		return (-1);
805	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
806		return (-1);
807
808	/* mrt changes can be sent out of bound */
809	mrt_reconfigure(conf->mrt);
810	return (0);
811}
812
813int
814dispatch_imsg(struct imsgbuf *ibuf, int idx, struct bgpd_config *conf)
815{
816	struct imsg		 imsg;
817	struct peer		*p;
818	struct rtr_config	*r;
819	ssize_t			 n;
820	u_int			 rtableid;
821	int			 rv, verbose;
822
823	rv = 0;
824	while (ibuf) {
825		if ((n = imsg_get(ibuf, &imsg)) == -1)
826			return (-1);
827
828		if (n == 0)
829			break;
830
831		switch (imsg.hdr.type) {
832		case IMSG_KROUTE_CHANGE:
833			if (idx != PFD_PIPE_RDE)
834				log_warnx("route request not from RDE");
835			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
836			    sizeof(struct kroute_full))
837				log_warnx("wrong imsg len");
838			else if (kr_change(imsg.hdr.peerid, imsg.data))
839				rv = -1;
840			break;
841		case IMSG_KROUTE_DELETE:
842			if (idx != PFD_PIPE_RDE)
843				log_warnx("route request not from RDE");
844			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
845			    sizeof(struct kroute_full))
846				log_warnx("wrong imsg len");
847			else if (kr_delete(imsg.hdr.peerid, imsg.data))
848				rv = -1;
849			break;
850		case IMSG_KROUTE_FLUSH:
851			if (idx != PFD_PIPE_RDE)
852				log_warnx("route request not from RDE");
853			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
854				log_warnx("wrong imsg len");
855			else if (kr_flush(imsg.hdr.peerid))
856				rv = -1;
857			break;
858		case IMSG_NEXTHOP_ADD:
859			if (idx != PFD_PIPE_RDE)
860				log_warnx("nexthop request not from RDE");
861			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
862			    sizeof(struct bgpd_addr))
863				log_warnx("wrong imsg len");
864			else {
865				rtableid = conf->default_tableid;
866				if (kr_nexthop_add(rtableid, imsg.data) == -1)
867					rv = -1;
868			}
869			break;
870		case IMSG_NEXTHOP_REMOVE:
871			if (idx != PFD_PIPE_RDE)
872				log_warnx("nexthop request not from RDE");
873			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
874			    sizeof(struct bgpd_addr))
875				log_warnx("wrong imsg len");
876			else {
877				rtableid = conf->default_tableid;
878				kr_nexthop_delete(rtableid, imsg.data);
879			}
880			break;
881		case IMSG_PFTABLE_ADD:
882			if (idx != PFD_PIPE_RDE)
883				log_warnx("pftable request not from RDE");
884			else
885				if (imsg.hdr.len != IMSG_HEADER_SIZE +
886				    sizeof(struct pftable_msg))
887					log_warnx("wrong imsg len");
888				else if (pftable_addr_add(imsg.data) != 0)
889					rv = -1;
890			break;
891		case IMSG_PFTABLE_REMOVE:
892			if (idx != PFD_PIPE_RDE)
893				log_warnx("pftable request not from RDE");
894			else
895				if (imsg.hdr.len != IMSG_HEADER_SIZE +
896				    sizeof(struct pftable_msg))
897					log_warnx("wrong imsg len");
898				else if (pftable_addr_remove(imsg.data) != 0)
899					rv = -1;
900			break;
901		case IMSG_PFTABLE_COMMIT:
902			if (idx != PFD_PIPE_RDE)
903				log_warnx("pftable request not from RDE");
904			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
905				log_warnx("wrong imsg len");
906			else if (pftable_commit() != 0)
907				rv = -1;
908			break;
909		case IMSG_PFKEY_RELOAD:
910			if (idx != PFD_PIPE_SESSION) {
911				log_warnx("pfkey reload request not from SE");
912				break;
913			}
914			p = getpeerbyid(conf, imsg.hdr.peerid);
915			if (p != NULL) {
916				if (pfkey_establish(p) == -1)
917					log_peer_warnx(&p->conf,
918					    "pfkey setup failed");
919			}
920			break;
921		case IMSG_CTL_RELOAD:
922			if (idx != PFD_PIPE_SESSION)
923				log_warnx("reload request not from SE");
924			else {
925				reconfig = 1;
926				reconfpid = imsg.hdr.pid;
927				if (imsg.hdr.len == IMSG_HEADER_SIZE +
928				    REASON_LEN && ((char *)imsg.data)[0])
929					log_info("reload due to: %s",
930					    log_reason(imsg.data));
931			}
932			break;
933		case IMSG_CTL_FIB_COUPLE:
934			if (idx != PFD_PIPE_SESSION)
935				log_warnx("couple request not from SE");
936			else
937				kr_fib_couple(imsg.hdr.peerid);
938			break;
939		case IMSG_CTL_FIB_DECOUPLE:
940			if (idx != PFD_PIPE_SESSION)
941				log_warnx("decouple request not from SE");
942			else
943				kr_fib_decouple(imsg.hdr.peerid);
944			break;
945		case IMSG_CTL_KROUTE:
946		case IMSG_CTL_KROUTE_ADDR:
947		case IMSG_CTL_SHOW_NEXTHOP:
948		case IMSG_CTL_SHOW_INTERFACE:
949		case IMSG_CTL_SHOW_FIB_TABLES:
950			if (idx != PFD_PIPE_SESSION)
951				log_warnx("kroute request not from SE");
952			else
953				kr_show_route(&imsg);
954			break;
955		case IMSG_SESSION_DEPENDON:
956			if (idx != PFD_PIPE_SESSION)
957				log_warnx("DEPENDON request not from SE");
958			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
959				log_warnx("DEPENDON request with wrong len");
960			else
961				kr_ifinfo(imsg.data);
962			break;
963		case IMSG_DEMOTE:
964			if (idx != PFD_PIPE_SESSION)
965				log_warnx("demote request not from SE");
966			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
967			    sizeof(struct demote_msg))
968				log_warnx("DEMOTE request with wrong len");
969			else {
970				struct demote_msg	*msg;
971
972				msg = imsg.data;
973				carp_demote_set(msg->demote_group, msg->level);
974			}
975			break;
976		case IMSG_CTL_LOG_VERBOSE:
977			/* already checked by SE */
978			memcpy(&verbose, imsg.data, sizeof(verbose));
979			log_setverbose(verbose);
980			break;
981		case IMSG_RECONF_DONE:
982			if (reconfpending == 0) {
983				log_warnx("unexpected RECONF_DONE received");
984				break;
985			}
986			if (idx == PFD_PIPE_SESSION) {
987				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
988				    0, -1, NULL, 0);
989			} else if (idx == PFD_PIPE_RTR) {
990				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
991				    0, -1, NULL, 0);
992
993				/* finally fix kroute information */
994				ktable_postload();
995
996				/* redistribute list needs to be reloaded too */
997				kr_reload();
998			}
999			reconfpending--;
1000			break;
1001		case IMSG_RECONF_DRAIN:
1002			if (reconfpending == 0) {
1003				log_warnx("unexpected RECONF_DRAIN received");
1004				break;
1005			}
1006			reconfpending--;
1007			if (reconfpending == 0) {
1008				/*
1009				 * SE goes first to bring templated neighbors
1010				 * in sync.
1011				 */
1012				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
1013				    0, -1, NULL, 0);
1014				reconfpending = 3; /* expecting 2 DONE msg */
1015			}
1016			break;
1017		case IMSG_SOCKET_CONN:
1018			if (idx != PFD_PIPE_RTR) {
1019				log_warnx("connect request not from RTR");
1020			} else {
1021				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1022					if (imsg.hdr.peerid == r->id)
1023						break;
1024				}
1025				if (r == NULL)
1026					log_warnx("unknown rtr id %d",
1027					    imsg.hdr.peerid);
1028				else
1029					bgpd_rtr_connect(r);
1030			}
1031			break;
1032		case IMSG_CTL_SHOW_RTR:
1033			if (idx == PFD_PIPE_SESSION) {
1034				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1035					imsg_compose(ibuf_rtr, imsg.hdr.type,
1036					    r->id, imsg.hdr.pid, -1, NULL, 0);
1037				}
1038				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1039				    0, imsg.hdr.pid, -1, NULL, 0);
1040			} else if (imsg.hdr.len != IMSG_HEADER_SIZE +
1041			    sizeof(struct ctl_show_rtr)) {
1042				log_warnx("IMSG_CTL_SHOW_RTR with wrong len");
1043			} else if (idx == PFD_PIPE_RTR) {
1044				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1045					if (imsg.hdr.peerid == r->id)
1046						break;
1047				}
1048				if (r != NULL) {
1049					struct ctl_show_rtr *msg;
1050					msg = imsg.data;
1051					strlcpy(msg->descr, r->descr,
1052					    sizeof(msg->descr));
1053					msg->local_addr = r->local_addr;
1054					msg->remote_addr = r->remote_addr;
1055					msg->remote_port = r->remote_port;
1056
1057					imsg_compose(ibuf_se, imsg.hdr.type,
1058					    imsg.hdr.peerid, imsg.hdr.pid,
1059					    -1, imsg.data,
1060					    imsg.hdr.len - IMSG_HEADER_SIZE);
1061				}
1062			}
1063			break;
1064		case IMSG_CTL_END:
1065		case IMSG_CTL_SHOW_TIMER:
1066			if (idx != PFD_PIPE_RTR) {
1067				log_warnx("connect request not from RTR");
1068				break;
1069			}
1070			imsg_compose(ibuf_se, imsg.hdr.type, imsg.hdr.peerid,
1071			    imsg.hdr.pid, -1, imsg.data,
1072			    imsg.hdr.len - IMSG_HEADER_SIZE);
1073			break;
1074		default:
1075			break;
1076		}
1077		imsg_free(&imsg);
1078		if (rv != 0)
1079			return (rv);
1080	}
1081	return (0);
1082}
1083
1084void
1085send_nexthop_update(struct kroute_nexthop *msg)
1086{
1087	char	*gw = NULL;
1088
1089	if (msg->gateway.aid)
1090		if (asprintf(&gw, ": via %s",
1091		    log_addr(&msg->gateway)) == -1) {
1092			log_warn("send_nexthop_update");
1093			quit = 1;
1094		}
1095
1096	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1097	    msg->valid ? "valid" : "invalid",
1098	    msg->connected ? ": directly connected" : "",
1099	    msg->gateway.aid ? gw : "");
1100
1101	free(gw);
1102
1103	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1104	    msg, sizeof(struct kroute_nexthop)) == -1)
1105		quit = 1;
1106}
1107
1108void
1109send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1110{
1111	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1112}
1113
1114int
1115send_network(int type, struct network_config *net, struct filter_set_head *h)
1116{
1117	if (quit)
1118		return (0);
1119	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1120	    sizeof(struct network_config)) == -1)
1121		return (-1);
1122	/* networks that get deleted don't need to send the filter set */
1123	if (type == IMSG_NETWORK_REMOVE)
1124		return (0);
1125	if (send_filterset(ibuf_rde, h) == -1)
1126		return (-1);
1127	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1128		return (-1);
1129
1130	return (0);
1131}
1132
1133/*
1134 * Return true if a route can be used for nexthop resolution.
1135 */
1136int
1137bgpd_oknexthop(struct kroute_full *kf)
1138{
1139	if (kf->flags & F_BGPD)
1140		return ((cflags & BGPD_FLAG_NEXTHOP_BGP) != 0);
1141
1142	if (kf->prefixlen == 0)
1143		return ((cflags & BGPD_FLAG_NEXTHOP_DEFAULT) != 0);
1144
1145	/* any other route is fine */
1146	return (1);
1147}
1148
1149int
1150control_setup(struct bgpd_config *conf)
1151{
1152	int fd, restricted;
1153
1154	/* control socket is outside chroot */
1155	if (!cname || strcmp(cname, conf->csock)) {
1156		if (cname) {
1157			free(cname);
1158		}
1159		if ((cname = strdup(conf->csock)) == NULL)
1160			fatal("strdup");
1161		if (control_check(cname) == -1)
1162			return (-1);
1163		if ((fd = control_init(0, cname)) == -1)
1164			fatalx("control socket setup failed");
1165		if (control_listen(fd) == -1)
1166			fatalx("control socket setup failed");
1167		restricted = 0;
1168		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1169		    &restricted, sizeof(restricted)) == -1)
1170			return (-1);
1171	}
1172	if (!conf->rcsock) {
1173		/* remove restricted socket */
1174		free(rcname);
1175		rcname = NULL;
1176	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1177		if (rcname) {
1178			free(rcname);
1179		}
1180		if ((rcname = strdup(conf->rcsock)) == NULL)
1181			fatal("strdup");
1182		if (control_check(rcname) == -1)
1183			return (-1);
1184		if ((fd = control_init(1, rcname)) == -1)
1185			fatalx("control socket setup failed");
1186		if (control_listen(fd) == -1)
1187			fatalx("control socket setup failed");
1188		restricted = 1;
1189		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1190		    &restricted, sizeof(restricted)) == -1)
1191			return (-1);
1192	}
1193	return (0);
1194}
1195
1196void
1197set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1198{
1199	if (i == NULL || i->fd == -1) {
1200		pfd->fd = -1;
1201		return;
1202	}
1203	pfd->fd = i->fd;
1204	pfd->events = POLLIN;
1205	if (i->w.queued > 0)
1206		pfd->events |= POLLOUT;
1207}
1208
1209int
1210handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1211{
1212	ssize_t n;
1213
1214	if (i == NULL)
1215		return (0);
1216
1217	if (pfd->revents & POLLOUT)
1218		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1219			log_warn("imsg write error");
1220			close(i->fd);
1221			i->fd = -1;
1222			return (-1);
1223		}
1224
1225	if (pfd->revents & POLLIN) {
1226		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1227			log_warn("imsg read error");
1228			close(i->fd);
1229			i->fd = -1;
1230			return (-1);
1231		}
1232		if (n == 0) {
1233			log_warnx("peer closed imsg connection");
1234			close(i->fd);
1235			i->fd = -1;
1236			return (-1);
1237		}
1238	}
1239	return (0);
1240}
1241
1242static void
1243getsockpair(int pipe[2])
1244{
1245	int bsize, i;
1246
1247	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1248	    PF_UNSPEC, pipe) == -1)
1249		fatal("socketpair");
1250
1251	for (i = 0; i < 2; i++) {
1252		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1253			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1254			    &bsize, sizeof(bsize)) == -1) {
1255				if (errno != ENOBUFS)
1256					fatal("setsockopt(SO_RCVBUF, %d)",
1257					    bsize);
1258				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1259				continue;
1260			}
1261			break;
1262		}
1263	}
1264	for (i = 0; i < 2; i++) {
1265		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1266			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1267			    &bsize, sizeof(bsize)) == -1) {
1268				if (errno != ENOBUFS)
1269					fatal("setsockopt(SO_SNDBUF, %d)",
1270					    bsize);
1271				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1272				continue;
1273			}
1274			break;
1275		}
1276	}
1277}
1278
1279int
1280imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *roa)
1281{
1282	int pipe_s2r[2];
1283	int pipe_s2r_ctl[2];
1284	int pipe_r2r[2];
1285
1286	getsockpair(pipe_s2r);
1287	getsockpair(pipe_s2r_ctl);
1288	getsockpair(pipe_r2r);
1289
1290	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1291	    NULL, 0) == -1)
1292		return (-1);
1293	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1294	    NULL, 0) == -1)
1295		return (-1);
1296
1297	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1298	    NULL, 0) == -1)
1299		return (-1);
1300	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1301	    NULL, 0) == -1)
1302		return (-1);
1303
1304	if (imsg_compose(roa, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1305	    NULL, 0) == -1)
1306		return (-1);
1307	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1308	    NULL, 0) == -1)
1309		return (-1);
1310
1311	return (0);
1312}
1313
1314void
1315bgpd_rtr_connect(struct rtr_config *r)
1316{
1317	struct connect_elm *ce;
1318	struct sockaddr *sa;
1319	socklen_t len;
1320
1321	if (connect_cnt >= MAX_CONNECT_CNT) {
1322		log_warnx("rtr %s: too many concurrent connection requests",
1323		    r->descr);
1324		return;
1325	}
1326
1327	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1328		log_warn("rtr %s", r->descr);
1329		return;
1330	}
1331
1332	ce->id = r->id;
1333	ce->fd = socket(aid2af(r->remote_addr.aid),
1334	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1335	if (ce->fd == -1) {
1336		log_warn("rtr %s", r->descr);
1337		free(ce);
1338		return;
1339	}
1340
1341	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1342		if (bind(ce->fd, sa, len) == -1) {
1343			log_warn("rtr %s: bind to %s", r->descr,
1344			    log_addr(&r->local_addr));
1345			close(ce->fd);
1346			free(ce);
1347			return;
1348		}
1349	}
1350
1351	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1352	if (connect(ce->fd, sa, len) == -1) {
1353		if (errno != EINPROGRESS) {
1354			log_warn("rtr %s: connect to %s:%u", r->descr,
1355			    log_addr(&r->remote_addr), r->remote_port);
1356			close(ce->fd);
1357			free(ce);
1358			return;
1359		}
1360		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1361		connect_cnt++;
1362		return;
1363	}
1364
1365	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1366	free(ce);
1367}
1368
1369void
1370bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1371{
1372	struct rtr_config *r;
1373	struct connect_elm *ce;
1374	int error = 0;
1375	socklen_t len;
1376
1377	TAILQ_FOREACH(ce, &connect_queue, entry) {
1378		if (ce->fd == fd)
1379			break;
1380	}
1381	if (ce == NULL)
1382		fatalx("connect entry not found");
1383
1384	TAILQ_REMOVE(&connect_queue, ce, entry);
1385	connect_cnt--;
1386
1387	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1388		if (ce->id == r->id)
1389			break;
1390	}
1391	if (r == NULL) {
1392		log_warnx("rtr id %d no longer exists", ce->id);
1393		goto fail;
1394	}
1395
1396	len = sizeof(error);
1397	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1398		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1399		goto fail;
1400	}
1401
1402	if (error != 0) {
1403		errno = error;
1404		log_warn("rtr %s: connect to %s:%u", r->descr,
1405		    log_addr(&r->remote_addr), r->remote_port);
1406		goto fail;
1407	}
1408
1409	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1410	free(ce);
1411	return;
1412
1413fail:
1414	close(fd);
1415	free(ce);
1416}
1417