bgpd.c revision 1.234
1/*	$OpenBSD: bgpd.c,v 1.234 2021/02/16 08:29:16 claudio Exp $ */
2
3/*
4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/socket.h>
21#include <sys/wait.h>
22#include <netinet/in.h>
23#include <arpa/inet.h>
24#include <err.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <poll.h>
28#include <pwd.h>
29#include <signal.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <syslog.h>
34#include <unistd.h>
35
36#include "bgpd.h"
37#include "session.h"
38#include "log.h"
39
40void		sighdlr(int);
41__dead void	usage(void);
42int		main(int, char *[]);
43pid_t		start_child(enum bgpd_process, char *, int, int, int);
44int		send_filterset(struct imsgbuf *, struct filter_set_head *);
45int		reconfigure(char *, struct bgpd_config *);
46int		send_config(struct bgpd_config *);
47int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
48int		control_setup(struct bgpd_config *);
49static void	getsockpair(int [2]);
50int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
51		    struct imsgbuf *);
52void		bgpd_rtr_connect(struct rtr_config *);
53
54int			 cflags;
55volatile sig_atomic_t	 mrtdump;
56volatile sig_atomic_t	 quit;
57volatile sig_atomic_t	 reconfig;
58pid_t			 reconfpid;
59int			 reconfpending;
60struct imsgbuf		*ibuf_se;
61struct imsgbuf		*ibuf_rde;
62struct imsgbuf		*ibuf_rtr;
63struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
64char			*cname;
65char			*rcname;
66
67void
68sighdlr(int sig)
69{
70	switch (sig) {
71	case SIGTERM:
72	case SIGINT:
73		quit = 1;
74		break;
75	case SIGHUP:
76		reconfig = 1;
77		break;
78	case SIGALRM:
79	case SIGUSR1:
80		mrtdump = 1;
81		break;
82	}
83}
84
85__dead void
86usage(void)
87{
88	extern char *__progname;
89
90	fprintf(stderr, "usage: %s [-cdnv] [-D macro=value] [-f file]\n",
91	    __progname);
92	exit(1);
93}
94
95#define PFD_PIPE_SESSION	0
96#define PFD_PIPE_RDE		1
97#define PFD_PIPE_RTR		2
98#define PFD_SOCK_ROUTE		3
99#define PFD_SOCK_PFKEY		4
100#define POLL_MAX		5
101#define MAX_TIMEOUT		3600
102
103int	 cmd_opts;
104
105int
106main(int argc, char *argv[])
107{
108	struct bgpd_config	*conf;
109	enum bgpd_process	 proc = PROC_MAIN;
110	struct rde_rib		*rr;
111	struct peer		*p;
112	struct pollfd		 pfd[POLL_MAX];
113	time_t			 timeout;
114	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
115	char			*conffile;
116	char			*saved_argv0;
117	int			 debug = 0;
118	int			 rfd, keyfd;
119	int			 ch, status;
120	int			 pipe_m2s[2];
121	int			 pipe_m2r[2];
122	int			 pipe_m2roa[2];
123
124	conffile = CONFFILE;
125
126	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
127	log_procinit(log_procnames[PROC_MAIN]);
128	log_setverbose(1);
129
130	saved_argv0 = argv[0];
131	if (saved_argv0 == NULL)
132		saved_argv0 = "bgpd";
133
134	while ((ch = getopt(argc, argv, "cdD:f:nRSTv")) != -1) {
135		switch (ch) {
136		case 'c':
137			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
138			break;
139		case 'd':
140			debug = 1;
141			break;
142		case 'D':
143			if (cmdline_symset(optarg) < 0)
144				log_warnx("could not parse macro definition %s",
145				    optarg);
146			break;
147		case 'f':
148			conffile = optarg;
149			break;
150		case 'n':
151			cmd_opts |= BGPD_OPT_NOACTION;
152			break;
153		case 'v':
154			if (cmd_opts & BGPD_OPT_VERBOSE)
155				cmd_opts |= BGPD_OPT_VERBOSE2;
156			cmd_opts |= BGPD_OPT_VERBOSE;
157			break;
158		case 'R':
159			proc = PROC_RDE;
160			break;
161		case 'S':
162			proc = PROC_SE;
163			break;
164		case 'T':
165			proc = PROC_RTR;
166			break;
167		default:
168			usage();
169			/* NOTREACHED */
170		}
171	}
172
173	argc -= optind;
174	argv += optind;
175	if (argc > 0)
176		usage();
177
178	if (cmd_opts & BGPD_OPT_NOACTION) {
179		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
180			exit(1);
181
182		if (cmd_opts & BGPD_OPT_VERBOSE)
183			print_config(conf, &ribnames);
184		else
185			fprintf(stderr, "configuration OK\n");
186
187		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
188			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
189			free(rr);
190		}
191		free_config(conf);
192		exit(0);
193	}
194
195	switch (proc) {
196	case PROC_MAIN:
197		break;
198	case PROC_RDE:
199		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
200		/* NOTREACHED */
201	case PROC_SE:
202		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
203		/* NOTREACHED */
204	case PROC_RTR:
205		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
206		/* NOTREACHED */
207	}
208
209	if (geteuid())
210		errx(1, "need root privileges");
211
212	if (getpwnam(BGPD_USER) == NULL)
213		errx(1, "unknown user %s", BGPD_USER);
214
215	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
216		log_warnx("config file %s has errors", conffile);
217		exit(1);
218	}
219
220	if (prepare_listeners(conf) == -1)
221		exit(1);
222
223	log_init(debug, LOG_DAEMON);
224	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
225
226	if (!debug)
227		daemon(1, 0);
228
229	log_info("startup");
230
231	getsockpair(pipe_m2s);
232	getsockpair(pipe_m2r);
233	getsockpair(pipe_m2roa);
234
235	/* fork children */
236	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
237	    cmd_opts & BGPD_OPT_VERBOSE);
238	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
239	    cmd_opts & BGPD_OPT_VERBOSE);
240	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
241	    cmd_opts & BGPD_OPT_VERBOSE);
242
243	signal(SIGTERM, sighdlr);
244	signal(SIGINT, sighdlr);
245	signal(SIGHUP, sighdlr);
246	signal(SIGALRM, sighdlr);
247	signal(SIGUSR1, sighdlr);
248	signal(SIGPIPE, SIG_IGN);
249
250	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
251	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
252	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
253		fatal(NULL);
254	imsg_init(ibuf_se, pipe_m2s[0]);
255	imsg_init(ibuf_rde, pipe_m2r[0]);
256	imsg_init(ibuf_rtr, pipe_m2roa[0]);
257	mrt_init(ibuf_rde, ibuf_se);
258	if (kr_init(&rfd) == -1)
259		quit = 1;
260	keyfd = pfkey_init();
261
262	/*
263	 * rpath, read config file
264	 * cpath, unlink control socket
265	 * fattr, chmod on control socket
266	 * wpath, needed if we are doing mrt dumps
267	 *
268	 * pledge placed here because kr_init() does a setsockopt on the
269	 * routing socket thats not allowed at all.
270	 */
271#if 0
272	/*
273	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
274	 * this needs some redesign of bgpd to be fixed.
275	 */
276BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
277	    NULL) == -1)
278		fatal("pledge");
279#endif
280
281	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
282		fatal("could not establish imsg links");
283	/* control setup needs to happen late since it sends imsgs */
284	if (control_setup(conf) == -1)
285		quit = 1;
286	if (send_config(conf) != 0)
287		quit = 1;
288	if (pftable_clear_all() != 0)
289		quit = 1;
290
291	while (quit == 0) {
292		bzero(pfd, sizeof(pfd));
293
294		timeout = mrt_timeout(conf->mrt);
295
296		pfd[PFD_SOCK_ROUTE].fd = rfd;
297		pfd[PFD_SOCK_ROUTE].events = POLLIN;
298
299		pfd[PFD_SOCK_PFKEY].fd = keyfd;
300		pfd[PFD_SOCK_PFKEY].events = POLLIN;
301
302		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
303		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
304		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
305
306		if (timeout < 0 || timeout > MAX_TIMEOUT)
307			timeout = MAX_TIMEOUT;
308		if (poll(pfd, POLL_MAX, timeout * 1000) == -1)
309			if (errno != EINTR) {
310				log_warn("poll error");
311				quit = 1;
312			}
313
314		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
315			log_warnx("main: Lost connection to SE");
316			msgbuf_clear(&ibuf_se->w);
317			free(ibuf_se);
318			ibuf_se = NULL;
319			quit = 1;
320		} else {
321			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
322			    -1)
323				quit = 1;
324		}
325
326		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
327			log_warnx("main: Lost connection to RDE");
328			msgbuf_clear(&ibuf_rde->w);
329			free(ibuf_rde);
330			ibuf_rde = NULL;
331			quit = 1;
332		} else {
333			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
334				quit = 1;
335		}
336
337		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
338			log_warnx("main: Lost connection to RTR");
339			msgbuf_clear(&ibuf_rtr->w);
340			free(ibuf_rtr);
341			ibuf_rtr = NULL;
342			quit = 1;
343		} else {
344			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
345				quit = 1;
346		}
347
348		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
349			if (kr_dispatch_msg(conf->default_tableid) == -1)
350				quit = 1;
351		}
352
353		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
354			if (pfkey_read(keyfd, NULL) == -1) {
355				log_warnx("pfkey_read failed, exiting...");
356				quit = 1;
357			}
358		}
359
360		if (reconfig) {
361			u_int	error;
362
363			reconfig = 0;
364			switch (reconfigure(conffile, conf)) {
365			case -1:	/* fatal error */
366				quit = 1;
367				break;
368			case 0:		/* all OK */
369				error = 0;
370				break;
371			case 2:
372				log_info("previous reload still running");
373				error = CTL_RES_PENDING;
374				break;
375			default:	/* parse error */
376				log_warnx("config file %s has errors, "
377				    "not reloading", conffile);
378				error = CTL_RES_PARSE_ERROR;
379				break;
380			}
381			if (reconfpid != 0) {
382				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
383				    &error, sizeof(error));
384				reconfpid = 0;
385			}
386		}
387
388		if (mrtdump) {
389			mrtdump = 0;
390			mrt_handler(conf->mrt);
391		}
392	}
393
394	/* close pipes */
395	if (ibuf_se) {
396		msgbuf_clear(&ibuf_se->w);
397		close(ibuf_se->fd);
398		free(ibuf_se);
399		ibuf_se = NULL;
400	}
401	if (ibuf_rde) {
402		msgbuf_clear(&ibuf_rde->w);
403		close(ibuf_rde->fd);
404		free(ibuf_rde);
405		ibuf_rde = NULL;
406	}
407	if (ibuf_rtr) {
408		msgbuf_clear(&ibuf_rtr->w);
409		close(ibuf_rtr->fd);
410		free(ibuf_rtr);
411		ibuf_rtr = NULL;
412	}
413
414	/* cleanup kernel data structures */
415	carp_demote_shutdown();
416	kr_shutdown(conf->fib_priority, conf->default_tableid);
417	pftable_clear_all();
418
419	RB_FOREACH(p, peer_head, &conf->peers)
420		pfkey_remove(p);
421
422	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
423		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
424		free(rr);
425	}
426	free_config(conf);
427
428	log_debug("waiting for children to terminate");
429	do {
430		pid = wait(&status);
431		if (pid == -1) {
432			if (errno != EINTR && errno != ECHILD)
433				fatal("wait");
434		} else if (WIFSIGNALED(status)) {
435			char *name = "unknown process";
436			if (pid == rde_pid)
437				name = "route decision engine";
438			else if (pid == se_pid)
439				name = "session engine";
440			else if (pid == rtr_pid)
441				name = "rtr engine";
442			log_warnx("%s terminated; signal %d", name,
443				WTERMSIG(status));
444		}
445	} while (pid != -1 || (pid == -1 && errno == EINTR));
446
447	free(rcname);
448	free(cname);
449
450	log_info("terminating");
451	return (0);
452}
453
454pid_t
455start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
456{
457	char *argv[5];
458	int argc = 0;
459	pid_t pid;
460
461	switch (pid = fork()) {
462	case -1:
463		fatal("cannot fork");
464	case 0:
465		break;
466	default:
467		close(fd);
468		return (pid);
469	}
470
471	if (fd != 3) {
472		if (dup2(fd, 3) == -1)
473			fatal("cannot setup imsg fd");
474	} else if (fcntl(fd, F_SETFD, 0) == -1)
475		fatal("cannot setup imsg fd");
476
477	argv[argc++] = argv0;
478	switch (p) {
479	case PROC_MAIN:
480		fatalx("Can not start main process");
481	case PROC_RDE:
482		argv[argc++] = "-R";
483		break;
484	case PROC_SE:
485		argv[argc++] = "-S";
486		break;
487	case PROC_RTR:
488		argv[argc++] = "-T";
489		break;
490	}
491	if (debug)
492		argv[argc++] = "-d";
493	if (verbose)
494		argv[argc++] = "-v";
495	argv[argc++] = NULL;
496
497	execvp(argv0, argv);
498	fatal("execvp");
499}
500
501int
502send_filterset(struct imsgbuf *i, struct filter_set_head *set)
503{
504	struct filter_set	*s;
505
506	TAILQ_FOREACH(s, set, entry)
507		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
508		    sizeof(struct filter_set)) == -1)
509			return (-1);
510	return (0);
511}
512
513int
514reconfigure(char *conffile, struct bgpd_config *conf)
515{
516	struct bgpd_config	*new_conf;
517
518	if (reconfpending)
519		return (2);
520
521	log_info("rereading config");
522	if ((new_conf = parse_config(conffile, &conf->peers,
523	    &conf->rtrs)) == NULL)
524		return (1);
525
526	merge_config(conf, new_conf);
527
528	if (prepare_listeners(conf) == -1) {
529		return (1);
530	}
531
532	if (control_setup(conf) == -1) {
533		return (1);
534	}
535
536	return send_config(conf);
537}
538
539int
540send_config(struct bgpd_config *conf)
541{
542	struct peer		*p;
543	struct filter_rule	*r;
544	struct listen_addr	*la;
545	struct rde_rib		*rr;
546	struct l3vpn		*vpn;
547	struct as_set		*aset;
548	struct prefixset	*ps;
549	struct prefixset_item	*psi, *npsi;
550	struct roa		*roa, *nroa;
551	struct rtr_config	*rtr;
552
553	reconfpending = 3;	/* one per child */
554
555	expand_networks(conf);
556
557	cflags = conf->flags;
558
559	/* start reconfiguration */
560	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
561	    conf, sizeof(*conf)) == -1)
562		return (-1);
563	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
564	    conf, sizeof(*conf)) == -1)
565		return (-1);
566	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
567	    conf, sizeof(*conf)) == -1)
568		return (-1);
569
570	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
571		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
572		    la, sizeof(*la)) == -1)
573			return (-1);
574		la->fd = -1;
575	}
576
577	/* adjust fib syncing on reload */
578	ktable_preload();
579
580	/* RIBs for the RDE */
581	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
582		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
583		if (ktable_update(rr->rtableid, rr->name, rr->flags,
584		    conf->fib_priority) == -1) {
585			log_warnx("failed to load rdomain %d",
586			    rr->rtableid);
587			return (-1);
588		}
589		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
590		    rr, sizeof(*rr)) == -1)
591			return (-1);
592		free(rr);
593	}
594
595	/* send peer list to the SE */
596	RB_FOREACH(p, peer_head, &conf->peers) {
597		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
598		    &p->conf, sizeof(p->conf)) == -1)
599			return (-1);
600
601		if (p->reconf_action == RECONF_REINIT)
602			if (pfkey_establish(p) == -1)
603				log_peer_warnx(&p->conf, "pfkey setup failed");
604	}
605
606	/* networks go via kroute to the RDE */
607	kr_net_reload(conf->default_tableid, 0, &conf->networks);
608
609	/* prefixsets for filters in the RDE */
610	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
611		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
612		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
613		    ps->name, sizeof(ps->name)) == -1)
614			return (-1);
615		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
616			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
617			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
618			    0, 0, -1, psi, sizeof(*psi)) == -1)
619				return (-1);
620			free(psi);
621		}
622		free(ps);
623	}
624
625	/* originsets for filters in the RDE */
626	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
627		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
628		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
629		    ps->name, sizeof(ps->name)) == -1)
630			return (-1);
631		RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
632			RB_REMOVE(roa_tree, &ps->roaitems, roa);
633			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
634			    -1, roa, sizeof(*roa)) == -1)
635				return (-1);
636			free(roa);
637		}
638		free(ps);
639	}
640
641	/* roa table and rtr config are sent to the RTR engine */
642	RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
643		RB_REMOVE(roa_tree, &conf->roa, roa);
644		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
645		    -1, roa, sizeof(*roa)) == -1)
646			return (-1);
647		free(roa);
648	}
649	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
650		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
651		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
652			return (-1);
653	}
654
655	/* as-sets for filters in the RDE */
656	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
657		struct ibuf *wbuf;
658		u_int32_t *as;
659		size_t i, l, n;
660
661		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
662
663		as = set_get(aset->set, &n);
664		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
665		    sizeof(n) + sizeof(aset->name))) == NULL)
666			return -1;
667		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
668		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
669			return -1;
670		imsg_close(ibuf_rde, wbuf);
671
672		for (i = 0; i < n; i += l) {
673			l = (n - i > 1024 ? 1024 : n - i);
674			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
675			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
676				return -1;
677		}
678
679		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
680		    NULL, 0) == -1)
681			return -1;
682
683		set_free(aset->set);
684		free(aset);
685	}
686
687	/* filters for the RDE */
688	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
689		TAILQ_REMOVE(conf->filters, r, entry);
690		if (send_filterset(ibuf_rde, &r->set) == -1)
691			return (-1);
692		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
693		    r, sizeof(struct filter_rule)) == -1)
694			return (-1);
695		filterset_free(&r->set);
696		free(r);
697	}
698
699	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
700		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
701		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags,
702		    conf->fib_priority) == -1) {
703			log_warnx("failed to load rdomain %d",
704			    vpn->rtableid);
705			return (-1);
706		}
707		/* networks go via kroute to the RDE */
708		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
709
710		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
711		    vpn, sizeof(*vpn)) == -1)
712			return (-1);
713
714		/* export targets */
715		if (send_filterset(ibuf_rde, &vpn->export) == -1)
716			return (-1);
717		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
718		    -1, NULL, 0) == -1)
719			return (-1);
720		filterset_free(&vpn->export);
721
722		/* import targets */
723		if (send_filterset(ibuf_rde, &vpn->import) == -1)
724			return (-1);
725		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
726		    -1, NULL, 0) == -1)
727			return (-1);
728		filterset_free(&vpn->import);
729
730		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
731		    -1, NULL, 0) == -1)
732			return (-1);
733
734		free(vpn);
735	}
736
737	/* send a drain message to know when all messages where processed */
738	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
739		return (-1);
740	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
741		return (-1);
742	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
743		return (-1);
744
745	/* mrt changes can be sent out of bound */
746	mrt_reconfigure(conf->mrt);
747	return (0);
748}
749
750int
751dispatch_imsg(struct imsgbuf *ibuf, int idx, struct bgpd_config *conf)
752{
753	struct imsg		 imsg;
754	struct peer		*p;
755	struct rtr_config	*r;
756	ssize_t			 n;
757	int			 rv, verbose;
758
759	rv = 0;
760	while (ibuf) {
761		if ((n = imsg_get(ibuf, &imsg)) == -1)
762			return (-1);
763
764		if (n == 0)
765			break;
766
767		switch (imsg.hdr.type) {
768		case IMSG_KROUTE_CHANGE:
769			if (idx != PFD_PIPE_RDE)
770				log_warnx("route request not from RDE");
771			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
772			    sizeof(struct kroute_full))
773				log_warnx("wrong imsg len");
774			else if (kr_change(imsg.hdr.peerid, imsg.data,
775			    conf->fib_priority))
776				rv = -1;
777			break;
778		case IMSG_KROUTE_DELETE:
779			if (idx != PFD_PIPE_RDE)
780				log_warnx("route request not from RDE");
781			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
782			    sizeof(struct kroute_full))
783				log_warnx("wrong imsg len");
784			else if (kr_delete(imsg.hdr.peerid, imsg.data,
785			    conf->fib_priority))
786				rv = -1;
787			break;
788		case IMSG_KROUTE_FLUSH:
789			if (idx != PFD_PIPE_RDE)
790				log_warnx("route request not from RDE");
791			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
792				log_warnx("wrong imsg len");
793			else if (kr_flush(imsg.hdr.peerid))
794				rv = -1;
795			break;
796		case IMSG_NEXTHOP_ADD:
797			if (idx != PFD_PIPE_RDE)
798				log_warnx("nexthop request not from RDE");
799			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
800			    sizeof(struct bgpd_addr))
801				log_warnx("wrong imsg len");
802			else if (kr_nexthop_add(imsg.hdr.peerid, imsg.data,
803			    conf) == -1)
804				rv = -1;
805			break;
806		case IMSG_NEXTHOP_REMOVE:
807			if (idx != PFD_PIPE_RDE)
808				log_warnx("nexthop request not from RDE");
809			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
810			    sizeof(struct bgpd_addr))
811				log_warnx("wrong imsg len");
812			else
813				kr_nexthop_delete(imsg.hdr.peerid, imsg.data,
814				    conf);
815			break;
816		case IMSG_PFTABLE_ADD:
817			if (idx != PFD_PIPE_RDE)
818				log_warnx("pftable request not from RDE");
819			else
820				if (imsg.hdr.len != IMSG_HEADER_SIZE +
821				    sizeof(struct pftable_msg))
822					log_warnx("wrong imsg len");
823				else if (pftable_addr_add(imsg.data) != 0)
824					rv = -1;
825			break;
826		case IMSG_PFTABLE_REMOVE:
827			if (idx != PFD_PIPE_RDE)
828				log_warnx("pftable request not from RDE");
829			else
830				if (imsg.hdr.len != IMSG_HEADER_SIZE +
831				    sizeof(struct pftable_msg))
832					log_warnx("wrong imsg len");
833				else if (pftable_addr_remove(imsg.data) != 0)
834					rv = -1;
835			break;
836		case IMSG_PFTABLE_COMMIT:
837			if (idx != PFD_PIPE_RDE)
838				log_warnx("pftable request not from RDE");
839			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
840				log_warnx("wrong imsg len");
841			else if (pftable_commit() != 0)
842				rv = -1;
843			break;
844		case IMSG_PFKEY_RELOAD:
845			if (idx != PFD_PIPE_SESSION) {
846				log_warnx("pfkey reload request not from SE");
847				break;
848			}
849			p = getpeerbyid(conf, imsg.hdr.peerid);
850			if (p != NULL) {
851				if (pfkey_establish(p) == -1)
852					log_peer_warnx(&p->conf,
853					    "pfkey setup failed");
854			}
855			break;
856		case IMSG_CTL_RELOAD:
857			if (idx != PFD_PIPE_SESSION)
858				log_warnx("reload request not from SE");
859			else {
860				reconfig = 1;
861				reconfpid = imsg.hdr.pid;
862				if (imsg.hdr.len == IMSG_HEADER_SIZE +
863				    REASON_LEN && ((char *)imsg.data)[0])
864					log_info("reload due to: %s",
865					    log_reason(imsg.data));
866			}
867			break;
868		case IMSG_CTL_FIB_COUPLE:
869			if (idx != PFD_PIPE_SESSION)
870				log_warnx("couple request not from SE");
871			else
872				kr_fib_couple(imsg.hdr.peerid,
873				    conf->fib_priority);
874			break;
875		case IMSG_CTL_FIB_DECOUPLE:
876			if (idx != PFD_PIPE_SESSION)
877				log_warnx("decouple request not from SE");
878			else
879				kr_fib_decouple(imsg.hdr.peerid,
880				    conf->fib_priority);
881			break;
882		case IMSG_CTL_KROUTE:
883		case IMSG_CTL_KROUTE_ADDR:
884		case IMSG_CTL_SHOW_NEXTHOP:
885		case IMSG_CTL_SHOW_INTERFACE:
886		case IMSG_CTL_SHOW_FIB_TABLES:
887			if (idx != PFD_PIPE_SESSION)
888				log_warnx("kroute request not from SE");
889			else
890				kr_show_route(&imsg);
891			break;
892		case IMSG_IFINFO:
893			if (idx != PFD_PIPE_SESSION)
894				log_warnx("IFINFO request not from SE");
895			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
896				log_warnx("IFINFO request with wrong len");
897			else
898				kr_ifinfo(imsg.data);
899			break;
900		case IMSG_DEMOTE:
901			if (idx != PFD_PIPE_SESSION)
902				log_warnx("demote request not from SE");
903			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
904			    sizeof(struct demote_msg))
905				log_warnx("DEMOTE request with wrong len");
906			else {
907				struct demote_msg	*msg;
908
909				msg = imsg.data;
910				carp_demote_set(msg->demote_group, msg->level);
911			}
912			break;
913		case IMSG_CTL_LOG_VERBOSE:
914			/* already checked by SE */
915			memcpy(&verbose, imsg.data, sizeof(verbose));
916			log_setverbose(verbose);
917			break;
918		case IMSG_RECONF_DONE:
919			if (reconfpending == 0) {
920				log_warnx("unexpected RECONF_DONE received");
921				break;
922			}
923			if (idx == PFD_PIPE_SESSION) {
924				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
925				    0, -1, NULL, 0);
926			} else if (idx == PFD_PIPE_RTR) {
927				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
928				    0, -1, NULL, 0);
929
930				/* finally fix kroute information */
931				ktable_postload(conf->fib_priority);
932
933				/* redistribute list needs to be reloaded too */
934				kr_reload();
935			}
936			reconfpending--;
937			break;
938		case IMSG_RECONF_DRAIN:
939			if (reconfpending == 0) {
940				log_warnx("unexpected RECONF_DRAIN received");
941				break;
942			}
943			reconfpending--;
944			if (reconfpending == 0) {
945				/*
946				 * SE goes first to bring templated neighbors
947				 * in sync.
948				 */
949				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
950				    0, -1, NULL, 0);
951				reconfpending = 3; /* expecting 2 DONE msg */
952			}
953			break;
954		case IMSG_SOCKET_CONN:
955			if (idx != PFD_PIPE_RTR) {
956				log_warnx("connect request not from RTR");
957			} else {
958				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
959					if (imsg.hdr.peerid == r->id)
960						break;
961				}
962				if (r == NULL)
963					log_warnx("unknown rtr id %d",
964					    imsg.hdr.peerid);
965				else
966					bgpd_rtr_connect(r);
967			}
968			break;
969		case IMSG_CTL_SHOW_RTR:
970			if (idx == PFD_PIPE_SESSION) {
971				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
972					imsg_compose(ibuf_rtr, imsg.hdr.type,
973					    r->id, imsg.hdr.pid, -1, NULL, 0);
974				}
975				imsg_compose(ibuf_rtr, IMSG_CTL_END,
976				    0, imsg.hdr.pid, -1, NULL, 0);
977			} else if (imsg.hdr.len != IMSG_HEADER_SIZE +
978			    sizeof(struct ctl_show_rtr)) {
979				log_warnx("IMSG_CTL_SHOW_RTR with wrong len");
980			} else if (idx == PFD_PIPE_RTR) {
981				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
982					if (imsg.hdr.peerid == r->id)
983						break;
984				}
985				if (r != NULL) {
986					struct ctl_show_rtr *msg;
987					msg = imsg.data;
988					strlcpy(msg->descr, r->descr,
989					    sizeof(msg->descr));
990					msg->local_addr = r->local_addr;
991					msg->remote_addr = r->remote_addr;
992					msg->remote_port = r->remote_port;
993
994					imsg_compose(ibuf_se, imsg.hdr.type,
995					    imsg.hdr.peerid, imsg.hdr.pid,
996					    -1, imsg.data,
997					    imsg.hdr.len - IMSG_HEADER_SIZE);
998				}
999			}
1000			break;
1001		case IMSG_CTL_END:
1002		case IMSG_CTL_SHOW_TIMER:
1003			if (idx != PFD_PIPE_RTR) {
1004				log_warnx("connect request not from RTR");
1005				break;
1006			}
1007			imsg_compose(ibuf_se, imsg.hdr.type, imsg.hdr.peerid,
1008			    imsg.hdr.pid, -1, imsg.data,
1009			    imsg.hdr.len - IMSG_HEADER_SIZE);
1010			break;
1011		default:
1012			break;
1013		}
1014		imsg_free(&imsg);
1015		if (rv != 0)
1016			return (rv);
1017	}
1018	return (0);
1019}
1020
1021void
1022send_nexthop_update(struct kroute_nexthop *msg)
1023{
1024	char	*gw = NULL;
1025
1026	if (msg->gateway.aid)
1027		if (asprintf(&gw, ": via %s",
1028		    log_addr(&msg->gateway)) == -1) {
1029			log_warn("send_nexthop_update");
1030			quit = 1;
1031		}
1032
1033	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1034	    msg->valid ? "valid" : "invalid",
1035	    msg->connected ? ": directly connected" : "",
1036	    msg->gateway.aid ? gw : "");
1037
1038	free(gw);
1039
1040	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1041	    msg, sizeof(struct kroute_nexthop)) == -1)
1042		quit = 1;
1043}
1044
1045void
1046send_imsg_session(int type, pid_t pid, void *data, u_int16_t datalen)
1047{
1048	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1049}
1050
1051int
1052send_network(int type, struct network_config *net, struct filter_set_head *h)
1053{
1054	if (quit)
1055		return (0);
1056	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1057	    sizeof(struct network_config)) == -1)
1058		return (-1);
1059	/* networks that get deleted don't need to send the filter set */
1060	if (type == IMSG_NETWORK_REMOVE)
1061		return (0);
1062	if (send_filterset(ibuf_rde, h) == -1)
1063		return (-1);
1064	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1065		return (-1);
1066
1067	return (0);
1068}
1069
1070int
1071bgpd_filternexthop(struct kroute *kr, struct kroute6 *kr6)
1072{
1073	/* kernel routes are never filtered */
1074	if (kr && kr->flags & F_KERNEL && kr->prefixlen != 0)
1075		return (0);
1076	if (kr6 && kr6->flags & F_KERNEL && kr6->prefixlen != 0)
1077		return (0);
1078
1079	if (cflags & BGPD_FLAG_NEXTHOP_BGP) {
1080		if (kr && kr->flags & F_BGPD_INSERTED)
1081			return (0);
1082		if (kr6 && kr6->flags & F_BGPD_INSERTED)
1083			return (0);
1084	}
1085
1086	if (cflags & BGPD_FLAG_NEXTHOP_DEFAULT) {
1087		if (kr && kr->prefixlen == 0)
1088			return (0);
1089		if (kr6 && kr6->prefixlen == 0)
1090			return (0);
1091	}
1092
1093	return (1);
1094}
1095
1096int
1097control_setup(struct bgpd_config *conf)
1098{
1099	int fd, restricted;
1100
1101	/* control socket is outside chroot */
1102	if (!cname || strcmp(cname, conf->csock)) {
1103		if (cname) {
1104			free(cname);
1105		}
1106		if ((cname = strdup(conf->csock)) == NULL)
1107			fatal("strdup");
1108		if (control_check(cname) == -1)
1109			return (-1);
1110		if ((fd = control_init(0, cname)) == -1)
1111			fatalx("control socket setup failed");
1112		if (control_listen(fd) == -1)
1113			fatalx("control socket setup failed");
1114		restricted = 0;
1115		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1116		    &restricted, sizeof(restricted)) == -1)
1117			return (-1);
1118	}
1119	if (!conf->rcsock) {
1120		/* remove restricted socket */
1121		free(rcname);
1122		rcname = NULL;
1123	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1124		if (rcname) {
1125			free(rcname);
1126		}
1127		if ((rcname = strdup(conf->rcsock)) == NULL)
1128			fatal("strdup");
1129		if (control_check(rcname) == -1)
1130			return (-1);
1131		if ((fd = control_init(1, rcname)) == -1)
1132			fatalx("control socket setup failed");
1133		if (control_listen(fd) == -1)
1134			fatalx("control socket setup failed");
1135		restricted = 1;
1136		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1137		    &restricted, sizeof(restricted)) == -1)
1138			return (-1);
1139	}
1140	return (0);
1141}
1142
1143void
1144set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1145{
1146	if (i == NULL || i->fd == -1) {
1147		pfd->fd = -1;
1148		return;
1149	}
1150	pfd->fd = i->fd;
1151	pfd->events = POLLIN;
1152	if (i->w.queued > 0)
1153		pfd->events |= POLLOUT;
1154}
1155
1156int
1157handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1158{
1159	ssize_t n;
1160
1161	if (i == NULL)
1162		return (0);
1163
1164	if (pfd->revents & POLLOUT)
1165		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1166			log_warn("imsg write error");
1167			close(i->fd);
1168			i->fd = -1;
1169			return (-1);
1170		}
1171
1172	if (pfd->revents & POLLIN) {
1173		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1174			log_warn("imsg read error");
1175			close(i->fd);
1176			i->fd = -1;
1177			return (-1);
1178		}
1179		if (n == 0) {
1180			log_warnx("peer closed imsg connection");
1181			close(i->fd);
1182			i->fd = -1;
1183			return (-1);
1184		}
1185	}
1186	return (0);
1187}
1188
1189static void
1190getsockpair(int pipe[2])
1191{
1192	int bsize, i;
1193
1194	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1195	    PF_UNSPEC, pipe) == -1)
1196		fatal("socketpair");
1197
1198	for (i = 0; i < 2; i++) {
1199		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1200			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1201			    &bsize, sizeof(bsize)) == -1) {
1202				if (errno != ENOBUFS)
1203					fatal("setsockopt(SO_RCVBUF, %d)",
1204					    bsize);
1205				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1206				continue;
1207			}
1208			break;
1209		}
1210	}
1211	for (i = 0; i < 2; i++) {
1212		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1213			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1214			    &bsize, sizeof(bsize)) == -1) {
1215				if (errno != ENOBUFS)
1216					fatal("setsockopt(SO_SNDBUF, %d)",
1217					    bsize);
1218				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1219				continue;
1220			}
1221			break;
1222		}
1223	}
1224}
1225
1226int
1227imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *roa)
1228{
1229	int pipe_s2r[2];
1230	int pipe_s2r_ctl[2];
1231	int pipe_r2r[2];
1232
1233	getsockpair(pipe_s2r);
1234	getsockpair(pipe_s2r_ctl);
1235	getsockpair(pipe_r2r);
1236
1237	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1238	    NULL, 0) == -1)
1239		return (-1);
1240	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1241	    NULL, 0) == -1)
1242		return (-1);
1243
1244	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1245	    NULL, 0) == -1)
1246		return (-1);
1247	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1248	    NULL, 0) == -1)
1249		return (-1);
1250
1251	if (imsg_compose(roa, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1252	    NULL, 0) == -1)
1253		return (-1);
1254	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1255	    NULL, 0) == -1)
1256		return (-1);
1257
1258	return (0);
1259}
1260
1261void
1262bgpd_rtr_connect(struct rtr_config *r)
1263{
1264	socklen_t len;
1265	int fd;
1266
1267	/* XXX should be non-blocking */
1268	fd = socket(aid2af(r->remote_addr.aid), SOCK_STREAM, 0);
1269	if (fd == -1) {
1270		log_warn("rtr %s", r->descr);
1271		return;
1272	}
1273	if (r->local_addr.aid != AID_UNSPEC) {
1274		if (bind(fd,  addr2sa(&r->local_addr, 0, &len), len) == -1) {
1275			log_warn("rtr %s: bind to %s", r->descr,
1276			    log_addr(&r->local_addr));
1277			close(fd);
1278			return;
1279		}
1280	}
1281
1282	if (connect(fd, addr2sa(&r->remote_addr, r->remote_port, &len), len) ==
1283	    -1) {
1284		log_warn("rtr %s: connect to %s:%u", r->descr,
1285		    log_addr(&r->remote_addr), r->remote_port);
1286		close(fd);
1287		return;
1288	}
1289
1290	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, r->id, 0, fd, NULL, 0);
1291}
1292