bgpd.c revision 1.118
1/*	$OpenBSD: bgpd.c,v 1.118 2005/05/23 22:48:53 henning Exp $ */
2
3/*
4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/socket.h>
21#include <sys/wait.h>
22#include <netinet/in.h>
23#include <arpa/inet.h>
24#include <err.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <poll.h>
28#include <pwd.h>
29#include <signal.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34
35#include "mrt.h"
36#include "bgpd.h"
37#include "session.h"
38
39void		sighdlr(int);
40__dead void	usage(void);
41int		main(int, char *[]);
42int		check_child(pid_t, const char *);
43int		send_filterset(struct imsgbuf *, struct filter_set_head *,
44		    int, int);
45int		reconfigure(char *, struct bgpd_config *, struct mrt_head *,
46		    struct peer **, struct filter_head *);
47int		dispatch_imsg(struct imsgbuf *, int);
48
49int			 rfd = -1;
50int			 cflags = 0;
51struct filter_set_head	*connectset;
52struct filter_set_head	*staticset;
53volatile sig_atomic_t	 mrtdump = 0;
54volatile sig_atomic_t	 quit = 0;
55volatile sig_atomic_t	 reconfig = 0;
56volatile sig_atomic_t	 sigchld = 0;
57struct imsgbuf		*ibuf_se;
58struct imsgbuf		*ibuf_rde;
59
60void
61sighdlr(int sig)
62{
63	switch (sig) {
64	case SIGTERM:
65	case SIGINT:
66		quit = 1;
67		break;
68	case SIGCHLD:
69		sigchld = 1;
70		break;
71	case SIGHUP:
72		reconfig = 1;
73		break;
74	case SIGALRM:
75	case SIGUSR1:
76		mrtdump = 1;
77		break;
78	}
79}
80
81__dead void
82usage(void)
83{
84	extern char *__progname;
85
86	fprintf(stderr, "usage: %s [-dnv] ", __progname);
87	fprintf(stderr, "[-D macro=value] [-f file]\n");
88	exit(1);
89}
90
91#define PFD_PIPE_SESSION	0
92#define PFD_PIPE_ROUTE		1
93#define PFD_SOCK_ROUTE		2
94#define POLL_MAX		3
95#define MAX_TIMEOUT		3600
96
97int
98main(int argc, char *argv[])
99{
100	struct bgpd_config	 conf;
101	struct peer		*peer_l, *p;
102	struct mrt_head		 mrt_l;
103	struct network_head	 net_l;
104	struct filter_head	*rules_l;
105	struct network		*net;
106	struct filter_rule	*r;
107	struct mrt		*m;
108	struct listen_addr	*la;
109	struct pollfd		 pfd[POLL_MAX];
110	pid_t			 io_pid = 0, rde_pid = 0, pid;
111	char			*conffile;
112	int			 debug = 0;
113	int			 ch, nfds, timeout;
114	int			 pipe_m2s[2];
115	int			 pipe_m2r[2];
116	int			 pipe_s2r[2];
117
118	conffile = CONFFILE;
119	bgpd_process = PROC_MAIN;
120
121	log_init(1);		/* log to stderr until daemonized */
122
123	if ((rules_l = calloc(1, sizeof(struct filter_head))) == NULL)
124		err(1, NULL);
125
126	bzero(&conf, sizeof(conf));
127	LIST_INIT(&mrt_l);
128	TAILQ_INIT(&net_l);
129	TAILQ_INIT(rules_l);
130	peer_l = NULL;
131
132	while ((ch = getopt(argc, argv, "dD:f:nv")) != -1) {
133		switch (ch) {
134		case 'd':
135			debug = 1;
136			break;
137		case 'D':
138			if (cmdline_symset(optarg) < 0)
139				log_warnx("could not parse macro definition %s",
140				    optarg);
141			break;
142		case 'f':
143			conffile = optarg;
144			break;
145		case 'n':
146			conf.opts |= BGPD_OPT_NOACTION;
147			break;
148		case 'v':
149			if (conf.opts & BGPD_OPT_VERBOSE)
150				conf.opts |= BGPD_OPT_VERBOSE2;
151			conf.opts |= BGPD_OPT_VERBOSE;
152			break;
153		default:
154			usage();
155			/* NOTREACHED */
156		}
157	}
158
159	if (parse_config(conffile, &conf, &mrt_l, &peer_l, &net_l, rules_l)) {
160		free(rules_l);
161		exit(1);
162	}
163
164	if (conf.opts & BGPD_OPT_NOACTION) {
165		if (conf.opts & BGPD_OPT_VERBOSE)
166			print_config(&conf, &net_l, peer_l, rules_l, &mrt_l);
167		else
168			fprintf(stderr, "configuration OK\n");
169		exit(0);
170	}
171	cflags = conf.flags;
172	connectset = &conf.connectset;
173	staticset = &conf.staticset;
174
175	if (geteuid())
176		errx(1, "need root privileges");
177
178	if (getpwnam(BGPD_USER) == NULL)
179		errx(1, "unknown user %s", BGPD_USER);
180
181	log_init(debug);
182
183	if (!debug)
184		daemon(1, 0);
185
186	log_info("startup");
187
188	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_m2s) == -1)
189		fatal("socketpair");
190	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_m2r) == -1)
191		fatal("socketpair");
192	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_s2r) == -1)
193		fatal("socketpair");
194	session_socket_blockmode(pipe_m2s[0], BM_NONBLOCK);
195	session_socket_blockmode(pipe_m2s[1], BM_NONBLOCK);
196	session_socket_blockmode(pipe_m2r[0], BM_NONBLOCK);
197	session_socket_blockmode(pipe_m2r[1], BM_NONBLOCK);
198	session_socket_blockmode(pipe_s2r[0], BM_NONBLOCK);
199	session_socket_blockmode(pipe_s2r[1], BM_NONBLOCK);
200
201	prepare_listeners(&conf);
202
203	/* fork children */
204	rde_pid = rde_main(&conf, peer_l, &net_l, rules_l, &mrt_l,
205	    pipe_m2r, pipe_s2r, pipe_m2s);
206	io_pid = session_main(&conf, peer_l, &net_l, rules_l, &mrt_l,
207	    pipe_m2s, pipe_s2r, pipe_m2r);
208
209	setproctitle("parent");
210
211	signal(SIGTERM, sighdlr);
212	signal(SIGINT, sighdlr);
213	signal(SIGCHLD, sighdlr);
214	signal(SIGHUP, sighdlr);
215	signal(SIGALRM, sighdlr);
216	signal(SIGUSR1, sighdlr);
217
218	close(pipe_m2s[1]);
219	close(pipe_m2r[1]);
220	close(pipe_s2r[0]);
221	close(pipe_s2r[1]);
222
223	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
224	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL)
225		fatal(NULL);
226	imsg_init(ibuf_se, pipe_m2s[0]);
227	imsg_init(ibuf_rde, pipe_m2r[0]);
228	mrt_init(ibuf_rde, ibuf_se);
229	if ((rfd = kr_init(!(conf.flags & BGPD_FLAG_NO_FIB_UPDATE))) == -1)
230		quit = 1;
231	if (pftable_clear_all() != 0)
232		quit = 1;
233
234	while ((net = TAILQ_FIRST(&net_l)) != NULL) {
235		TAILQ_REMOVE(&net_l, net, entry);
236		free(net);
237	}
238
239	while ((r = TAILQ_FIRST(rules_l)) != NULL) {
240		TAILQ_REMOVE(rules_l, r, entry);
241		free(r);
242	}
243	TAILQ_FOREACH(la, conf.listen_addrs, entry) {
244		close(la->fd);
245		la->fd = -1;
246	}
247
248	mrt_reconfigure(&mrt_l);
249
250	while (quit == 0) {
251		pfd[PFD_PIPE_SESSION].fd = ibuf_se->fd;
252		pfd[PFD_PIPE_SESSION].events = POLLIN;
253		if (ibuf_se->w.queued)
254			pfd[PFD_PIPE_SESSION].events |= POLLOUT;
255		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
256		pfd[PFD_PIPE_ROUTE].events = POLLIN;
257		if (ibuf_rde->w.queued)
258			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
259		pfd[PFD_SOCK_ROUTE].fd = rfd;
260		pfd[PFD_SOCK_ROUTE].events = POLLIN;
261
262		timeout = mrt_timeout(&mrt_l);
263		if (timeout > MAX_TIMEOUT)
264			timeout = MAX_TIMEOUT;
265
266		if ((nfds = poll(pfd, POLL_MAX, timeout * 1000)) == -1)
267			if (errno != EINTR) {
268				log_warn("poll error");
269				quit = 1;
270			}
271
272		if (nfds > 0 && (pfd[PFD_PIPE_SESSION].revents & POLLOUT))
273			if (msgbuf_write(&ibuf_se->w) < 0) {
274				log_warn("pipe write error (to SE)");
275				quit = 1;
276			}
277
278		if (nfds > 0 && (pfd[PFD_PIPE_ROUTE].revents & POLLOUT))
279			if (msgbuf_write(&ibuf_rde->w) < 0) {
280				log_warn("pipe write error (to RDE)");
281				quit = 1;
282			}
283
284		if (nfds > 0 && pfd[PFD_PIPE_SESSION].revents & POLLIN) {
285			nfds--;
286			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION) == -1)
287				quit = 1;
288		}
289
290		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
291			nfds--;
292			if (dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE) == -1)
293				quit = 1;
294		}
295
296		if (nfds > 0 && pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
297			nfds--;
298			if (kr_dispatch_msg() == -1)
299				quit = 1;
300		}
301
302		if (reconfig) {
303			reconfig = 0;
304			log_info("rereading config");
305			reconfigure(conffile, &conf, &mrt_l, &peer_l, rules_l);
306		}
307
308		if (sigchld) {
309			sigchld = 0;
310			if (check_child(io_pid, "session engine")) {
311				quit = 1;
312				io_pid = 0;
313			}
314			if (check_child(rde_pid, "route decision engine")) {
315				quit = 1;
316				rde_pid = 0;
317			}
318		}
319
320		if (mrtdump == 1) {
321			mrtdump = 0;
322			mrt_handler(&mrt_l);
323		}
324	}
325
326	signal(SIGCHLD, SIG_IGN);
327
328	if (io_pid)
329		kill(io_pid, SIGTERM);
330
331	if (rde_pid)
332		kill(rde_pid, SIGTERM);
333
334	while ((p = peer_l) != NULL) {
335		peer_l = p->next;
336		free(p);
337	}
338	while ((m = LIST_FIRST(&mrt_l)) != NULL) {
339		LIST_REMOVE(m, entry);
340		free(m);
341	}
342	while ((la = TAILQ_FIRST(conf.listen_addrs)) != NULL) {
343		TAILQ_REMOVE(conf.listen_addrs, la, entry);
344		close(la->fd);
345		free(la);
346	}
347
348	free(rules_l);
349	control_cleanup();
350	kr_shutdown();
351	pftable_clear_all();
352	free(conf.listen_addrs);
353
354	do {
355		if ((pid = wait(NULL)) == -1 &&
356		    errno != EINTR && errno != ECHILD)
357			fatal("wait");
358	} while (pid != -1 || (pid == -1 && errno == EINTR));
359
360	msgbuf_clear(&ibuf_se->w);
361	free(ibuf_se);
362	msgbuf_clear(&ibuf_rde->w);
363	free(ibuf_rde);
364
365	log_info("Terminating");
366	return (0);
367}
368
369int
370check_child(pid_t pid, const char *pname)
371{
372	int	status;
373
374	if (waitpid(pid, &status, WNOHANG) > 0) {
375		if (WIFEXITED(status)) {
376			log_warnx("Lost child: %s exited", pname);
377			return (1);
378		}
379		if (WIFSIGNALED(status)) {
380			log_warnx("Lost child: %s terminated; signal %d",
381			    pname, WTERMSIG(status));
382			return (1);
383		}
384	}
385
386	return (0);
387}
388
389int
390send_filterset(struct imsgbuf *i, struct filter_set_head *set, int id, int f)
391{
392	struct filter_set	*s;
393
394	for (s = SIMPLEQ_FIRST(set); s != NULL; ) {
395		if (imsg_compose(i, IMSG_FILTER_SET, id, 0, -1, s,
396		    sizeof(struct filter_set)) == -1)
397			return (-1);
398		if (f) {
399			SIMPLEQ_REMOVE_HEAD(set, entry);
400			free(s);
401			s = SIMPLEQ_FIRST(set);
402		} else
403			s = SIMPLEQ_NEXT(s, entry);
404	}
405	return (0);
406}
407
408int
409reconfigure(char *conffile, struct bgpd_config *conf, struct mrt_head *mrt_l,
410    struct peer **peer_l, struct filter_head *rules_l)
411{
412	struct network_head	 net_l;
413	struct network		*n;
414	struct peer		*p;
415	struct filter_rule	*r;
416	struct listen_addr	*la;
417
418	if (parse_config(conffile, conf, mrt_l, peer_l, &net_l, rules_l)) {
419		log_warnx("config file %s has errors, not reloading",
420		    conffile);
421		return (-1);
422	}
423
424	cflags = conf->flags;
425	connectset = &conf->connectset;
426	staticset = &conf->staticset;
427
428	prepare_listeners(conf);
429
430	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
431	    conf, sizeof(struct bgpd_config)) == -1)
432		return (-1);
433	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
434	    conf, sizeof(struct bgpd_config)) == -1)
435		return (-1);
436	for (p = *peer_l; p != NULL; p = p->next) {
437		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
438		    &p->conf, sizeof(struct peer_config)) == -1)
439			return (-1);
440		if (send_filterset(ibuf_se, &p->conf.attrset,
441		    p->conf.id, 0) == -1)
442			return (-1);
443	}
444	while ((n = TAILQ_FIRST(&net_l)) != NULL) {
445		if (imsg_compose(ibuf_rde, IMSG_NETWORK_ADD, 0, 0, -1,
446		    &n->net, sizeof(struct network_config)) == -1)
447			return (-1);
448		if (send_filterset(ibuf_rde, &n->net.attrset, 0, 1) == -1)
449			return (-1);
450		if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1,
451		    NULL, 0) == -1)
452			return (-1);
453		TAILQ_REMOVE(&net_l, n, entry);
454		free(n);
455	}
456	/* redistribute list needs to be reloaded too */
457	if (kr_redist_reload() == -1)
458		return (-1);
459
460	while ((r = TAILQ_FIRST(rules_l)) != NULL) {
461		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
462		    r, sizeof(struct filter_rule)) == -1)
463			return (-1);
464		if (send_filterset(ibuf_rde, &r->set, 0, 1) == -1)
465			return (-1);
466		TAILQ_REMOVE(rules_l, r, entry);
467		free(r);
468	}
469	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
470		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
471		    la, sizeof(struct listen_addr)) == -1)
472			return (-1);
473		la->fd = -1;
474	}
475
476	if (imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0, 0, -1, NULL, 0) == -1 ||
477	    imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0, 0, -1, NULL, 0) == -1)
478		return (-1);
479
480	/* mrt changes can be sent out of bound */
481	mrt_reconfigure(mrt_l);
482	return (0);
483}
484
485int
486dispatch_imsg(struct imsgbuf *ibuf, int idx)
487{
488	struct imsg		 imsg;
489	int			 n;
490	int			 rv;
491
492	if ((n = imsg_read(ibuf)) == -1)
493		return (-1);
494
495	if (n == 0) {	/* connection closed */
496		log_warnx("dispatch_imsg in main: pipe closed");
497		return (-1);
498	}
499
500	rv = 0;
501	for (;;) {
502		if ((n = imsg_get(ibuf, &imsg)) == -1)
503			return (-1);
504
505		if (n == 0)
506			break;
507
508		switch (imsg.hdr.type) {
509		case IMSG_KROUTE_CHANGE:
510			if (idx != PFD_PIPE_ROUTE)
511				log_warnx("route request not from RDE");
512			else if (kr_change(imsg.data))
513				rv = -1;
514			break;
515		case IMSG_KROUTE_DELETE:
516			if (idx != PFD_PIPE_ROUTE)
517				log_warnx("route request not from RDE");
518			else if (kr_delete(imsg.data))
519				rv = -1;
520			break;
521		case IMSG_NEXTHOP_ADD:
522			if (idx != PFD_PIPE_ROUTE)
523				log_warnx("nexthop request not from RDE");
524			else
525				if (imsg.hdr.len != IMSG_HEADER_SIZE +
526				    sizeof(struct bgpd_addr))
527					log_warnx("wrong imsg len");
528				else if (kr_nexthop_add(imsg.data) == -1)
529					rv = -1;
530			break;
531		case IMSG_NEXTHOP_REMOVE:
532			if (idx != PFD_PIPE_ROUTE)
533				log_warnx("nexthop request not from RDE");
534			else
535				if (imsg.hdr.len != IMSG_HEADER_SIZE +
536				    sizeof(struct bgpd_addr))
537					log_warnx("wrong imsg len");
538				else
539					kr_nexthop_delete(imsg.data);
540			break;
541		case IMSG_PFTABLE_ADD:
542			if (idx != PFD_PIPE_ROUTE)
543				log_warnx("pftable request not from RDE");
544			else
545				if (imsg.hdr.len != IMSG_HEADER_SIZE +
546				    sizeof(struct pftable_msg))
547					log_warnx("wrong imsg len");
548				else if (pftable_addr_add(imsg.data) != 0)
549					rv = -1;
550			break;
551		case IMSG_PFTABLE_REMOVE:
552			if (idx != PFD_PIPE_ROUTE)
553				log_warnx("pftable request not from RDE");
554			else
555				if (imsg.hdr.len != IMSG_HEADER_SIZE +
556				    sizeof(struct pftable_msg))
557					log_warnx("wrong imsg len");
558				else if (pftable_addr_remove(imsg.data) != 0)
559					rv = -1;
560			break;
561		case IMSG_PFTABLE_COMMIT:
562			if (idx != PFD_PIPE_ROUTE)
563				log_warnx("pftable request not from RDE");
564			else
565				if (imsg.hdr.len != IMSG_HEADER_SIZE)
566					log_warnx("wrong imsg len");
567				else if (pftable_commit() != 0)
568					rv = -1;
569			break;
570		case IMSG_CTL_RELOAD:
571			if (idx != PFD_PIPE_SESSION)
572				log_warnx("reload request not from SE");
573			else
574				reconfig = 1;
575			break;
576		case IMSG_CTL_FIB_COUPLE:
577			if (idx != PFD_PIPE_SESSION)
578				log_warnx("couple request not from SE");
579			else
580				kr_fib_couple();
581			break;
582		case IMSG_CTL_FIB_DECOUPLE:
583			if (idx != PFD_PIPE_SESSION)
584				log_warnx("decouple request not from SE");
585			else
586				kr_fib_decouple();
587			break;
588		case IMSG_CTL_KROUTE:
589		case IMSG_CTL_KROUTE_ADDR:
590		case IMSG_CTL_SHOW_NEXTHOP:
591		case IMSG_CTL_SHOW_INTERFACE:
592			if (idx != PFD_PIPE_SESSION)
593				log_warnx("kroute request not from SE");
594			else
595				kr_show_route(&imsg);
596			break;
597		case IMSG_IFINFO:
598			if (idx != PFD_PIPE_SESSION)
599				log_warnx("IFINFO request not from SE");
600			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
601				log_warnx("IFINFO request with wrong len");
602			else
603				kr_ifinfo(imsg.data);
604			break;
605		default:
606			break;
607		}
608		imsg_free(&imsg);
609		if (rv != 0)
610			return (rv);
611	}
612	return (0);
613}
614
615void
616send_nexthop_update(struct kroute_nexthop *msg)
617{
618	char	*gw = NULL;
619
620	if (msg->gateway.af)
621		if (asprintf(&gw, ": via %s",
622		    log_addr(&msg->gateway)) == -1) {
623			log_warn("send_nexthop_update");
624			quit = 1;
625		}
626
627	log_info("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
628	    msg->valid ? "valid" : "invalid",
629	    msg->connected ? ": directly connected" : "",
630	    msg->gateway.af ? gw : "");
631
632	free(gw);
633
634	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
635	    msg, sizeof(struct kroute_nexthop)) == -1)
636		quit = 1;
637}
638
639void
640send_imsg_session(int type, pid_t pid, void *data, u_int16_t datalen)
641{
642	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
643}
644
645int
646bgpd_redistribute(int type, struct kroute *kr)
647{
648	struct network_config	 net;
649	struct filter_set_head	*h;
650
651	if ((cflags & BGPD_FLAG_REDIST_CONNECTED) && (kr->flags & F_CONNECTED))
652		h = connectset;
653	else if ((cflags & BGPD_FLAG_REDIST_STATIC) && (kr->flags & F_STATIC))
654		h = staticset;
655	else
656		return (0);
657
658	bzero(&net, sizeof(net));
659	net.prefix.af = AF_INET;
660	net.prefix.v4.s_addr = kr->prefix.s_addr;
661	net.prefixlen = kr->prefixlen;
662
663	if (imsg_compose(ibuf_rde, type, 0, 0, -1, &net,
664	    sizeof(struct network_config)) == -1)
665		return (-1);
666
667	/* networks that get deleted don't need to send the filter set */
668	if (type == IMSG_NETWORK_REMOVE)
669		return (1);
670
671	if (send_filterset(ibuf_rde, h, 0, 0) == -1)
672		return (-1);
673	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
674		return (-1);
675
676	return (1);
677}
678
679