1/*	$OpenBSD: mrt.c,v 1.117 2024/05/22 08:41:14 claudio Exp $ */
2
3/*
4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/queue.h>
21
22#include <errno.h>
23#include <fcntl.h>
24#include <limits.h>
25#include <stdlib.h>
26#include <string.h>
27#include <time.h>
28#include <unistd.h>
29
30#include "bgpd.h"
31#include "rde.h"
32#include "session.h"
33
34#include "mrt.h"
35#include "log.h"
36
37static int	mrt_attr_dump(struct ibuf *, struct rde_aspath *,
38		    struct rde_community *, struct bgpd_addr *, int);
39static int	mrt_dump_entry_mp(struct mrt *, struct prefix *, uint16_t,
40		    struct rde_peer*);
41static int	mrt_dump_entry(struct mrt *, struct prefix *, uint16_t,
42		    struct rde_peer*);
43static int	mrt_dump_entry_v2(struct mrt *, struct rib_entry *, uint32_t);
44static int	mrt_dump_peer(struct ibuf *, struct rde_peer *);
45static int	mrt_dump_hdr_se(struct ibuf **, struct peer *, uint16_t,
46		    uint16_t, uint32_t, int);
47static int	mrt_dump_hdr_rde(struct ibuf **, uint16_t type, uint16_t,
48		    uint32_t);
49static int	mrt_open(struct mrt *, time_t);
50
51#define RDEIDX		0
52#define SEIDX		1
53#define TYPE2IDX(x)	((x == MRT_TABLE_DUMP ||			\
54			    x == MRT_TABLE_DUMP_MP ||			\
55			    x == MRT_TABLE_DUMP_V2) ? RDEIDX : SEIDX	\
56			)
57
58static uint8_t
59mrt_update_msg_guess_aid(uint8_t *pkg, uint16_t pkglen)
60{
61	uint16_t wlen, alen, len, afi;
62	uint8_t type, aid;
63
64	pkg += MSGSIZE_HEADER;
65	pkglen -= MSGSIZE_HEADER;
66
67	if (pkglen < 4)
68		goto bad;
69
70	memcpy(&wlen, pkg, 2);
71	wlen = ntohs(wlen);
72	pkg += 2;
73	pkglen -= 2;
74
75	if (wlen > 0) {
76		/* UPDATE has withdraw routes, therefore IPv4 */
77		return AID_INET;
78	}
79
80	memcpy(&alen, pkg, 2);
81	alen = ntohs(alen);
82	pkg += 2;
83	pkglen -= 2;
84
85	if (alen < pkglen) {
86		/* UPDATE has NLRI prefixes, therefore IPv4 */
87		return AID_INET;
88	}
89
90	if (wlen == 0 && alen == 0) {
91		/* UPDATE is an IPv4 EoR marker */
92		return AID_INET;
93	}
94
95	/* bad attribute length */
96	if (alen > pkglen)
97		goto bad;
98
99	/* try to extract AFI/SAFI from the MP attributes */
100	while (alen > 0) {
101		if (alen < 3)
102			goto bad;
103		type = pkg[1];
104		if (pkg[0] & ATTR_EXTLEN) {
105			if (alen < 4)
106				goto bad;
107			memcpy(&len, pkg + 2, 2);
108			len = ntohs(len);
109			pkg += 4;
110			alen -= 4;
111		} else {
112			len = pkg[2];
113			pkg += 3;
114			alen -= 3;
115		}
116		if (len > alen)
117			goto bad;
118
119		if (type == ATTR_MP_REACH_NLRI ||
120		    type == ATTR_MP_UNREACH_NLRI) {
121			if (alen < 3)
122				goto bad;
123			memcpy(&afi, pkg, 2);
124			afi = ntohs(afi);
125			if (afi2aid(afi, pkg[2], &aid) == -1)
126				goto bad;
127			return aid;
128		}
129
130		pkg += len;
131		alen -= len;
132	}
133
134bad:
135	return AID_UNSPEC;
136}
137
138static uint16_t
139mrt_bgp_msg_subtype(struct mrt *mrt, void *pkg, uint16_t pkglen,
140    struct peer *peer, enum msg_type msgtype, int in)
141{
142	uint16_t subtype = BGP4MP_MESSAGE;
143	uint8_t aid, mask;
144
145	if (peer->capa.neg.as4byte)
146		subtype = BGP4MP_MESSAGE_AS4;
147
148	if (msgtype != UPDATE)
149		return subtype;
150
151	/*
152	 * RFC8050 adjust types for add-path enabled sessions.
153	 * It is necessary to extract the AID from UPDATES to decide
154	 * if the add-path types are needed or not. The ADDPATH
155	 * subtypes only matter for BGP UPDATES.
156	 */
157
158	mask = in ? CAPA_AP_RECV : CAPA_AP_SEND;
159	/* only guess if add-path could be active */
160	if (peer->capa.neg.add_path[0] & mask) {
161		aid = mrt_update_msg_guess_aid(pkg, pkglen);
162		if (aid != AID_UNSPEC &&
163		    (peer->capa.neg.add_path[aid] & mask)) {
164			if (peer->capa.neg.as4byte)
165				subtype = BGP4MP_MESSAGE_AS4_ADDPATH;
166			else
167				subtype = BGP4MP_MESSAGE_ADDPATH;
168		}
169	}
170
171	return subtype;
172}
173
174void
175mrt_dump_bgp_msg(struct mrt *mrt, void *pkg, uint16_t pkglen,
176    struct peer *peer, enum msg_type msgtype)
177{
178	struct ibuf	*buf;
179	int		 in = 0;
180	uint16_t	 subtype = BGP4MP_MESSAGE;
181
182	/* get the direction of the message to swap address and AS fields */
183	if (mrt->type == MRT_ALL_IN || mrt->type == MRT_UPDATE_IN)
184		in = 1;
185
186	subtype = mrt_bgp_msg_subtype(mrt, pkg, pkglen, peer, msgtype, in);
187
188	if (mrt_dump_hdr_se(&buf, peer, MSG_PROTOCOL_BGP4MP_ET, subtype,
189	    pkglen, in) == -1)
190		goto fail;
191
192	if (ibuf_add(buf, pkg, pkglen) == -1)
193		goto fail;
194
195	ibuf_close(&mrt->wbuf, buf);
196	return;
197
198fail:
199	log_warn("%s: ibuf error", __func__);
200	ibuf_free(buf);
201}
202
203void
204mrt_dump_state(struct mrt *mrt, uint16_t old_state, uint16_t new_state,
205    struct peer *peer)
206{
207	struct ibuf	*buf;
208	uint16_t	 subtype = BGP4MP_STATE_CHANGE;
209
210	if (peer->capa.neg.as4byte)
211		subtype = BGP4MP_STATE_CHANGE_AS4;
212
213	if (mrt_dump_hdr_se(&buf, peer, MSG_PROTOCOL_BGP4MP_ET, subtype,
214	    2 * sizeof(short), 0) == -1)
215		goto fail;
216
217	if (ibuf_add_n16(buf, old_state) == -1)
218		goto fail;
219	if (ibuf_add_n16(buf, new_state) == -1)
220		goto fail;
221
222	ibuf_close(&mrt->wbuf, buf);
223	return;
224
225fail:
226	log_warn("%s: ibuf error", __func__);
227	ibuf_free(buf);
228}
229
230static int
231mrt_attr_dump(struct ibuf *buf, struct rde_aspath *a, struct rde_community *c,
232    struct bgpd_addr *nexthop, int v2)
233{
234	struct attr	*oa;
235	u_char		*pdata;
236	uint32_t	 tmp;
237	int		 neednewpath = 0;
238	uint16_t	 plen, afi;
239	uint8_t		 l, safi;
240
241	/* origin */
242	if (attr_writebuf(buf, ATTR_WELL_KNOWN, ATTR_ORIGIN,
243	    &a->origin, 1) == -1)
244		return (-1);
245
246	/* aspath */
247	plen = aspath_length(a->aspath);
248	pdata = aspath_dump(a->aspath);
249
250	if (!v2)
251		pdata = aspath_deflate(pdata, &plen, &neednewpath);
252	if (attr_writebuf(buf, ATTR_WELL_KNOWN, ATTR_ASPATH, pdata,
253	    plen) == -1) {
254		if (!v2)
255			free(pdata);
256		return (-1);
257	}
258	if (!v2)
259		free(pdata);
260
261	if (nexthop && nexthop->aid == AID_INET) {
262		/* nexthop, already network byte order */
263		if (attr_writebuf(buf, ATTR_WELL_KNOWN, ATTR_NEXTHOP,
264		    &nexthop->v4.s_addr, 4) ==	-1)
265			return (-1);
266	}
267
268	/* MED, non transitive */
269	if (a->med != 0) {
270		tmp = htonl(a->med);
271		if (attr_writebuf(buf, ATTR_OPTIONAL, ATTR_MED, &tmp, 4) == -1)
272			return (-1);
273	}
274
275	/* local preference */
276	tmp = htonl(a->lpref);
277	if (attr_writebuf(buf, ATTR_WELL_KNOWN, ATTR_LOCALPREF, &tmp, 4) == -1)
278		return (-1);
279
280	/* communities */
281	if (community_writebuf(c, ATTR_COMMUNITIES, 0, buf) == -1 ||
282	    community_writebuf(c, ATTR_EXT_COMMUNITIES, 0, buf) == -1 ||
283	    community_writebuf(c, ATTR_LARGE_COMMUNITIES, 0, buf) == -1)
284		return (-1);
285
286	/* dump all other path attributes without modification */
287	for (l = 0; l < a->others_len; l++) {
288		if ((oa = a->others[l]) == NULL)
289			break;
290		if (attr_writebuf(buf, oa->flags, oa->type,
291		    oa->data, oa->len) == -1)
292			return (-1);
293	}
294
295	if (nexthop && nexthop->aid != AID_INET) {
296		struct ibuf *nhbuf;
297
298		if ((nhbuf = ibuf_dynamic(0, UCHAR_MAX)) == NULL)
299			return (-1);
300		if (!v2) {
301			if (aid2afi(nexthop->aid, &afi, &safi))
302				goto fail;
303			if (ibuf_add_n16(nhbuf, afi) == -1)
304				goto fail;
305			if (ibuf_add_n8(nhbuf, safi) == -1)
306				goto fail;
307		}
308		switch (nexthop->aid) {
309		case AID_INET6:
310			if (ibuf_add_n8(nhbuf, sizeof(struct in6_addr)) == -1)
311				goto fail;
312			if (ibuf_add(nhbuf, &nexthop->v6,
313			    sizeof(struct in6_addr)) == -1)
314				goto fail;
315			break;
316		case AID_VPN_IPv4:
317			if (ibuf_add_n8(nhbuf, sizeof(uint64_t) +
318			    sizeof(struct in_addr)) == -1)
319				goto fail;
320			if (ibuf_add_n64(nhbuf, 0) == -1) /* set RD to 0 */
321				goto fail;
322			if (ibuf_add(nhbuf, &nexthop->v4,
323			    sizeof(nexthop->v4)) == -1)
324				goto fail;
325			break;
326		case AID_VPN_IPv6:
327			if (ibuf_add_n8(nhbuf, sizeof(uint64_t) +
328			    sizeof(struct in6_addr)) == -1)
329				goto fail;
330			if (ibuf_add_n64(nhbuf, 0) == -1) /* set RD to 0 */
331				goto fail;
332			if (ibuf_add(nhbuf, &nexthop->v6,
333			    sizeof(nexthop->v6)) == -1)
334				goto fail;
335			break;
336		}
337		if (!v2)
338			if (ibuf_add_n8(nhbuf, 0) == -1)
339				goto fail;
340		if (attr_writebuf(buf, ATTR_OPTIONAL, ATTR_MP_REACH_NLRI,
341		    ibuf_data(nhbuf), ibuf_size(nhbuf)) == -1) {
342fail:
343			ibuf_free(nhbuf);
344			return (-1);
345		}
346		ibuf_free(nhbuf);
347	}
348
349	if (neednewpath) {
350		pdata = aspath_prepend(a->aspath, rde_local_as(), 0, &plen);
351		if (plen != 0)
352			if (attr_writebuf(buf, ATTR_OPTIONAL|ATTR_TRANSITIVE,
353			    ATTR_AS4_PATH, pdata, plen) == -1) {
354				free(pdata);
355				return (-1);
356			}
357		free(pdata);
358	}
359
360	return (0);
361}
362
363static int
364mrt_dump_entry_mp(struct mrt *mrt, struct prefix *p, uint16_t snum,
365    struct rde_peer *peer)
366{
367	struct ibuf	*buf, *hbuf = NULL, *h2buf = NULL;
368	struct nexthop	*n;
369	struct bgpd_addr nexthop, *nh;
370	uint16_t	 len;
371	uint8_t		 aid;
372
373	if ((buf = ibuf_dynamic(0, MAX_PKTSIZE)) == NULL) {
374		log_warn("mrt_dump_entry_mp: ibuf_dynamic");
375		return (-1);
376	}
377
378	if (mrt_attr_dump(buf, prefix_aspath(p), prefix_communities(p),
379	    NULL, 0) == -1)
380		goto fail;
381	len = ibuf_size(buf);
382
383	if ((h2buf = ibuf_dynamic(MRT_BGP4MP_IPv4_HEADER_SIZE +
384	    MRT_BGP4MP_IPv4_ENTRY_SIZE, MRT_BGP4MP_IPv6_HEADER_SIZE +
385	    MRT_BGP4MP_IPv6_ENTRY_SIZE + MRT_BGP4MP_MAX_PREFIXLEN)) == NULL)
386		goto fail;
387
388	if (ibuf_add_n16(h2buf, peer->conf.local_short_as) == -1)
389		goto fail;
390	if (ibuf_add_n16(h2buf, peer->short_as) == -1)
391		goto fail;
392	if (ibuf_add_n16(h2buf, /* ifindex */ 0) == -1)
393		goto fail;
394
395	/* XXX is this for peer self? */
396	aid = peer->remote_addr.aid == AID_UNSPEC ? p->pt->aid :
397	    peer->remote_addr.aid;
398	switch (aid) {
399	case AID_INET:
400	case AID_VPN_IPv4:
401		if (ibuf_add_n16(h2buf, AFI_IPv4) == -1)
402			goto fail;
403		if (ibuf_add(h2buf, &peer->local_v4_addr.v4,
404		    sizeof(peer->local_v4_addr.v4)) == -1 ||
405		    ibuf_add(h2buf, &peer->remote_addr.v4,
406		    sizeof(peer->remote_addr.v4)) == -1)
407			goto fail;
408		break;
409	case AID_INET6:
410	case AID_VPN_IPv6:
411		if (ibuf_add_n16(h2buf, AFI_IPv6) == -1)
412			goto fail;
413		if (ibuf_add(h2buf, &peer->local_v6_addr.v6,
414		    sizeof(peer->local_v6_addr.v6)) == -1 ||
415		    ibuf_add(h2buf, &peer->remote_addr.v6,
416		    sizeof(peer->remote_addr.v6)) == -1)
417			goto fail;
418		break;
419	default:
420		log_warnx("king bula found new AF %d in %s", aid, __func__);
421		goto fail;
422	}
423
424	if (ibuf_add_n16(h2buf, 0) == -1)		/* view */
425		goto fail;
426	if (ibuf_add_n16(h2buf, 1) == -1)		/* status */
427		goto fail;
428	/* originated timestamp */
429	if (ibuf_add_n32(h2buf, time(NULL) - (getmonotime() -
430	    p->lastchange)) == -1)
431		goto fail;
432
433	n = prefix_nexthop(p);
434	if (n == NULL) {
435		memset(&nexthop, 0, sizeof(struct bgpd_addr));
436		nexthop.aid = p->pt->aid;
437		nh = &nexthop;
438	} else
439		nh = &n->exit_nexthop;
440
441	switch (p->pt->aid) {
442	case AID_INET:
443		if (ibuf_add_n16(h2buf, AFI_IPv4) == -1)	/* afi */
444			goto fail;
445		if (ibuf_add_n8(h2buf, SAFI_UNICAST) == -1)	/* safi */
446			goto fail;
447		if (ibuf_add_n8(h2buf, 4) == -1)		/* nhlen */
448			goto fail;
449		if (ibuf_add(h2buf, &nh->v4, sizeof(nh->v4)) == -1)
450			goto fail;
451		break;
452	case AID_INET6:
453		if (ibuf_add_n16(h2buf, AFI_IPv6) == -1)	/* afi */
454			goto fail;
455		if (ibuf_add_n8(h2buf, SAFI_UNICAST) == -1)	/* safi */
456			goto fail;
457		if (ibuf_add_n8(h2buf, 16) == -1)		/* nhlen */
458			goto fail;
459		if (ibuf_add(h2buf, &nh->v6, sizeof(nh->v6)) == -1)
460			goto fail;
461		break;
462	case AID_VPN_IPv4:
463		if (ibuf_add_n16(h2buf, AFI_IPv4) == -1)	/* afi */
464			goto fail;
465		if (ibuf_add_n8(h2buf, SAFI_MPLSVPN) == -1)	/* safi */
466			goto fail;
467		if (ibuf_add_n8(h2buf, sizeof(uint64_t) +
468		    sizeof(struct in_addr)) == -1)
469			goto fail;
470		if (ibuf_add_n64(h2buf, 0) == -1)	/* set RD to 0 */
471			goto fail;
472		if (ibuf_add(h2buf, &nh->v4, sizeof(nh->v4)) == -1)
473			goto fail;
474		break;
475	case AID_VPN_IPv6:
476		if (ibuf_add_n16(h2buf, AFI_IPv6) == -1)	/* afi */
477			goto fail;
478		if (ibuf_add_n8(h2buf, SAFI_MPLSVPN) == -1)	/* safi */
479			goto fail;
480		if (ibuf_add_n8(h2buf, sizeof(uint64_t) +
481		    sizeof(struct in6_addr)) == -1)
482			goto fail;
483		if (ibuf_add_n64(h2buf, 0) == -1)	/* set RD to 0 */
484			goto fail;
485		if (ibuf_add(h2buf, &nh->v6, sizeof(nh->v6)) == -1)
486			goto fail;
487		break;
488	case AID_FLOWSPECv4:
489	case AID_FLOWSPECv6:
490		if (p->pt->aid == AID_FLOWSPECv4) {
491			if (ibuf_add_n16(h2buf, AFI_IPv4) == -1) /* afi */
492				goto fail;
493		} else {
494			if (ibuf_add_n16(h2buf, AFI_IPv6) == -1) /* afi */
495				goto fail;
496		}
497		if (ibuf_add_n8(h2buf, SAFI_FLOWSPEC) == -1)	/* safi */
498			goto fail;
499		if (ibuf_add_n8(h2buf, 0) == -1)		/* nhlen */
500			goto fail;
501		break;
502	default:
503		log_warnx("king bula found new AF in %s", __func__);
504		goto fail;
505	}
506
507	if (pt_writebuf(h2buf, p->pt, 0, 0, 0) == -1)
508		goto fail;
509
510	if (ibuf_add_n16(h2buf, len) == -1)
511		goto fail;
512	len += ibuf_size(h2buf);
513
514	if (mrt_dump_hdr_rde(&hbuf, MSG_PROTOCOL_BGP4MP, BGP4MP_ENTRY,
515	    len) == -1)
516		goto fail;
517
518	ibuf_close(&mrt->wbuf, hbuf);
519	ibuf_close(&mrt->wbuf, h2buf);
520	ibuf_close(&mrt->wbuf, buf);
521
522	return (len + MRT_HEADER_SIZE);
523
524fail:
525	log_warn("%s: ibuf error", __func__);
526	ibuf_free(hbuf);
527	ibuf_free(h2buf);
528	ibuf_free(buf);
529	return (-1);
530}
531
532static int
533mrt_dump_entry(struct mrt *mrt, struct prefix *p, uint16_t snum,
534    struct rde_peer *peer)
535{
536	struct ibuf	*buf, *hbuf = NULL;
537	struct nexthop	*nexthop;
538	struct bgpd_addr addr, *nh;
539	size_t		 len;
540	uint16_t	 subtype;
541	uint8_t		 dummy;
542
543	if (p->pt->aid != peer->remote_addr.aid &&
544	    p->pt->aid != AID_INET && p->pt->aid != AID_INET6)
545		/* only able to dump pure IPv4/IPv6 */
546		return (0);
547
548	if ((buf = ibuf_dynamic(0, MAX_PKTSIZE)) == NULL) {
549		log_warn("mrt_dump_entry: ibuf_dynamic");
550		return (-1);
551	}
552
553	nexthop = prefix_nexthop(p);
554	if (nexthop == NULL) {
555		memset(&addr, 0, sizeof(struct bgpd_addr));
556		addr.aid = p->pt->aid;
557		nh = &addr;
558	} else
559		nh = &nexthop->exit_nexthop;
560	if (mrt_attr_dump(buf, prefix_aspath(p), prefix_communities(p),
561	    nh, 0) == -1)
562		goto fail;
563
564	len = ibuf_size(buf);
565	aid2afi(p->pt->aid, &subtype, &dummy);
566	if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP, subtype, len) == -1)
567		goto fail;
568
569	if (ibuf_add_n16(hbuf, 0) == -1)
570		goto fail;
571	if (ibuf_add_n16(hbuf, snum) == -1)
572		goto fail;
573
574	pt_getaddr(p->pt, &addr);
575	switch (p->pt->aid) {
576	case AID_INET:
577		if (ibuf_add(hbuf, &addr.v4, sizeof(addr.v4)) == -1)
578			goto fail;
579		break;
580	case AID_INET6:
581		if (ibuf_add(hbuf, &addr.v6, sizeof(addr.v6)) == -1)
582			goto fail;
583		break;
584	}
585	if (ibuf_add_n8(hbuf, p->pt->prefixlen) == -1)
586		goto fail;
587
588	if (ibuf_add_n8(hbuf, 1) == -1)		/* state */
589		goto fail;
590	/* originated timestamp */
591	if (ibuf_add_n32(hbuf, time(NULL) - (getmonotime() -
592	    p->lastchange)) == -1)
593		goto fail;
594	switch (p->pt->aid) {
595	case AID_INET:
596		if (ibuf_add(hbuf, &peer->remote_addr.v4,
597		    sizeof(peer->remote_addr.v4)) == -1)
598			goto fail;
599		break;
600	case AID_INET6:
601		if (ibuf_add(hbuf, &peer->remote_addr.v6,
602		    sizeof(peer->remote_addr.v6)) == -1)
603			goto fail;
604		break;
605	}
606	if (ibuf_add_n16(hbuf, peer->short_as) == -1)
607		goto fail;
608	if (ibuf_add_n16(hbuf, len) == -1)
609		goto fail;
610
611	ibuf_close(&mrt->wbuf, hbuf);
612	ibuf_close(&mrt->wbuf, buf);
613
614	return (len + MRT_HEADER_SIZE);
615
616fail:
617	log_warn("%s: ibuf error", __func__);
618	ibuf_free(hbuf);
619	ibuf_free(buf);
620	return (-1);
621}
622
623static int
624mrt_dump_entry_v2_rib(struct rib_entry *re, struct ibuf **nb, struct ibuf **apb,
625    uint16_t *np, uint16_t *app)
626{
627	struct bgpd_addr addr;
628	struct ibuf *buf = NULL, **bp;
629	struct ibuf *tbuf = NULL;
630	struct prefix *p;
631	int addpath;
632
633	*np = 0;
634	*app = 0;
635
636	TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
637		struct nexthop		*nexthop;
638		struct bgpd_addr	*nh;
639
640		addpath = peer_has_add_path(prefix_peer(p), re->prefix->aid,
641		    CAPA_AP_RECV);
642
643		if (addpath) {
644			bp = apb;
645			*app += 1;
646		} else {
647			bp = nb;
648			*np += 1;
649		}
650		if ((buf = *bp) == NULL) {
651			if ((buf = ibuf_dynamic(0, UINT_MAX)) == NULL)
652				goto fail;
653			*bp = buf;
654		}
655
656		nexthop = prefix_nexthop(p);
657		if (nexthop == NULL) {
658			memset(&addr, 0, sizeof(struct bgpd_addr));
659			addr.aid = re->prefix->aid;
660			nh = &addr;
661		} else
662			nh = &nexthop->exit_nexthop;
663
664		if (ibuf_add_n16(buf, prefix_peer(p)->mrt_idx) == -1)
665			goto fail;
666		/* originated timestamp */
667		if (ibuf_add_n32(buf, time(NULL) - (getmonotime() -
668		    p->lastchange)) == -1)
669			goto fail;
670
671		/* RFC8050: path-id if add-path is used */
672		if (addpath)
673			if (ibuf_add_n32(buf, p->path_id) == -1)
674				goto fail;
675
676		if ((tbuf = ibuf_dynamic(0, MAX_PKTSIZE)) == NULL)
677			goto fail;
678		if (mrt_attr_dump(tbuf, prefix_aspath(p), prefix_communities(p),
679		    nh, 1) == -1)
680			goto fail;
681		if (ibuf_add_n16(buf, ibuf_size(tbuf)) == -1)
682			goto fail;
683		if (ibuf_add_buf(buf, tbuf) == -1)
684			goto fail;
685		ibuf_free(tbuf);
686		tbuf = NULL;
687	}
688
689	return 0;
690
691fail:
692	ibuf_free(tbuf);
693	return -1;
694}
695
696static int
697mrt_dump_entry_v2(struct mrt *mrt, struct rib_entry *re, uint32_t snum)
698{
699	struct ibuf	*hbuf = NULL, *nbuf = NULL, *apbuf = NULL, *pbuf;
700	size_t		 hlen, len;
701	uint16_t	 subtype, apsubtype, nump, apnump, afi;
702	uint8_t		 safi;
703
704	if ((pbuf = ibuf_dynamic(0, UINT_MAX)) == NULL) {
705		log_warn("%s: ibuf_dynamic", __func__);
706		return -1;
707	}
708
709	switch (re->prefix->aid) {
710	case AID_INET:
711		subtype = MRT_DUMP_V2_RIB_IPV4_UNICAST;
712		apsubtype = MRT_DUMP_V2_RIB_IPV4_UNICAST_ADDPATH;
713		break;
714	case AID_INET6:
715		subtype = MRT_DUMP_V2_RIB_IPV6_UNICAST;
716		apsubtype = MRT_DUMP_V2_RIB_IPV6_UNICAST_ADDPATH;
717		break;
718	default:
719		/*
720		 * XXX The RFC defined the format for this type differently
721		 * and it is prohibitly expensive to implement that format.
722		 * Instead do what gobgp does and encode it like the other
723		 * types.
724		 */
725		subtype = MRT_DUMP_V2_RIB_GENERIC;
726		apsubtype = MRT_DUMP_V2_RIB_GENERIC_ADDPATH;
727		aid2afi(re->prefix->aid, &afi, &safi);
728
729		/* first add 3-bytes AFI/SAFI */
730		if (ibuf_add_n16(pbuf, afi) == -1)
731			goto fail;
732		if (ibuf_add_n8(pbuf, safi) == -1)
733			goto fail;
734		break;
735	}
736
737	if (pt_writebuf(pbuf, re->prefix, 0, 0, 0) == -1)
738		goto fail;
739
740	hlen = sizeof(snum) + sizeof(nump) + ibuf_size(pbuf);
741
742	if (mrt_dump_entry_v2_rib(re, &nbuf, &apbuf, &nump, &apnump))
743		goto fail;
744
745	if (nump > 0) {
746		len = ibuf_size(nbuf) + hlen;
747		if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, subtype,
748		    len) == -1)
749			goto fail;
750
751		if (ibuf_add_n32(hbuf, snum) == -1)
752			goto fail;
753		if (ibuf_add_buf(hbuf, pbuf) == -1)
754			goto fail;
755		if (ibuf_add_n16(hbuf, nump) == -1)
756			goto fail;
757
758		ibuf_close(&mrt->wbuf, hbuf);
759		ibuf_close(&mrt->wbuf, nbuf);
760		hbuf = NULL;
761		nbuf = NULL;
762	}
763
764	if (apnump > 0) {
765		len = ibuf_size(apbuf) + hlen;
766		if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, apsubtype,
767		    len) == -1)
768			goto fail;
769
770		if (ibuf_add_n32(hbuf, snum) == -1)
771			goto fail;
772		if (ibuf_add_buf(hbuf, pbuf) == -1)
773			goto fail;
774		if (ibuf_add_n16(hbuf, apnump) == -1)
775			goto fail;
776
777		ibuf_close(&mrt->wbuf, hbuf);
778		ibuf_close(&mrt->wbuf, apbuf);
779		hbuf = NULL;
780		apbuf = NULL;
781	}
782
783	ibuf_free(pbuf);
784	return (0);
785fail:
786	log_warn("%s: ibuf error", __func__);
787	ibuf_free(apbuf);
788	ibuf_free(nbuf);
789	ibuf_free(hbuf);
790	ibuf_free(pbuf);
791	return (-1);
792}
793
794struct cb_arg {
795	struct ibuf	*buf;
796	int		 nump;
797};
798
799static void
800mrt_dump_v2_hdr_peer(struct rde_peer *peer, void *arg)
801{
802	struct cb_arg *a = arg;
803
804	if (a->nump == -1)
805		return;
806	peer->mrt_idx = a->nump;
807	if (mrt_dump_peer(a->buf, peer) == -1) {
808		a->nump = -1;
809		return;
810	}
811	a->nump++;
812}
813
814int
815mrt_dump_v2_hdr(struct mrt *mrt, struct bgpd_config *conf)
816{
817	struct ibuf	*buf, *hbuf = NULL;
818	size_t		 len, off;
819	uint16_t	 nlen, nump;
820	struct cb_arg	 arg;
821
822	if ((buf = ibuf_dynamic(0, UINT_MAX)) == NULL) {
823		log_warn("%s: ibuf_dynamic", __func__);
824		return (-1);
825	}
826
827	if (ibuf_add_n32(buf, conf->bgpid) == -1)
828		goto fail;
829	nlen = strlen(mrt->rib);
830	if (nlen > 0)
831		nlen += 1;
832	if (ibuf_add_n16(buf, nlen) == -1)
833		goto fail;
834	if (ibuf_add(buf, mrt->rib, nlen) == -1)
835		goto fail;
836
837	off = ibuf_size(buf);
838	if (ibuf_add_zero(buf, sizeof(nump)) == -1)
839		goto fail;
840	arg.nump = 0;
841	arg.buf = buf;
842	peer_foreach(mrt_dump_v2_hdr_peer, &arg);
843	if (arg.nump == -1)
844		goto fail;
845
846	if (ibuf_set_n16(buf, off, arg.nump) == -1)
847		goto fail;
848
849	len = ibuf_size(buf);
850	if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2,
851	    MRT_DUMP_V2_PEER_INDEX_TABLE, len) == -1)
852		goto fail;
853
854	ibuf_close(&mrt->wbuf, hbuf);
855	ibuf_close(&mrt->wbuf, buf);
856
857	return (0);
858fail:
859	log_warn("%s: ibuf error", __func__);
860	ibuf_free(hbuf);
861	ibuf_free(buf);
862	return (-1);
863}
864
865static int
866mrt_dump_peer(struct ibuf *buf, struct rde_peer *peer)
867{
868	uint8_t	type = 0;
869
870	if (peer->capa.as4byte)
871		type |= MRT_DUMP_V2_PEER_BIT_A;
872	if (peer->remote_addr.aid == AID_INET6)
873		type |= MRT_DUMP_V2_PEER_BIT_I;
874
875	if (ibuf_add_n8(buf, type) == -1)
876		goto fail;
877	if (ibuf_add_n32(buf, peer->remote_bgpid) == -1)
878		goto fail;
879
880	switch (peer->remote_addr.aid) {
881	case AID_INET:
882		if (ibuf_add(buf, &peer->remote_addr.v4,
883		    sizeof(peer->remote_addr.v4)) == -1)
884			goto fail;
885		break;
886	case AID_INET6:
887		if (ibuf_add(buf, &peer->remote_addr.v6,
888		    sizeof(peer->remote_addr.v6)) == -1)
889			goto fail;
890		break;
891	case AID_UNSPEC: /* XXX special handling for peerself? */
892		if (ibuf_add_n32(buf, 0) == -1)
893			goto fail;
894		break;
895	default:
896		log_warnx("king bula found new AF in %s", __func__);
897		goto fail;
898	}
899
900	if (peer->capa.as4byte) {
901		if (ibuf_add_n32(buf, peer->conf.remote_as) == -1)
902			goto fail;
903	} else {
904		if (ibuf_add_n16(buf, peer->short_as) == -1)
905			goto fail;
906	}
907	return (0);
908fail:
909	log_warn("%s: ibuf error", __func__);
910	return (-1);
911}
912
913void
914mrt_dump_upcall(struct rib_entry *re, void *ptr)
915{
916	struct mrt		*mrtbuf = ptr;
917	struct prefix		*p;
918
919	if (mrtbuf->type == MRT_TABLE_DUMP_V2) {
920		mrt_dump_entry_v2(mrtbuf, re, mrtbuf->seqnum++);
921		return;
922	}
923
924	/*
925	 * dump all prefixes even the inactive ones. That is the way zebra
926	 * dumps the table so we do the same. If only the active route should
927	 * be dumped p should be set to p = pt->active.
928	 */
929	TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
930		if (mrtbuf->type == MRT_TABLE_DUMP)
931			mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
932			    prefix_peer(p));
933		else
934			mrt_dump_entry_mp(mrtbuf, p, mrtbuf->seqnum++,
935			    prefix_peer(p));
936	}
937}
938
939void
940mrt_done(struct mrt *mrtbuf)
941{
942	mrtbuf->state = MRT_STATE_REMOVE;
943}
944
945static int
946mrt_dump_hdr_se(struct ibuf ** bp, struct peer *peer, uint16_t type,
947    uint16_t subtype, uint32_t len, int swap)
948{
949	struct timespec	time;
950
951	if ((*bp = ibuf_dynamic(MRT_ET_HEADER_SIZE, MRT_ET_HEADER_SIZE +
952	    MRT_BGP4MP_AS4_IPv6_HEADER_SIZE + len)) == NULL)
953		return (-1);
954
955	clock_gettime(CLOCK_REALTIME, &time);
956
957	if (ibuf_add_n32(*bp, time.tv_sec) == -1)
958		goto fail;
959	if (ibuf_add_n16(*bp, type) == -1)
960		goto fail;
961	if (ibuf_add_n16(*bp, subtype) == -1)
962		goto fail;
963
964	switch (peer->local.aid) {
965	case AID_INET:
966		if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
967		    subtype == BGP4MP_MESSAGE_AS4 ||
968		    subtype == BGP4MP_MESSAGE_AS4_ADDPATH)
969			len += MRT_BGP4MP_ET_AS4_IPv4_HEADER_SIZE;
970		else
971			len += MRT_BGP4MP_ET_IPv4_HEADER_SIZE;
972		break;
973	case AID_INET6:
974		if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
975		    subtype == BGP4MP_MESSAGE_AS4 ||
976		    subtype == BGP4MP_MESSAGE_AS4_ADDPATH)
977			len += MRT_BGP4MP_ET_AS4_IPv6_HEADER_SIZE;
978		else
979			len += MRT_BGP4MP_ET_IPv6_HEADER_SIZE;
980		break;
981	case 0:
982		goto fail;
983	default:
984		log_warnx("king bula found new AF in %s", __func__);
985		goto fail;
986	}
987
988	if (ibuf_add_n32(*bp, len) == -1)
989		goto fail;
990	/* millisecond field use by the _ET format */
991	if (ibuf_add_n32(*bp, time.tv_nsec / 1000) == -1)
992		goto fail;
993
994	if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
995	    subtype == BGP4MP_MESSAGE_AS4 ||
996	    subtype == BGP4MP_MESSAGE_AS4_ADDPATH) {
997		if (!swap)
998			if (ibuf_add_n32(*bp, peer->conf.local_as) == -1)
999				goto fail;
1000		if (ibuf_add_n32(*bp, peer->conf.remote_as) == -1)
1001			goto fail;
1002		if (swap)
1003			if (ibuf_add_n32(*bp, peer->conf.local_as) == -1)
1004				goto fail;
1005	} else {
1006		if (!swap)
1007			if (ibuf_add_n16(*bp, peer->conf.local_short_as) == -1)
1008				goto fail;
1009		if (ibuf_add_n16(*bp, peer->short_as) == -1)
1010			goto fail;
1011		if (swap)
1012			if (ibuf_add_n16(*bp, peer->conf.local_short_as) == -1)
1013				goto fail;
1014	}
1015
1016	if (ibuf_add_n16(*bp, /* ifindex */ 0) == -1)
1017		goto fail;
1018
1019	switch (peer->local.aid) {
1020	case AID_INET:
1021		if (ibuf_add_n16(*bp, AFI_IPv4) == -1)
1022			goto fail;
1023		if (!swap)
1024			if (ibuf_add(*bp, &peer->local.v4,
1025			    sizeof(peer->local.v4)) == -1)
1026				goto fail;
1027		if (ibuf_add(*bp, &peer->remote.v4,
1028		    sizeof(peer->remote.v4)) == -1)
1029			goto fail;
1030		if (swap)
1031			if (ibuf_add(*bp, &peer->local.v4,
1032			    sizeof(peer->local.v4)) == -1)
1033				goto fail;
1034		break;
1035	case AID_INET6:
1036		if (ibuf_add_n16(*bp, AFI_IPv6) == -1)
1037			goto fail;
1038		if (!swap)
1039			if (ibuf_add(*bp, &peer->local.v6,
1040			    sizeof(peer->local.v6)) == -1)
1041				goto fail;
1042		if (ibuf_add(*bp, &peer->remote.v6,
1043		    sizeof(peer->remote.v6)) == -1)
1044			goto fail;
1045		if (swap)
1046			if (ibuf_add(*bp, &peer->local.v6,
1047			    sizeof(peer->local.v6)) == -1)
1048				goto fail;
1049		break;
1050	}
1051
1052	return (0);
1053
1054fail:
1055	ibuf_free(*bp);
1056	*bp = NULL;
1057	return (-1);
1058}
1059
1060int
1061mrt_dump_hdr_rde(struct ibuf **bp, uint16_t type, uint16_t subtype,
1062    uint32_t len)
1063{
1064	struct timespec	time;
1065
1066	if ((*bp = ibuf_dynamic(MRT_HEADER_SIZE, MRT_HEADER_SIZE +
1067	    MRT_BGP4MP_AS4_IPv6_HEADER_SIZE + MRT_BGP4MP_IPv6_ENTRY_SIZE)) ==
1068	    NULL)
1069		return (-1);
1070
1071	clock_gettime(CLOCK_REALTIME, &time);
1072
1073	if (ibuf_add_n32(*bp, time.tv_sec) == -1)
1074		goto fail;
1075	if (ibuf_add_n16(*bp, type) == -1)
1076		goto fail;
1077	if (ibuf_add_n16(*bp, subtype) == -1)
1078		goto fail;
1079
1080	switch (type) {
1081	case MSG_TABLE_DUMP:
1082		switch (subtype) {
1083		case AFI_IPv4:
1084			len += MRT_DUMP_HEADER_SIZE;
1085			break;
1086		case AFI_IPv6:
1087			len += MRT_DUMP_HEADER_SIZE_V6;
1088			break;
1089		}
1090		if (ibuf_add_n32(*bp, len) == -1)
1091			goto fail;
1092		break;
1093	case MSG_PROTOCOL_BGP4MP:
1094	case MSG_TABLE_DUMP_V2:
1095		if (ibuf_add_n32(*bp, len) == -1)
1096			goto fail;
1097		break;
1098	default:
1099		log_warnx("mrt_dump_hdr_rde: unsupported type");
1100		goto fail;
1101	}
1102	return (0);
1103
1104fail:
1105	ibuf_free(*bp);
1106	*bp = NULL;
1107	return (-1);
1108}
1109
1110void
1111mrt_write(struct mrt *mrt)
1112{
1113	int	r;
1114
1115	if ((r = ibuf_write(&mrt->wbuf)) == -1 && errno != EAGAIN) {
1116		log_warn("mrt dump aborted, mrt_write");
1117		mrt_clean(mrt);
1118		mrt_done(mrt);
1119	}
1120}
1121
1122void
1123mrt_clean(struct mrt *mrt)
1124{
1125	close(mrt->wbuf.fd);
1126	msgbuf_clear(&mrt->wbuf);
1127}
1128
1129static struct imsgbuf	*mrt_imsgbuf[2];
1130
1131void
1132mrt_init(struct imsgbuf *rde, struct imsgbuf *se)
1133{
1134	mrt_imsgbuf[RDEIDX] = rde;
1135	mrt_imsgbuf[SEIDX] = se;
1136}
1137
1138int
1139mrt_open(struct mrt *mrt, time_t now)
1140{
1141	enum imsg_type	type;
1142	int		fd;
1143
1144	if (strftime(MRT2MC(mrt)->file, sizeof(MRT2MC(mrt)->file),
1145	    MRT2MC(mrt)->name, localtime(&now)) == 0) {
1146		log_warnx("mrt_open: strftime conversion failed");
1147		return (-1);
1148	}
1149
1150	fd = open(MRT2MC(mrt)->file,
1151	    O_WRONLY|O_NONBLOCK|O_CREAT|O_TRUNC|O_CLOEXEC, 0644);
1152	if (fd == -1) {
1153		log_warn("mrt_open %s", MRT2MC(mrt)->file);
1154		return (1);
1155	}
1156
1157	if (mrt->state == MRT_STATE_OPEN)
1158		type = IMSG_MRT_OPEN;
1159	else
1160		type = IMSG_MRT_REOPEN;
1161
1162	if (imsg_compose(mrt_imsgbuf[TYPE2IDX(mrt->type)], type, 0, 0, fd,
1163	    mrt, sizeof(struct mrt)) == -1)
1164		log_warn("mrt_open");
1165
1166	return (1);
1167}
1168
1169time_t
1170mrt_timeout(struct mrt_head *mrt)
1171{
1172	struct mrt	*m;
1173	time_t		 now;
1174	time_t		 timeout = -1;
1175
1176	now = time(NULL);
1177	LIST_FOREACH(m, mrt, entry) {
1178		if (m->state == MRT_STATE_RUNNING &&
1179		    MRT2MC(m)->ReopenTimerInterval != 0) {
1180			if (MRT2MC(m)->ReopenTimer <= now) {
1181				mrt_open(m, now);
1182				MRT2MC(m)->ReopenTimer =
1183				    now + MRT2MC(m)->ReopenTimerInterval;
1184			}
1185			if (timeout == -1 ||
1186			    MRT2MC(m)->ReopenTimer - now < timeout)
1187				timeout = MRT2MC(m)->ReopenTimer - now;
1188		}
1189	}
1190	return (timeout);
1191}
1192
1193void
1194mrt_reconfigure(struct mrt_head *mrt)
1195{
1196	struct mrt	*m, *xm;
1197	time_t		 now;
1198
1199	now = time(NULL);
1200	for (m = LIST_FIRST(mrt); m != NULL; m = xm) {
1201		xm = LIST_NEXT(m, entry);
1202		if (m->state == MRT_STATE_OPEN ||
1203		    m->state == MRT_STATE_REOPEN) {
1204			if (mrt_open(m, now) == -1)
1205				continue;
1206			if (MRT2MC(m)->ReopenTimerInterval != 0)
1207				MRT2MC(m)->ReopenTimer =
1208				    now + MRT2MC(m)->ReopenTimerInterval;
1209			m->state = MRT_STATE_RUNNING;
1210		}
1211		if (m->state == MRT_STATE_REMOVE) {
1212			if (imsg_compose(mrt_imsgbuf[TYPE2IDX(m->type)],
1213			    IMSG_MRT_CLOSE, 0, 0, -1, m, sizeof(struct mrt)) ==
1214			    -1)
1215				log_warn("mrt_reconfigure");
1216			LIST_REMOVE(m, entry);
1217			free(m);
1218			continue;
1219		}
1220	}
1221}
1222
1223void
1224mrt_handler(struct mrt_head *mrt)
1225{
1226	struct mrt	*m;
1227	time_t		 now;
1228
1229	now = time(NULL);
1230	LIST_FOREACH(m, mrt, entry) {
1231		if (m->state == MRT_STATE_RUNNING &&
1232		    (MRT2MC(m)->ReopenTimerInterval != 0 ||
1233		     m->type == MRT_TABLE_DUMP ||
1234		     m->type == MRT_TABLE_DUMP_MP ||
1235		     m->type == MRT_TABLE_DUMP_V2)) {
1236			if (mrt_open(m, now) == -1)
1237				continue;
1238			MRT2MC(m)->ReopenTimer =
1239			    now + MRT2MC(m)->ReopenTimerInterval;
1240		}
1241	}
1242}
1243
1244struct mrt *
1245mrt_get(struct mrt_head *c, struct mrt *m)
1246{
1247	struct mrt	*t;
1248
1249	LIST_FOREACH(t, c, entry) {
1250		if (t->type != m->type)
1251			continue;
1252		if (strcmp(t->rib, m->rib))
1253			continue;
1254		if (t->peer_id == m->peer_id &&
1255		    t->group_id == m->group_id)
1256			return (t);
1257	}
1258	return (NULL);
1259}
1260
1261void
1262mrt_mergeconfig(struct mrt_head *xconf, struct mrt_head *nconf)
1263{
1264	struct mrt	*m, *xm;
1265
1266	/* both lists here are actually struct mrt_conifg nodes */
1267	LIST_FOREACH(m, nconf, entry) {
1268		if ((xm = mrt_get(xconf, m)) == NULL) {
1269			/* NEW */
1270			if ((xm = malloc(sizeof(struct mrt_config))) == NULL)
1271				fatal("mrt_mergeconfig");
1272			memcpy(xm, m, sizeof(struct mrt_config));
1273			xm->state = MRT_STATE_OPEN;
1274			LIST_INSERT_HEAD(xconf, xm, entry);
1275		} else {
1276			/* MERGE */
1277			if (strlcpy(MRT2MC(xm)->name, MRT2MC(m)->name,
1278			    sizeof(MRT2MC(xm)->name)) >=
1279			    sizeof(MRT2MC(xm)->name))
1280				fatalx("mrt_mergeconfig: strlcpy");
1281			MRT2MC(xm)->ReopenTimerInterval =
1282			    MRT2MC(m)->ReopenTimerInterval;
1283			xm->state = MRT_STATE_REOPEN;
1284		}
1285	}
1286
1287	LIST_FOREACH(xm, xconf, entry)
1288		if (mrt_get(nconf, xm) == NULL)
1289			/* REMOVE */
1290			xm->state = MRT_STATE_REMOVE;
1291
1292	/* free config */
1293	while ((m = LIST_FIRST(nconf)) != NULL) {
1294		LIST_REMOVE(m, entry);
1295		free(m);
1296	}
1297}
1298