usr.sbin/bgpd/rde_peer.c

/*	$OpenBSD: rde_peer.c,v 1.37 2024/05/22 08:41:14 claudio Exp $ */

/*
 * Copyright (c) 2019 Claudio Jeker <claudio@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/types.h>
#include <sys/queue.h>

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#include "bgpd.h"
#include "rde.h"

struct peer_tree	 peertable;
struct rde_peer		*peerself;
static long		 imsg_pending;

CTASSERT(sizeof(peerself->recv_eor) * 8 > AID_MAX);
CTASSERT(sizeof(peerself->sent_eor) * 8 > AID_MAX);

struct iq {
	SIMPLEQ_ENTRY(iq)	entry;
	struct imsg		imsg;
};

int
peer_has_as4byte(struct rde_peer *peer)
{
	return (peer->capa.as4byte);
}

/*
 * Check if ADD_PATH is enabled for aid and mode (rx / tx). If aid is
 * AID_UNSPEC then the function returns true if any aid has mode enabled.
 */
int
peer_has_add_path(struct rde_peer *peer, uint8_t aid, int mode)
{
	if (aid >= AID_MAX)
		return 0;
	return (peer->capa.add_path[aid] & mode);
}

int
peer_accept_no_as_set(struct rde_peer *peer)
{
	return (peer->flags & PEERFLAG_NO_AS_SET);
}

void
peer_init(struct filter_head *rules)
{
	struct peer_config pc;

	RB_INIT(&peertable);

	memset(&pc, 0, sizeof(pc));
	snprintf(pc.descr, sizeof(pc.descr), "LOCAL");
	pc.id = PEER_ID_SELF;

	peerself = peer_add(PEER_ID_SELF, &pc, rules);
	peerself->state = PEER_UP;
}

void
peer_shutdown(void)
{
	if (!RB_EMPTY(&peertable))
		log_warnx("%s: free non-free table", __func__);
}

/*
 * Traverse all peers calling callback for each peer.
 */
void
peer_foreach(void (*callback)(struct rde_peer *, void *), void *arg)
{
	struct rde_peer *peer, *np;

	RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
		callback(peer, arg);
}

/*
 * Lookup a peer by peer_id, return NULL if not found.
 */
struct rde_peer *
peer_get(uint32_t id)
{
	struct rde_peer	needle;

	needle.conf.id = id;
	return RB_FIND(peer_tree, &peertable, &needle);
}

/*
 * Find next peer that matches neighbor options in *n.
 * If peerid was set then pickup the lookup after that peer.
 * Returns NULL if no more peers match.
 */
struct rde_peer *
peer_match(struct ctl_neighbor *n, uint32_t peerid)
{
	struct rde_peer		*peer;

	if (peerid != 0) {
		peer = peer_get(peerid);
		if (peer)
			peer = RB_NEXT(peer_tree, &peertable, peer);
	} else
		peer = RB_MIN(peer_tree, &peertable);

	for (; peer != NULL; peer = RB_NEXT(peer_tree, &peertable, peer)) {
		if (rde_match_peer(peer, n))
			return peer;
	}
	return NULL;
}

struct rde_peer *
peer_add(uint32_t id, struct peer_config *p_conf, struct filter_head *rules)
{
	struct rde_peer		*peer;
	int			 conflict;

	if ((peer = peer_get(id))) {
		memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
		return (peer);
	}

	peer = calloc(1, sizeof(struct rde_peer));
	if (peer == NULL)
		fatal("peer_add");

	memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
	peer->remote_bgpid = 0;
	peer->loc_rib_id = rib_find(peer->conf.rib);
	if (peer->loc_rib_id == RIB_NOTFOUND)
		fatalx("King Bula's new peer met an unknown RIB");
	peer->state = PEER_NONE;
	peer->eval = peer->conf.eval;
	peer->role = peer->conf.role;
	peer->export_type = peer->conf.export_type;
	peer->flags = peer->conf.flags;
	SIMPLEQ_INIT(&peer->imsg_queue);

	peer_apply_out_filter(peer, rules);

	/*
	 * Assign an even random unique transmit path id.
	 * Odd path_id_tx numbers are for peers using add-path recv.
	 */
	do {
		struct rde_peer *p;

		conflict = 0;
		peer->path_id_tx = arc4random() << 1;
		RB_FOREACH(p, peer_tree, &peertable) {
			if (p->path_id_tx == peer->path_id_tx) {
				conflict = 1;
				break;
			}
		}
	} while (conflict);

	if (RB_INSERT(peer_tree, &peertable, peer) != NULL)
		fatalx("rde peer table corrupted");

	return (peer);
}

struct filter_head *
peer_apply_out_filter(struct rde_peer *peer, struct filter_head *rules)
{
	struct filter_head *old;
	struct filter_rule *fr, *new;

	old = peer->out_rules;
	if ((peer->out_rules = malloc(sizeof(*peer->out_rules))) == NULL)
		fatal(NULL);
	TAILQ_INIT(peer->out_rules);

	TAILQ_FOREACH(fr, rules, entry) {
		if (rde_filter_skip_rule(peer, fr))
			continue;

		if ((new = malloc(sizeof(*new))) == NULL)
			fatal(NULL);
		memcpy(new, fr, sizeof(*new));
		filterset_copy(&fr->set, &new->set);

		TAILQ_INSERT_TAIL(peer->out_rules, new, entry);
	}

	return old;
}

static inline int
peer_cmp(struct rde_peer *a, struct rde_peer *b)
{
	if (a->conf.id > b->conf.id)
		return 1;
	if (a->conf.id < b->conf.id)
		return -1;
	return 0;
}

RB_GENERATE(peer_tree, rde_peer, entry, peer_cmp);

static void
peer_generate_update(struct rde_peer *peer, struct rib_entry *re,
    struct prefix *newpath, struct prefix *oldpath,
    enum eval_mode mode)
{
	uint8_t		 aid;

	aid = re->prefix->aid;

	/* skip ourself */
	if (peer == peerself)
		return;
	if (peer->state != PEER_UP)
		return;
	/* skip peers using a different rib */
	if (peer->loc_rib_id != re->rib_id)
		return;
	/* check if peer actually supports the address family */
	if (peer->capa.mp[aid] == 0)
		return;
	/* skip peers with special export types */
	if (peer->export_type == EXPORT_NONE ||
	    peer->export_type == EXPORT_DEFAULT_ROUTE)
		return;

	/* if reconf skip peers which don't need to reconfigure */
	if (mode == EVAL_RECONF && peer->reconf_out == 0)
		return;

	/* handle peers with add-path */
	if (peer_has_add_path(peer, aid, CAPA_AP_SEND)) {
		if (peer->eval.mode == ADDPATH_EVAL_ALL)
			up_generate_addpath_all(peer, re, newpath, oldpath);
		else
			up_generate_addpath(peer, re);
		return;
	}

	/* skip regular peers if the best path didn't change */
	if (mode == EVAL_ALL && (peer->flags & PEERFLAG_EVALUATE_ALL) == 0)
		return;
	up_generate_updates(peer, re);
}

void
rde_generate_updates(struct rib_entry *re, struct prefix *newpath,
    struct prefix *oldpath, enum eval_mode mode)
{
	struct rde_peer	*peer;

	RB_FOREACH(peer, peer_tree, &peertable)
		peer_generate_update(peer, re, newpath, oldpath, mode);
}

/*
 * Various RIB walker callbacks.
 */
static void
peer_adjout_clear_upcall(struct prefix *p, void *arg)
{
	prefix_adjout_destroy(p);
}

static void
peer_adjout_stale_upcall(struct prefix *p, void *arg)
{
	if (p->flags & PREFIX_FLAG_DEAD) {
		return;
	} else if (p->flags & PREFIX_FLAG_WITHDRAW) {
		/* no need to keep stale withdraws, they miss all attributes */
		prefix_adjout_destroy(p);
		return;
	} else if (p->flags & PREFIX_FLAG_UPDATE) {
		RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p);
		p->flags &= ~PREFIX_FLAG_UPDATE;
	}
	p->flags |= PREFIX_FLAG_STALE;
}

struct peer_flush {
	struct rde_peer *peer;
	time_t		 staletime;
};

static void
peer_flush_upcall(struct rib_entry *re, void *arg)
{
	struct rde_peer *peer = ((struct peer_flush *)arg)->peer;
	struct rde_aspath *asp;
	struct bgpd_addr addr;
	struct prefix *p, *np, *rp;
	time_t staletime = ((struct peer_flush *)arg)->staletime;
	uint32_t i;
	uint8_t prefixlen;

	pt_getaddr(re->prefix, &addr);
	prefixlen = re->prefix->prefixlen;
	TAILQ_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
		if (peer != prefix_peer(p))
			continue;
		if (staletime && p->lastchange > staletime)
			continue;

		for (i = RIB_LOC_START; i < rib_size; i++) {
			struct rib *rib = rib_byid(i);
			if (rib == NULL)
				continue;
			rp = prefix_get(rib, peer, p->path_id,
			    &addr, prefixlen);
			if (rp) {
				asp = prefix_aspath(rp);
				if (asp && asp->pftableid)
					rde_pftable_del(asp->pftableid, rp);

				prefix_destroy(rp);
				rde_update_log("flush", i, peer, NULL,
				    &addr, prefixlen);
			}
		}

		prefix_destroy(p);
		peer->stats.prefix_cnt--;
	}
}

static void
rde_up_adjout_force_upcall(struct prefix *p, void *ptr)
{
	if (p->flags & PREFIX_FLAG_STALE) {
		/* remove stale entries */
		prefix_adjout_destroy(p);
	} else if (p->flags & PREFIX_FLAG_DEAD) {
		/* ignore dead prefixes, they will go away soon */
	} else if ((p->flags & PREFIX_FLAG_MASK) == 0) {
		/* put entries on the update queue if not allready on a queue */
		p->flags |= PREFIX_FLAG_UPDATE;
		if (RB_INSERT(prefix_tree, &prefix_peer(p)->updates[p->pt->aid],
		    p) != NULL)
			fatalx("%s: RB tree invariant violated", __func__);
	}
}

static void
rde_up_adjout_force_done(void *ptr, uint8_t aid)
{
	struct rde_peer		*peer = ptr;

	/* Adj-RIB-Out ready, unthrottle peer and inject EOR */
	peer->throttled = 0;
	if (peer->capa.grestart.restart)
		prefix_add_eor(peer, aid);
}

static void
rde_up_dump_upcall(struct rib_entry *re, void *ptr)
{
	struct rde_peer		*peer = ptr;
	struct prefix		*p;

	if ((p = prefix_best(re)) == NULL)
		/* no eligible prefix, not even for 'evaluate all' */
		return;

	peer_generate_update(peer, re, NULL, NULL, 0);
}

static void
rde_up_dump_done(void *ptr, uint8_t aid)
{
	struct rde_peer		*peer = ptr;

	/* force out all updates of Adj-RIB-Out for this peer */
	if (prefix_dump_new(peer, aid, 0, peer, rde_up_adjout_force_upcall,
	    rde_up_adjout_force_done, NULL) == -1)
		fatal("%s: prefix_dump_new", __func__);
}

/*
 * Session got established, bring peer up, load RIBs do initial table dump.
 */
void
peer_up(struct rde_peer *peer, struct session_up *sup)
{
	uint8_t	 i;

	if (peer->state == PEER_ERR) {
		/*
		 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
		 * So just do a full reset of the peer here.
		 */
		rib_dump_terminate(peer);
		peer_imsg_flush(peer);
		if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
		    peer_adjout_clear_upcall, NULL, NULL) == -1)
			fatal("%s: prefix_dump_new", __func__);
		peer_flush(peer, AID_UNSPEC, 0);
		peer->stats.prefix_cnt = 0;
		peer->stats.prefix_out_cnt = 0;
		peer->state = PEER_DOWN;
	}
	peer->remote_bgpid = sup->remote_bgpid;
	peer->short_as = sup->short_as;
	peer->remote_addr = sup->remote_addr;
	peer->local_v4_addr = sup->local_v4_addr;
	peer->local_v6_addr = sup->local_v6_addr;
	peer->local_if_scope = sup->if_scope;
	memcpy(&peer->capa, &sup->capa, sizeof(peer->capa));

	/* clear eor markers depending on GR flags */
	if (peer->capa.grestart.restart) {
		peer->sent_eor = 0;
		peer->recv_eor = 0;
	} else {
		/* no EOR expected */
		peer->sent_eor = ~0;
		peer->recv_eor = ~0;
	}
	peer->state = PEER_UP;

	for (i = AID_MIN; i < AID_MAX; i++) {
		if (peer->capa.mp[i])
			peer_dump(peer, i);
	}
}

/*
 * Session dropped and no graceful restart is done. Stop everything for
 * this peer and clean up.
 */
void
peer_down(struct rde_peer *peer, void *bula)
{
	peer->remote_bgpid = 0;
	peer->state = PEER_DOWN;
	/*
	 * stop all pending dumps which may depend on this peer
	 * and flush all pending imsg from the SE.
	 */
	rib_dump_terminate(peer);
	peer_imsg_flush(peer);

	/* flush Adj-RIB-Out */
	if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
	    peer_adjout_clear_upcall, NULL, NULL) == -1)
		fatal("%s: prefix_dump_new", __func__);

	/* flush Adj-RIB-In */
	peer_flush(peer, AID_UNSPEC, 0);
	peer->stats.prefix_cnt = 0;
	peer->stats.prefix_out_cnt = 0;

	/* free filters */
	filterlist_free(peer->out_rules);

	RB_REMOVE(peer_tree, &peertable, peer);
	free(peer);
}

/*
 * Flush all routes older then staletime. If staletime is 0 all routes will
 * be flushed.
 */
void
peer_flush(struct rde_peer *peer, uint8_t aid, time_t staletime)
{
	struct peer_flush pf = { peer, staletime };

	/* this dump must run synchronous, too much depends on that right now */
	if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall,
	    NULL, NULL) == -1)
		fatal("%s: rib_dump_new", __func__);

	/* every route is gone so reset staletime */
	if (aid == AID_UNSPEC) {
		uint8_t i;
		for (i = AID_MIN; i < AID_MAX; i++)
			peer->staletime[i] = 0;
	} else {
		peer->staletime[aid] = 0;
	}
}

/*
 * During graceful restart mark a peer as stale if the session goes down.
 * For the specified AID the Adj-RIB-Out is marked stale and the staletime
 * is set to the current timestamp for identifying stale routes in Adj-RIB-In.
 */
void
peer_stale(struct rde_peer *peer, uint8_t aid, int flushall)
{
	time_t now;

	/* flush the now even staler routes out */
	if (peer->staletime[aid])
		peer_flush(peer, aid, peer->staletime[aid]);

	peer->staletime[aid] = now = getmonotime();
	peer->state = PEER_DOWN;

	/*
	 * stop all pending dumps which may depend on this peer
	 * and flush all pending imsg from the SE.
	 */
	rib_dump_terminate(peer);
	peer_imsg_flush(peer);

	if (flushall)
		peer_flush(peer, aid, 0);

	/* XXX this is not quite correct */
	/* mark Adj-RIB-Out stale for this peer */
	if (prefix_dump_new(peer, aid, 0, NULL,
	    peer_adjout_stale_upcall, NULL, NULL) == -1)
		fatal("%s: prefix_dump_new", __func__);

	/* make sure new prefixes start on a higher timestamp */
	while (now >= getmonotime())
		sleep(1);
}

/*
 * Load the Adj-RIB-Out of a peer normally called when a session is established.
 * Once the Adj-RIB-Out is ready stale routes are removed from the Adj-RIB-Out
 * and all routes are put on the update queue so they will be sent out.
 */
void
peer_dump(struct rde_peer *peer, uint8_t aid)
{
	if (peer->capa.enhanced_rr && (peer->sent_eor & (1 << aid)))
		rde_peer_send_rrefresh(peer, aid, ROUTE_REFRESH_BEGIN_RR);

	if (peer->export_type == EXPORT_NONE) {
		/* nothing to send apart from the marker */
		if (peer->capa.grestart.restart)
			prefix_add_eor(peer, aid);
	} else if (peer->export_type == EXPORT_DEFAULT_ROUTE) {
		up_generate_default(peer, aid);
		rde_up_dump_done(peer, aid);
	} else if (aid == AID_FLOWSPECv4 || aid == AID_FLOWSPECv6) {
		prefix_flowspec_dump(aid, peer, rde_up_dump_upcall,
		    rde_up_dump_done);
	} else {
		if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer,
		    rde_up_dump_upcall, rde_up_dump_done, NULL) == -1)
			fatal("%s: rib_dump_new", __func__);
		/* throttle peer until dump is done */
		peer->throttled = 1;
	}
}

/*
 * Start of an enhanced route refresh. Mark all routes as stale.
 * Once the route refresh ends a End of Route Refresh message is sent
 * which calls peer_flush() to remove all stale routes.
 */
void
peer_begin_rrefresh(struct rde_peer *peer, uint8_t aid)
{
	time_t now;

	/* flush the now even staler routes out */
	if (peer->staletime[aid])
		peer_flush(peer, aid, peer->staletime[aid]);

	peer->staletime[aid] = now = getmonotime();

	/* make sure new prefixes start on a higher timestamp */
	while (now >= getmonotime())
		sleep(1);
}

/*
 * move an imsg from src to dst, disconnecting any dynamic memory from src.
 */
static void
imsg_move(struct imsg *dst, struct imsg *src)
{
	*dst = *src;
	memset(src, 0, sizeof(*src));
}

/*
 * push an imsg onto the peer imsg queue.
 */
void
peer_imsg_push(struct rde_peer *peer, struct imsg *imsg)
{
	struct iq *iq;

	if ((iq = calloc(1, sizeof(*iq))) == NULL)
		fatal(NULL);
	imsg_move(&iq->imsg, imsg);
	SIMPLEQ_INSERT_TAIL(&peer->imsg_queue, iq, entry);
	imsg_pending++;
}

/*
 * pop first imsg from peer imsg queue and move it into imsg argument.
 * Returns 1 if an element is returned else 0.
 */
int
peer_imsg_pop(struct rde_peer *peer, struct imsg *imsg)
{
	struct iq *iq;

	iq = SIMPLEQ_FIRST(&peer->imsg_queue);
	if (iq == NULL)
		return 0;

	imsg_move(imsg, &iq->imsg);

	SIMPLEQ_REMOVE_HEAD(&peer->imsg_queue, entry);
	free(iq);
	imsg_pending--;

	return 1;
}

/*
 * Check if any imsg are pending, return 0 if none are pending
 */
int
peer_imsg_pending(void)
{
	return imsg_pending != 0;
}

/*
 * flush all imsg queued for a peer.
 */
void
peer_imsg_flush(struct rde_peer *peer)
{
	struct iq *iq;

	while ((iq = SIMPLEQ_FIRST(&peer->imsg_queue)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&peer->imsg_queue, entry);
		free(iq);
		imsg_pending--;
	}
}