1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Ng Peng Nam Sean
5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#include "opt_inet.h"
31#include "opt_inet6.h"
32#include "opt_route.h"
33#include <sys/types.h>
34#include <sys/malloc.h>
35#include <sys/rmlock.h>
36#include <sys/socket.h>
37
38#include <net/if.h>
39#include <net/route.h>
40#include <net/route/nhop.h>
41#include <net/route/route_ctl.h>
42#include <net/route/route_var.h>
43#include <netinet6/scope6_var.h>
44#include <netlink/netlink.h>
45#include <netlink/netlink_ctl.h>
46#include <netlink/netlink_route.h>
47#include <netlink/route/route_var.h>
48
49#define	DEBUG_MOD_NAME	nl_route
50#define	DEBUG_MAX_LEVEL	LOG_DEBUG3
51#include <netlink/netlink_debug.h>
52_DECLARE_DEBUG(LOG_INFO);
53
54static unsigned char
55get_rtm_type(const struct nhop_object *nh)
56{
57	int nh_flags = nh->nh_flags;
58
59	/* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
60	if (nh_flags & NHF_BLACKHOLE)
61		return (RTN_BLACKHOLE);
62	else if (nh_flags & NHF_REJECT)
63		return (RTN_PROHIBIT);
64	return (RTN_UNICAST);
65}
66
67static uint8_t
68nl_get_rtm_protocol(const struct nhop_object *nh)
69{
70#ifdef ROUTE_MPATH
71	if (NH_IS_NHGRP(nh)) {
72		const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
73		uint8_t origin = nhgrp_get_origin(nhg);
74		if (origin != RTPROT_UNSPEC)
75			return (origin);
76		nh = nhg->nhops[0];
77	}
78#endif
79	uint8_t origin = nhop_get_origin(nh);
80	if (origin != RTPROT_UNSPEC)
81		return (origin);
82	/* TODO: remove guesswork once all kernel users fill in origin */
83	int rt_flags = nhop_get_rtflags(nh);
84	if (rt_flags & RTF_PROTO1)
85		return (RTPROT_ZEBRA);
86	if (rt_flags & RTF_STATIC)
87		return (RTPROT_STATIC);
88	return (RTPROT_KERNEL);
89}
90
91static int
92get_rtmsg_type_from_rtsock(int cmd)
93{
94	switch (cmd) {
95	case RTM_ADD:
96	case RTM_CHANGE:
97	case RTM_GET:
98		return NL_RTM_NEWROUTE;
99	case RTM_DELETE:
100		return NL_RTM_DELROUTE;
101	}
102
103	return (0);
104}
105
106/*
107 * fibnum heuristics
108 *
109 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
110 * msg                rtm_table     RTA_TABLE            result
111 * RTM_GETROUTE/dump          0             -       RT_ALL_FIBS
112 * RTM_GETROUTE/dump          1             -                 1
113 * RTM_GETROUTE/get           0             -                 0
114 *
115 */
116
117static struct nhop_object *
118rc_get_nhop(const struct rib_cmd_info *rc)
119{
120	return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
121}
122
123static void
124dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
125{
126#ifdef INET6
127	int upper_family;
128#endif
129
130	switch (nhop_get_neigh_family(nh)) {
131	case AF_LINK:
132		/* onlink prefix, skip */
133		break;
134	case AF_INET:
135		nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
136		break;
137#ifdef INET6
138	case AF_INET6:
139		upper_family = nhop_get_upper_family(nh);
140		if (upper_family == AF_INET6) {
141			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
142			in6_clearscope(&gw6);
143
144			nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6);
145		} else if (upper_family == AF_INET) {
146			/* IPv4 over IPv6 */
147			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
148			in6_clearscope(&gw6);
149
150			char buf[20];
151			struct rtvia *via = (struct rtvia *)&buf[0];
152			via->rtvia_family = AF_INET6;
153			memcpy(via->rtvia_addr, &gw6, 16);
154			nlattr_add(nw, NL_RTA_VIA, 17, via);
155		}
156		break;
157#endif
158	}
159}
160
161static void
162dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
163{
164	int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
165	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
166
167	if (nla == NULL)
168		return;
169	nla->nla_type = NL_RTA_METRICS;
170	nla->nla_len = nla_len;
171	nla++;
172	nla->nla_type = NL_RTAX_MTU;
173	nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
174	*((uint32_t *)(nla + 1)) = nh->nh_mtu;
175}
176
177#ifdef ROUTE_MPATH
178static void
179dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
180{
181	uint32_t uidx = nhgrp_get_uidx(nhg);
182	uint32_t num_nhops;
183	const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
184	uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
185
186	if (uidx != 0)
187		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
188	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg));
189
190	nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
191	int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
192	if (off == 0)
193		return;
194
195	for (int i = 0; i < num_nhops; i++) {
196		int nh_off = nlattr_save_offset(nw);
197		struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
198		if (rtnh == NULL)
199			return;
200		rtnh->rtnh_flags = 0;
201		rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp);
202		rtnh->rtnh_hops = wn[i].weight;
203		dump_rc_nhop_gw(nw, wn[i].nh);
204		uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
205		if (rtflags != base_rtflags)
206			nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
207		if (rtflags & RTF_FIXEDMTU)
208			dump_rc_nhop_mtu(nw, wn[i].nh);
209		rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
210		/*
211		 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
212		 * length here
213		 * */
214		rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
215	}
216	nlattr_set_len(nw, off);
217}
218#endif
219
220static void
221dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm)
222{
223#ifdef ROUTE_MPATH
224	if (NH_IS_NHGRP(rnd->rnd_nhop)) {
225		dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm);
226		return;
227	}
228#endif
229	const struct nhop_object *nh = rnd->rnd_nhop;
230	uint32_t rtflags = nhop_get_rtflags(nh);
231
232	/*
233	 * IPv4 over IPv6
234	 *    ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
235	 * IPv4 w/ gw
236	 *    ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
237	 * Direct route:
238	 *    ('RTA_OIF', 2)
239	 */
240	if (nh->nh_flags & NHF_GATEWAY)
241		dump_rc_nhop_gw(nw, nh);
242
243	uint32_t uidx = nhop_get_uidx(nh);
244	if (uidx != 0)
245		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
246	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
247	nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
248
249	if (rtflags & RTF_FIXEDMTU)
250		dump_rc_nhop_mtu(nw, nh);
251	uint32_t nh_expire = nhop_get_expire(nh);
252	if (nh_expire > 0)
253		nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
254
255	/* In any case, fill outgoing interface */
256	nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp));
257
258	if (rnd->rnd_weight != RT_DEFAULT_WEIGHT)
259		nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight);
260}
261
262/*
263 * Dumps output from a rib command into an rtmsg
264 */
265
266static int
267dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
268    const struct rtentry *rt, struct route_nhop_data *rnd,
269    struct nl_writer *nw)
270{
271	struct rtmsg *rtm;
272	int error = 0;
273
274	NET_EPOCH_ASSERT();
275
276	if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
277		goto enomem;
278
279	int family = rt_get_family(rt);
280	int rtm_off = nlattr_save_offset(nw);
281	rtm = nlmsg_reserve_object(nw, struct rtmsg);
282	rtm->rtm_family = family;
283	rtm->rtm_dst_len = 0;
284	rtm->rtm_src_len = 0;
285	rtm->rtm_tos = 0;
286	if (fibnum < 255)
287		rtm->rtm_table = (unsigned char)fibnum;
288	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
289	rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
290	rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
291
292	nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
293
294	int plen = 0;
295#if defined(INET) || defined(INET6)
296	uint32_t scopeid;
297#endif
298	switch (family) {
299#ifdef INET
300	case AF_INET:
301		{
302			struct in_addr addr;
303			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
304			nlattr_add(nw, NL_RTA_DST, 4, &addr);
305			break;
306		}
307#endif
308#ifdef INET6
309	case AF_INET6:
310		{
311			struct in6_addr addr;
312			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
313			nlattr_add(nw, NL_RTA_DST, 16, &addr);
314			break;
315		}
316#endif
317	default:
318		FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
319		error = EAFNOSUPPORT;
320		goto flush;
321	}
322
323	rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
324	if (plen > 0)
325		rtm->rtm_dst_len = plen;
326	dump_rc_nhop(nw, rnd, rtm);
327
328	if (nlmsg_end(nw))
329		return (0);
330enomem:
331	error = ENOMEM;
332flush:
333	nlmsg_abort(nw);
334	return (error);
335}
336
337static int
338family_to_group(int family)
339{
340	switch (family) {
341	case AF_INET:
342		return (RTNLGRP_IPV4_ROUTE);
343	case AF_INET6:
344		return (RTNLGRP_IPV6_ROUTE);
345	}
346	return (0);
347}
348
349static void
350report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
351    struct nlpcb *nlp, struct nlmsghdr *hdr)
352{
353	struct nl_writer nw = {};
354	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
355
356	if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
357		struct route_nhop_data rnd = {
358			.rnd_nhop = rc_get_nhop(rc),
359			.rnd_weight = rc->rc_nh_weight,
360		};
361		hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
362		hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
363		switch (rc->rc_cmd) {
364		case RTM_ADD:
365			hdr->nlmsg_type = NL_RTM_NEWROUTE;
366			hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
367			break;
368		case RTM_CHANGE:
369			hdr->nlmsg_type = NL_RTM_NEWROUTE;
370			hdr->nlmsg_flags |= NLM_F_REPLACE;
371			break;
372		case RTM_DELETE:
373			hdr->nlmsg_type = NL_RTM_DELROUTE;
374			break;
375		}
376		dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
377		nlmsg_flush(&nw);
378	}
379
380	rtsock_callback_p->route_f(fibnum, rc);
381}
382
383static void
384set_scope6(struct sockaddr *sa, struct ifnet *ifp)
385{
386#ifdef INET6
387	if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) {
388		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
389
390		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
391			in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp));
392	}
393#endif
394}
395
396struct rta_mpath_nh {
397	struct sockaddr	*gw;
398	struct ifnet	*ifp;
399	uint8_t		rtnh_flags;
400	uint8_t		rtnh_weight;
401};
402
403#define	_IN(_field)	offsetof(struct rtnexthop, _field)
404#define	_OUT(_field)	offsetof(struct rta_mpath_nh, _field)
405const static struct nlattr_parser nla_p_rtnh[] = {
406	{ .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
407	{ .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
408};
409const static struct nlfield_parser nlf_p_rtnh[] = {
410	{ .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
411	{ .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
412	{ .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
413};
414#undef _IN
415#undef _OUT
416
417static bool
418post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused)
419{
420	struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs;
421
422	set_scope6(attrs->gw, attrs->ifp);
423	return (true);
424}
425NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh);
426
427struct rta_mpath {
428	int num_nhops;
429	struct rta_mpath_nh nhops[0];
430};
431
432static int
433nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
434{
435	int data_len = nla->nla_len - sizeof(struct nlattr);
436	struct rtnexthop *rtnh;
437
438	int max_nhops = data_len / sizeof(struct rtnexthop);
439
440	struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
441	mp->num_nhops = 0;
442
443	for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
444		struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
445
446		int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
447		    npt, mpnh);
448		if (error != 0) {
449			NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
450			    mp->num_nhops - 1);
451			return (error);
452		}
453
454		int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
455		data_len -= len;
456		rtnh = (struct rtnexthop *)((char *)rtnh + len);
457	}
458	if (data_len != 0 || mp->num_nhops == 0) {
459		NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
460		return (EINVAL);
461	}
462
463	*((struct rta_mpath **)target) = mp;
464	return (0);
465}
466
467
468struct nl_parsed_route {
469	struct sockaddr		*rta_dst;
470	struct sockaddr		*rta_gw;
471	struct ifnet		*rta_oif;
472	struct rta_mpath	*rta_multipath;
473	uint32_t		rta_table;
474	uint32_t		rta_rtflags;
475	uint32_t		rta_nh_id;
476	uint32_t		rta_weight;
477	uint32_t		rtax_mtu;
478	uint8_t			rtm_family;
479	uint8_t			rtm_dst_len;
480	uint8_t			rtm_protocol;
481	uint8_t			rtm_type;
482	uint32_t		rtm_flags;
483};
484
485#define	_IN(_field)	offsetof(struct rtmsg, _field)
486#define	_OUT(_field)	offsetof(struct nl_parsed_route, _field)
487static struct nlattr_parser nla_p_rtmetrics[] = {
488	{ .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
489};
490NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
491
492static const struct nlattr_parser nla_p_rtmsg[] = {
493	{ .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
494	{ .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
495	{ .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
496	{ .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
497	{ .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
498	{ .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 },
499	{ .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
500	{ .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
501	{ .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
502	{ .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
503};
504
505static const struct nlfield_parser nlf_p_rtmsg[] = {
506	{ .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
507	{ .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
508	{ .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
509	{ .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 },
510	{ .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 },
511};
512#undef _IN
513#undef _OUT
514
515static bool
516post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused)
517{
518	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs;
519
520	set_scope6(attrs->rta_dst, attrs->rta_oif);
521	set_scope6(attrs->rta_gw, attrs->rta_oif);
522	return (true);
523}
524NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg);
525
526struct netlink_walkargs {
527	struct nl_writer *nw;
528	struct route_nhop_data rnd;
529	struct nlmsghdr hdr;
530	struct nlpcb *nlp;
531	uint32_t fibnum;
532	int family;
533	int error;
534	int count;
535	int dumped;
536	int dumped_tables;
537};
538
539static int
540dump_rtentry(struct rtentry *rt, void *_arg)
541{
542	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
543	int error;
544
545	wa->count++;
546	if (wa->error != 0)
547		return (0);
548	if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp)))
549		return (0);
550	wa->dumped++;
551
552	rt_get_rnd(rt, &wa->rnd);
553
554	error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
555
556	IF_DEBUG_LEVEL(LOG_DEBUG3) {
557		char rtbuf[INET6_ADDRSTRLEN + 5];
558		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
559		    "Dump %s, error %d",
560		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error);
561	}
562	wa->error = error;
563
564	return (0);
565}
566
567static void
568dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
569{
570	FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
571	wa->count = 0;
572	wa->dumped = 0;
573
574	rib_walk(fibnum, family, false, dump_rtentry, wa);
575
576	wa->dumped_tables++;
577
578	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
579	    wa->count, wa->dumped);
580}
581
582static int
583dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
584{
585	wa->fibnum = fibnum;
586
587	if (family == AF_UNSPEC) {
588		for (int i = 0; i < AF_MAX; i++) {
589			if (rt_tables_get_rnh(fibnum, i) != 0) {
590				wa->family = i;
591				dump_rtable_one(wa, fibnum, i);
592				if (wa->error != 0)
593					break;
594			}
595		}
596	} else {
597		if (rt_tables_get_rnh(fibnum, family) != 0) {
598			wa->family = family;
599			dump_rtable_one(wa, fibnum, family);
600		}
601	}
602
603	return (wa->error);
604}
605
606static int
607handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
608    struct nlmsghdr *hdr, struct nl_pstate *npt)
609{
610	RIB_RLOCK_TRACKER;
611	struct rib_head *rnh;
612	const struct rtentry *rt;
613	struct route_nhop_data rnd;
614	uint32_t fibnum = attrs->rta_table;
615	sa_family_t family = attrs->rtm_family;
616
617	if (attrs->rta_dst == NULL) {
618		NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
619			return (EINVAL);
620	}
621
622	rnh = rt_tables_get_rnh(fibnum, family);
623	if (rnh == NULL)
624		return (EAFNOSUPPORT);
625
626	RIB_RLOCK(rnh);
627
628	struct sockaddr *dst = attrs->rta_dst;
629
630	if (attrs->rtm_flags & RTM_F_PREFIX)
631		rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd);
632	else
633		rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head);
634	if (rt == NULL) {
635		RIB_RUNLOCK(rnh);
636		return (ESRCH);
637	}
638
639	rt_get_rnd(rt, &rnd);
640	rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
641
642	RIB_RUNLOCK(rnh);
643
644	if (!rt_is_exportable(rt, nlp_get_cred(nlp)))
645		return (ESRCH);
646
647	IF_DEBUG_LEVEL(LOG_DEBUG2) {
648		char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
649		FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
650		    nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
651		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
652	}
653
654	hdr->nlmsg_type = NL_RTM_NEWROUTE;
655	dump_px(fibnum, hdr, rt, &rnd, npt->nw);
656
657	return (0);
658}
659
660static int
661handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
662    struct nlmsghdr *hdr, struct nl_writer *nw)
663{
664	struct netlink_walkargs wa = {
665		.nlp = nlp,
666		.nw = nw,
667		.hdr.nlmsg_pid = hdr->nlmsg_pid,
668		.hdr.nlmsg_seq = hdr->nlmsg_seq,
669		.hdr.nlmsg_type = NL_RTM_NEWROUTE,
670		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
671	};
672
673	if (fibnum == RT_TABLE_UNSPEC) {
674		for (int i = 0; i < V_rt_numfibs; i++) {
675			dump_rtable_fib(&wa, fibnum, family);
676			if (wa.error != 0)
677				break;
678		}
679	} else
680		dump_rtable_fib(&wa, fibnum, family);
681
682	if (wa.error == 0 && wa.dumped_tables == 0) {
683		FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
684		wa.error = ESRCH;
685		// How do we propagate it?
686	}
687
688	if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
689                NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
690                return (ENOMEM);
691        }
692
693	return (wa.error);
694}
695
696static struct nhop_object *
697finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror)
698{
699	/*
700	 * The following MUST be filled:
701	 *  nh_ifp, nh_ifa, nh_gw
702	 */
703	if (nh->gw_sa.sa_family == 0) {
704		/*
705		 * Empty gateway. Can be direct route with RTA_OIF set.
706		 */
707		if (nh->nh_ifp != NULL)
708			nhop_set_direct_gw(nh, nh->nh_ifp);
709		else {
710			NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
711			*perror = EINVAL;
712			return (NULL);
713		}
714		/* Both nh_ifp and gateway are set */
715	} else {
716		/* Gateway is set up, we can derive ifp if not set */
717		if (nh->nh_ifp == NULL) {
718			uint32_t fibnum = nhop_get_fibnum(nh);
719			uint32_t flags = 0;
720
721			if (nh->nh_flags & NHF_GATEWAY)
722				flags = RTF_GATEWAY;
723			else if (nh->nh_flags & NHF_HOST)
724				flags = RTF_HOST;
725
726			struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum);
727			if (ifa == NULL) {
728				NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
729				*perror = EINVAL;
730				return (NULL);
731			}
732			nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
733		}
734	}
735	/* Both nh_ifp and gateway are set */
736	if (nh->nh_ifa == NULL) {
737		const struct sockaddr *gw_sa = &nh->gw_sa;
738
739		if (gw_sa->sa_family != dst->sa_family) {
740			/*
741			 * Use dst as the target for determining the default
742			 * preferred ifa IF
743			 * 1) the gateway is link-level (e.g. direct route)
744			 * 2) the gateway family is different (e.g. IPv4 over IPv6).
745			 */
746			gw_sa = dst;
747		}
748
749		struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
750		if (ifa == NULL) {
751			/* Try link-level ifa. */
752			gw_sa = &nh->gw_sa;
753			ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
754			if (ifa == NULL) {
755				NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
756				*perror = EINVAL;
757				return (NULL);
758			}
759		}
760		nhop_set_src(nh, ifa);
761	}
762
763	return (nhop_get_nhop(nh, perror));
764}
765
766static int
767get_pxflag(const struct nl_parsed_route *attrs)
768{
769	int pxflag = 0;
770	switch (attrs->rtm_family) {
771	case AF_INET:
772		if (attrs->rtm_dst_len == 32)
773			pxflag = NHF_HOST;
774		else if (attrs->rtm_dst_len == 0)
775			pxflag = NHF_DEFAULT;
776		break;
777	case AF_INET6:
778		if (attrs->rtm_dst_len == 128)
779			pxflag = NHF_HOST;
780		else if (attrs->rtm_dst_len == 0)
781			pxflag = NHF_DEFAULT;
782		break;
783	}
784
785	return (pxflag);
786}
787
788static int
789get_op_flags(int nlm_flags)
790{
791	int op_flags = 0;
792
793	op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
794	op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
795	op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
796	op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
797
798	return (op_flags);
799}
800
801#ifdef ROUTE_MPATH
802static int
803create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
804    struct nl_pstate *npt, struct nhop_object **pnh)
805{
806	int error;
807
808	if (mpnh->gw == NULL)
809		return (EINVAL);
810
811	struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
812	if (nh == NULL)
813		return (ENOMEM);
814
815	error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt);
816	if (error != 0) {
817		nhop_free(nh);
818		return (error);
819	}
820	if (mpnh->ifp != NULL)
821		nhop_set_transmit_ifp(nh, mpnh->ifp);
822	nhop_set_pxtype_flag(nh, get_pxflag(attrs));
823	nhop_set_rtflags(nh, attrs->rta_rtflags);
824	if (attrs->rtm_protocol > RTPROT_STATIC)
825		nhop_set_origin(nh, attrs->rtm_protocol);
826
827	*pnh = finalize_nhop(nh, attrs->rta_dst, &error);
828
829	return (error);
830}
831#endif
832
833static struct nhop_object *
834create_nexthop_from_attrs(struct nl_parsed_route *attrs,
835    struct nl_pstate *npt, int *perror)
836{
837	struct nhop_object *nh = NULL;
838	int error = 0;
839
840	if (attrs->rta_multipath != NULL) {
841#ifdef ROUTE_MPATH
842		/* Multipath w/o explicit nexthops */
843		int num_nhops = attrs->rta_multipath->num_nhops;
844		struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
845
846		for (int i = 0; i < num_nhops; i++) {
847			struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
848
849			error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
850			if (error != 0) {
851				for (int j = 0; j < i; j++)
852					nhop_free(wn[j].nh);
853				break;
854			}
855			wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
856		}
857		if (error == 0) {
858			struct rib_head *rh = nhop_get_rh(wn[0].nh);
859			struct nhgrp_object *nhg;
860
861			nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
862			    wn, num_nhops, perror);
863			if (nhg != NULL) {
864				if (attrs->rtm_protocol > RTPROT_STATIC)
865					nhgrp_set_origin(nhg, attrs->rtm_protocol);
866				nhg = nhgrp_get_nhgrp(nhg, perror);
867			}
868			for (int i = 0; i < num_nhops; i++)
869				nhop_free(wn[i].nh);
870			if (nhg != NULL)
871				return ((struct nhop_object *)nhg);
872			error = *perror;
873		}
874#else
875		error = ENOTSUP;
876#endif
877		*perror = error;
878	} else {
879		nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
880		if (nh == NULL) {
881			*perror = ENOMEM;
882			return (NULL);
883		}
884		if (attrs->rta_gw != NULL) {
885			*perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt);
886			if (*perror != 0) {
887				nhop_free(nh);
888				return (NULL);
889			}
890		}
891		if (attrs->rta_oif != NULL)
892			nhop_set_transmit_ifp(nh, attrs->rta_oif);
893		if (attrs->rtax_mtu != 0)
894			nhop_set_mtu(nh, attrs->rtax_mtu, true);
895		if (attrs->rta_rtflags & RTF_BROADCAST)
896			nhop_set_broadcast(nh, true);
897		if (attrs->rtm_protocol > RTPROT_STATIC)
898			nhop_set_origin(nh, attrs->rtm_protocol);
899		nhop_set_pxtype_flag(nh, get_pxflag(attrs));
900		nhop_set_rtflags(nh, attrs->rta_rtflags);
901
902		switch (attrs->rtm_type) {
903		case RTN_UNICAST:
904			break;
905		case RTN_BLACKHOLE:
906			nhop_set_blackhole(nh, RTF_BLACKHOLE);
907			break;
908		case RTN_PROHIBIT:
909		case RTN_UNREACHABLE:
910			nhop_set_blackhole(nh, RTF_REJECT);
911			break;
912		/* TODO: return ENOTSUP for other types if strict option is set */
913		}
914
915		nh = finalize_nhop(nh, attrs->rta_dst, perror);
916	}
917
918	return (nh);
919}
920
921static int
922rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
923    struct nl_pstate *npt)
924{
925	struct rib_cmd_info rc = {};
926	struct nhop_object *nh = NULL;
927	int error;
928
929	struct nl_parsed_route attrs = {};
930	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
931	if (error != 0)
932		return (error);
933
934	/* Check if we have enough data */
935	if (attrs.rta_dst == NULL) {
936		NL_LOG(LOG_DEBUG, "missing RTA_DST");
937		return (EINVAL);
938	}
939
940	if (attrs.rta_table >= V_rt_numfibs) {
941		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
942		return (EINVAL);
943	}
944
945	if (attrs.rta_nh_id != 0) {
946		/* Referenced uindex */
947		int pxflag = get_pxflag(&attrs);
948		nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
949		    pxflag, &error);
950		if (error != 0)
951			return (error);
952	} else {
953		nh = create_nexthop_from_attrs(&attrs, npt, &error);
954		if (error != 0) {
955			NL_LOG(LOG_DEBUG, "Error creating nexthop");
956			return (error);
957		}
958	}
959
960	if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0)
961		attrs.rta_weight = RT_DEFAULT_WEIGHT;
962	struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight };
963	int op_flags = get_op_flags(hdr->nlmsg_flags);
964
965	error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
966	    &rnd, op_flags, &rc);
967	if (error == 0)
968		report_operation(attrs.rta_table, &rc, nlp, hdr);
969	return (error);
970}
971
972static int
973path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
974{
975	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
976
977	if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
978		return (0);
979
980	if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
981		return (0);
982
983	return (1);
984}
985
986static int
987rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
988    struct nl_pstate *npt)
989{
990	struct rib_cmd_info rc;
991	int error;
992
993	struct nl_parsed_route attrs = {};
994	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
995	if (error != 0)
996		return (error);
997
998	if (attrs.rta_dst == NULL) {
999		NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
1000		return (ESRCH);
1001	}
1002
1003	if (attrs.rta_table >= V_rt_numfibs) {
1004		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
1005		return (EINVAL);
1006	}
1007
1008	error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
1009	    attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
1010	if (error == 0)
1011		report_operation(attrs.rta_table, &rc, nlp, hdr);
1012	return (error);
1013}
1014
1015static int
1016rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1017{
1018	int error;
1019
1020	struct nl_parsed_route attrs = {};
1021	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
1022	if (error != 0)
1023		return (error);
1024
1025	if (attrs.rta_table >= V_rt_numfibs) {
1026		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
1027		return (EINVAL);
1028	}
1029
1030	if (hdr->nlmsg_flags & NLM_F_DUMP)
1031		error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
1032	else
1033		error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
1034
1035	return (error);
1036}
1037
1038void
1039rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
1040{
1041	struct nl_writer nw = {};
1042	int family, nlm_flags = 0;
1043
1044	family = rt_get_family(rc->rc_rt);
1045
1046	/* XXX: check if there are active listeners first */
1047
1048	/* TODO: consider passing PID/type/seq */
1049	switch (rc->rc_cmd) {
1050	case RTM_ADD:
1051		nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
1052		break;
1053	case RTM_CHANGE:
1054		nlm_flags = NLM_F_REPLACE;
1055		break;
1056	case RTM_DELETE:
1057		nlm_flags = 0;
1058		break;
1059	}
1060	IF_DEBUG_LEVEL(LOG_DEBUG2) {
1061		char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
1062		FIB_LOG(LOG_DEBUG2, fibnum, family,
1063		    "received event %s for %s / nlm_flags=%X",
1064		    rib_print_cmd(rc->rc_cmd),
1065		    rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
1066		    nlm_flags);
1067	}
1068
1069	struct nlmsghdr hdr = {
1070		.nlmsg_flags = nlm_flags,
1071		.nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
1072	};
1073
1074	struct route_nhop_data rnd = {
1075		.rnd_nhop = rc_get_nhop(rc),
1076		.rnd_weight = rc->rc_nh_weight,
1077	};
1078
1079	uint32_t group_id = family_to_group(family);
1080	if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
1081		NL_LOG(LOG_DEBUG, "error allocating event buffer");
1082		return;
1083	}
1084
1085	dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
1086	nlmsg_flush(&nw);
1087}
1088
1089static const struct rtnl_cmd_handler cmd_handlers[] = {
1090	{
1091		.cmd = NL_RTM_GETROUTE,
1092		.name = "RTM_GETROUTE",
1093		.cb = &rtnl_handle_getroute,
1094		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1095	},
1096	{
1097		.cmd = NL_RTM_DELROUTE,
1098		.name = "RTM_DELROUTE",
1099		.cb = &rtnl_handle_delroute,
1100		.priv = PRIV_NET_ROUTE,
1101	},
1102	{
1103		.cmd = NL_RTM_NEWROUTE,
1104		.name = "RTM_NEWROUTE",
1105		.cb = &rtnl_handle_newroute,
1106		.priv = PRIV_NET_ROUTE,
1107	}
1108};
1109
1110static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
1111
1112void
1113rtnl_routes_init(void)
1114{
1115	NL_VERIFY_PARSERS(all_parsers);
1116	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1117}
1118