1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29#include "opt_inet.h"
30#include "opt_inet6.h"
31#include <sys/types.h>
32#include <sys/eventhandler.h>
33#include <sys/kernel.h>
34#include <sys/jail.h>
35#include <sys/malloc.h>
36#include <sys/socket.h>
37#include <sys/sockio.h>
38#include <sys/syslog.h>
39
40#include <net/if.h>
41#include <net/if_dl.h>
42#include <net/if_media.h>
43#include <net/if_var.h>
44#include <net/if_clone.h>
45#include <net/route.h>
46#include <net/route/nhop.h>
47#include <net/route/route_ctl.h>
48#include <netinet/in_var.h>
49#include <netinet6/in6_var.h>
50#include <netinet6/scope6_var.h> /* scope deembedding */
51#include <netlink/netlink.h>
52#include <netlink/netlink_ctl.h>
53#include <netlink/netlink_route.h>
54#include <netlink/route/route_var.h>
55
56#define	DEBUG_MOD_NAME	nl_iface
57#define	DEBUG_MAX_LEVEL	LOG_DEBUG3
58#include <netlink/netlink_debug.h>
59_DECLARE_DEBUG(LOG_INFO);
60
61struct netlink_walkargs {
62	struct nl_writer *nw;
63	struct nlmsghdr hdr;
64	struct nlpcb *so;
65	struct ucred *cred;
66	uint32_t fibnum;
67	int family;
68	int error;
69	int count;
70	int dumped;
71};
72
73static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
74
75static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
76
77static struct sx rtnl_cloner_lock;
78SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
79
80/* These are external hooks for CARP. */
81extern int	(*carp_get_vhid_p)(struct ifaddr *);
82
83/*
84 * RTM_GETLINK request
85 * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
86 *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
87 *
88 * Reply:
89 * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
90{{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
91
92[
93{{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
94{{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
95{{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
96{{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
97{{nla_len=8, nla_type=IFLA_MTU}, 1500},
98{{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
99 {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
100{{nla_len=8, nla_type=IFLA_GROUP}, 0},
101{{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
102{{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
103{{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
104{{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
105{{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
106{{nla_len=5, nla_type=IFLA_CARRIER}, 1},
107{{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
108{{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
109{{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
110{{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
111{{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
112 */
113
114struct if_state {
115	uint8_t		ifla_operstate;
116	uint8_t		ifla_carrier;
117};
118
119static void
120get_operstate_ether(if_t ifp, struct if_state *pstate)
121{
122	struct ifmediareq ifmr = {};
123	int error;
124	error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr);
125
126	if (error != 0) {
127		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
128		    if_name(ifp), error);
129		return;
130	}
131
132	switch (IFM_TYPE(ifmr.ifm_active)) {
133	case IFM_ETHER:
134		if (ifmr.ifm_status & IFM_ACTIVE) {
135			pstate->ifla_carrier = 1;
136			if (if_getflags(ifp) & IFF_MONITOR)
137				pstate->ifla_operstate = IF_OPER_DORMANT;
138			else
139				pstate->ifla_operstate = IF_OPER_UP;
140		} else
141			pstate->ifla_operstate = IF_OPER_DOWN;
142	}
143}
144
145static bool
146get_stats(struct nl_writer *nw, if_t ifp)
147{
148	struct rtnl_link_stats64 *stats;
149
150	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
151	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
152	if (nla == NULL)
153		return (false);
154	nla->nla_type = IFLA_STATS64;
155	nla->nla_len = nla_len;
156	stats = (struct rtnl_link_stats64 *)(nla + 1);
157
158	stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS);
159	stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS);
160	stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES);
161	stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES);
162	stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS);
163	stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS);
164	stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS);
165	stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS);
166	stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS);
167	stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO);
168
169	return (true);
170}
171
172static void
173get_operstate(if_t ifp, struct if_state *pstate)
174{
175	pstate->ifla_operstate = IF_OPER_UNKNOWN;
176	pstate->ifla_carrier = 0; /* no carrier */
177
178	switch (if_gettype(ifp)) {
179	case IFT_ETHER:
180	case IFT_L2VLAN:
181		get_operstate_ether(ifp, pstate);
182		break;
183	default:
184		/* Map admin state to the operstate */
185		if (if_getflags(ifp) & IFF_UP) {
186			pstate->ifla_operstate = IF_OPER_UP;
187			pstate->ifla_carrier = 1;
188		} else
189			pstate->ifla_operstate = IF_OPER_DOWN;
190		break;
191	}
192}
193
194static void
195get_hwaddr(struct nl_writer *nw, if_t ifp)
196{
197	struct ifreq ifr = {};
198
199	if (if_gethwaddr(ifp, &ifr) == 0) {
200		nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp),
201		    ifr.ifr_addr.sa_data);
202	}
203}
204
205static unsigned
206ifp_flags_to_netlink(const if_t ifp)
207{
208        return (if_getflags(ifp) | if_getdrvflags(ifp));
209}
210
211#define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
212static bool
213dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
214{
215        uint32_t addr_len = 0;
216        const void *addr_data = NULL;
217#ifdef INET6
218        struct in6_addr addr6;
219#endif
220
221        if (sa == NULL)
222                return (true);
223
224        switch (sa->sa_family) {
225#ifdef INET
226        case AF_INET:
227                addr_len = sizeof(struct in_addr);
228                addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
229                break;
230#endif
231#ifdef INET6
232        case AF_INET6:
233                in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
234                addr_len = sizeof(struct in6_addr);
235                addr_data = &addr6;
236                break;
237#endif
238        case AF_LINK:
239                addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
240                addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
241                break;
242	case AF_UNSPEC:
243		/* Ignore empty SAs without warning */
244		return (true);
245        default:
246                NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
247                return (true);
248        }
249
250        return (nlattr_add(nw, attr, addr_len, addr_data));
251}
252
253static bool
254dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp)
255{
256	int off = nlattr_add_nested(nw, IFLAF_CAPS);
257	uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
258	uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
259
260	MPASS(sizeof(active_caps) >= 8);
261	MPASS(sizeof(all_caps) >= 8);
262
263	if (off == 0)
264		return (false);
265
266	active_caps[0] = (uint32_t)if_getcapabilities(ifp);
267	all_caps[0] = (uint32_t)if_getcapenable(ifp);
268	active_caps[1] = (uint32_t)if_getcapabilities2(ifp);
269	all_caps[1] = (uint32_t)if_getcapenable2(ifp);
270
271	nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE);
272	nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps);
273	nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps);
274
275	nlattr_set_len(nw, off);
276
277	return (true);
278}
279
280/*
281 * Dumps interface state, properties and metrics.
282 * @nw: message writer
283 * @ifp: target interface
284 * @hdr: template header
285 * @if_flags_mask: changed if_[drv]_flags bitmask
286 *
287 * This function is called without epoch and MAY sleep.
288 */
289static bool
290dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr,
291    int if_flags_mask)
292{
293	struct epoch_tracker et;
294        struct ifinfomsg *ifinfo;
295
296        NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
297
298	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
299		goto enomem;
300
301        ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
302        ifinfo->ifi_family = AF_UNSPEC;
303        ifinfo->__ifi_pad = 0;
304        ifinfo->ifi_type = if_gettype(ifp);
305        ifinfo->ifi_index = if_getindex(ifp);
306        ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
307        ifinfo->ifi_change = if_flags_mask;
308
309	struct if_state ifs = {};
310	get_operstate(ifp, &ifs);
311
312	if (ifs.ifla_operstate == IF_OPER_UP)
313		ifinfo->ifi_flags |= IFF_LOWER_UP;
314
315        nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
316        nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
317        nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
318
319/*
320        nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
321        nlattr_add_u8(nw, IFLA_LINKMODE, val);
322*/
323	if (if_getaddrlen(ifp) != 0) {
324		struct ifaddr *ifa;
325
326		NET_EPOCH_ENTER(et);
327		ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
328		if (ifa != NULL)
329			dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr);
330		NET_EPOCH_EXIT(et);
331	}
332
333        if ((if_getbroadcastaddr(ifp) != NULL)) {
334		nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp),
335		    if_getbroadcastaddr(ifp));
336        }
337
338        nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp));
339/*
340        nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
341        nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
342        nlattr_add_u32(nw, IFLA_GROUP, 0);
343*/
344
345	if (if_getdescr(ifp) != NULL)
346		nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp));
347
348	/* Store FreeBSD-specific attributes */
349	int off = nlattr_add_nested(nw, IFLA_FREEBSD);
350	if (off != 0) {
351		get_hwaddr(nw, ifp);
352		dump_iface_caps(nw, ifp);
353
354		nlattr_set_len(nw, off);
355	}
356
357	get_stats(nw, ifp);
358
359	uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0;
360        nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
361
362	ifc_dump_ifp_nl(ifp, nw);
363
364        if (nlmsg_end(nw))
365		return (true);
366
367enomem:
368        NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
369        nlmsg_abort(nw);
370        return (false);
371}
372
373static bool
374check_ifmsg(void *hdr, struct nl_pstate *npt)
375{
376	struct ifinfomsg *ifm = hdr;
377
378	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
379	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
380		nlmsg_report_err_msg(npt,
381		    "strict checking: non-zero values in ifinfomsg header");
382		return (false);
383	}
384
385	return (true);
386}
387
388#define	_IN(_field)	offsetof(struct ifinfomsg, _field)
389#define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
390static const struct nlfield_parser nlf_p_if[] = {
391	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
392	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
393	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
394	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
395};
396
397static const struct nlattr_parser nla_p_linfo[] = {
398	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
399	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
400};
401NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
402
403static const struct nlattr_parser nla_p_if[] = {
404	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
405	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
406	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
407	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
408	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
409	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
410	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
411};
412#undef _IN
413#undef _OUT
414NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
415
416static bool
417match_iface(if_t ifp, void *_arg)
418{
419	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
420
421	if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp))
422		return (false);
423	if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp))
424		return (false);
425	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
426		return (false);
427	/* TODO: add group match */
428
429	return (true);
430}
431
432static int
433dump_cb(if_t ifp, void *_arg)
434{
435	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
436	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
437		return (ENOMEM);
438	return (0);
439}
440
441/*
442 * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
443 *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
444 *   [
445 *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
446 *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
447 *   ]
448 */
449static int
450rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
451{
452	struct epoch_tracker et;
453        if_t ifp;
454	int error = 0;
455
456	struct nl_parsed_link attrs = {};
457	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
458	if (error != 0)
459		return (error);
460
461	struct netlink_walkargs wa = {
462		.so = nlp,
463		.nw = npt->nw,
464		.hdr.nlmsg_pid = hdr->nlmsg_pid,
465		.hdr.nlmsg_seq = hdr->nlmsg_seq,
466		.hdr.nlmsg_flags = hdr->nlmsg_flags,
467		.hdr.nlmsg_type = NL_RTM_NEWLINK,
468	};
469
470	/* Fast track for an interface w/ explicit name or index match */
471	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
472		if (attrs.ifi_index != 0) {
473			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
474			    attrs.ifi_index);
475			NET_EPOCH_ENTER(et);
476			ifp = ifnet_byindex_ref(attrs.ifi_index);
477			NET_EPOCH_EXIT(et);
478		} else {
479			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
480			    attrs.ifla_ifname);
481			ifp = ifunit_ref(attrs.ifla_ifname);
482		}
483
484		if (ifp != NULL) {
485			if (match_iface(ifp, &attrs)) {
486				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
487					error = ENOMEM;
488			} else
489				error = ENODEV;
490			if_rele(ifp);
491		} else
492			error = ENODEV;
493		return (error);
494	}
495
496	/* Always treat non-direct-match as a multipart message */
497	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
498
499	/*
500	 * Fetching some link properties require performing ioctl's that may be blocking.
501	 * Address it by saving referenced pointers of the matching links,
502	 * exiting from epoch and going through the list one-by-one.
503	 */
504
505	NL_LOG(LOG_DEBUG2, "Start dump");
506	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
507	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
508
509	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
510                NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
511                return (ENOMEM);
512        }
513
514	return (error);
515}
516
517/*
518 * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
519 * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
520 *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
521 *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
522 *   [
523 *    {nla_len=16, nla_type=IFLA_LINKINFO},
524 *     [
525 *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
526 *     ]
527 *    ]
528 */
529
530static int
531rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
532{
533	struct epoch_tracker et;
534        if_t ifp;
535	int error;
536
537	struct nl_parsed_link attrs = {};
538	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
539	if (error != 0)
540		return (error);
541
542	NET_EPOCH_ENTER(et);
543	ifp = ifnet_byindex_ref(attrs.ifi_index);
544	NET_EPOCH_EXIT(et);
545	if (ifp == NULL) {
546		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
547		return (ENOENT);
548	}
549	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
550
551	sx_xlock(&ifnet_detach_sxlock);
552	error = if_clone_destroy(if_name(ifp));
553	sx_xunlock(&ifnet_detach_sxlock);
554
555	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
556
557	if_rele(ifp);
558	return (error);
559}
560
561/*
562 * New link:
563 * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
564 *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
565 *    [
566 *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
567 *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
568 *     {{nla_len=24, nla_type=IFLA_LINKINFO},
569 *      [
570 *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
571 *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
572 *
573 * Update link:
574 * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
575 * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
576 * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
577 *
578 *
579 * Check command availability:
580 * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
581 *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
582 */
583
584
585static int
586create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
587    struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
588{
589	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
590		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
591		return (EINVAL);
592	}
593	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
594		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
595		return (EINVAL);
596	}
597
598	struct ifc_data_nl ifd = {
599		.flags = IFC_F_CREATE,
600		.lattrs = lattrs,
601		.bm = bm,
602		.npt = npt,
603	};
604	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
605		nl_store_ifp_cookie(npt, ifd.ifp);
606
607	return (ifd.error);
608}
609
610static int
611modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
612    struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
613{
614	if_t ifp = NULL;
615	struct epoch_tracker et;
616
617	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
618		/*
619		 * Applications like ip(8) verify RTM_NEWLINK command
620		 * existence by calling it with empty arguments. Always
621		 * return "innocent" error in that case.
622		 */
623		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
624		return (EPERM);
625	}
626
627	if (lattrs->ifi_index != 0) {
628		NET_EPOCH_ENTER(et);
629		ifp = ifnet_byindex_ref(lattrs->ifi_index);
630		NET_EPOCH_EXIT(et);
631		if (ifp == NULL) {
632			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
633			    lattrs->ifi_index);
634			return (ENOENT);
635		}
636	}
637
638	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
639		ifp = ifunit_ref(lattrs->ifla_ifname);
640		if (ifp == NULL) {
641			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
642			    lattrs->ifla_ifname);
643			return (ENOENT);
644		}
645	}
646
647	MPASS(ifp != NULL);
648
649	/*
650	 * Modification request can address either
651	 * 1) cloned interface, in which case we call the cloner-specific
652	 *  modification routine
653	 * or
654	 * 2) non-cloned (e.g. "physical") interface, in which case we call
655	 *  generic modification routine
656	 */
657	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
658	if (!ifc_modify_ifp_nl(ifp, &ifd))
659		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
660
661	if_rele(ifp);
662
663	return (ifd.error);
664}
665
666
667static int
668rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
669{
670	struct nlattr_bmask bm;
671	int error;
672
673	struct nl_parsed_link attrs = {};
674	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
675	if (error != 0)
676		return (error);
677	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
678
679	if (hdr->nlmsg_flags & NLM_F_CREATE)
680		return (create_link(hdr, &attrs, &bm, nlp, npt));
681	else
682		return (modify_link(hdr, &attrs, &bm, nlp, npt));
683}
684
685static void
686set_scope6(struct sockaddr *sa, uint32_t ifindex)
687{
688#ifdef INET6
689	if (sa != NULL && sa->sa_family == AF_INET6) {
690		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
691
692		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
693			in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex);
694	}
695#endif
696}
697
698static bool
699check_sa_family(const struct sockaddr *sa, int family, const char *attr_name,
700    struct nl_pstate *npt)
701{
702	if (sa == NULL || sa->sa_family == family)
703		return (true);
704
705	nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d",
706	    attr_name, family, sa->sa_family);
707	return (false);
708}
709
710struct nl_parsed_ifa {
711	uint8_t			ifa_family;
712	uint8_t			ifa_prefixlen;
713	uint8_t			ifa_scope;
714	uint32_t		ifa_index;
715	uint32_t		ifa_flags;
716	uint32_t		ifaf_vhid;
717	uint32_t		ifaf_flags;
718	struct sockaddr		*ifa_address;
719	struct sockaddr		*ifa_local;
720	struct sockaddr		*ifa_broadcast;
721	struct ifa_cacheinfo	*ifa_cacheinfo;
722	struct sockaddr		*f_ifa_addr;
723	struct sockaddr		*f_ifa_dst;
724};
725
726static int
727nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt,
728    const void *arg __unused, void *target)
729{
730	if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) {
731		NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo",
732		    nla->nla_type, NLA_DATA_LEN(nla));
733		return (EINVAL);
734	}
735	*((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla);
736	return (0);
737}
738
739#define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
740#define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
741static const struct nlfield_parser nlf_p_ifa[] = {
742	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
743	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
744	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
745	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
746	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
747};
748
749static const struct nlattr_parser nla_p_ifa_fbsd[] = {
750	{ .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 },
751	{ .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 },
752};
753NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd);
754
755static const struct nlattr_parser nla_p_ifa[] = {
756	{ .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip },
757	{ .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip },
758	{ .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip },
759	{ .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo },
760	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
761	{ .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested },
762};
763#undef _IN
764#undef _OUT
765
766static bool
767post_p_ifa(void *_attrs, struct nl_pstate *npt)
768{
769	struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs;
770
771	if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt))
772		return (false);
773	if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt))
774		return (false);
775	if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt))
776		return (false);
777
778	set_scope6(attrs->ifa_address, attrs->ifa_index);
779	set_scope6(attrs->ifa_local, attrs->ifa_index);
780
781	return (true);
782}
783
784NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa);
785
786
787/*
788
789{ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
790 [
791        {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
792        {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
793        {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
794        {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
795        {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
796---
797
798{{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
799 {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
800   [
801    {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
802   {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
803   {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
804*/
805
806static uint8_t
807ifa_get_scope(const struct ifaddr *ifa)
808{
809        const struct sockaddr *sa;
810        uint8_t addr_scope = RT_SCOPE_UNIVERSE;
811
812        sa = ifa->ifa_addr;
813        switch (sa->sa_family) {
814#ifdef INET
815        case AF_INET:
816                {
817                        struct in_addr addr;
818                        addr = ((const struct sockaddr_in *)sa)->sin_addr;
819                        if (IN_LOOPBACK(addr.s_addr))
820                                addr_scope = RT_SCOPE_HOST;
821                        else if (IN_LINKLOCAL(addr.s_addr))
822                                addr_scope = RT_SCOPE_LINK;
823                        break;
824                }
825#endif
826#ifdef INET6
827        case AF_INET6:
828                {
829                        const struct in6_addr *addr;
830                        addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
831                        if (IN6_IS_ADDR_LOOPBACK(addr))
832                                addr_scope = RT_SCOPE_HOST;
833                        else if (IN6_IS_ADDR_LINKLOCAL(addr))
834                                addr_scope = RT_SCOPE_LINK;
835                        break;
836                }
837#endif
838        }
839
840        return (addr_scope);
841}
842
843#ifdef INET6
844static uint8_t
845inet6_get_plen(const struct in6_addr *addr)
846{
847
848	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
849	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
850}
851#endif
852
853static uint8_t
854get_sa_plen(const struct sockaddr *sa)
855{
856#ifdef INET
857        const struct in_addr *paddr;
858#endif
859#ifdef INET6
860        const struct in6_addr *paddr6;
861#endif
862
863        switch (sa->sa_family) {
864#ifdef INET
865        case AF_INET:
866                paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
867                return bitcount32(paddr->s_addr);
868#endif
869#ifdef INET6
870        case AF_INET6:
871                paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
872                return inet6_get_plen(paddr6);
873#endif
874        }
875
876        return (0);
877}
878
879#ifdef INET6
880static uint32_t
881in6_flags_to_nl(uint32_t flags)
882{
883	uint32_t nl_flags = 0;
884
885	if (flags & IN6_IFF_TEMPORARY)
886		nl_flags |= IFA_F_TEMPORARY;
887	if (flags & IN6_IFF_NODAD)
888		nl_flags |= IFA_F_NODAD;
889	if (flags & IN6_IFF_DEPRECATED)
890		nl_flags |= IFA_F_DEPRECATED;
891	if (flags & IN6_IFF_TENTATIVE)
892		nl_flags |= IFA_F_TENTATIVE;
893	if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0)
894		flags |= IFA_F_PERMANENT;
895	if (flags & IN6_IFF_DUPLICATED)
896		flags |= IFA_F_DADFAILED;
897	return (nl_flags);
898}
899
900static uint32_t
901nl_flags_to_in6(uint32_t flags)
902{
903	uint32_t in6_flags = 0;
904
905	if (flags & IFA_F_TEMPORARY)
906		in6_flags |= IN6_IFF_TEMPORARY;
907	if (flags & IFA_F_NODAD)
908		in6_flags |= IN6_IFF_NODAD;
909	if (flags & IFA_F_DEPRECATED)
910		in6_flags |= IN6_IFF_DEPRECATED;
911	if (flags & IFA_F_TENTATIVE)
912		in6_flags |= IN6_IFF_TENTATIVE;
913	if (flags & IFA_F_DADFAILED)
914		in6_flags |= IN6_IFF_DUPLICATED;
915
916	return (in6_flags);
917}
918
919static void
920export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia)
921{
922	struct ifa_cacheinfo ci = {
923		.cstamp = ia->ia6_createtime * 1000,
924		.tstamp = ia->ia6_updatetime * 1000,
925		.ifa_prefered = ia->ia6_lifetime.ia6t_pltime,
926		.ifa_valid = ia->ia6_lifetime.ia6t_vltime,
927	};
928
929	nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci);
930}
931#endif
932
933static void
934export_cache_info(struct nl_writer *nw, struct ifaddr *ifa)
935{
936	switch (ifa->ifa_addr->sa_family) {
937#ifdef INET6
938	case AF_INET6:
939		export_cache_info6(nw, (struct in6_ifaddr *)ifa);
940		break;
941#endif
942	}
943}
944
945/*
946 * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
947           ('IFA_LOCAL', '12.0.0.1'),
948           ('IFA_LABEL', 'eth10'),
949           ('IFA_FLAGS', 128),
950           ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
951 */
952static bool
953dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa,
954    const struct nlmsghdr *hdr)
955{
956        struct ifaddrmsg *ifamsg;
957        struct sockaddr *sa = ifa->ifa_addr;
958        struct sockaddr *sa_dst = ifa->ifa_dstaddr;
959
960        NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
961            ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
962
963	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
964		goto enomem;
965
966        ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
967        ifamsg->ifa_family = sa->sa_family;
968        ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
969        ifamsg->ifa_flags = 0; // ifa_flags is useless
970        ifamsg->ifa_scope = ifa_get_scope(ifa);
971        ifamsg->ifa_index = if_getindex(ifp);
972
973	if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) {
974		/* P2P interface may have IPv6 LL with no dst address */
975		dump_sa(nw, IFA_ADDRESS, sa_dst);
976		dump_sa(nw, IFA_LOCAL, sa);
977	} else {
978		dump_sa(nw, IFA_ADDRESS, sa);
979#ifdef INET
980		/*
981		 * In most cases, IFA_ADDRESS == IFA_LOCAL
982		 * Skip IFA_LOCAL for anything except INET
983		 */
984		if (sa->sa_family == AF_INET)
985			dump_sa(nw, IFA_LOCAL, sa);
986#endif
987	}
988	if (if_getflags(ifp) & IFF_BROADCAST)
989		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
990
991        nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
992
993        uint32_t nl_ifa_flags = 0;
994#ifdef INET6
995	if (sa->sa_family == AF_INET6) {
996		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
997		nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags);
998	}
999#endif
1000        nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags);
1001
1002	export_cache_info(nw, ifa);
1003
1004	/* Store FreeBSD-specific attributes */
1005	int off = nlattr_add_nested(nw, IFA_FREEBSD);
1006	if (off != 0) {
1007		if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) {
1008			uint32_t vhid  = (uint32_t)(*carp_get_vhid_p)(ifa);
1009			nlattr_add_u32(nw, IFAF_VHID, vhid);
1010		}
1011#ifdef INET6
1012		if (sa->sa_family == AF_INET6) {
1013			uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
1014
1015			nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags);
1016		}
1017#endif
1018
1019		nlattr_set_len(nw, off);
1020	}
1021
1022	if (nlmsg_end(nw))
1023		return (true);
1024enomem:
1025        NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
1026            rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
1027        nlmsg_abort(nw);
1028        return (false);
1029}
1030
1031static int
1032dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp)
1033{
1034        struct ifaddr *ifa;
1035	struct ifa_iter it;
1036	int error = 0;
1037
1038	for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) {
1039		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
1040			continue;
1041		if (ifa->ifa_addr->sa_family == AF_LINK)
1042			continue;
1043		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
1044			continue;
1045		wa->count++;
1046		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) {
1047			error = ENOMEM;
1048			break;
1049		}
1050		wa->dumped++;
1051	}
1052	ifa_iter_finish(&it);
1053
1054	return (error);
1055}
1056
1057static int
1058rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1059{
1060        if_t ifp;
1061	int error = 0;
1062
1063	struct nl_parsed_ifa attrs = {};
1064	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1065	if (error != 0)
1066		return (error);
1067
1068	struct netlink_walkargs wa = {
1069		.so = nlp,
1070		.nw = npt->nw,
1071		.cred = nlp_get_cred(nlp),
1072		.family = attrs.ifa_family,
1073		.hdr.nlmsg_pid = hdr->nlmsg_pid,
1074		.hdr.nlmsg_seq = hdr->nlmsg_seq,
1075		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
1076		.hdr.nlmsg_type = NL_RTM_NEWADDR,
1077	};
1078
1079	NL_LOG(LOG_DEBUG2, "Start dump");
1080
1081	if (attrs.ifa_index != 0) {
1082		ifp = ifnet_byindex(attrs.ifa_index);
1083		if (ifp == NULL)
1084			error = ENOENT;
1085		else
1086			error = dump_iface_addrs(&wa, ifp);
1087	} else {
1088		struct if_iter it;
1089
1090		for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) {
1091			error = dump_iface_addrs(&wa, ifp);
1092			if (error != 0)
1093				break;
1094		}
1095		if_iter_finish(&it);
1096	}
1097
1098	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
1099
1100	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
1101                NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
1102                return (ENOMEM);
1103        }
1104
1105	return (error);
1106}
1107
1108#ifdef INET
1109static int
1110handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1111    if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1112{
1113	int plen = attrs->ifa_prefixlen;
1114	int if_flags = if_getflags(ifp);
1115	struct sockaddr_in *addr, *dst;
1116
1117	if (plen > 32) {
1118		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1119		return (EINVAL);
1120	};
1121
1122	if (if_flags & IFF_POINTOPOINT) {
1123		/*
1124		 * Only P2P IFAs are allowed by the implementation.
1125		 */
1126		if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) {
1127			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1128			return (EINVAL);
1129		}
1130		addr = (struct sockaddr_in *)attrs->ifa_local;
1131		dst = (struct sockaddr_in *)attrs->ifa_address;
1132	} else {
1133		/*
1134		 * Map the Netlink attributes to FreeBSD ifa layout.
1135		 * If only IFA_ADDRESS or IFA_LOCAL is set OR
1136		 * both are set to the same value => ifa is not p2p
1137		 * and the attribute value contains interface address.
1138		 *
1139		 * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and
1140		 * different), IFA_LOCAL contains an interface address and
1141		 * IFA_ADDRESS contains peer address.
1142		 */
1143		addr = (struct sockaddr_in *)attrs->ifa_local;
1144		if (addr == NULL)
1145			addr = (struct sockaddr_in *)attrs->ifa_address;
1146
1147		if (addr == NULL) {
1148			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1149			return (EINVAL);
1150		}
1151
1152		/* Generate broadcast address if not set */
1153		if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) {
1154			uint32_t s_baddr;
1155			struct sockaddr_in *sin_brd;
1156
1157			if (plen == 31)
1158				s_baddr = INADDR_BROADCAST; /* RFC 3021 */
1159			else {
1160				uint32_t s_mask;
1161
1162				s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
1163				s_baddr = addr->sin_addr.s_addr | ~s_mask;
1164			}
1165
1166			sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd));
1167			if (sin_brd == NULL)
1168				return (ENOMEM);
1169			sin_brd->sin_family = AF_INET;
1170			sin_brd->sin_len = sizeof(*sin_brd);
1171			sin_brd->sin_addr.s_addr = s_baddr;
1172			attrs->ifa_broadcast = (struct sockaddr *)sin_brd;
1173		}
1174		dst = (struct sockaddr_in *)attrs->ifa_broadcast;
1175	}
1176
1177	struct sockaddr_in mask = {
1178		.sin_len = sizeof(struct sockaddr_in),
1179		.sin_family = AF_INET,
1180		.sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0),
1181	};
1182	struct in_aliasreq req = {
1183		.ifra_addr = *addr,
1184		.ifra_mask = mask,
1185		.ifra_vhid = attrs->ifaf_vhid,
1186	};
1187	if (dst != NULL)
1188		req.ifra_dstaddr = *dst;
1189
1190	return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp)));
1191}
1192
1193static int
1194handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1195    if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1196{
1197	struct sockaddr_in *addr = (struct sockaddr_in *)attrs->ifa_local;
1198
1199	if (addr == NULL)
1200		addr = (struct sockaddr_in *)attrs->ifa_address;
1201
1202	if (addr == NULL) {
1203		nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL");
1204		return (EINVAL);
1205	}
1206
1207	struct ifreq req = { .ifr_addr = *(struct sockaddr *)addr };
1208
1209	return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp)));
1210}
1211#endif
1212
1213#ifdef INET6
1214static int
1215handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1216    if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1217{
1218	struct sockaddr_in6 *addr, *dst;
1219
1220	if (attrs->ifa_prefixlen > 128) {
1221		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1222		return (EINVAL);
1223	}
1224
1225	/*
1226	 * In IPv6 implementation, adding non-P2P address to the P2P interface
1227	 * is allowed.
1228	 */
1229	addr = (struct sockaddr_in6 *)(attrs->ifa_local);
1230	dst = (struct sockaddr_in6 *)(attrs->ifa_address);
1231
1232	if (addr == NULL) {
1233		addr = dst;
1234		dst = NULL;
1235	} else if (dst != NULL) {
1236		if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) {
1237			/*
1238			 * Sometimes Netlink users fills in both attributes
1239			 * with the same address. It still means "non-p2p".
1240			 */
1241			dst = NULL;
1242		}
1243	}
1244
1245	if (addr == NULL) {
1246		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1247		return (EINVAL);
1248	}
1249
1250	uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags;
1251
1252	uint32_t pltime = 0, vltime = 0;
1253	if (attrs->ifa_cacheinfo != 0) {
1254		pltime = attrs->ifa_cacheinfo->ifa_prefered;
1255		vltime = attrs->ifa_cacheinfo->ifa_valid;
1256	}
1257
1258	struct sockaddr_in6 mask = {
1259		.sin6_len = sizeof(struct sockaddr_in6),
1260		.sin6_family = AF_INET6,
1261	};
1262	ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen);
1263
1264	struct in6_aliasreq req = {
1265		.ifra_addr = *addr,
1266		.ifra_prefixmask = mask,
1267		.ifra_flags = flags,
1268		.ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime },
1269		.ifra_vhid = attrs->ifaf_vhid,
1270	};
1271	if (dst != NULL)
1272		req.ifra_dstaddr = *dst;
1273
1274	return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1275}
1276
1277static int
1278handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1279    if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1280{
1281	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local;
1282
1283	if (addr == NULL)
1284		addr = (struct sockaddr_in6 *)(attrs->ifa_address);
1285
1286	if (addr == NULL) {
1287		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1288		return (EINVAL);
1289	}
1290
1291	struct in6_ifreq req = { .ifr_addr = *addr };
1292
1293	return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1294}
1295#endif
1296
1297
1298static int
1299rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1300{
1301	struct epoch_tracker et;
1302	int error;
1303
1304	struct nl_parsed_ifa attrs = {};
1305	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1306	if (error != 0)
1307		return (error);
1308
1309	NET_EPOCH_ENTER(et);
1310	if_t ifp = ifnet_byindex_ref(attrs.ifa_index);
1311	NET_EPOCH_EXIT(et);
1312
1313	if (ifp == NULL) {
1314		nlmsg_report_err_msg(npt, "Unable to find interface with index %u",
1315		    attrs.ifa_index);
1316		return (ENOENT);
1317	}
1318	int if_flags = if_getflags(ifp);
1319
1320#if defined(INET) || defined(INET6)
1321	bool new = hdr->nlmsg_type == NL_RTM_NEWADDR;
1322#endif
1323
1324	/*
1325	 * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL.
1326	 * The current ioctl-based KPI always does an implicit create-or-replace.
1327	 * It is not possible to specify fine-grained options.
1328	 */
1329
1330	switch (attrs.ifa_family) {
1331#ifdef INET
1332	case AF_INET:
1333		if (new)
1334			error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt);
1335		else
1336			error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt);
1337		break;
1338#endif
1339#ifdef INET6
1340	case AF_INET6:
1341		if (new)
1342			error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt);
1343		else
1344			error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt);
1345		break;
1346#endif
1347	default:
1348		error = EAFNOSUPPORT;
1349	}
1350
1351	if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP))
1352		if_up(ifp);
1353
1354	if_rele(ifp);
1355
1356	return (error);
1357}
1358
1359
1360static void
1361rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
1362{
1363	struct nlmsghdr hdr = {};
1364	struct nl_writer nw = {};
1365	uint32_t group = 0;
1366
1367	switch (ifa->ifa_addr->sa_family) {
1368#ifdef INET
1369	case AF_INET:
1370		group = RTNLGRP_IPV4_IFADDR;
1371		break;
1372#endif
1373#ifdef INET6
1374	case AF_INET6:
1375		group = RTNLGRP_IPV6_IFADDR;
1376		break;
1377#endif
1378	default:
1379		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
1380		    ifa->ifa_addr->sa_family);
1381		return;
1382	}
1383
1384	if (!nl_has_listeners(NETLINK_ROUTE, group))
1385		return;
1386
1387	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
1388		NL_LOG(LOG_DEBUG, "error allocating group writer");
1389		return;
1390	}
1391
1392	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
1393
1394	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
1395	nlmsg_flush(&nw);
1396}
1397
1398static void
1399rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
1400{
1401	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
1402	struct nl_writer nw = {};
1403
1404	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
1405		return;
1406
1407	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
1408		NL_LOG(LOG_DEBUG, "error allocating mbuf");
1409		return;
1410	}
1411	dump_iface(&nw, ifp, &hdr, if_flags_mask);
1412        nlmsg_flush(&nw);
1413}
1414
1415static void
1416rtnl_handle_ifattach(void *arg, if_t ifp)
1417{
1418	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1419	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1420}
1421
1422static void
1423rtnl_handle_ifdetach(void *arg, if_t ifp)
1424{
1425	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1426	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
1427}
1428
1429static void
1430rtnl_handle_iflink(void *arg, if_t ifp)
1431{
1432	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1433	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1434}
1435
1436void
1437rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask)
1438{
1439	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1440	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
1441}
1442
1443static const struct rtnl_cmd_handler cmd_handlers[] = {
1444	{
1445		.cmd = NL_RTM_GETLINK,
1446		.name = "RTM_GETLINK",
1447		.cb = &rtnl_handle_getlink,
1448		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
1449	},
1450	{
1451		.cmd = NL_RTM_DELLINK,
1452		.name = "RTM_DELLINK",
1453		.cb = &rtnl_handle_dellink,
1454		.priv = PRIV_NET_IFDESTROY,
1455		.flags = RTNL_F_NOEPOCH,
1456	},
1457	{
1458		.cmd = NL_RTM_NEWLINK,
1459		.name = "RTM_NEWLINK",
1460		.cb = &rtnl_handle_newlink,
1461		.priv = PRIV_NET_IFCREATE,
1462		.flags = RTNL_F_NOEPOCH,
1463	},
1464	{
1465		.cmd = NL_RTM_GETADDR,
1466		.name = "RTM_GETADDR",
1467		.cb = &rtnl_handle_getaddr,
1468		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1469	},
1470	{
1471		.cmd = NL_RTM_NEWADDR,
1472		.name = "RTM_NEWADDR",
1473		.cb = &rtnl_handle_addr,
1474		.priv = PRIV_NET_ADDIFADDR,
1475		.flags = RTNL_F_NOEPOCH,
1476	},
1477	{
1478		.cmd = NL_RTM_DELADDR,
1479		.name = "RTM_DELADDR",
1480		.cb = &rtnl_handle_addr,
1481		.priv = PRIV_NET_DELIFADDR,
1482		.flags = RTNL_F_NOEPOCH,
1483	},
1484};
1485
1486static const struct nlhdr_parser *all_parsers[] = {
1487	&ifmsg_parser, &ifa_parser, &ifa_fbsd_parser,
1488};
1489
1490void
1491rtnl_iface_add_cloner(struct nl_cloner *cloner)
1492{
1493	sx_xlock(&rtnl_cloner_lock);
1494	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1495	sx_xunlock(&rtnl_cloner_lock);
1496}
1497
1498void
1499rtnl_iface_del_cloner(struct nl_cloner *cloner)
1500{
1501	sx_xlock(&rtnl_cloner_lock);
1502	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1503	sx_xunlock(&rtnl_cloner_lock);
1504}
1505
1506void
1507rtnl_ifaces_init(void)
1508{
1509	ifattach_event = EVENTHANDLER_REGISTER(
1510	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1511	    EVENTHANDLER_PRI_ANY);
1512	ifdetach_event = EVENTHANDLER_REGISTER(
1513	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1514	    EVENTHANDLER_PRI_ANY);
1515	ifaddr_event = EVENTHANDLER_REGISTER(
1516	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1517	    EVENTHANDLER_PRI_ANY);
1518	iflink_event = EVENTHANDLER_REGISTER(
1519	    ifnet_link_event, rtnl_handle_iflink, NULL,
1520	    EVENTHANDLER_PRI_ANY);
1521	NL_VERIFY_PARSERS(all_parsers);
1522	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1523}
1524
1525void
1526rtnl_ifaces_destroy(void)
1527{
1528	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1529	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1530	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1531	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1532}
1533