1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include "opt_inet.h"
29#include "opt_inet6.h"
30
31#include <sys/types.h>
32#include <sys/ck.h>
33#include <sys/lock.h>
34#include <sys/socket.h>
35#include <sys/vnode.h>
36
37#include <net/if.h>
38#include <net/if_dl.h>
39#include <net/route.h>
40#include <net/route/nhop.h>
41#include <net/route/route_ctl.h>
42#include <netlink/netlink.h>
43#include <netlink/netlink_ctl.h>
44#include <netlink/netlink_linux.h>
45#include <netlink/netlink_var.h>
46#include <netlink/netlink_route.h>
47
48#include <compat/linux/linux.h>
49#include <compat/linux/linux_common.h>
50#include <compat/linux/linux_util.h>
51
52#define	DEBUG_MOD_NAME	nl_linux
53#define	DEBUG_MAX_LEVEL	LOG_DEBUG3
54#include <netlink/netlink_debug.h>
55_DECLARE_DEBUG(LOG_INFO);
56
57static bool
58valid_rta_size(const struct rtattr *rta, int sz)
59{
60	return (NL_RTA_DATA_LEN(rta) == sz);
61}
62
63static bool
64valid_rta_u32(const struct rtattr *rta)
65{
66	return (valid_rta_size(rta, sizeof(uint32_t)));
67}
68
69static uint32_t
70_rta_get_uint32(const struct rtattr *rta)
71{
72	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
73}
74
75static int
76rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
77{
78	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
79	sa_family_t f;
80
81	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
82		return (EBADMSG);
83	if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN)
84		return (EPFNOSUPPORT);
85
86	ndm->ndm_family = f;
87
88	return (0);
89}
90
91static int
92rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
93{
94	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
95	sa_family_t f;
96
97	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) +
98	    offsetof(struct ifaddrmsg, ifa_family) + sizeof(ifam->ifa_family))
99		return (EBADMSG);
100	if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN)
101		return (EPFNOSUPPORT);
102
103	ifam->ifa_family = f;
104
105	return (0);
106}
107
108/*
109 * XXX: in case of error state of hdr is inconsistent.
110 */
111static int
112rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
113{
114	/* Tweak address families and default fib only */
115	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
116	struct nlattr *nla, *nla_head;
117	int attrs_len;
118	sa_family_t f;
119
120	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg))
121		return (EBADMSG);
122	if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN)
123		return (EPFNOSUPPORT);
124	rtm->rtm_family = f;
125
126	if (rtm->rtm_table == 254)
127		rtm->rtm_table = 0;
128
129	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
130	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
131	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
132
133	NLA_FOREACH(nla, nla_head, attrs_len) {
134		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
135		    nla->nla_type, nla->nla_len, attrs_len);
136		struct rtattr *rta = (struct rtattr *)nla;
137		if (rta->rta_len < sizeof(struct rtattr)) {
138			break;
139		}
140		switch (rta->rta_type) {
141		case NL_RTA_TABLE:
142			if (!valid_rta_u32(rta))
143				return (EBADMSG);
144			rtm->rtm_table = 0;
145			uint32_t fibnum = _rta_get_uint32(rta);
146			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
147			if (fibnum == 254) {
148				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
149			}
150			break;
151		}
152	}
153
154	return (0);
155}
156
157static int
158rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
159{
160
161	switch (hdr->nlmsg_type) {
162	case NL_RTM_GETROUTE:
163	case NL_RTM_NEWROUTE:
164	case NL_RTM_DELROUTE:
165		return (rtnl_route_from_linux(hdr, npt));
166	case NL_RTM_GETNEIGH:
167		return (rtnl_neigh_from_linux(hdr, npt));
168	case NL_RTM_GETADDR:
169		return (rtnl_ifaddr_from_linux(hdr, npt));
170	/* Silence warning for the messages where no translation is required */
171	case NL_RTM_NEWLINK:
172	case NL_RTM_DELLINK:
173	case NL_RTM_GETLINK:
174		break;
175	default:
176		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
177		    hdr->nlmsg_type);
178		/* XXXGL: maybe return error? */
179	}
180
181	return (0);
182}
183
184static int
185nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr,
186    struct nl_pstate *npt)
187{
188	switch (netlink_family) {
189	case NETLINK_ROUTE:
190		return (rtnl_from_linux(*hdr, npt));
191	}
192
193	return (0);
194}
195
196
197/************************************************************
198 * Kernel -> Linux
199 ************************************************************/
200
201static bool
202handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
203{
204	char *out_hdr;
205	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
206
207	if (out_hdr != NULL) {
208		memcpy(out_hdr, hdr, hdr->nlmsg_len);
209		nw->num_messages++;
210		return (true);
211	}
212	return (false);
213}
214
215static bool
216nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
217{
218	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
219	    hdr->nlmsg_flags, 0));
220}
221
222static void *
223_nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
224{
225	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
226	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
227
228	return (next_hdr);
229}
230#define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
231	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
232
233static bool
234nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
235{
236	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
237	if (nla != NULL) {
238		memcpy(nla, nla_orig, nla_orig->nla_len);
239		return (true);
240	}
241	return (false);
242}
243
244/*
245 * Translate a FreeBSD interface name to a Linux interface name.
246 */
247static bool
248nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
249{
250	char ifname[LINUX_IFNAMSIZ];
251
252	if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
253	    sizeof(ifname)) <= 0)
254		return (false);
255	return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
256}
257
258#define	LINUX_NLA_UNHANDLED	-1
259/*
260 * Translate a FreeBSD attribute to a Linux attribute.
261 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed
262 * and the caller must take care of it, otherwise the result is returned.
263 */
264static int
265nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
266    struct nl_writer *nw)
267{
268
269	switch (hdr->nlmsg_type) {
270	case NL_RTM_NEWLINK:
271	case NL_RTM_DELLINK:
272	case NL_RTM_GETLINK:
273		switch (nla->nla_type) {
274		case IFLA_IFNAME:
275			return (nlmsg_translate_ifname_nla(nla, nw));
276		default:
277			break;
278		}
279	default:
280		break;
281	}
282	return (LINUX_NLA_UNHANDLED);
283}
284
285static bool
286nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
287{
288	struct nlattr *nla;
289	int ret;
290
291	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
292	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
293	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
294
295	NLA_FOREACH(nla, nla_head, attrs_len) {
296		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
297		if (nla->nla_len < sizeof(struct nlattr)) {
298			return (false);
299		}
300		ret = nlmsg_translate_all_nla(hdr, nla, nw);
301		if (ret == LINUX_NLA_UNHANDLED)
302			ret = nlmsg_copy_nla(nla, nw);
303		if (!ret)
304			return (false);
305	}
306	return (true);
307}
308#undef LINUX_NLA_UNHANDLED
309
310static unsigned int
311rtnl_if_flags_to_linux(unsigned int if_flags)
312{
313	unsigned int result = 0;
314
315	for (int i = 0; i < 31; i++) {
316		unsigned int flag = 1 << i;
317		if (!(flag & if_flags))
318			continue;
319		switch (flag) {
320		case IFF_UP:
321		case IFF_BROADCAST:
322		case IFF_DEBUG:
323		case IFF_LOOPBACK:
324		case IFF_POINTOPOINT:
325		case IFF_DRV_RUNNING:
326		case IFF_NOARP:
327		case IFF_PROMISC:
328		case IFF_ALLMULTI:
329			result |= flag;
330			break;
331		case IFF_NEEDSEPOCH:
332		case IFF_DRV_OACTIVE:
333		case IFF_SIMPLEX:
334		case IFF_LINK0:
335		case IFF_LINK1:
336		case IFF_LINK2:
337		case IFF_CANTCONFIG:
338		case IFF_PPROMISC:
339		case IFF_MONITOR:
340		case IFF_STATICARP:
341		case IFF_STICKYARP:
342		case IFF_DYING:
343		case IFF_RENAMING:
344			/* No Linux analogue */
345			break;
346		case IFF_MULTICAST:
347			result |= 1 << 12;
348		}
349	}
350	return (result);
351}
352
353static bool
354rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
355    struct nl_writer *nw)
356{
357	if (!nlmsg_copy_header(hdr, nw))
358		return (false);
359
360	struct ifinfomsg *ifinfo;
361	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
362
363	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
364	/* Convert interface type */
365	switch (ifinfo->ifi_type) {
366	case IFT_ETHER:
367		ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
368		break;
369	}
370	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
371
372	/* Copy attributes unchanged */
373	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
374		return (false);
375
376	/* make ip(8) happy */
377	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
378		return (false);
379
380	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
381		return (false);
382
383	nlmsg_end(nw);
384	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
385	return (true);
386}
387
388static bool
389rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
390    struct nl_writer *nw)
391{
392	if (!nlmsg_copy_header(hdr, nw))
393		return (false);
394
395	struct ifaddrmsg *ifamsg;
396	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
397
398	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
399	/* XXX: fake ifa_flags? */
400
401	/* Copy attributes unchanged */
402	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
403		return (false);
404
405	nlmsg_end(nw);
406	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
407	return (true);
408}
409
410static bool
411rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
412    struct nl_writer *nw)
413{
414	if (!nlmsg_copy_header(hdr, nw))
415		return (false);
416
417	struct ndmsg *ndm;
418	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
419
420	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
421
422	/* Copy attributes unchanged */
423	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
424		return (false);
425
426	nlmsg_end(nw);
427	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
428	return (true);
429}
430
431static bool
432rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
433    struct nl_writer *nw)
434{
435	if (!nlmsg_copy_header(hdr, nw))
436		return (false);
437
438	struct rtmsg *rtm;
439	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
440	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
441
442	struct nlattr *nla;
443
444	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
445	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
446	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
447
448	NLA_FOREACH(nla, nla_head, attrs_len) {
449		struct rtattr *rta = (struct rtattr *)nla;
450		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
451		if (rta->rta_len < sizeof(struct rtattr)) {
452			break;
453		}
454
455		switch (rta->rta_type) {
456		case NL_RTA_TABLE:
457			{
458				uint32_t fibnum;
459				fibnum = _rta_get_uint32(rta);
460				if (fibnum == 0)
461					fibnum = 254;
462				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
463				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
464					return (false);
465			}
466			break;
467		default:
468			if (!nlmsg_copy_nla(nla, nw))
469				return (false);
470			break;
471		}
472	}
473
474	nlmsg_end(nw);
475	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
476	return (true);
477}
478
479static bool
480rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
481{
482	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
483
484	switch (hdr->nlmsg_type) {
485	case NL_RTM_NEWLINK:
486	case NL_RTM_DELLINK:
487	case NL_RTM_GETLINK:
488		return (rtnl_newlink_to_linux(hdr, nlp, nw));
489	case NL_RTM_NEWADDR:
490	case NL_RTM_DELADDR:
491		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
492	case NL_RTM_NEWROUTE:
493	case NL_RTM_DELROUTE:
494		return (rtnl_newroute_to_linux(hdr, nlp, nw));
495	case NL_RTM_NEWNEIGH:
496	case NL_RTM_DELNEIGH:
497	case NL_RTM_GETNEIGH:
498		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
499	default:
500		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
501		    hdr->nlmsg_type);
502		return (handle_default_out(hdr, nw));
503	}
504}
505
506static bool
507nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
508{
509	if (!nlmsg_copy_header(hdr, nw))
510		return (false);
511
512	struct nlmsgerr *nlerr;
513	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
514	nlerr->error = bsd_to_linux_errno(nlerr->error);
515
516	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
517	if (hdr->nlmsg_len == copied_len) {
518		nlmsg_end(nw);
519		return (true);
520	}
521
522	/*
523	 * CAP_ACK was not set. Original request needs to be translated.
524	 * XXX: implement translation of the original message
525	 */
526	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
527	    nlerr->msg.nlmsg_type);
528	char *dst_payload, *src_payload;
529	int copy_len = hdr->nlmsg_len - copied_len;
530	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
531
532	src_payload = (char *)hdr + copied_len;
533
534	memcpy(dst_payload, src_payload, copy_len);
535	nlmsg_end(nw);
536
537	return (true);
538}
539
540static bool
541nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
542{
543	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
544		switch (hdr->nlmsg_type) {
545		case NLMSG_ERROR:
546			return (nlmsg_error_to_linux(hdr, nlp, nw));
547		case NLMSG_NOOP:
548		case NLMSG_DONE:
549		case NLMSG_OVERRUN:
550			return (handle_default_out(hdr, nw));
551		default:
552			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
553			    hdr->nlmsg_type);
554			return (handle_default_out(hdr, nw));
555		}
556	}
557
558	switch (nlp->nl_proto) {
559	case NETLINK_ROUTE:
560		return (rtnl_to_linux(hdr, nlp, nw));
561	default:
562		return (handle_default_out(hdr, nw));
563	}
564}
565
566static bool
567nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
568{
569	struct nl_buf *nb, *orig;
570	u_int offset, msglen, orig_messages;
571
572	RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
573	    nw->buf->datalen, nw->num_messages);
574
575	orig = nw->buf;
576	nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
577	if (__predict_false(nb == NULL))
578		return (false);
579	nw->buf = nb;
580	orig_messages = nw->num_messages;
581	nw->num_messages = 0;
582
583	/* Assume correct headers. Buffer IS mutable */
584	for (offset = 0;
585	    offset + sizeof(struct nlmsghdr) <= orig->datalen;
586	    offset += msglen) {
587		struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
588
589		msglen = NLMSG_ALIGN(hdr->nlmsg_len);
590		if (!nlmsg_to_linux(hdr, nlp, nw)) {
591			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
592			    hdr->nlmsg_type);
593			nl_buf_free(nb);
594			nw->buf = orig;
595			nw->num_messages = orig_messages;
596			return (false);
597		}
598	}
599
600	MPASS(nw->num_messages == orig_messages);
601	MPASS(nw->buf == nb);
602	nl_buf_free(orig);
603	RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
604
605	return (true);
606}
607
608static struct linux_netlink_provider linux_netlink_v1 = {
609	.msgs_to_linux = nlmsgs_to_linux,
610	.msg_from_linux = nlmsg_from_linux,
611};
612
613void
614linux_netlink_register(void)
615{
616	linux_netlink_p = &linux_netlink_v1;
617}
618
619void
620linux_netlink_deregister(void)
621{
622	linux_netlink_p = NULL;
623}
624