1/*-
2 * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include "opt_inet6.h"
27
28#include <sys/param.h>
29#include <sys/conf.h>
30#include <sys/ctype.h>
31#include <sys/file.h>
32#include <sys/filedesc.h>
33#include <sys/jail.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/poll.h>
37#include <sys/proc.h>
38#include <sys/signalvar.h>
39#include <sys/socket.h>
40#include <sys/socketvar.h>
41
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_dl.h>
45#include <net/if_types.h>
46#include <netlink/netlink.h>
47
48#include <sys/un.h>
49#include <netinet/in.h>
50
51#include <compat/linux/linux.h>
52#include <compat/linux/linux_common.h>
53#include <compat/linux/linux_mib.h>
54#include <compat/linux/linux_util.h>
55
56_Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
57_Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
58    "Linux struct sockaddr size");
59_Static_assert(offsetof(struct sockaddr, sa_data) ==
60    offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
61
62static bool use_real_ifnames = false;
63SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
64    &use_real_ifnames, 0,
65    "Use FreeBSD interface names instead of generating ethN aliases");
66
67static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
68	LINUX_SIGHUP,	/* SIGHUP */
69	LINUX_SIGINT,	/* SIGINT */
70	LINUX_SIGQUIT,	/* SIGQUIT */
71	LINUX_SIGILL,	/* SIGILL */
72	LINUX_SIGTRAP,	/* SIGTRAP */
73	LINUX_SIGABRT,	/* SIGABRT */
74	0,		/* SIGEMT */
75	LINUX_SIGFPE,	/* SIGFPE */
76	LINUX_SIGKILL,	/* SIGKILL */
77	LINUX_SIGBUS,	/* SIGBUS */
78	LINUX_SIGSEGV,	/* SIGSEGV */
79	LINUX_SIGSYS,	/* SIGSYS */
80	LINUX_SIGPIPE,	/* SIGPIPE */
81	LINUX_SIGALRM,	/* SIGALRM */
82	LINUX_SIGTERM,	/* SIGTERM */
83	LINUX_SIGURG,	/* SIGURG */
84	LINUX_SIGSTOP,	/* SIGSTOP */
85	LINUX_SIGTSTP,	/* SIGTSTP */
86	LINUX_SIGCONT,	/* SIGCONT */
87	LINUX_SIGCHLD,	/* SIGCHLD */
88	LINUX_SIGTTIN,	/* SIGTTIN */
89	LINUX_SIGTTOU,	/* SIGTTOU */
90	LINUX_SIGIO,	/* SIGIO */
91	LINUX_SIGXCPU,	/* SIGXCPU */
92	LINUX_SIGXFSZ,	/* SIGXFSZ */
93	LINUX_SIGVTALRM,/* SIGVTALRM */
94	LINUX_SIGPROF,	/* SIGPROF */
95	LINUX_SIGWINCH,	/* SIGWINCH */
96	0,		/* SIGINFO */
97	LINUX_SIGUSR1,	/* SIGUSR1 */
98	LINUX_SIGUSR2	/* SIGUSR2 */
99};
100
101#define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
102
103static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
104	SIGHUP,		/* LINUX_SIGHUP */
105	SIGINT,		/* LINUX_SIGINT */
106	SIGQUIT,	/* LINUX_SIGQUIT */
107	SIGILL,		/* LINUX_SIGILL */
108	SIGTRAP,	/* LINUX_SIGTRAP */
109	SIGABRT,	/* LINUX_SIGABRT */
110	SIGBUS,		/* LINUX_SIGBUS */
111	SIGFPE,		/* LINUX_SIGFPE */
112	SIGKILL,	/* LINUX_SIGKILL */
113	SIGUSR1,	/* LINUX_SIGUSR1 */
114	SIGSEGV,	/* LINUX_SIGSEGV */
115	SIGUSR2,	/* LINUX_SIGUSR2 */
116	SIGPIPE,	/* LINUX_SIGPIPE */
117	SIGALRM,	/* LINUX_SIGALRM */
118	SIGTERM,	/* LINUX_SIGTERM */
119	SIGBUS,		/* LINUX_SIGSTKFLT */
120	SIGCHLD,	/* LINUX_SIGCHLD */
121	SIGCONT,	/* LINUX_SIGCONT */
122	SIGSTOP,	/* LINUX_SIGSTOP */
123	SIGTSTP,	/* LINUX_SIGTSTP */
124	SIGTTIN,	/* LINUX_SIGTTIN */
125	SIGTTOU,	/* LINUX_SIGTTOU */
126	SIGURG,		/* LINUX_SIGURG */
127	SIGXCPU,	/* LINUX_SIGXCPU */
128	SIGXFSZ,	/* LINUX_SIGXFSZ */
129	SIGVTALRM,	/* LINUX_SIGVTALARM */
130	SIGPROF,	/* LINUX_SIGPROF */
131	SIGWINCH,	/* LINUX_SIGWINCH */
132	SIGIO,		/* LINUX_SIGIO */
133	/*
134	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
135	 * to the first unused FreeBSD signal number. Since Linux supports
136	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
137	 */
138	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
139	SIGSYS		/* LINUX_SIGSYS */
140};
141
142static struct cdev *dev_shm_cdev;
143static struct cdevsw dev_shm_cdevsw = {
144     .d_version = D_VERSION,
145     .d_name    = "dev_shm",
146};
147
148/*
149 * Map Linux RT signals to the FreeBSD RT signals.
150 */
151static inline int
152linux_to_bsd_rt_signal(int sig)
153{
154
155	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
156}
157
158static inline int
159bsd_to_linux_rt_signal(int sig)
160{
161
162	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
163}
164
165int
166linux_to_bsd_signal(int sig)
167{
168
169	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
170
171	if (sig < LINUX_SIGRTMIN)
172		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
173
174	return (linux_to_bsd_rt_signal(sig));
175}
176
177int
178bsd_to_linux_signal(int sig)
179{
180
181	if (sig <= LINUX_SIGTBLSZ)
182		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
183	if (sig == LINUX_SIGPWREMU)
184		return (LINUX_SIGPWR);
185
186	return (bsd_to_linux_rt_signal(sig));
187}
188
189int
190linux_to_bsd_sigaltstack(int lsa)
191{
192	int bsa = 0;
193
194	if (lsa & LINUX_SS_DISABLE)
195		bsa |= SS_DISABLE;
196	/*
197	 * Linux ignores SS_ONSTACK flag for ss
198	 * parameter while FreeBSD prohibits it.
199	 */
200	return (bsa);
201}
202
203int
204bsd_to_linux_sigaltstack(int bsa)
205{
206	int lsa = 0;
207
208	if (bsa & SS_DISABLE)
209		lsa |= LINUX_SS_DISABLE;
210	if (bsa & SS_ONSTACK)
211		lsa |= LINUX_SS_ONSTACK;
212	return (lsa);
213}
214
215void
216linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
217{
218	int b, l;
219
220	SIGEMPTYSET(*bss);
221	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
222		if (LINUX_SIGISMEMBER(*lss, l)) {
223			b = linux_to_bsd_signal(l);
224			if (b)
225				SIGADDSET(*bss, b);
226		}
227	}
228}
229
230void
231bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
232{
233	int b, l;
234
235	LINUX_SIGEMPTYSET(*lss);
236	for (b = 1; b <= SIGRTMAX; b++) {
237		if (SIGISMEMBER(*bss, b)) {
238			l = bsd_to_linux_signal(b);
239			if (l)
240				LINUX_SIGADDSET(*lss, l);
241		}
242	}
243}
244
245/*
246 * Translate a FreeBSD interface name to a Linux interface name
247 * by interface name, and return the number of bytes copied to lxname.
248 */
249int
250ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
251{
252	struct epoch_tracker et;
253	struct ifnet *ifp;
254	int ret;
255
256	CURVNET_ASSERT_SET();
257
258	ret = 0;
259	NET_EPOCH_ENTER(et);
260	ifp = ifunit(bsdname);
261	if (ifp != NULL)
262		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
263	NET_EPOCH_EXIT(et);
264	return (ret);
265}
266
267/*
268 * Translate a FreeBSD interface name to a Linux interface name
269 * by interface index, and return the number of bytes copied to lxname.
270 */
271int
272ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
273{
274	struct epoch_tracker et;
275	struct ifnet *ifp;
276	int ret;
277
278	ret = 0;
279	CURVNET_SET(TD_TO_VNET(curthread));
280	NET_EPOCH_ENTER(et);
281	ifp = ifnet_byindex(idx);
282	if (ifp != NULL)
283		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
284	NET_EPOCH_EXIT(et);
285	CURVNET_RESTORE();
286	return (ret);
287}
288
289/*
290 * Translate a FreeBSD interface name to a Linux interface name,
291 * and return the number of bytes copied to lxname, 0 if interface
292 * not found, -1 on error.
293 */
294struct ifname_bsd_to_linux_ifp_cb_s {
295	struct ifnet	*ifp;
296	int		ethno;
297	char		*lxname;
298	size_t		len;
299};
300
301static int
302ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
303{
304	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
305
306	if (ifp == cbs->ifp)
307		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
308	if (IFP_IS_ETH(ifp))
309		cbs->ethno++;
310	return (0);
311}
312
313int
314ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
315{
316	struct ifname_bsd_to_linux_ifp_cb_s arg = {
317		.ifp = ifp,
318		.ethno = 0,
319		.lxname = lxname,
320		.len = len,
321	};
322
323	NET_EPOCH_ASSERT();
324
325	/*
326	 * Linux loopback interface name is lo (not lo0),
327	 * we translate lo to lo0, loX to loX.
328	 */
329	if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
330		return (strlcpy(lxname, "lo", len));
331
332	/* Short-circuit non ethernet interfaces. */
333	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
334		return (strlcpy(lxname, if_name(ifp), len));
335
336 	/* Determine the (relative) unit number for ethernet interfaces. */
337	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
338}
339
340/*
341 * Translate a Linux interface name to a FreeBSD interface name,
342 * and return the associated ifnet structure
343 * bsdname and lxname need to be least IFNAMSIZ bytes long, but
344 * can point to the same buffer.
345 */
346struct ifname_linux_to_ifp_cb_s {
347	bool		is_lo;
348	bool		is_eth;
349	int		ethno;
350	int		unit;
351	const char	*lxname;
352	if_t		ifp;
353};
354
355static int
356ifname_linux_to_ifp_cb(if_t ifp, void *arg)
357{
358	struct ifname_linux_to_ifp_cb_s *cbs = arg;
359
360	NET_EPOCH_ASSERT();
361
362	/*
363	 * Allow Linux programs to use FreeBSD names. Don't presume
364	 * we never have an interface named "eth", so don't make
365	 * the test optional based on is_eth.
366	 */
367	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
368		goto out;
369	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
370		goto out;
371	if (cbs->is_lo && IFP_IS_LOOP(ifp))
372		goto out;
373	if (IFP_IS_ETH(ifp))
374		cbs->ethno++;
375	return (0);
376
377out:
378	cbs->ifp = ifp;
379	return (1);
380}
381
382struct ifnet *
383ifname_linux_to_ifp(struct thread *td, const char *lxname)
384{
385	struct ifname_linux_to_ifp_cb_s arg = {
386		.ethno = 0,
387		.lxname = lxname,
388		.ifp = NULL,
389	};
390	int len;
391	char *ep;
392
393	NET_EPOCH_ASSERT();
394
395	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
396		if (!isalpha(lxname[len]) || lxname[len] == '\0')
397			break;
398	if (len == 0 || len == LINUX_IFNAMSIZ)
399		return (NULL);
400	/*
401	 * Linux loopback interface name is lo (not lo0),
402	 * we translate lo to lo0, loX to loX.
403	 */
404	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
405	arg.unit = (int)strtoul(lxname + len, &ep, 10);
406	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
407	    arg.is_lo == 0)
408		return (NULL);
409	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
410
411	if_foreach(ifname_linux_to_ifp_cb, &arg);
412	return (arg.ifp);
413}
414
415int
416ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
417{
418	struct epoch_tracker et;
419	struct ifnet *ifp;
420
421	CURVNET_SET(TD_TO_VNET(td));
422	NET_EPOCH_ENTER(et);
423	ifp = ifname_linux_to_ifp(td, lxname);
424	if (ifp != NULL && bsdname != NULL)
425		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
426	NET_EPOCH_EXIT(et);
427	CURVNET_RESTORE();
428	return (ifp != NULL ? 0 : EINVAL);
429}
430
431unsigned short
432linux_ifflags(struct ifnet *ifp)
433{
434	unsigned short flags;
435
436	NET_EPOCH_ASSERT();
437
438	flags = if_getflags(ifp) | if_getdrvflags(ifp);
439	return (bsd_to_linux_ifflags(flags));
440}
441
442unsigned short
443bsd_to_linux_ifflags(int fl)
444{
445	unsigned short flags = 0;
446
447	if (fl & IFF_UP)
448		flags |= LINUX_IFF_UP;
449	if (fl & IFF_BROADCAST)
450		flags |= LINUX_IFF_BROADCAST;
451	if (fl & IFF_DEBUG)
452		flags |= LINUX_IFF_DEBUG;
453	if (fl & IFF_LOOPBACK)
454		flags |= LINUX_IFF_LOOPBACK;
455	if (fl & IFF_POINTOPOINT)
456		flags |= LINUX_IFF_POINTOPOINT;
457	if (fl & IFF_DRV_RUNNING)
458		flags |= LINUX_IFF_RUNNING;
459	if (fl & IFF_NOARP)
460		flags |= LINUX_IFF_NOARP;
461	if (fl & IFF_PROMISC)
462		flags |= LINUX_IFF_PROMISC;
463	if (fl & IFF_ALLMULTI)
464		flags |= LINUX_IFF_ALLMULTI;
465	if (fl & IFF_MULTICAST)
466		flags |= LINUX_IFF_MULTICAST;
467	return (flags);
468}
469
470static u_int
471linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
472{
473	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
474	struct l_sockaddr *lsa = arg;
475
476	if (count > 0)
477		return (0);
478	if (sdl->sdl_type != IFT_ETHER)
479		return (0);
480	bzero(lsa, sizeof(*lsa));
481	lsa->sa_family = LINUX_ARPHRD_ETHER;
482	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
483	return (1);
484}
485
486int
487linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
488{
489
490	NET_EPOCH_ASSERT();
491
492	if (IFP_IS_LOOP(ifp)) {
493		bzero(lsa, sizeof(*lsa));
494		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
495		return (0);
496	}
497	if (!IFP_IS_ETH(ifp))
498		return (ENOENT);
499	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
500		return (0);
501	return (ENOENT);
502}
503
504sa_family_t
505linux_to_bsd_domain(sa_family_t domain)
506{
507
508	switch (domain) {
509	case LINUX_AF_UNSPEC:
510		return (AF_UNSPEC);
511	case LINUX_AF_UNIX:
512		return (AF_LOCAL);
513	case LINUX_AF_INET:
514		return (AF_INET);
515	case LINUX_AF_INET6:
516		return (AF_INET6);
517	case LINUX_AF_AX25:
518		return (AF_CCITT);
519	case LINUX_AF_IPX:
520		return (AF_IPX);
521	case LINUX_AF_APPLETALK:
522		return (AF_APPLETALK);
523	case LINUX_AF_NETLINK:
524		return (AF_NETLINK);
525	}
526	return (AF_UNKNOWN);
527}
528
529sa_family_t
530bsd_to_linux_domain(sa_family_t domain)
531{
532
533	switch (domain) {
534	case AF_UNSPEC:
535		return (LINUX_AF_UNSPEC);
536	case AF_LOCAL:
537		return (LINUX_AF_UNIX);
538	case AF_INET:
539		return (LINUX_AF_INET);
540	case AF_INET6:
541		return (LINUX_AF_INET6);
542	case AF_CCITT:
543		return (LINUX_AF_AX25);
544	case AF_IPX:
545		return (LINUX_AF_IPX);
546	case AF_APPLETALK:
547		return (LINUX_AF_APPLETALK);
548	case AF_NETLINK:
549		return (LINUX_AF_NETLINK);
550	}
551	return (AF_UNKNOWN);
552}
553
554/*
555 * Based on the fact that:
556 * 1. Native and Linux storage of struct sockaddr
557 * and struct sockaddr_in6 are equal.
558 * 2. On Linux sa_family is the first member of all struct sockaddr.
559 */
560int
561bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
562    socklen_t len)
563{
564	struct l_sockaddr *kosa;
565	sa_family_t bdom;
566
567	*lsa = NULL;
568	if (len < 2 || len > UCHAR_MAX)
569		return (EINVAL);
570	bdom = bsd_to_linux_domain(sa->sa_family);
571	if (bdom == AF_UNKNOWN)
572		return (EAFNOSUPPORT);
573
574	kosa = malloc(len, M_LINUX, M_WAITOK);
575	bcopy(sa, kosa, len);
576	kosa->sa_family = bdom;
577	*lsa = kosa;
578	return (0);
579}
580
581int
582linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
583    socklen_t *len)
584{
585	struct sockaddr *sa;
586	struct l_sockaddr *kosa;
587#ifdef INET6
588	struct sockaddr_in6 *sin6;
589	bool  oldv6size;
590#endif
591	char *name;
592	int salen, bdom, error, hdrlen, namelen;
593
594	if (*len < 2 || *len > UCHAR_MAX)
595		return (EINVAL);
596
597	salen = *len;
598
599#ifdef INET6
600	oldv6size = false;
601	/*
602	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
603	 * if it's a v4-mapped address, so reserve the proper space
604	 * for it.
605	 */
606	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
607		salen += sizeof(uint32_t);
608		oldv6size = true;
609	}
610#endif
611
612	kosa = malloc(salen, M_SONAME, M_WAITOK);
613
614	if ((error = copyin(osa, kosa, *len)))
615		goto out;
616
617	bdom = linux_to_bsd_domain(kosa->sa_family);
618	if (bdom == AF_UNKNOWN) {
619		error = EAFNOSUPPORT;
620		goto out;
621	}
622
623#ifdef INET6
624	/*
625	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
626	 * which lacks the scope id compared with RFC2553 one. If we detect
627	 * the situation, reject the address and write a message to system log.
628	 *
629	 * Still accept addresses for which the scope id is not used.
630	 */
631	if (oldv6size) {
632		if (bdom == AF_INET6) {
633			sin6 = (struct sockaddr_in6 *)kosa;
634			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
635			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
636			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
637			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
638			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
639			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
640				sin6->sin6_scope_id = 0;
641			} else {
642				linux_msg(curthread,
643				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
644				error = EINVAL;
645				goto out;
646			}
647		} else
648			salen -= sizeof(uint32_t);
649	}
650#endif
651	if (bdom == AF_INET) {
652		if (salen < sizeof(struct sockaddr_in)) {
653			error = EINVAL;
654			goto out;
655		}
656		salen = sizeof(struct sockaddr_in);
657	}
658
659	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
660		hdrlen = offsetof(struct sockaddr_un, sun_path);
661		name = ((struct sockaddr_un *)kosa)->sun_path;
662		if (*name == '\0') {
663			/*
664			 * Linux abstract namespace starts with a NULL byte.
665			 * XXX We do not support abstract namespace yet.
666			 */
667			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
668		} else
669			namelen = strnlen(name, salen - hdrlen);
670		salen = hdrlen + namelen;
671		if (salen > sizeof(struct sockaddr_un)) {
672			error = ENAMETOOLONG;
673			goto out;
674		}
675	}
676
677	if (bdom == AF_NETLINK) {
678		if (salen < sizeof(struct sockaddr_nl)) {
679			error = EINVAL;
680			goto out;
681		}
682		salen = sizeof(struct sockaddr_nl);
683	}
684
685	sa = (struct sockaddr *)kosa;
686	sa->sa_family = bdom;
687	sa->sa_len = salen;
688
689	*sap = sa;
690	*len = salen;
691	return (0);
692
693out:
694	free(kosa, M_SONAME);
695	return (error);
696}
697
698void
699linux_dev_shm_create(void)
700{
701	int error;
702
703	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
704	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
705	if (error != 0) {
706		printf("%s: failed to create device node, error %d\n",
707		    __func__, error);
708	}
709}
710
711void
712linux_dev_shm_destroy(void)
713{
714
715	destroy_dev(dev_shm_cdev);
716}
717
718int
719bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
720    size_t mapcnt, int no_value)
721{
722	int bsd_mask, bsd_value, linux_mask, linux_value;
723	int linux_ret;
724	size_t i;
725	bool applied;
726
727	applied = false;
728	linux_ret = 0;
729	for (i = 0; i < mapcnt; ++i) {
730		bsd_mask = bitmap[i].bsd_mask;
731		bsd_value = bitmap[i].bsd_value;
732		if (bsd_mask == 0)
733			bsd_mask = bsd_value;
734
735		linux_mask = bitmap[i].linux_mask;
736		linux_value = bitmap[i].linux_value;
737		if (linux_mask == 0)
738			linux_mask = linux_value;
739
740		/*
741		 * If a mask larger than just the value is set, we explicitly
742		 * want to make sure that only this bit we mapped within that
743		 * mask is set.
744		 */
745		if ((value & bsd_mask) == bsd_value) {
746			linux_ret = (linux_ret & ~linux_mask) | linux_value;
747			applied = true;
748		}
749	}
750
751	if (!applied)
752		return (no_value);
753	return (linux_ret);
754}
755
756int
757linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
758    size_t mapcnt, int no_value)
759{
760	int bsd_mask, bsd_value, linux_mask, linux_value;
761	int bsd_ret;
762	size_t i;
763	bool applied;
764
765	applied = false;
766	bsd_ret = 0;
767	for (i = 0; i < mapcnt; ++i) {
768		bsd_mask = bitmap[i].bsd_mask;
769		bsd_value = bitmap[i].bsd_value;
770		if (bsd_mask == 0)
771			bsd_mask = bsd_value;
772
773		linux_mask = bitmap[i].linux_mask;
774		linux_value = bitmap[i].linux_value;
775		if (linux_mask == 0)
776			linux_mask = linux_value;
777
778		/*
779		 * If a mask larger than just the value is set, we explicitly
780		 * want to make sure that only this bit we mapped within that
781		 * mask is set.
782		 */
783		if ((value & linux_mask) == linux_value) {
784			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
785			applied = true;
786		}
787	}
788
789	if (!applied)
790		return (no_value);
791	return (bsd_ret);
792}
793
794void
795linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
796    short *bev)
797{
798	struct file *fp;
799	int error;
800	short bits = 0;
801
802	if (lev & LINUX_POLLIN)
803		bits |= POLLIN;
804	if (lev & LINUX_POLLPRI)
805		bits |=	POLLPRI;
806	if (lev & LINUX_POLLOUT)
807		bits |= POLLOUT;
808	if (lev & LINUX_POLLERR)
809		bits |= POLLERR;
810	if (lev & LINUX_POLLHUP)
811		bits |= POLLHUP;
812	if (lev & LINUX_POLLNVAL)
813		bits |= POLLNVAL;
814	if (lev & LINUX_POLLRDNORM)
815		bits |= POLLRDNORM;
816	if (lev & LINUX_POLLRDBAND)
817		bits |= POLLRDBAND;
818	if (lev & LINUX_POLLWRBAND)
819		bits |= POLLWRBAND;
820	if (lev & LINUX_POLLWRNORM)
821		bits |= POLLWRNORM;
822
823	if (lev & LINUX_POLLRDHUP) {
824		/*
825		 * It seems that the Linux silencly ignores POLLRDHUP
826		 * on non-socket file descriptors unlike FreeBSD, where
827		 * events bits is more strictly checked (POLLSTANDARD).
828		 */
829		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
830		if (error == 0) {
831			/*
832			 * XXX. On FreeBSD POLLRDHUP applies only to
833			 * stream sockets.
834			 */
835			if (fp->f_type == DTYPE_SOCKET)
836				bits |= POLLRDHUP;
837			fdrop(fp, td);
838		}
839	}
840
841	if (lev & LINUX_POLLMSG)
842		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
843	if (lev & LINUX_POLLREMOVE)
844		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
845
846	*bev = bits;
847}
848
849void
850bsd_to_linux_poll_events(short bev, short *lev)
851{
852	short bits = 0;
853
854	if (bev & POLLIN)
855		bits |= LINUX_POLLIN;
856	if (bev & POLLPRI)
857		bits |=	LINUX_POLLPRI;
858	if (bev & (POLLOUT | POLLWRNORM))
859		/*
860		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
861		 * but not on Linux
862		 */
863		bits |= LINUX_POLLOUT;
864	if (bev & POLLERR)
865		bits |= LINUX_POLLERR;
866	if (bev & POLLHUP)
867		bits |= LINUX_POLLHUP;
868	if (bev & POLLNVAL)
869		bits |= LINUX_POLLNVAL;
870	if (bev & POLLRDNORM)
871		bits |= LINUX_POLLRDNORM;
872	if (bev & POLLRDBAND)
873		bits |= LINUX_POLLRDBAND;
874	if (bev & POLLWRBAND)
875		bits |= LINUX_POLLWRBAND;
876	if (bev & POLLRDHUP)
877		bits |= LINUX_POLLRDHUP;
878
879	*lev = bits;
880}
881
882bool
883linux_use_real_ifname(const struct ifnet *ifp)
884{
885
886	return (use_real_ifnames);
887}
888