1/*-
2 * Copyright (c) 2015 Dmitry Chagin
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <opt_inet6.h>
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/ctype.h>
36#include <sys/jail.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/signalvar.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42
43#include <net/if.h>
44#include <net/if_var.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47
48#include <sys/un.h>
49#include <netinet/in.h>
50
51#include <compat/linux/linux.h>
52#include <compat/linux/linux_common.h>
53#include <compat/linux/linux_util.h>
54
55struct futex_list futex_list;
56struct mtx futex_mtx;			/* protects the futex list */
57
58CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ);
59
60static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
61	LINUX_SIGHUP,	/* SIGHUP */
62	LINUX_SIGINT,	/* SIGINT */
63	LINUX_SIGQUIT,	/* SIGQUIT */
64	LINUX_SIGILL,	/* SIGILL */
65	LINUX_SIGTRAP,	/* SIGTRAP */
66	LINUX_SIGABRT,	/* SIGABRT */
67	0,		/* SIGEMT */
68	LINUX_SIGFPE,	/* SIGFPE */
69	LINUX_SIGKILL,	/* SIGKILL */
70	LINUX_SIGBUS,	/* SIGBUS */
71	LINUX_SIGSEGV,	/* SIGSEGV */
72	LINUX_SIGSYS,	/* SIGSYS */
73	LINUX_SIGPIPE,	/* SIGPIPE */
74	LINUX_SIGALRM,	/* SIGALRM */
75	LINUX_SIGTERM,	/* SIGTERM */
76	LINUX_SIGURG,	/* SIGURG */
77	LINUX_SIGSTOP,	/* SIGSTOP */
78	LINUX_SIGTSTP,	/* SIGTSTP */
79	LINUX_SIGCONT,	/* SIGCONT */
80	LINUX_SIGCHLD,	/* SIGCHLD */
81	LINUX_SIGTTIN,	/* SIGTTIN */
82	LINUX_SIGTTOU,	/* SIGTTOU */
83	LINUX_SIGIO,	/* SIGIO */
84	LINUX_SIGXCPU,	/* SIGXCPU */
85	LINUX_SIGXFSZ,	/* SIGXFSZ */
86	LINUX_SIGVTALRM,/* SIGVTALRM */
87	LINUX_SIGPROF,	/* SIGPROF */
88	LINUX_SIGWINCH,	/* SIGWINCH */
89	0,		/* SIGINFO */
90	LINUX_SIGUSR1,	/* SIGUSR1 */
91	LINUX_SIGUSR2	/* SIGUSR2 */
92};
93
94static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
95	SIGHUP,		/* LINUX_SIGHUP */
96	SIGINT,		/* LINUX_SIGINT */
97	SIGQUIT,	/* LINUX_SIGQUIT */
98	SIGILL,		/* LINUX_SIGILL */
99	SIGTRAP,	/* LINUX_SIGTRAP */
100	SIGABRT,	/* LINUX_SIGABRT */
101	SIGBUS,		/* LINUX_SIGBUS */
102	SIGFPE,		/* LINUX_SIGFPE */
103	SIGKILL,	/* LINUX_SIGKILL */
104	SIGUSR1,	/* LINUX_SIGUSR1 */
105	SIGSEGV,	/* LINUX_SIGSEGV */
106	SIGUSR2,	/* LINUX_SIGUSR2 */
107	SIGPIPE,	/* LINUX_SIGPIPE */
108	SIGALRM,	/* LINUX_SIGALRM */
109	SIGTERM,	/* LINUX_SIGTERM */
110	SIGBUS,		/* LINUX_SIGSTKFLT */
111	SIGCHLD,	/* LINUX_SIGCHLD */
112	SIGCONT,	/* LINUX_SIGCONT */
113	SIGSTOP,	/* LINUX_SIGSTOP */
114	SIGTSTP,	/* LINUX_SIGTSTP */
115	SIGTTIN,	/* LINUX_SIGTTIN */
116	SIGTTOU,	/* LINUX_SIGTTOU */
117	SIGURG,		/* LINUX_SIGURG */
118	SIGXCPU,	/* LINUX_SIGXCPU */
119	SIGXFSZ,	/* LINUX_SIGXFSZ */
120	SIGVTALRM,	/* LINUX_SIGVTALARM */
121	SIGPROF,	/* LINUX_SIGPROF */
122	SIGWINCH,	/* LINUX_SIGWINCH */
123	SIGIO,		/* LINUX_SIGIO */
124	/*
125	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
126	 * to the first unused FreeBSD signal number. Since Linux supports
127	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
128	 */
129	SIGRTMIN,	/* LINUX_SIGPWR */
130	SIGSYS		/* LINUX_SIGSYS */
131};
132
133static struct cdev *dev_shm_cdev;
134static struct cdevsw dev_shm_cdevsw = {
135     .d_version = D_VERSION,
136     .d_name    = "dev_shm",
137};
138
139/*
140 * Map Linux RT signals to the FreeBSD RT signals.
141 */
142static inline int
143linux_to_bsd_rt_signal(int sig)
144{
145
146	return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN);
147}
148
149static inline int
150bsd_to_linux_rt_signal(int sig)
151{
152
153	return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN);
154}
155
156int
157linux_to_bsd_signal(int sig)
158{
159
160	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
161
162	if (sig < LINUX_SIGRTMIN)
163		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
164
165	return (linux_to_bsd_rt_signal(sig));
166}
167
168int
169bsd_to_linux_signal(int sig)
170{
171
172	if (sig <= LINUX_SIGTBLSZ)
173		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
174	if (sig == SIGRTMIN)
175		return (LINUX_SIGPWR);
176
177	return (bsd_to_linux_rt_signal(sig));
178}
179
180int
181linux_to_bsd_sigaltstack(int lsa)
182{
183	int bsa = 0;
184
185	if (lsa & LINUX_SS_DISABLE)
186		bsa |= SS_DISABLE;
187	/*
188	 * Linux ignores SS_ONSTACK flag for ss
189	 * parameter while FreeBSD prohibits it.
190	 */
191	return (bsa);
192}
193
194int
195bsd_to_linux_sigaltstack(int bsa)
196{
197	int lsa = 0;
198
199	if (bsa & SS_DISABLE)
200		lsa |= LINUX_SS_DISABLE;
201	if (bsa & SS_ONSTACK)
202		lsa |= LINUX_SS_ONSTACK;
203	return (lsa);
204}
205
206void
207linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
208{
209	int b, l;
210
211	SIGEMPTYSET(*bss);
212	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
213		if (LINUX_SIGISMEMBER(*lss, l)) {
214			b = linux_to_bsd_signal(l);
215			if (b)
216				SIGADDSET(*bss, b);
217		}
218	}
219}
220
221void
222bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
223{
224	int b, l;
225
226	LINUX_SIGEMPTYSET(*lss);
227	for (b = 1; b <= SIGRTMAX; b++) {
228		if (SIGISMEMBER(*bss, b)) {
229			l = bsd_to_linux_signal(b);
230			if (l)
231				LINUX_SIGADDSET(*lss, l);
232		}
233	}
234}
235
236/*
237 * Translate a Linux interface name to a FreeBSD interface name,
238 * and return the associated ifnet structure
239 * bsdname and lxname need to be least IFNAMSIZ bytes long, but
240 * can point to the same buffer.
241 */
242struct ifnet *
243ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
244{
245	struct ifnet *ifp;
246	int len, unit;
247	char *ep;
248	int index;
249	bool is_eth, is_lo;
250
251	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
252		if (!isalpha(lxname[len]) || lxname[len] == '\0')
253			break;
254	if (len == 0 || len == LINUX_IFNAMSIZ)
255		return (NULL);
256	/* Linux loopback interface name is lo (not lo0) */
257	is_lo = (len == 2 && strncmp(lxname, "lo", len) == 0);
258	unit = (int)strtoul(lxname + len, &ep, 10);
259	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
260	    is_lo == 0)
261		return (NULL);
262	index = 0;
263	is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
264
265	CURVNET_SET(TD_TO_VNET(td));
266	IFNET_RLOCK();
267	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
268		/*
269		 * Allow Linux programs to use FreeBSD names. Don't presume
270		 * we never have an interface named "eth", so don't make
271		 * the test optional based on is_eth.
272		 */
273		if (strncmp(ifp->if_xname, lxname, LINUX_IFNAMSIZ) == 0)
274			break;
275		if (is_eth && IFP_IS_ETH(ifp) && unit == index++)
276			break;
277		if (is_lo && IFP_IS_LOOP(ifp))
278			break;
279	}
280	IFNET_RUNLOCK();
281	CURVNET_RESTORE();
282	if (ifp != NULL && bsdname != NULL)
283		strlcpy(bsdname, ifp->if_xname, IFNAMSIZ);
284	return (ifp);
285}
286
287void
288linux_ifflags(struct ifnet *ifp, short *flags)
289{
290	unsigned short fl;
291
292	fl = (ifp->if_flags | ifp->if_drv_flags) & 0xffff;
293	*flags = 0;
294	if (fl & IFF_UP)
295		*flags |= LINUX_IFF_UP;
296	if (fl & IFF_BROADCAST)
297		*flags |= LINUX_IFF_BROADCAST;
298	if (fl & IFF_DEBUG)
299		*flags |= LINUX_IFF_DEBUG;
300	if (fl & IFF_LOOPBACK)
301		*flags |= LINUX_IFF_LOOPBACK;
302	if (fl & IFF_POINTOPOINT)
303		*flags |= LINUX_IFF_POINTOPOINT;
304	if (fl & IFF_DRV_RUNNING)
305		*flags |= LINUX_IFF_RUNNING;
306	if (fl & IFF_NOARP)
307		*flags |= LINUX_IFF_NOARP;
308	if (fl & IFF_PROMISC)
309		*flags |= LINUX_IFF_PROMISC;
310	if (fl & IFF_ALLMULTI)
311		*flags |= LINUX_IFF_ALLMULTI;
312	if (fl & IFF_MULTICAST)
313		*flags |= LINUX_IFF_MULTICAST;
314}
315
316int
317linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
318{
319	struct ifaddr *ifa;
320	struct sockaddr_dl *sdl;
321
322	if (IFP_IS_LOOP(ifp)) {
323		bzero(lsa, sizeof(*lsa));
324		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
325		return (0);
326	}
327
328	if (!IFP_IS_ETH(ifp))
329		return (ENOENT);
330
331	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
332		sdl = (struct sockaddr_dl*)ifa->ifa_addr;
333		if (sdl != NULL && (sdl->sdl_family == AF_LINK) &&
334		    (sdl->sdl_type == IFT_ETHER)) {
335			bzero(lsa, sizeof(*lsa));
336			lsa->sa_family = LINUX_ARPHRD_ETHER;
337			bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
338			return (0);
339		}
340	}
341
342	return (ENOENT);
343}
344
345int
346linux_to_bsd_domain(int domain)
347{
348
349	switch (domain) {
350	case LINUX_AF_UNSPEC:
351		return (AF_UNSPEC);
352	case LINUX_AF_UNIX:
353		return (AF_LOCAL);
354	case LINUX_AF_INET:
355		return (AF_INET);
356	case LINUX_AF_INET6:
357		return (AF_INET6);
358	case LINUX_AF_AX25:
359		return (AF_CCITT);
360	case LINUX_AF_IPX:
361		return (AF_IPX);
362	case LINUX_AF_APPLETALK:
363		return (AF_APPLETALK);
364	}
365	return (-1);
366}
367
368int
369bsd_to_linux_domain(int domain)
370{
371
372	switch (domain) {
373	case AF_UNSPEC:
374		return (LINUX_AF_UNSPEC);
375	case AF_LOCAL:
376		return (LINUX_AF_UNIX);
377	case AF_INET:
378		return (LINUX_AF_INET);
379	case AF_INET6:
380		return (LINUX_AF_INET6);
381	case AF_CCITT:
382		return (LINUX_AF_AX25);
383	case AF_IPX:
384		return (LINUX_AF_IPX);
385	case AF_APPLETALK:
386		return (LINUX_AF_APPLETALK);
387	}
388	return (-1);
389}
390
391/*
392 * Based on the fact that:
393 * 1. Native and Linux storage of struct sockaddr
394 * and struct sockaddr_in6 are equal.
395 * 2. On Linux sa_family is the first member of all struct sockaddr.
396 */
397int
398bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
399    socklen_t len)
400{
401	struct l_sockaddr *kosa;
402	int error, bdom;
403
404	*lsa = NULL;
405	if (len < 2 || len > UCHAR_MAX)
406		return (EINVAL);
407
408	kosa = malloc(len, M_SONAME, M_WAITOK);
409	bcopy(sa, kosa, len);
410
411	bdom = bsd_to_linux_domain(sa->sa_family);
412	if (bdom == -1) {
413		error = EAFNOSUPPORT;
414		goto out;
415	}
416
417	kosa->sa_family = bdom;
418	*lsa = kosa;
419	return (0);
420
421out:
422	free(kosa, M_SONAME);
423	return (error);
424}
425
426int
427linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
428    socklen_t *len)
429{
430	struct sockaddr *sa;
431	struct l_sockaddr *kosa;
432#ifdef INET6
433	struct sockaddr_in6 *sin6;
434	bool  oldv6size;
435#endif
436	char *name;
437	int salen, bdom, error, hdrlen, namelen;
438
439	if (*len < 2 || *len > UCHAR_MAX)
440		return (EINVAL);
441
442	salen = *len;
443
444#ifdef INET6
445	oldv6size = false;
446	/*
447	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
448	 * if it's a v4-mapped address, so reserve the proper space
449	 * for it.
450	 */
451	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
452		salen += sizeof(uint32_t);
453		oldv6size = true;
454	}
455#endif
456
457	kosa = malloc(salen, M_SONAME, M_WAITOK);
458
459	if ((error = copyin(osa, kosa, *len)))
460		goto out;
461
462	bdom = linux_to_bsd_domain(kosa->sa_family);
463	if (bdom == -1) {
464		error = EAFNOSUPPORT;
465		goto out;
466	}
467
468#ifdef INET6
469	/*
470	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
471	 * which lacks the scope id compared with RFC2553 one. If we detect
472	 * the situation, reject the address and write a message to system log.
473	 *
474	 * Still accept addresses for which the scope id is not used.
475	 */
476	if (oldv6size) {
477		if (bdom == AF_INET6) {
478			sin6 = (struct sockaddr_in6 *)kosa;
479			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
480			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
481			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
482			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
483			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
484			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
485				sin6->sin6_scope_id = 0;
486			} else {
487				linux_msg(curthread,
488				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
489				error = EINVAL;
490				goto out;
491			}
492		} else
493			salen -= sizeof(uint32_t);
494	}
495#endif
496	if (bdom == AF_INET) {
497		if (salen < sizeof(struct sockaddr_in)) {
498			error = EINVAL;
499			goto out;
500		}
501		salen = sizeof(struct sockaddr_in);
502	}
503
504	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
505		hdrlen = offsetof(struct sockaddr_un, sun_path);
506		name = ((struct sockaddr_un *)kosa)->sun_path;
507		if (*name == '\0') {
508			/*
509			 * Linux abstract namespace starts with a NULL byte.
510			 * XXX We do not support abstract namespace yet.
511			 */
512			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
513		} else
514			namelen = strnlen(name, salen - hdrlen);
515		salen = hdrlen + namelen;
516		if (salen > sizeof(struct sockaddr_un)) {
517			error = ENAMETOOLONG;
518			goto out;
519		}
520	}
521
522	sa = (struct sockaddr *)kosa;
523	sa->sa_family = bdom;
524	sa->sa_len = salen;
525
526	*sap = sa;
527	*len = salen;
528	return (0);
529
530out:
531	free(kosa, M_SONAME);
532	return (error);
533}
534
535void
536linux_dev_shm_create(void)
537{
538	int error;
539
540	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
541	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
542	if (error != 0) {
543		printf("%s: failed to create device node, error %d\n",
544		    __func__, error);
545	}
546}
547
548void
549linux_dev_shm_destroy(void)
550{
551
552	destroy_dev(dev_shm_cdev);
553}
554