linux_socket.c revision 283433
140939Sdes/*-
240939Sdes * Copyright (c) 1995 S��ren Schmidt
340939Sdes * All rights reserved.
440939Sdes *
540939Sdes * Redistribution and use in source and binary forms, with or without
640939Sdes * modification, are permitted provided that the following conditions
740939Sdes * are met:
840939Sdes * 1. Redistributions of source code must retain the above copyright
940939Sdes *    notice, this list of conditions and the following disclaimer
1040939Sdes *    in this position and unchanged.
1140939Sdes * 2. Redistributions in binary form must reproduce the above copyright
1240939Sdes *    notice, this list of conditions and the following disclaimer in the
1340939Sdes *    documentation and/or other materials provided with the distribution.
1440939Sdes * 3. The name of the author may not be used to endorse or promote products
1540939Sdes *    derived from this software without specific prior written permission
1640939Sdes *
1740939Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1840939Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1940939Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2040939Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2140939Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2240939Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2340939Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2440939Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2540939Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2640939Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2740939Sdes */
2840939Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 283433 2015-05-24 16:26:55Z dchagin $");
3184203Sdillon
3241862Sdes/* XXX we use functions that might not exist. */
3340939Sdes#include "opt_compat.h"
3455557Sdes#include "opt_inet6.h"
3562981Sdes
3640939Sdes#include <sys/param.h>
3740939Sdes#include <sys/proc.h>
3840939Sdes#include <sys/systm.h>
3940939Sdes#include <sys/sysproto.h>
4060924Sdes#include <sys/capsicum.h>
4141862Sdes#include <sys/fcntl.h>
4241862Sdes#include <sys/file.h>
4340939Sdes#include <sys/limits.h>
4440939Sdes#include <sys/lock.h>
4540939Sdes#include <sys/malloc.h>
4640939Sdes#include <sys/mutex.h>
4740939Sdes#include <sys/mbuf.h>
4840939Sdes#include <sys/socket.h>
4940975Sdes#include <sys/socketvar.h>
5040939Sdes#include <sys/syscallsubr.h>
5140939Sdes#include <sys/uio.h>
5240939Sdes#include <sys/syslog.h>
5340939Sdes#include <sys/un.h>
5440939Sdes
5540939Sdes#include <net/if.h>
5690267Sdes#include <net/vnet.h>
5790267Sdes#include <netinet/in.h>
5890267Sdes#include <netinet/in_systm.h>
5990267Sdes#include <netinet/ip.h>
6090267Sdes#include <netinet/tcp.h>
6140939Sdes#ifdef INET6
6240939Sdes#include <netinet/ip6.h>
6362981Sdes#include <netinet6/ip6_var.h>
6475891Sarchie#endif
6540939Sdes
6662981Sdes#ifdef COMPAT_LINUX32
6740939Sdes#include <machine/../linux32/linux.h>
6840939Sdes#include <machine/../linux32/linux32_proto.h>
6940939Sdes#else
7040939Sdes#include <machine/../linux/linux.h>
7140939Sdes#include <machine/../linux/linux_proto.h>
7260924Sdes#endif
7340975Sdes#include <compat/linux/linux_socket.h>
7440939Sdes#include <compat/linux/linux_util.h>
7590267Sdes
7690267Sdesstatic int linux_to_bsd_domain(int);
7790267Sdes
7840939Sdes/*
7940939Sdes * Reads a linux sockaddr and does any necessary translation.
8040939Sdes * Linux sockaddrs don't have a length field, only a family.
8140939Sdes * Copy the osockaddr structure pointed to by osa to kernel, adjust
8240939Sdes * family and convert to sockaddr.
8340939Sdes */
8440939Sdesstatic int
8540939Sdeslinux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int salen)
8690267Sdes{
8790267Sdes	struct sockaddr *sa;
8890267Sdes	struct osockaddr *kosa;
8940939Sdes#ifdef INET6
9040939Sdes	struct sockaddr_in6 *sin6;
9140939Sdes	int oldv6size;
9240939Sdes#endif
9340939Sdes	char *name;
9440939Sdes	int bdom, error, hdrlen, namelen;
9540939Sdes
9640939Sdes	if (salen < 2 || salen > UCHAR_MAX || !osa)
9790267Sdes		return (EINVAL);
9890267Sdes
9990267Sdes#ifdef INET6
10090267Sdes	oldv6size = 0;
10190267Sdes	/*
10290267Sdes	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
10390267Sdes	 * if it's a v4-mapped address, so reserve the proper space
10490267Sdes	 * for it.
10590267Sdes	 */
10690267Sdes	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
10790267Sdes		salen += sizeof(uint32_t);
10890267Sdes		oldv6size = 1;
10990267Sdes	}
11090267Sdes#endif
11190267Sdes
11290267Sdes	kosa = malloc(salen, M_SONAME, M_WAITOK);
11390267Sdes
11490267Sdes	if ((error = copyin(osa, kosa, salen)))
11590267Sdes		goto out;
11690267Sdes
11790267Sdes	bdom = linux_to_bsd_domain(kosa->sa_family);
11890267Sdes	if (bdom == -1) {
11990267Sdes		error = EAFNOSUPPORT;
12090267Sdes		goto out;
12190267Sdes	}
12290267Sdes
12390267Sdes#ifdef INET6
12490267Sdes	/*
12590267Sdes	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
12690267Sdes	 * which lacks the scope id compared with RFC2553 one. If we detect
12790267Sdes	 * the situation, reject the address and write a message to system log.
12890267Sdes	 *
12990267Sdes	 * Still accept addresses for which the scope id is not used.
13090267Sdes	 */
13190267Sdes	if (oldv6size) {
13290267Sdes		if (bdom == AF_INET6) {
13390267Sdes			sin6 = (struct sockaddr_in6 *)kosa;
13490267Sdes			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
13590267Sdes			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
13690267Sdes			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
13790267Sdes			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
13890267Sdes			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
13990267Sdes			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
14090267Sdes				sin6->sin6_scope_id = 0;
14190267Sdes			} else {
14290267Sdes				log(LOG_DEBUG,
14390267Sdes				    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
14490267Sdes				error = EINVAL;
14590267Sdes				goto out;
14690267Sdes			}
14790267Sdes		} else
14840939Sdes			salen -= sizeof(uint32_t);
14940939Sdes	}
15040939Sdes#endif
15141862Sdes	if (bdom == AF_INET) {
15241862Sdes		if (salen < sizeof(struct sockaddr_in)) {
15341862Sdes			error = EINVAL;
15460924Sdes			goto out;
15575891Sarchie		}
15641862Sdes		salen = sizeof(struct sockaddr_in);
15790267Sdes	}
15890267Sdes
15990267Sdes	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
16090267Sdes		hdrlen = offsetof(struct sockaddr_un, sun_path);
16190267Sdes		name = ((struct sockaddr_un *)kosa)->sun_path;
16290267Sdes		if (*name == '\0') {
16341862Sdes			/*
16441862Sdes		 	 * Linux abstract namespace starts with a NULL byte.
16541862Sdes			 * XXX We do not support abstract namespace yet.
16640939Sdes			 */
16740939Sdes			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
16840939Sdes		} else
16968551Sdes			namelen = strnlen(name, salen - hdrlen);
17068551Sdes		salen = hdrlen + namelen;
17168551Sdes		if (salen > sizeof(struct sockaddr_un)) {
17275891Sarchie			error = ENAMETOOLONG;
17368551Sdes			goto out;
17490267Sdes		}
17568551Sdes	}
17690267Sdes
17790267Sdes	sa = (struct sockaddr *)kosa;
17890267Sdes	sa->sa_family = bdom;
17990267Sdes	sa->sa_len = salen;
18090267Sdes
18190267Sdes	*sap = sa;
18290267Sdes	return (0);
18368551Sdes
18468551Sdesout:
18568551Sdes	free(kosa, M_SONAME);
18668551Sdes	return (error);
18768551Sdes}
18868551Sdes
18975891Sarchiestatic int
19068551Sdeslinux_to_bsd_domain(int domain)
19190267Sdes{
19290267Sdes
19390267Sdes	switch (domain) {
19490267Sdes	case LINUX_AF_UNSPEC:
19590267Sdes		return (AF_UNSPEC);
19668551Sdes	case LINUX_AF_UNIX:
19768551Sdes		return (AF_LOCAL);
19868551Sdes	case LINUX_AF_INET:
19940939Sdes		return (AF_INET);
20040939Sdes	case LINUX_AF_INET6:
20140939Sdes		return (AF_INET6);
20275891Sarchie	case LINUX_AF_AX25:
20340939Sdes		return (AF_CCITT);
20490267Sdes	case LINUX_AF_IPX:
20590267Sdes		return (AF_IPX);
20690267Sdes	case LINUX_AF_APPLETALK:
20740939Sdes		return (AF_APPLETALK);
20890267Sdes	}
20941862Sdes	return (-1);
21090267Sdes}
21190267Sdes
21240939Sdesstatic int
21390267Sdesbsd_to_linux_domain(int domain)
21490267Sdes{
21590267Sdes
21690267Sdes	switch (domain) {
21790267Sdes	case AF_UNSPEC:
21890267Sdes		return (LINUX_AF_UNSPEC);
21990267Sdes	case AF_LOCAL:
22090267Sdes		return (LINUX_AF_UNIX);
22190267Sdes	case AF_INET:
22290267Sdes		return (LINUX_AF_INET);
22390267Sdes	case AF_INET6:
22490267Sdes		return (LINUX_AF_INET6);
22590267Sdes	case AF_CCITT:
22690267Sdes		return (LINUX_AF_AX25);
22790267Sdes	case AF_IPX:
22890267Sdes		return (LINUX_AF_IPX);
22990267Sdes	case AF_APPLETALK:
23062981Sdes		return (LINUX_AF_APPLETALK);
23190267Sdes	}
23290267Sdes	return (-1);
23390267Sdes}
23490267Sdes
23590267Sdesstatic int
23690267Sdeslinux_to_bsd_sockopt_level(int level)
23790267Sdes{
23890267Sdes
23990267Sdes	switch (level) {
24090267Sdes	case LINUX_SOL_SOCKET:
24190267Sdes		return (SOL_SOCKET);
24240939Sdes	}
24390267Sdes	return (level);
24440939Sdes}
24541989Sdes
24641989Sdesstatic int
24755557Sdesbsd_to_linux_sockopt_level(int level)
24855557Sdes{
24955557Sdes
25055557Sdes	switch (level) {
25155557Sdes	case SOL_SOCKET:
25255557Sdes		return (LINUX_SOL_SOCKET);
25355557Sdes	}
25455557Sdes	return (level);
25590267Sdes}
25690267Sdes
25790267Sdesstatic int
25890267Sdeslinux_to_bsd_ip_sockopt(int opt)
25990267Sdes{
26090267Sdes
26190267Sdes	switch (opt) {
26290267Sdes	case LINUX_IP_TOS:
26390267Sdes		return (IP_TOS);
26490267Sdes	case LINUX_IP_TTL:
26590267Sdes		return (IP_TTL);
26655557Sdes	case LINUX_IP_OPTIONS:
26755557Sdes		return (IP_OPTIONS);
26890267Sdes	case LINUX_IP_MULTICAST_IF:
26990267Sdes		return (IP_MULTICAST_IF);
27055557Sdes	case LINUX_IP_MULTICAST_TTL:
27155557Sdes		return (IP_MULTICAST_TTL);
27290267Sdes	case LINUX_IP_MULTICAST_LOOP:
27390267Sdes		return (IP_MULTICAST_LOOP);
27490267Sdes	case LINUX_IP_ADD_MEMBERSHIP:
27555557Sdes		return (IP_ADD_MEMBERSHIP);
27690267Sdes	case LINUX_IP_DROP_MEMBERSHIP:
27790267Sdes		return (IP_DROP_MEMBERSHIP);
27890267Sdes	case LINUX_IP_HDRINCL:
27990267Sdes		return (IP_HDRINCL);
28090267Sdes	}
28190267Sdes	return (-1);
28290267Sdes}
28390267Sdes
28490267Sdesstatic int
28590267Sdeslinux_to_bsd_so_sockopt(int opt)
28690267Sdes{
28790267Sdes
28890267Sdes	switch (opt) {
28990267Sdes	case LINUX_SO_DEBUG:
29090267Sdes		return (SO_DEBUG);
29190267Sdes	case LINUX_SO_REUSEADDR:
29290267Sdes		return (SO_REUSEADDR);
29390267Sdes	case LINUX_SO_TYPE:
29490267Sdes		return (SO_TYPE);
29590267Sdes	case LINUX_SO_ERROR:
29690267Sdes		return (SO_ERROR);
29790267Sdes	case LINUX_SO_DONTROUTE:
29890267Sdes		return (SO_DONTROUTE);
29990267Sdes	case LINUX_SO_BROADCAST:
30090267Sdes		return (SO_BROADCAST);
30190267Sdes	case LINUX_SO_SNDBUF:
30290267Sdes		return (SO_SNDBUF);
30390267Sdes	case LINUX_SO_RCVBUF:
30490267Sdes		return (SO_RCVBUF);
30590267Sdes	case LINUX_SO_KEEPALIVE:
30690267Sdes		return (SO_KEEPALIVE);
30790267Sdes	case LINUX_SO_OOBINLINE:
30890267Sdes		return (SO_OOBINLINE);
30990267Sdes	case LINUX_SO_LINGER:
31090267Sdes		return (SO_LINGER);
31190267Sdes	case LINUX_SO_PEERCRED:
31290267Sdes		return (LOCAL_PEERCRED);
31390267Sdes	case LINUX_SO_RCVLOWAT:
31490267Sdes		return (SO_RCVLOWAT);
31590267Sdes	case LINUX_SO_SNDLOWAT:
31690267Sdes		return (SO_SNDLOWAT);
31790267Sdes	case LINUX_SO_RCVTIMEO:
31890267Sdes		return (SO_RCVTIMEO);
31990267Sdes	case LINUX_SO_SNDTIMEO:
32090267Sdes		return (SO_SNDTIMEO);
32190267Sdes	case LINUX_SO_TIMESTAMP:
32290267Sdes		return (SO_TIMESTAMP);
32390267Sdes	case LINUX_SO_ACCEPTCONN:
32490267Sdes		return (SO_ACCEPTCONN);
32590267Sdes	}
32655557Sdes	return (-1);
32755557Sdes}
32855557Sdes
32962981Sdesstatic int
33062981Sdeslinux_to_bsd_tcp_sockopt(int opt)
33162981Sdes{
33262981Sdes
33362981Sdes	switch (opt) {
33475891Sarchie	case LINUX_TCP_NODELAY:
33562981Sdes		return (TCP_NODELAY);
33690267Sdes	case LINUX_TCP_MAXSEG:
33790267Sdes		return (TCP_MAXSEG);
33862981Sdes	case LINUX_TCP_KEEPIDLE:
33990267Sdes		return (TCP_KEEPIDLE);
34090267Sdes	case LINUX_TCP_KEEPINTVL:
34190267Sdes		return (TCP_KEEPINTVL);
34290267Sdes	case LINUX_TCP_KEEPCNT:
34390267Sdes		return (TCP_KEEPCNT);
34490267Sdes	case LINUX_TCP_MD5SIG:
34590267Sdes		return (TCP_MD5SIG);
34690267Sdes	}
34790267Sdes	return (-1);
34890267Sdes}
34990267Sdes
35062981Sdesstatic int
35162981Sdeslinux_to_bsd_msg_flags(int flags)
35262981Sdes{
35341989Sdes	int ret_flags = 0;
35441989Sdes
35541989Sdes	if (flags & LINUX_MSG_OOB)
35641989Sdes		ret_flags |= MSG_OOB;
35790267Sdes	if (flags & LINUX_MSG_PEEK)
35841989Sdes		ret_flags |= MSG_PEEK;
35990267Sdes	if (flags & LINUX_MSG_DONTROUTE)
36041989Sdes		ret_flags |= MSG_DONTROUTE;
36190267Sdes	if (flags & LINUX_MSG_CTRUNC)
36241989Sdes		ret_flags |= MSG_CTRUNC;
36390267Sdes	if (flags & LINUX_MSG_TRUNC)
36490267Sdes		ret_flags |= MSG_TRUNC;
36590267Sdes	if (flags & LINUX_MSG_DONTWAIT)
36690267Sdes		ret_flags |= MSG_DONTWAIT;
36790267Sdes	if (flags & LINUX_MSG_EOR)
36890267Sdes		ret_flags |= MSG_EOR;
36990267Sdes	if (flags & LINUX_MSG_WAITALL)
37041989Sdes		ret_flags |= MSG_WAITALL;
37141989Sdes	if (flags & LINUX_MSG_NOSIGNAL)
37241989Sdes		ret_flags |= MSG_NOSIGNAL;
37390267Sdes#if 0 /* not handled */
37490267Sdes	if (flags & LINUX_MSG_PROXY)
37590267Sdes		;
37690267Sdes	if (flags & LINUX_MSG_FIN)
37790267Sdes		;
37890267Sdes	if (flags & LINUX_MSG_SYN)
37990267Sdes		;
38090267Sdes	if (flags & LINUX_MSG_CONFIRM)
38190267Sdes		;
38290267Sdes	if (flags & LINUX_MSG_RST)
38341989Sdes		;
38490267Sdes	if (flags & LINUX_MSG_ERRQUEUE)
38590267Sdes		;
38690267Sdes#endif
38741989Sdes	return ret_flags;
38890267Sdes}
38990267Sdes
39090267Sdes/*
39190267Sdes* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
39241989Sdes* native syscall will fault.  Thus, we don't really need to check the
393* return values for these functions.
394*/
395
396static int
397bsd_to_linux_sockaddr(struct sockaddr *arg)
398{
399	struct sockaddr sa;
400	size_t sa_len = sizeof(struct sockaddr);
401	int error;
402
403	if ((error = copyin(arg, &sa, sa_len)))
404		return (error);
405
406	*(u_short *)&sa = sa.sa_family;
407
408	error = copyout(&sa, arg, sa_len);
409
410	return (error);
411}
412
413static int
414linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
415{
416	struct sockaddr sa;
417	size_t sa_len = sizeof(struct sockaddr);
418	int error;
419
420	if ((error = copyin(arg, &sa, sa_len)))
421		return (error);
422
423	sa.sa_family = *(sa_family_t *)&sa;
424	sa.sa_len = len;
425
426	error = copyout(&sa, arg, sa_len);
427
428	return (error);
429}
430
431static int
432linux_sa_put(struct osockaddr *osa)
433{
434	struct osockaddr sa;
435	int error, bdom;
436
437	/*
438	 * Only read/write the osockaddr family part, the rest is
439	 * not changed.
440	 */
441	error = copyin(osa, &sa, sizeof(sa.sa_family));
442	if (error)
443		return (error);
444
445	bdom = bsd_to_linux_domain(sa.sa_family);
446	if (bdom == -1)
447		return (EINVAL);
448
449	sa.sa_family = bdom;
450	error = copyout(&sa, osa, sizeof(sa.sa_family));
451	if (error)
452		return (error);
453
454	return (0);
455}
456
457static int
458linux_to_bsd_cmsg_type(int cmsg_type)
459{
460
461	switch (cmsg_type) {
462	case LINUX_SCM_RIGHTS:
463		return (SCM_RIGHTS);
464	case LINUX_SCM_CREDENTIALS:
465		return (SCM_CREDS);
466	}
467	return (-1);
468}
469
470static int
471bsd_to_linux_cmsg_type(int cmsg_type)
472{
473
474	switch (cmsg_type) {
475	case SCM_RIGHTS:
476		return (LINUX_SCM_RIGHTS);
477	case SCM_CREDS:
478		return (LINUX_SCM_CREDENTIALS);
479	}
480	return (-1);
481}
482
483static int
484linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
485{
486	if (lhdr->msg_controllen > INT_MAX)
487		return (ENOBUFS);
488
489	bhdr->msg_name		= PTRIN(lhdr->msg_name);
490	bhdr->msg_namelen	= lhdr->msg_namelen;
491	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
492	bhdr->msg_iovlen	= lhdr->msg_iovlen;
493	bhdr->msg_control	= PTRIN(lhdr->msg_control);
494
495	/*
496	 * msg_controllen is skipped since BSD and LINUX control messages
497	 * are potentially different sizes (e.g. the cred structure used
498	 * by SCM_CREDS is different between the two operating system).
499	 *
500	 * The caller can set it (if necessary) after converting all the
501	 * control messages.
502	 */
503
504	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
505	return (0);
506}
507
508static int
509bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
510{
511	lhdr->msg_name		= PTROUT(bhdr->msg_name);
512	lhdr->msg_namelen	= bhdr->msg_namelen;
513	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
514	lhdr->msg_iovlen	= bhdr->msg_iovlen;
515	lhdr->msg_control	= PTROUT(bhdr->msg_control);
516
517	/*
518	 * msg_controllen is skipped since BSD and LINUX control messages
519	 * are potentially different sizes (e.g. the cred structure used
520	 * by SCM_CREDS is different between the two operating system).
521	 *
522	 * The caller can set it (if necessary) after converting all the
523	 * control messages.
524	 */
525
526	/* msg_flags skipped */
527	return (0);
528}
529
530static int
531linux_set_socket_flags(struct thread *td, int s, int flags)
532{
533	int error;
534
535	if (flags & LINUX_SOCK_NONBLOCK) {
536		error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
537		if (error)
538			return (error);
539	}
540	if (flags & LINUX_SOCK_CLOEXEC) {
541		error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
542		if (error)
543			return (error);
544	}
545	return (0);
546}
547
548static int
549linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
550    struct mbuf *control, enum uio_seg segflg)
551{
552	struct sockaddr *to;
553	int error;
554
555	if (mp->msg_name != NULL) {
556		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
557		if (error)
558			return (error);
559		mp->msg_name = to;
560	} else
561		to = NULL;
562
563	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
564	    segflg);
565
566	if (to)
567		free(to, M_SONAME);
568	return (error);
569}
570
571/* Return 0 if IP_HDRINCL is set for the given socket. */
572static int
573linux_check_hdrincl(struct thread *td, int s)
574{
575	int error, optval;
576	socklen_t size_val;
577
578	size_val = sizeof(optval);
579	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
580	    &optval, UIO_SYSSPACE, &size_val);
581	if (error)
582		return (error);
583
584	return (optval == 0);
585}
586
587/*
588 * Updated sendto() when IP_HDRINCL is set:
589 * tweak endian-dependent fields in the IP packet.
590 */
591static int
592linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
593{
594/*
595 * linux_ip_copysize defines how many bytes we should copy
596 * from the beginning of the IP packet before we customize it for BSD.
597 * It should include all the fields we modify (ip_len and ip_off).
598 */
599#define linux_ip_copysize	8
600
601	struct ip *packet;
602	struct msghdr msg;
603	struct iovec aiov[1];
604	int error;
605
606	/* Check that the packet isn't too big or too small. */
607	if (linux_args->len < linux_ip_copysize ||
608	    linux_args->len > IP_MAXPACKET)
609		return (EINVAL);
610
611	packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK);
612
613	/* Make kernel copy of the packet to be sent */
614	if ((error = copyin(PTRIN(linux_args->msg), packet,
615	    linux_args->len)))
616		goto goout;
617
618	/* Convert fields from Linux to BSD raw IP socket format */
619	packet->ip_len = linux_args->len;
620	packet->ip_off = ntohs(packet->ip_off);
621
622	/* Prepare the msghdr and iovec structures describing the new packet */
623	msg.msg_name = PTRIN(linux_args->to);
624	msg.msg_namelen = linux_args->tolen;
625	msg.msg_iov = aiov;
626	msg.msg_iovlen = 1;
627	msg.msg_control = NULL;
628	msg.msg_flags = 0;
629	aiov[0].iov_base = (char *)packet;
630	aiov[0].iov_len = linux_args->len;
631	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
632	    NULL, UIO_SYSSPACE);
633goout:
634	free(packet, M_LINUX);
635	return (error);
636}
637
638int
639linux_socket(struct thread *td, struct linux_socket_args *args)
640{
641	struct socket_args /* {
642		int domain;
643		int type;
644		int protocol;
645	} */ bsd_args;
646	int retval_socket, socket_flags;
647
648	bsd_args.protocol = args->protocol;
649	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
650	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
651		return (EINVAL);
652	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
653	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
654		return (EINVAL);
655	bsd_args.domain = linux_to_bsd_domain(args->domain);
656	if (bsd_args.domain == -1)
657		return (EAFNOSUPPORT);
658
659	retval_socket = sys_socket(td, &bsd_args);
660	if (retval_socket)
661		return (retval_socket);
662
663	retval_socket = linux_set_socket_flags(td, td->td_retval[0],
664	    socket_flags);
665	if (retval_socket) {
666		(void)kern_close(td, td->td_retval[0]);
667		goto out;
668	}
669
670	if (bsd_args.type == SOCK_RAW
671	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
672	    && bsd_args.domain == PF_INET) {
673		/* It's a raw IP socket: set the IP_HDRINCL option. */
674		int hdrincl;
675
676		hdrincl = 1;
677		/* We ignore any error returned by kern_setsockopt() */
678		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
679		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
680	}
681#ifdef INET6
682	/*
683	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default
684	 * and some apps depend on this. So, set V6ONLY to 0 for Linux apps.
685	 * For simplicity we do this unconditionally of the net.inet6.ip6.v6only
686	 * sysctl value.
687	 */
688	if (bsd_args.domain == PF_INET6) {
689		int v6only;
690
691		v6only = 0;
692		/* We ignore any error returned by setsockopt() */
693		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
694		    &v6only, UIO_SYSSPACE, sizeof(v6only));
695	}
696#endif
697
698out:
699	return (retval_socket);
700}
701
702int
703linux_bind(struct thread *td, struct linux_bind_args *args)
704{
705	struct sockaddr *sa;
706	int error;
707
708	error = linux_getsockaddr(&sa, PTRIN(args->name),
709	    args->namelen);
710	if (error)
711		return (error);
712
713	error = kern_bindat(td, AT_FDCWD, args->s, sa);
714	free(sa, M_SONAME);
715	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
716	   	return (EINVAL);
717	return (error);
718}
719
720int
721linux_connect(struct thread *td, struct linux_connect_args *args)
722{
723	cap_rights_t rights;
724	struct socket *so;
725	struct sockaddr *sa;
726	u_int fflag;
727	int error;
728
729	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
730	    args->namelen);
731	if (error)
732		return (error);
733
734	error = kern_connectat(td, AT_FDCWD, args->s, sa);
735	free(sa, M_SONAME);
736	if (error != EISCONN)
737		return (error);
738
739	/*
740	 * Linux doesn't return EISCONN the first time it occurs,
741	 * when on a non-blocking socket. Instead it returns the
742	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
743	 *
744	 * XXXRW: Instead of using fgetsock(), check that it is a
745	 * socket and use the file descriptor reference instead of
746	 * creating a new one.
747	 */
748	error = fgetsock(td, args->s, cap_rights_init(&rights, CAP_CONNECT),
749	    &so, &fflag);
750	if (error == 0) {
751		error = EISCONN;
752		if (fflag & FNONBLOCK) {
753			SOCK_LOCK(so);
754			if (so->so_emuldata == 0)
755				error = so->so_error;
756			so->so_emuldata = (void *)1;
757			SOCK_UNLOCK(so);
758		}
759		fputsock(so);
760	}
761	return (error);
762}
763
764int
765linux_listen(struct thread *td, struct linux_listen_args *args)
766{
767	struct listen_args /* {
768		int s;
769		int backlog;
770	} */ bsd_args;
771
772	bsd_args.s = args->s;
773	bsd_args.backlog = args->backlog;
774	return (sys_listen(td, &bsd_args));
775}
776
777static int
778linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
779    l_uintptr_t namelen, int flags)
780{
781	struct accept_args /* {
782		int	s;
783		struct sockaddr * __restrict name;
784		socklen_t * __restrict anamelen;
785	} */ bsd_args;
786	int error;
787
788	if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
789		return (EINVAL);
790
791	bsd_args.s = s;
792	/* XXX: */
793	bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
794	bsd_args.anamelen = PTRIN(namelen);/* XXX */
795	error = sys_accept(td, &bsd_args);
796	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
797	if (error) {
798		if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
799			return (EINVAL);
800		return (error);
801	}
802
803	/*
804	 * linux appears not to copy flags from the parent socket to the
805	 * accepted one, so we must clear the flags in the new descriptor
806	 * and apply the requested flags.
807	 */
808	error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0);
809	if (error)
810		goto out;
811	error = linux_set_socket_flags(td, td->td_retval[0], flags);
812	if (error)
813		goto out;
814	if (addr)
815		error = linux_sa_put(PTRIN(addr));
816
817out:
818	if (error) {
819		(void)kern_close(td, td->td_retval[0]);
820		td->td_retval[0] = 0;
821	}
822	return (error);
823}
824
825int
826linux_accept(struct thread *td, struct linux_accept_args *args)
827{
828
829	return (linux_accept_common(td, args->s, args->addr,
830	    args->namelen, 0));
831}
832
833int
834linux_accept4(struct thread *td, struct linux_accept4_args *args)
835{
836
837	return (linux_accept_common(td, args->s, args->addr,
838	    args->namelen, args->flags));
839}
840
841int
842linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
843{
844	struct getsockname_args /* {
845		int	fdes;
846		struct sockaddr * __restrict asa;
847		socklen_t * __restrict alen;
848	} */ bsd_args;
849	int error;
850
851	bsd_args.fdes = args->s;
852	/* XXX: */
853	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
854	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
855	error = sys_getsockname(td, &bsd_args);
856	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
857	if (error)
858		return (error);
859	error = linux_sa_put(PTRIN(args->addr));
860	if (error)
861		return (error);
862	return (0);
863}
864
865int
866linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
867{
868	struct getpeername_args /* {
869		int fdes;
870		caddr_t asa;
871		int *alen;
872	} */ bsd_args;
873	int error;
874
875	bsd_args.fdes = args->s;
876	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
877	bsd_args.alen = (socklen_t *)PTRIN(args->namelen);
878	error = sys_getpeername(td, &bsd_args);
879	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
880	if (error)
881		return (error);
882	error = linux_sa_put(PTRIN(args->addr));
883	if (error)
884		return (error);
885	return (0);
886}
887
888int
889linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
890{
891	struct socketpair_args /* {
892		int domain;
893		int type;
894		int protocol;
895		int *rsv;
896	} */ bsd_args;
897	int error, socket_flags;
898	int sv[2];
899
900	bsd_args.domain = linux_to_bsd_domain(args->domain);
901	if (bsd_args.domain != PF_LOCAL)
902		return (EAFNOSUPPORT);
903
904	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
905	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
906		return (EINVAL);
907	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
908	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
909		return (EINVAL);
910
911	if (args->protocol != 0 && args->protocol != PF_UNIX)
912
913		/*
914		 * Use of PF_UNIX as protocol argument is not right,
915		 * but Linux does it.
916		 * Do not map PF_UNIX as its Linux value is identical
917		 * to FreeBSD one.
918		 */
919		return (EPROTONOSUPPORT);
920	else
921		bsd_args.protocol = 0;
922	bsd_args.rsv = (int *)PTRIN(args->rsv);
923	error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
924	    bsd_args.protocol, sv);
925	if (error)
926		return (error);
927	error = linux_set_socket_flags(td, sv[0], socket_flags);
928	if (error)
929		goto out;
930	error = linux_set_socket_flags(td, sv[1], socket_flags);
931	if (error)
932		goto out;
933
934	error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
935
936out:
937	if (error) {
938		(void)kern_close(td, sv[0]);
939		(void)kern_close(td, sv[1]);
940	}
941	return (error);
942}
943
944#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
945struct linux_send_args {
946	int s;
947	l_uintptr_t msg;
948	int len;
949	int flags;
950};
951
952static int
953linux_send(struct thread *td, struct linux_send_args *args)
954{
955	struct sendto_args /* {
956		int s;
957		caddr_t buf;
958		int len;
959		int flags;
960		caddr_t to;
961		int tolen;
962	} */ bsd_args;
963
964	bsd_args.s = args->s;
965	bsd_args.buf = (caddr_t)PTRIN(args->msg);
966	bsd_args.len = args->len;
967	bsd_args.flags = args->flags;
968	bsd_args.to = NULL;
969	bsd_args.tolen = 0;
970	return sys_sendto(td, &bsd_args);
971}
972
973struct linux_recv_args {
974	int s;
975	l_uintptr_t msg;
976	int len;
977	int flags;
978};
979
980static int
981linux_recv(struct thread *td, struct linux_recv_args *args)
982{
983	struct recvfrom_args /* {
984		int s;
985		caddr_t buf;
986		int len;
987		int flags;
988		struct sockaddr *from;
989		socklen_t fromlenaddr;
990	} */ bsd_args;
991
992	bsd_args.s = args->s;
993	bsd_args.buf = (caddr_t)PTRIN(args->msg);
994	bsd_args.len = args->len;
995	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
996	bsd_args.from = NULL;
997	bsd_args.fromlenaddr = 0;
998	return (sys_recvfrom(td, &bsd_args));
999}
1000#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1001
1002int
1003linux_sendto(struct thread *td, struct linux_sendto_args *args)
1004{
1005	struct msghdr msg;
1006	struct iovec aiov;
1007	int error;
1008
1009	if (linux_check_hdrincl(td, args->s) == 0)
1010		/* IP_HDRINCL set, tweak the packet before sending */
1011		return (linux_sendto_hdrincl(td, args));
1012
1013	msg.msg_name = PTRIN(args->to);
1014	msg.msg_namelen = args->tolen;
1015	msg.msg_iov = &aiov;
1016	msg.msg_iovlen = 1;
1017	msg.msg_control = NULL;
1018	msg.msg_flags = 0;
1019	aiov.iov_base = PTRIN(args->msg);
1020	aiov.iov_len = args->len;
1021	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
1022	    UIO_USERSPACE);
1023	return (error);
1024}
1025
1026int
1027linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
1028{
1029	struct msghdr msg;
1030	struct iovec aiov;
1031	int error;
1032
1033	if (PTRIN(args->fromlen) != NULL) {
1034		error = copyin(PTRIN(args->fromlen), &msg.msg_namelen,
1035		    sizeof(msg.msg_namelen));
1036		if (error != 0)
1037			return (error);
1038
1039		error = linux_to_bsd_sockaddr((struct sockaddr *)PTRIN(args->from),
1040		    msg.msg_namelen);
1041		if (error != 0)
1042			return (error);
1043	} else
1044		msg.msg_namelen = 0;
1045
1046	msg.msg_name = (struct sockaddr * __restrict)PTRIN(args->from);
1047	msg.msg_iov = &aiov;
1048	msg.msg_iovlen = 1;
1049	aiov.iov_base = PTRIN(args->buf);
1050	aiov.iov_len = args->len;
1051	msg.msg_control = 0;
1052	msg.msg_flags = linux_to_bsd_msg_flags(args->flags);
1053
1054	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, NULL);
1055	if (error != 0)
1056		return (error);
1057
1058	if (PTRIN(args->from) != NULL) {
1059		error = bsd_to_linux_sockaddr((struct sockaddr *)
1060		    PTRIN(args->from));
1061		if (error != 0)
1062			return (error);
1063
1064		error = linux_sa_put((struct osockaddr *)
1065		    PTRIN(args->from));
1066	}
1067
1068	if (PTRIN(args->fromlen) != NULL)
1069		error = copyout(&msg.msg_namelen, PTRIN(args->fromlen),
1070		    sizeof(msg.msg_namelen));
1071
1072	return (error);
1073}
1074
1075int
1076linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1077{
1078	struct cmsghdr *cmsg;
1079	struct cmsgcred cmcred;
1080	struct mbuf *control;
1081	struct msghdr msg;
1082	struct l_cmsghdr linux_cmsg;
1083	struct l_cmsghdr *ptr_cmsg;
1084	struct l_msghdr linux_msg;
1085	struct iovec *iov;
1086	socklen_t datalen;
1087	struct sockaddr *sa;
1088	sa_family_t sa_family;
1089	void *data;
1090	int error;
1091
1092	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1093	if (error)
1094		return (error);
1095
1096	/*
1097	 * Some Linux applications (ping) define a non-NULL control data
1098	 * pointer, but a msg_controllen of 0, which is not allowed in the
1099	 * FreeBSD system call interface.  NULL the msg_control pointer in
1100	 * order to handle this case.  This should be checked, but allows the
1101	 * Linux ping to work.
1102	 */
1103	if (PTRIN(linux_msg.msg_control) != NULL && linux_msg.msg_controllen == 0)
1104		linux_msg.msg_control = PTROUT(NULL);
1105
1106	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1107	if (error)
1108		return (error);
1109
1110#ifdef COMPAT_LINUX32
1111	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1112	    &iov, EMSGSIZE);
1113#else
1114	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1115#endif
1116	if (error)
1117		return (error);
1118
1119	control = NULL;
1120	cmsg = NULL;
1121
1122	if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
1123		error = kern_getsockname(td, args->s, &sa, &datalen);
1124		if (error)
1125			goto bad;
1126		sa_family = sa->sa_family;
1127		free(sa, M_SONAME);
1128
1129		error = ENOBUFS;
1130		cmsg = malloc(CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO);
1131		control = m_get(M_WAITOK, MT_CONTROL);
1132		if (control == NULL)
1133			goto bad;
1134
1135		do {
1136			error = copyin(ptr_cmsg, &linux_cmsg,
1137			    sizeof(struct l_cmsghdr));
1138			if (error)
1139				goto bad;
1140
1141			error = EINVAL;
1142			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1143				goto bad;
1144
1145			/*
1146			 * Now we support only SCM_RIGHTS and SCM_CRED,
1147			 * so return EINVAL in any other cmsg_type
1148			 */
1149			cmsg->cmsg_type =
1150			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type);
1151			cmsg->cmsg_level =
1152			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1153			if (cmsg->cmsg_type == -1
1154			    || cmsg->cmsg_level != SOL_SOCKET)
1155				goto bad;
1156
1157			/*
1158			 * Some applications (e.g. pulseaudio) attempt to
1159			 * send ancillary data even if the underlying protocol
1160			 * doesn't support it which is not allowed in the
1161			 * FreeBSD system call interface.
1162			 */
1163			if (sa_family != AF_UNIX)
1164				continue;
1165
1166			data = LINUX_CMSG_DATA(ptr_cmsg);
1167			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1168
1169			switch (cmsg->cmsg_type)
1170			{
1171			case SCM_RIGHTS:
1172				break;
1173
1174			case SCM_CREDS:
1175				data = &cmcred;
1176				datalen = sizeof(cmcred);
1177
1178				/*
1179				 * The lower levels will fill in the structure
1180				 */
1181				bzero(data, datalen);
1182				break;
1183			}
1184
1185			cmsg->cmsg_len = CMSG_LEN(datalen);
1186
1187			error = ENOBUFS;
1188			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t)cmsg))
1189				goto bad;
1190			if (!m_append(control, datalen, (c_caddr_t)data))
1191				goto bad;
1192		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg)));
1193
1194		if (m_length(control, NULL) == 0) {
1195			m_freem(control);
1196			control = NULL;
1197		}
1198	}
1199
1200	msg.msg_iov = iov;
1201	msg.msg_flags = 0;
1202	error = linux_sendit(td, args->s, &msg, args->flags, control,
1203	    UIO_USERSPACE);
1204
1205bad:
1206	free(iov, M_IOV);
1207	if (cmsg)
1208		free(cmsg, M_LINUX);
1209	return (error);
1210}
1211
1212int
1213linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1214{
1215	struct cmsghdr *cm;
1216	struct cmsgcred *cmcred;
1217	struct msghdr msg;
1218	struct l_cmsghdr *linux_cmsg = NULL;
1219	struct l_ucred linux_ucred;
1220	socklen_t datalen, outlen;
1221	struct l_msghdr linux_msg;
1222	struct iovec *iov, *uiov;
1223	struct mbuf *control = NULL;
1224	struct mbuf **controlp;
1225	caddr_t outbuf;
1226	void *data;
1227	int error, i, fd, fds, *fdp;
1228
1229	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1230	if (error)
1231		return (error);
1232
1233	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1234	if (error)
1235		return (error);
1236
1237#ifdef COMPAT_LINUX32
1238	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1239	    &iov, EMSGSIZE);
1240#else
1241	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1242#endif
1243	if (error)
1244		return (error);
1245
1246	if (msg.msg_name) {
1247		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1248		    msg.msg_namelen);
1249		if (error)
1250			goto bad;
1251	}
1252
1253	uiov = msg.msg_iov;
1254	msg.msg_iov = iov;
1255	controlp = (msg.msg_control != NULL) ? &control : NULL;
1256	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1257	msg.msg_iov = uiov;
1258	if (error)
1259		goto bad;
1260
1261	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1262	if (error)
1263		goto bad;
1264
1265	if (linux_msg.msg_name) {
1266		error = bsd_to_linux_sockaddr((struct sockaddr *)
1267		    PTRIN(linux_msg.msg_name));
1268		if (error)
1269			goto bad;
1270	}
1271	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1272		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1273		if (error)
1274			goto bad;
1275	}
1276
1277	outbuf = PTRIN(linux_msg.msg_control);
1278	outlen = 0;
1279
1280	if (control) {
1281		linux_cmsg = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO);
1282
1283		msg.msg_control = mtod(control, struct cmsghdr *);
1284		msg.msg_controllen = control->m_len;
1285
1286		cm = CMSG_FIRSTHDR(&msg);
1287
1288		while (cm != NULL) {
1289			linux_cmsg->cmsg_type =
1290			    bsd_to_linux_cmsg_type(cm->cmsg_type);
1291			linux_cmsg->cmsg_level =
1292			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1293			if (linux_cmsg->cmsg_type == -1
1294			    || cm->cmsg_level != SOL_SOCKET)
1295			{
1296				error = EINVAL;
1297				goto bad;
1298			}
1299
1300			data = CMSG_DATA(cm);
1301			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1302
1303			switch (cm->cmsg_type)
1304			{
1305			case SCM_RIGHTS:
1306				if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
1307					fds = datalen / sizeof(int);
1308					fdp = data;
1309					for (i = 0; i < fds; i++) {
1310						fd = *fdp++;
1311						(void)kern_fcntl(td, fd,
1312						    F_SETFD, FD_CLOEXEC);
1313					}
1314				}
1315				break;
1316
1317			case SCM_CREDS:
1318				/*
1319				 * Currently LOCAL_CREDS is never in
1320				 * effect for Linux so no need to worry
1321				 * about sockcred
1322				 */
1323				if (datalen != sizeof(*cmcred)) {
1324					error = EMSGSIZE;
1325					goto bad;
1326				}
1327				cmcred = (struct cmsgcred *)data;
1328				bzero(&linux_ucred, sizeof(linux_ucred));
1329				linux_ucred.pid = cmcred->cmcred_pid;
1330				linux_ucred.uid = cmcred->cmcred_uid;
1331				linux_ucred.gid = cmcred->cmcred_gid;
1332				data = &linux_ucred;
1333				datalen = sizeof(linux_ucred);
1334				break;
1335			}
1336
1337			if (outlen + LINUX_CMSG_LEN(datalen) >
1338			    linux_msg.msg_controllen) {
1339				if (outlen == 0) {
1340					error = EMSGSIZE;
1341					goto bad;
1342				} else {
1343					linux_msg.msg_flags |=
1344					    LINUX_MSG_CTRUNC;
1345					goto out;
1346				}
1347			}
1348
1349			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1350
1351			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1352			if (error)
1353				goto bad;
1354			outbuf += L_CMSG_HDRSZ;
1355
1356			error = copyout(data, outbuf, datalen);
1357			if (error)
1358				goto bad;
1359
1360			outbuf += LINUX_CMSG_ALIGN(datalen);
1361			outlen += LINUX_CMSG_LEN(datalen);
1362
1363			cm = CMSG_NXTHDR(&msg, cm);
1364		}
1365	}
1366
1367out:
1368	linux_msg.msg_controllen = outlen;
1369	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1370
1371bad:
1372	free(iov, M_IOV);
1373	m_freem(control);
1374	free(linux_cmsg, M_LINUX);
1375
1376	return (error);
1377}
1378
1379int
1380linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1381{
1382	struct shutdown_args /* {
1383		int s;
1384		int how;
1385	} */ bsd_args;
1386
1387	bsd_args.s = args->s;
1388	bsd_args.how = args->how;
1389	return (sys_shutdown(td, &bsd_args));
1390}
1391
1392int
1393linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1394{
1395	struct setsockopt_args /* {
1396		int s;
1397		int level;
1398		int name;
1399		caddr_t val;
1400		int valsize;
1401	} */ bsd_args;
1402	l_timeval linux_tv;
1403	struct timeval tv;
1404	int error, name;
1405
1406	bsd_args.s = args->s;
1407	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1408	switch (bsd_args.level) {
1409	case SOL_SOCKET:
1410		name = linux_to_bsd_so_sockopt(args->optname);
1411		switch (name) {
1412		case SO_RCVTIMEO:
1413			/* FALLTHROUGH */
1414		case SO_SNDTIMEO:
1415			error = copyin(PTRIN(args->optval), &linux_tv,
1416			    sizeof(linux_tv));
1417			if (error)
1418				return (error);
1419			tv.tv_sec = linux_tv.tv_sec;
1420			tv.tv_usec = linux_tv.tv_usec;
1421			return (kern_setsockopt(td, args->s, bsd_args.level,
1422			    name, &tv, UIO_SYSSPACE, sizeof(tv)));
1423			/* NOTREACHED */
1424			break;
1425		default:
1426			break;
1427		}
1428		break;
1429	case IPPROTO_IP:
1430		name = linux_to_bsd_ip_sockopt(args->optname);
1431		break;
1432	case IPPROTO_TCP:
1433		name = linux_to_bsd_tcp_sockopt(args->optname);
1434		break;
1435	default:
1436		name = -1;
1437		break;
1438	}
1439	if (name == -1)
1440		return (ENOPROTOOPT);
1441
1442	bsd_args.name = name;
1443	bsd_args.val = PTRIN(args->optval);
1444	bsd_args.valsize = args->optlen;
1445
1446	if (name == IPV6_NEXTHOP) {
1447		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1448			bsd_args.valsize);
1449		error = sys_setsockopt(td, &bsd_args);
1450		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1451	} else
1452		error = sys_setsockopt(td, &bsd_args);
1453
1454	return (error);
1455}
1456
1457int
1458linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1459{
1460	struct getsockopt_args /* {
1461		int s;
1462		int level;
1463		int name;
1464		caddr_t val;
1465		int *avalsize;
1466	} */ bsd_args;
1467	l_timeval linux_tv;
1468	struct timeval tv;
1469	socklen_t tv_len, xulen;
1470	struct xucred xu;
1471	struct l_ucred lxu;
1472	int error, name;
1473
1474	bsd_args.s = args->s;
1475	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1476	switch (bsd_args.level) {
1477	case SOL_SOCKET:
1478		name = linux_to_bsd_so_sockopt(args->optname);
1479		switch (name) {
1480		case SO_RCVTIMEO:
1481			/* FALLTHROUGH */
1482		case SO_SNDTIMEO:
1483			tv_len = sizeof(tv);
1484			error = kern_getsockopt(td, args->s, bsd_args.level,
1485			    name, &tv, UIO_SYSSPACE, &tv_len);
1486			if (error)
1487				return (error);
1488			linux_tv.tv_sec = tv.tv_sec;
1489			linux_tv.tv_usec = tv.tv_usec;
1490			return (copyout(&linux_tv, PTRIN(args->optval),
1491			    sizeof(linux_tv)));
1492			/* NOTREACHED */
1493			break;
1494		case LOCAL_PEERCRED:
1495			if (args->optlen != sizeof(lxu))
1496				return (EINVAL);
1497			xulen = sizeof(xu);
1498			error = kern_getsockopt(td, args->s, bsd_args.level,
1499			    name, &xu, UIO_SYSSPACE, &xulen);
1500			if (error)
1501				return (error);
1502			/*
1503			 * XXX Use 0 for pid as the FreeBSD does not cache peer pid.
1504			 */
1505			lxu.pid = 0;
1506			lxu.uid = xu.cr_uid;
1507			lxu.gid = xu.cr_gid;
1508			return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu)));
1509			/* NOTREACHED */
1510			break;
1511		default:
1512			break;
1513		}
1514		break;
1515	case IPPROTO_IP:
1516		name = linux_to_bsd_ip_sockopt(args->optname);
1517		break;
1518	case IPPROTO_TCP:
1519		name = linux_to_bsd_tcp_sockopt(args->optname);
1520		break;
1521	default:
1522		name = -1;
1523		break;
1524	}
1525	if (name == -1)
1526		return (EINVAL);
1527
1528	bsd_args.name = name;
1529	bsd_args.val = PTRIN(args->optval);
1530	bsd_args.avalsize = PTRIN(args->optlen);
1531
1532	if (name == IPV6_NEXTHOP) {
1533		error = sys_getsockopt(td, &bsd_args);
1534		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1535	} else
1536		error = sys_getsockopt(td, &bsd_args);
1537
1538	return (error);
1539}
1540
1541#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1542
1543/* Argument list sizes for linux_socketcall */
1544
1545#define LINUX_AL(x) ((x) * sizeof(l_ulong))
1546
1547static const unsigned char lxs_args[] = {
1548	LINUX_AL(0) /* unused*/,	LINUX_AL(3) /* socket */,
1549	LINUX_AL(3) /* bind */,		LINUX_AL(3) /* connect */,
1550	LINUX_AL(2) /* listen */,	LINUX_AL(3) /* accept */,
1551	LINUX_AL(3) /* getsockname */,	LINUX_AL(3) /* getpeername */,
1552	LINUX_AL(4) /* socketpair */,	LINUX_AL(4) /* send */,
1553	LINUX_AL(4) /* recv */,		LINUX_AL(6) /* sendto */,
1554	LINUX_AL(6) /* recvfrom */,	LINUX_AL(2) /* shutdown */,
1555	LINUX_AL(5) /* setsockopt */,	LINUX_AL(5) /* getsockopt */,
1556	LINUX_AL(3) /* sendmsg */,	LINUX_AL(3) /* recvmsg */,
1557	LINUX_AL(4) /* accept4 */
1558};
1559
1560#define	LINUX_AL_SIZE	sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
1561
1562int
1563linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1564{
1565	l_ulong a[6];
1566	void *arg;
1567	int error;
1568
1569	if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE)
1570		return (EINVAL);
1571	error = copyin(PTRIN(args->args), a, lxs_args[args->what]);
1572	if (error)
1573		return (error);
1574
1575	arg = a;
1576	switch (args->what) {
1577	case LINUX_SOCKET:
1578		return (linux_socket(td, arg));
1579	case LINUX_BIND:
1580		return (linux_bind(td, arg));
1581	case LINUX_CONNECT:
1582		return (linux_connect(td, arg));
1583	case LINUX_LISTEN:
1584		return (linux_listen(td, arg));
1585	case LINUX_ACCEPT:
1586		return (linux_accept(td, arg));
1587	case LINUX_GETSOCKNAME:
1588		return (linux_getsockname(td, arg));
1589	case LINUX_GETPEERNAME:
1590		return (linux_getpeername(td, arg));
1591	case LINUX_SOCKETPAIR:
1592		return (linux_socketpair(td, arg));
1593	case LINUX_SEND:
1594		return (linux_send(td, arg));
1595	case LINUX_RECV:
1596		return (linux_recv(td, arg));
1597	case LINUX_SENDTO:
1598		return (linux_sendto(td, arg));
1599	case LINUX_RECVFROM:
1600		return (linux_recvfrom(td, arg));
1601	case LINUX_SHUTDOWN:
1602		return (linux_shutdown(td, arg));
1603	case LINUX_SETSOCKOPT:
1604		return (linux_setsockopt(td, arg));
1605	case LINUX_GETSOCKOPT:
1606		return (linux_getsockopt(td, arg));
1607	case LINUX_SENDMSG:
1608		return (linux_sendmsg(td, arg));
1609	case LINUX_RECVMSG:
1610		return (linux_recvmsg(td, arg));
1611	case LINUX_ACCEPT4:
1612		return (linux_accept4(td, arg));
1613	}
1614
1615	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1616	return (ENOSYS);
1617}
1618#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1619