linux_socket.c revision 243882
1/*-
2 * Copyright (c) 1995 S��ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 243882 2012-12-05 08:04:20Z glebius $");
31
32/* XXX we use functions that might not exist. */
33#include "opt_compat.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/proc.h>
38#include <sys/systm.h>
39#include <sys/sysproto.h>
40#include <sys/capability.h>
41#include <sys/fcntl.h>
42#include <sys/file.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/mbuf.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/syscallsubr.h>
51#include <sys/uio.h>
52#include <sys/syslog.h>
53#include <sys/un.h>
54
55#include <net/if.h>
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#ifdef INET6
60#include <netinet/ip6.h>
61#include <netinet6/ip6_var.h>
62#include <netinet6/in6_var.h>
63#endif
64
65#ifdef COMPAT_LINUX32
66#include <machine/../linux32/linux.h>
67#include <machine/../linux32/linux32_proto.h>
68#else
69#include <machine/../linux/linux.h>
70#include <machine/../linux/linux_proto.h>
71#endif
72#include <compat/linux/linux_socket.h>
73#include <compat/linux/linux_util.h>
74
75static int linux_to_bsd_domain(int);
76
77/*
78 * Reads a linux sockaddr and does any necessary translation.
79 * Linux sockaddrs don't have a length field, only a family.
80 * Copy the osockaddr structure pointed to by osa to kernel, adjust
81 * family and convert to sockaddr.
82 */
83static int
84linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int salen)
85{
86	struct sockaddr *sa;
87	struct osockaddr *kosa;
88#ifdef INET6
89	struct sockaddr_in6 *sin6;
90	int oldv6size;
91#endif
92	char *name;
93	int bdom, error, hdrlen, namelen;
94
95	if (salen < 2 || salen > UCHAR_MAX || !osa)
96		return (EINVAL);
97
98#ifdef INET6
99	oldv6size = 0;
100	/*
101	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
102	 * if it's a v4-mapped address, so reserve the proper space
103	 * for it.
104	 */
105	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
106		salen += sizeof(uint32_t);
107		oldv6size = 1;
108	}
109#endif
110
111	kosa = malloc(salen, M_SONAME, M_WAITOK);
112
113	if ((error = copyin(osa, kosa, salen)))
114		goto out;
115
116	bdom = linux_to_bsd_domain(kosa->sa_family);
117	if (bdom == -1) {
118		error = EAFNOSUPPORT;
119		goto out;
120	}
121
122#ifdef INET6
123	/*
124	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
125	 * which lacks the scope id compared with RFC2553 one. If we detect
126	 * the situation, reject the address and write a message to system log.
127	 *
128	 * Still accept addresses for which the scope id is not used.
129	 */
130	if (oldv6size) {
131		if (bdom == AF_INET6) {
132			sin6 = (struct sockaddr_in6 *)kosa;
133			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
134			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
135			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
136			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
137			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
138			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
139				sin6->sin6_scope_id = 0;
140			} else {
141				log(LOG_DEBUG,
142				    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
143				error = EINVAL;
144				goto out;
145			}
146		} else
147			salen -= sizeof(uint32_t);
148	}
149#endif
150	if (bdom == AF_INET) {
151		if (salen < sizeof(struct sockaddr_in)) {
152			error = EINVAL;
153			goto out;
154		}
155		salen = sizeof(struct sockaddr_in);
156	}
157
158	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
159		hdrlen = offsetof(struct sockaddr_un, sun_path);
160		name = ((struct sockaddr_un *)kosa)->sun_path;
161		if (*name == '\0') {
162			/*
163		 	 * Linux abstract namespace starts with a NULL byte.
164			 * XXX We do not support abstract namespace yet.
165			 */
166			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
167		} else
168			namelen = strnlen(name, salen - hdrlen);
169		salen = hdrlen + namelen;
170		if (salen > sizeof(struct sockaddr_un)) {
171			error = ENAMETOOLONG;
172			goto out;
173		}
174	}
175
176	sa = (struct sockaddr *)kosa;
177	sa->sa_family = bdom;
178	sa->sa_len = salen;
179
180	*sap = sa;
181	return (0);
182
183out:
184	free(kosa, M_SONAME);
185	return (error);
186}
187
188static int
189linux_to_bsd_domain(int domain)
190{
191
192	switch (domain) {
193	case LINUX_AF_UNSPEC:
194		return (AF_UNSPEC);
195	case LINUX_AF_UNIX:
196		return (AF_LOCAL);
197	case LINUX_AF_INET:
198		return (AF_INET);
199	case LINUX_AF_INET6:
200		return (AF_INET6);
201	case LINUX_AF_AX25:
202		return (AF_CCITT);
203	case LINUX_AF_IPX:
204		return (AF_IPX);
205	case LINUX_AF_APPLETALK:
206		return (AF_APPLETALK);
207	}
208	return (-1);
209}
210
211static int
212bsd_to_linux_domain(int domain)
213{
214
215	switch (domain) {
216	case AF_UNSPEC:
217		return (LINUX_AF_UNSPEC);
218	case AF_LOCAL:
219		return (LINUX_AF_UNIX);
220	case AF_INET:
221		return (LINUX_AF_INET);
222	case AF_INET6:
223		return (LINUX_AF_INET6);
224	case AF_CCITT:
225		return (LINUX_AF_AX25);
226	case AF_IPX:
227		return (LINUX_AF_IPX);
228	case AF_APPLETALK:
229		return (LINUX_AF_APPLETALK);
230	}
231	return (-1);
232}
233
234static int
235linux_to_bsd_sockopt_level(int level)
236{
237
238	switch (level) {
239	case LINUX_SOL_SOCKET:
240		return (SOL_SOCKET);
241	}
242	return (level);
243}
244
245static int
246bsd_to_linux_sockopt_level(int level)
247{
248
249	switch (level) {
250	case SOL_SOCKET:
251		return (LINUX_SOL_SOCKET);
252	}
253	return (level);
254}
255
256static int
257linux_to_bsd_ip_sockopt(int opt)
258{
259
260	switch (opt) {
261	case LINUX_IP_TOS:
262		return (IP_TOS);
263	case LINUX_IP_TTL:
264		return (IP_TTL);
265	case LINUX_IP_OPTIONS:
266		return (IP_OPTIONS);
267	case LINUX_IP_MULTICAST_IF:
268		return (IP_MULTICAST_IF);
269	case LINUX_IP_MULTICAST_TTL:
270		return (IP_MULTICAST_TTL);
271	case LINUX_IP_MULTICAST_LOOP:
272		return (IP_MULTICAST_LOOP);
273	case LINUX_IP_ADD_MEMBERSHIP:
274		return (IP_ADD_MEMBERSHIP);
275	case LINUX_IP_DROP_MEMBERSHIP:
276		return (IP_DROP_MEMBERSHIP);
277	case LINUX_IP_HDRINCL:
278		return (IP_HDRINCL);
279	}
280	return (-1);
281}
282
283static int
284linux_to_bsd_so_sockopt(int opt)
285{
286
287	switch (opt) {
288	case LINUX_SO_DEBUG:
289		return (SO_DEBUG);
290	case LINUX_SO_REUSEADDR:
291		return (SO_REUSEADDR);
292	case LINUX_SO_TYPE:
293		return (SO_TYPE);
294	case LINUX_SO_ERROR:
295		return (SO_ERROR);
296	case LINUX_SO_DONTROUTE:
297		return (SO_DONTROUTE);
298	case LINUX_SO_BROADCAST:
299		return (SO_BROADCAST);
300	case LINUX_SO_SNDBUF:
301		return (SO_SNDBUF);
302	case LINUX_SO_RCVBUF:
303		return (SO_RCVBUF);
304	case LINUX_SO_KEEPALIVE:
305		return (SO_KEEPALIVE);
306	case LINUX_SO_OOBINLINE:
307		return (SO_OOBINLINE);
308	case LINUX_SO_LINGER:
309		return (SO_LINGER);
310	case LINUX_SO_PEERCRED:
311		return (LOCAL_PEERCRED);
312	case LINUX_SO_RCVLOWAT:
313		return (SO_RCVLOWAT);
314	case LINUX_SO_SNDLOWAT:
315		return (SO_SNDLOWAT);
316	case LINUX_SO_RCVTIMEO:
317		return (SO_RCVTIMEO);
318	case LINUX_SO_SNDTIMEO:
319		return (SO_SNDTIMEO);
320	case LINUX_SO_TIMESTAMP:
321		return (SO_TIMESTAMP);
322	case LINUX_SO_ACCEPTCONN:
323		return (SO_ACCEPTCONN);
324	}
325	return (-1);
326}
327
328static int
329linux_to_bsd_msg_flags(int flags)
330{
331	int ret_flags = 0;
332
333	if (flags & LINUX_MSG_OOB)
334		ret_flags |= MSG_OOB;
335	if (flags & LINUX_MSG_PEEK)
336		ret_flags |= MSG_PEEK;
337	if (flags & LINUX_MSG_DONTROUTE)
338		ret_flags |= MSG_DONTROUTE;
339	if (flags & LINUX_MSG_CTRUNC)
340		ret_flags |= MSG_CTRUNC;
341	if (flags & LINUX_MSG_TRUNC)
342		ret_flags |= MSG_TRUNC;
343	if (flags & LINUX_MSG_DONTWAIT)
344		ret_flags |= MSG_DONTWAIT;
345	if (flags & LINUX_MSG_EOR)
346		ret_flags |= MSG_EOR;
347	if (flags & LINUX_MSG_WAITALL)
348		ret_flags |= MSG_WAITALL;
349	if (flags & LINUX_MSG_NOSIGNAL)
350		ret_flags |= MSG_NOSIGNAL;
351#if 0 /* not handled */
352	if (flags & LINUX_MSG_PROXY)
353		;
354	if (flags & LINUX_MSG_FIN)
355		;
356	if (flags & LINUX_MSG_SYN)
357		;
358	if (flags & LINUX_MSG_CONFIRM)
359		;
360	if (flags & LINUX_MSG_RST)
361		;
362	if (flags & LINUX_MSG_ERRQUEUE)
363		;
364#endif
365	return ret_flags;
366}
367
368/*
369* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
370* native syscall will fault.  Thus, we don't really need to check the
371* return values for these functions.
372*/
373
374static int
375bsd_to_linux_sockaddr(struct sockaddr *arg)
376{
377	struct sockaddr sa;
378	size_t sa_len = sizeof(struct sockaddr);
379	int error;
380
381	if ((error = copyin(arg, &sa, sa_len)))
382		return (error);
383
384	*(u_short *)&sa = sa.sa_family;
385
386	error = copyout(&sa, arg, sa_len);
387
388	return (error);
389}
390
391static int
392linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
393{
394	struct sockaddr sa;
395	size_t sa_len = sizeof(struct sockaddr);
396	int error;
397
398	if ((error = copyin(arg, &sa, sa_len)))
399		return (error);
400
401	sa.sa_family = *(sa_family_t *)&sa;
402	sa.sa_len = len;
403
404	error = copyout(&sa, arg, sa_len);
405
406	return (error);
407}
408
409
410static int
411linux_sa_put(struct osockaddr *osa)
412{
413	struct osockaddr sa;
414	int error, bdom;
415
416	/*
417	 * Only read/write the osockaddr family part, the rest is
418	 * not changed.
419	 */
420	error = copyin(osa, &sa, sizeof(sa.sa_family));
421	if (error)
422		return (error);
423
424	bdom = bsd_to_linux_domain(sa.sa_family);
425	if (bdom == -1)
426		return (EINVAL);
427
428	sa.sa_family = bdom;
429	error = copyout(&sa, osa, sizeof(sa.sa_family));
430	if (error)
431		return (error);
432
433	return (0);
434}
435
436static int
437linux_to_bsd_cmsg_type(int cmsg_type)
438{
439
440	switch (cmsg_type) {
441	case LINUX_SCM_RIGHTS:
442		return (SCM_RIGHTS);
443	case LINUX_SCM_CREDENTIALS:
444		return (SCM_CREDS);
445	}
446	return (-1);
447}
448
449static int
450bsd_to_linux_cmsg_type(int cmsg_type)
451{
452
453	switch (cmsg_type) {
454	case SCM_RIGHTS:
455		return (LINUX_SCM_RIGHTS);
456	case SCM_CREDS:
457		return (LINUX_SCM_CREDENTIALS);
458	}
459	return (-1);
460}
461
462static int
463linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
464{
465	if (lhdr->msg_controllen > INT_MAX)
466		return (ENOBUFS);
467
468	bhdr->msg_name		= PTRIN(lhdr->msg_name);
469	bhdr->msg_namelen	= lhdr->msg_namelen;
470	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
471	bhdr->msg_iovlen	= lhdr->msg_iovlen;
472	bhdr->msg_control	= PTRIN(lhdr->msg_control);
473
474	/*
475	 * msg_controllen is skipped since BSD and LINUX control messages
476	 * are potentially different sizes (e.g. the cred structure used
477	 * by SCM_CREDS is different between the two operating system).
478	 *
479	 * The caller can set it (if necessary) after converting all the
480	 * control messages.
481	 */
482
483	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
484	return (0);
485}
486
487static int
488bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
489{
490	lhdr->msg_name		= PTROUT(bhdr->msg_name);
491	lhdr->msg_namelen	= bhdr->msg_namelen;
492	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
493	lhdr->msg_iovlen	= bhdr->msg_iovlen;
494	lhdr->msg_control	= PTROUT(bhdr->msg_control);
495
496	/*
497	 * msg_controllen is skipped since BSD and LINUX control messages
498	 * are potentially different sizes (e.g. the cred structure used
499	 * by SCM_CREDS is different between the two operating system).
500	 *
501	 * The caller can set it (if necessary) after converting all the
502	 * control messages.
503	 */
504
505	/* msg_flags skipped */
506	return (0);
507}
508
509static int
510linux_set_socket_flags(struct thread *td, int s, int flags)
511{
512	int error;
513
514	if (flags & LINUX_SOCK_NONBLOCK) {
515		error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
516		if (error)
517			return (error);
518	}
519	if (flags & LINUX_SOCK_CLOEXEC) {
520		error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
521		if (error)
522			return (error);
523	}
524	return (0);
525}
526
527static int
528linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
529    struct mbuf *control, enum uio_seg segflg)
530{
531	struct sockaddr *to;
532	int error;
533
534	if (mp->msg_name != NULL) {
535		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
536		if (error)
537			return (error);
538		mp->msg_name = to;
539	} else
540		to = NULL;
541
542	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
543	    segflg);
544
545	if (to)
546		free(to, M_SONAME);
547	return (error);
548}
549
550/* Return 0 if IP_HDRINCL is set for the given socket. */
551static int
552linux_check_hdrincl(struct thread *td, int s)
553{
554	int error, optval, size_val;
555
556	size_val = sizeof(optval);
557	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
558	    &optval, UIO_SYSSPACE, &size_val);
559	if (error)
560		return (error);
561
562	return (optval == 0);
563}
564
565struct linux_sendto_args {
566	int s;
567	l_uintptr_t msg;
568	int len;
569	int flags;
570	l_uintptr_t to;
571	int tolen;
572};
573
574/*
575 * Updated sendto() when IP_HDRINCL is set:
576 * tweak endian-dependent fields in the IP packet.
577 */
578static int
579linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
580{
581/*
582 * linux_ip_copysize defines how many bytes we should copy
583 * from the beginning of the IP packet before we customize it for BSD.
584 * It should include all the fields we modify (ip_len and ip_off).
585 */
586#define linux_ip_copysize	8
587
588	struct ip *packet;
589	struct msghdr msg;
590	struct iovec aiov[1];
591	int error;
592
593	/* Check that the packet isn't too big or too small. */
594	if (linux_args->len < linux_ip_copysize ||
595	    linux_args->len > IP_MAXPACKET)
596		return (EINVAL);
597
598	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
599
600	/* Make kernel copy of the packet to be sent */
601	if ((error = copyin(PTRIN(linux_args->msg), packet,
602	    linux_args->len)))
603		goto goout;
604
605	/* Convert fields from Linux to BSD raw IP socket format */
606	packet->ip_len = linux_args->len;
607	packet->ip_off = ntohs(packet->ip_off);
608
609	/* Prepare the msghdr and iovec structures describing the new packet */
610	msg.msg_name = PTRIN(linux_args->to);
611	msg.msg_namelen = linux_args->tolen;
612	msg.msg_iov = aiov;
613	msg.msg_iovlen = 1;
614	msg.msg_control = NULL;
615	msg.msg_flags = 0;
616	aiov[0].iov_base = (char *)packet;
617	aiov[0].iov_len = linux_args->len;
618	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
619	    NULL, UIO_SYSSPACE);
620goout:
621	free(packet, M_TEMP);
622	return (error);
623}
624
625struct linux_socket_args {
626	int domain;
627	int type;
628	int protocol;
629};
630
631static int
632linux_socket(struct thread *td, struct linux_socket_args *args)
633{
634	struct socket_args /* {
635		int domain;
636		int type;
637		int protocol;
638	} */ bsd_args;
639	int retval_socket, socket_flags;
640
641	bsd_args.protocol = args->protocol;
642	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
643	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
644		return (EINVAL);
645	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
646	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
647		return (EINVAL);
648	bsd_args.domain = linux_to_bsd_domain(args->domain);
649	if (bsd_args.domain == -1)
650		return (EAFNOSUPPORT);
651
652	retval_socket = sys_socket(td, &bsd_args);
653	if (retval_socket)
654		return (retval_socket);
655
656	retval_socket = linux_set_socket_flags(td, td->td_retval[0],
657	    socket_flags);
658	if (retval_socket) {
659		(void)kern_close(td, td->td_retval[0]);
660		goto out;
661	}
662
663	if (bsd_args.type == SOCK_RAW
664	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
665	    && bsd_args.domain == PF_INET) {
666		/* It's a raw IP socket: set the IP_HDRINCL option. */
667		int hdrincl;
668
669		hdrincl = 1;
670		/* We ignore any error returned by kern_setsockopt() */
671		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
672		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
673	}
674#ifdef INET6
675	/*
676	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default
677	 * and some apps depend on this. So, set V6ONLY to 0 for Linux apps.
678	 * For simplicity we do this unconditionally of the net.inet6.ip6.v6only
679	 * sysctl value.
680	 */
681	if (bsd_args.domain == PF_INET6) {
682		int v6only;
683
684		v6only = 0;
685		/* We ignore any error returned by setsockopt() */
686		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
687		    &v6only, UIO_SYSSPACE, sizeof(v6only));
688	}
689#endif
690
691out:
692	return (retval_socket);
693}
694
695struct linux_bind_args {
696	int s;
697	l_uintptr_t name;
698	int namelen;
699};
700
701static int
702linux_bind(struct thread *td, struct linux_bind_args *args)
703{
704	struct sockaddr *sa;
705	int error;
706
707	error = linux_getsockaddr(&sa, PTRIN(args->name),
708	    args->namelen);
709	if (error)
710		return (error);
711
712	error = kern_bind(td, args->s, sa);
713	free(sa, M_SONAME);
714	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
715	   	return (EINVAL);
716	return (error);
717}
718
719struct linux_connect_args {
720	int s;
721	l_uintptr_t name;
722	int namelen;
723};
724int linux_connect(struct thread *, struct linux_connect_args *);
725
726int
727linux_connect(struct thread *td, struct linux_connect_args *args)
728{
729	struct socket *so;
730	struct sockaddr *sa;
731	u_int fflag;
732	int error;
733
734	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
735	    args->namelen);
736	if (error)
737		return (error);
738
739	error = kern_connect(td, args->s, sa);
740	free(sa, M_SONAME);
741	if (error != EISCONN)
742		return (error);
743
744	/*
745	 * Linux doesn't return EISCONN the first time it occurs,
746	 * when on a non-blocking socket. Instead it returns the
747	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
748	 *
749	 * XXXRW: Instead of using fgetsock(), check that it is a
750	 * socket and use the file descriptor reference instead of
751	 * creating a new one.
752	 */
753	error = fgetsock(td, args->s, CAP_CONNECT, &so, &fflag);
754	if (error == 0) {
755		error = EISCONN;
756		if (fflag & FNONBLOCK) {
757			SOCK_LOCK(so);
758			if (so->so_emuldata == 0)
759				error = so->so_error;
760			so->so_emuldata = (void *)1;
761			SOCK_UNLOCK(so);
762		}
763		fputsock(so);
764	}
765	return (error);
766}
767
768struct linux_listen_args {
769	int s;
770	int backlog;
771};
772
773static int
774linux_listen(struct thread *td, struct linux_listen_args *args)
775{
776	struct listen_args /* {
777		int s;
778		int backlog;
779	} */ bsd_args;
780
781	bsd_args.s = args->s;
782	bsd_args.backlog = args->backlog;
783	return (sys_listen(td, &bsd_args));
784}
785
786static int
787linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
788    l_uintptr_t namelen, int flags)
789{
790	struct accept_args /* {
791		int	s;
792		struct sockaddr * __restrict name;
793		socklen_t * __restrict anamelen;
794	} */ bsd_args;
795	int error;
796
797	if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
798		return (EINVAL);
799
800	bsd_args.s = s;
801	/* XXX: */
802	bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
803	bsd_args.anamelen = PTRIN(namelen);/* XXX */
804	error = sys_accept(td, &bsd_args);
805	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
806	if (error) {
807		if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
808			return (EINVAL);
809		return (error);
810	}
811
812	/*
813	 * linux appears not to copy flags from the parent socket to the
814	 * accepted one, so we must clear the flags in the new descriptor
815	 * and apply the requested flags.
816	 */
817	error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0);
818	if (error)
819		goto out;
820	error = linux_set_socket_flags(td, td->td_retval[0], flags);
821	if (error)
822		goto out;
823	if (addr)
824		error = linux_sa_put(PTRIN(addr));
825
826out:
827	if (error) {
828		(void)kern_close(td, td->td_retval[0]);
829		td->td_retval[0] = 0;
830	}
831	return (error);
832}
833
834struct linux_accept_args {
835	int s;
836	l_uintptr_t addr;
837	l_uintptr_t namelen;
838};
839
840static int
841linux_accept(struct thread *td, struct linux_accept_args *args)
842{
843
844	return (linux_accept_common(td, args->s, args->addr,
845	    args->namelen, 0));
846}
847
848struct linux_accept4_args {
849	int s;
850	l_uintptr_t addr;
851	l_uintptr_t namelen;
852	int flags;
853};
854
855static int
856linux_accept4(struct thread *td, struct linux_accept4_args *args)
857{
858
859	return (linux_accept_common(td, args->s, args->addr,
860	    args->namelen, args->flags));
861}
862
863struct linux_getsockname_args {
864	int s;
865	l_uintptr_t addr;
866	l_uintptr_t namelen;
867};
868
869static int
870linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
871{
872	struct getsockname_args /* {
873		int	fdes;
874		struct sockaddr * __restrict asa;
875		socklen_t * __restrict alen;
876	} */ bsd_args;
877	int error;
878
879	bsd_args.fdes = args->s;
880	/* XXX: */
881	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
882	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
883	error = sys_getsockname(td, &bsd_args);
884	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
885	if (error)
886		return (error);
887	error = linux_sa_put(PTRIN(args->addr));
888	if (error)
889		return (error);
890	return (0);
891}
892
893struct linux_getpeername_args {
894	int s;
895	l_uintptr_t addr;
896	l_uintptr_t namelen;
897};
898
899static int
900linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
901{
902	struct getpeername_args /* {
903		int fdes;
904		caddr_t asa;
905		int *alen;
906	} */ bsd_args;
907	int error;
908
909	bsd_args.fdes = args->s;
910	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
911	bsd_args.alen = (int *)PTRIN(args->namelen);
912	error = sys_getpeername(td, &bsd_args);
913	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
914	if (error)
915		return (error);
916	error = linux_sa_put(PTRIN(args->addr));
917	if (error)
918		return (error);
919	return (0);
920}
921
922struct linux_socketpair_args {
923	int domain;
924	int type;
925	int protocol;
926	l_uintptr_t rsv;
927};
928
929static int
930linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
931{
932	struct socketpair_args /* {
933		int domain;
934		int type;
935		int protocol;
936		int *rsv;
937	} */ bsd_args;
938	int error, socket_flags;
939	int sv[2];
940
941	bsd_args.domain = linux_to_bsd_domain(args->domain);
942	if (bsd_args.domain != PF_LOCAL)
943		return (EAFNOSUPPORT);
944
945	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
946	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
947		return (EINVAL);
948	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
949	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
950		return (EINVAL);
951
952	if (args->protocol != 0 && args->protocol != PF_UNIX)
953
954		/*
955		 * Use of PF_UNIX as protocol argument is not right,
956		 * but Linux does it.
957		 * Do not map PF_UNIX as its Linux value is identical
958		 * to FreeBSD one.
959		 */
960		return (EPROTONOSUPPORT);
961	else
962		bsd_args.protocol = 0;
963	bsd_args.rsv = (int *)PTRIN(args->rsv);
964	error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
965	    bsd_args.protocol, sv);
966	if (error)
967		return (error);
968	error = linux_set_socket_flags(td, sv[0], socket_flags);
969	if (error)
970		goto out;
971	error = linux_set_socket_flags(td, sv[1], socket_flags);
972	if (error)
973		goto out;
974
975	error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
976
977out:
978	if (error) {
979		(void)kern_close(td, sv[0]);
980		(void)kern_close(td, sv[1]);
981	}
982	return (error);
983}
984
985struct linux_send_args {
986	int s;
987	l_uintptr_t msg;
988	int len;
989	int flags;
990};
991
992static int
993linux_send(struct thread *td, struct linux_send_args *args)
994{
995	struct sendto_args /* {
996		int s;
997		caddr_t buf;
998		int len;
999		int flags;
1000		caddr_t to;
1001		int tolen;
1002	} */ bsd_args;
1003
1004	bsd_args.s = args->s;
1005	bsd_args.buf = (caddr_t)PTRIN(args->msg);
1006	bsd_args.len = args->len;
1007	bsd_args.flags = args->flags;
1008	bsd_args.to = NULL;
1009	bsd_args.tolen = 0;
1010	return sys_sendto(td, &bsd_args);
1011}
1012
1013struct linux_recv_args {
1014	int s;
1015	l_uintptr_t msg;
1016	int len;
1017	int flags;
1018};
1019
1020static int
1021linux_recv(struct thread *td, struct linux_recv_args *args)
1022{
1023	struct recvfrom_args /* {
1024		int s;
1025		caddr_t buf;
1026		int len;
1027		int flags;
1028		struct sockaddr *from;
1029		socklen_t fromlenaddr;
1030	} */ bsd_args;
1031
1032	bsd_args.s = args->s;
1033	bsd_args.buf = (caddr_t)PTRIN(args->msg);
1034	bsd_args.len = args->len;
1035	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1036	bsd_args.from = NULL;
1037	bsd_args.fromlenaddr = 0;
1038	return (sys_recvfrom(td, &bsd_args));
1039}
1040
1041static int
1042linux_sendto(struct thread *td, struct linux_sendto_args *args)
1043{
1044	struct msghdr msg;
1045	struct iovec aiov;
1046	int error;
1047
1048	if (linux_check_hdrincl(td, args->s) == 0)
1049		/* IP_HDRINCL set, tweak the packet before sending */
1050		return (linux_sendto_hdrincl(td, args));
1051
1052	msg.msg_name = PTRIN(args->to);
1053	msg.msg_namelen = args->tolen;
1054	msg.msg_iov = &aiov;
1055	msg.msg_iovlen = 1;
1056	msg.msg_control = NULL;
1057	msg.msg_flags = 0;
1058	aiov.iov_base = PTRIN(args->msg);
1059	aiov.iov_len = args->len;
1060	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
1061	    UIO_USERSPACE);
1062	return (error);
1063}
1064
1065struct linux_recvfrom_args {
1066	int s;
1067	l_uintptr_t buf;
1068	int len;
1069	int flags;
1070	l_uintptr_t from;
1071	l_uintptr_t fromlen;
1072};
1073
1074static int
1075linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
1076{
1077	struct recvfrom_args /* {
1078		int	s;
1079		caddr_t	buf;
1080		size_t	len;
1081		int	flags;
1082		struct sockaddr * __restrict from;
1083		socklen_t * __restrict fromlenaddr;
1084	} */ bsd_args;
1085	size_t len;
1086	int error;
1087
1088	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
1089		return (error);
1090
1091	bsd_args.s = args->s;
1092	bsd_args.buf = PTRIN(args->buf);
1093	bsd_args.len = args->len;
1094	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1095	/* XXX: */
1096	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
1097	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
1098
1099	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
1100	error = sys_recvfrom(td, &bsd_args);
1101	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
1102
1103	if (error)
1104		return (error);
1105	if (args->from) {
1106		error = linux_sa_put((struct osockaddr *)
1107		    PTRIN(args->from));
1108		if (error)
1109			return (error);
1110	}
1111	return (0);
1112}
1113
1114struct linux_sendmsg_args {
1115	int s;
1116	l_uintptr_t msg;
1117	int flags;
1118};
1119
1120static int
1121linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1122{
1123	struct cmsghdr *cmsg;
1124	struct cmsgcred cmcred;
1125	struct mbuf *control;
1126	struct msghdr msg;
1127	struct l_cmsghdr linux_cmsg;
1128	struct l_cmsghdr *ptr_cmsg;
1129	struct l_msghdr linux_msg;
1130	struct iovec *iov;
1131	socklen_t datalen;
1132	struct sockaddr *sa;
1133	sa_family_t sa_family;
1134	void *data;
1135	int error;
1136
1137	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1138	if (error)
1139		return (error);
1140
1141	/*
1142	 * Some Linux applications (ping) define a non-NULL control data
1143	 * pointer, but a msg_controllen of 0, which is not allowed in the
1144	 * FreeBSD system call interface.  NULL the msg_control pointer in
1145	 * order to handle this case.  This should be checked, but allows the
1146	 * Linux ping to work.
1147	 */
1148	if (PTRIN(linux_msg.msg_control) != NULL && linux_msg.msg_controllen == 0)
1149		linux_msg.msg_control = PTROUT(NULL);
1150
1151	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1152	if (error)
1153		return (error);
1154
1155#ifdef COMPAT_LINUX32
1156	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1157	    &iov, EMSGSIZE);
1158#else
1159	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1160#endif
1161	if (error)
1162		return (error);
1163
1164	control = NULL;
1165	cmsg = NULL;
1166
1167	if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
1168		error = kern_getsockname(td, args->s, &sa, &datalen);
1169		if (error)
1170			goto bad;
1171		sa_family = sa->sa_family;
1172		free(sa, M_SONAME);
1173
1174		error = ENOBUFS;
1175		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1176		control = m_get(M_WAITOK, MT_CONTROL);
1177		if (control == NULL)
1178			goto bad;
1179
1180		do {
1181			error = copyin(ptr_cmsg, &linux_cmsg,
1182			    sizeof(struct l_cmsghdr));
1183			if (error)
1184				goto bad;
1185
1186			error = EINVAL;
1187			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1188				goto bad;
1189
1190			/*
1191			 * Now we support only SCM_RIGHTS and SCM_CRED,
1192			 * so return EINVAL in any other cmsg_type
1193			 */
1194			cmsg->cmsg_type =
1195			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type);
1196			cmsg->cmsg_level =
1197			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1198			if (cmsg->cmsg_type == -1
1199			    || cmsg->cmsg_level != SOL_SOCKET)
1200				goto bad;
1201
1202			/*
1203			 * Some applications (e.g. pulseaudio) attempt to
1204			 * send ancillary data even if the underlying protocol
1205			 * doesn't support it which is not allowed in the
1206			 * FreeBSD system call interface.
1207			 */
1208			if (sa_family != AF_UNIX)
1209				continue;
1210
1211			data = LINUX_CMSG_DATA(ptr_cmsg);
1212			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1213
1214			switch (cmsg->cmsg_type)
1215			{
1216			case SCM_RIGHTS:
1217				break;
1218
1219			case SCM_CREDS:
1220				data = &cmcred;
1221				datalen = sizeof(cmcred);
1222
1223				/*
1224				 * The lower levels will fill in the structure
1225				 */
1226				bzero(data, datalen);
1227				break;
1228			}
1229
1230			cmsg->cmsg_len = CMSG_LEN(datalen);
1231
1232			error = ENOBUFS;
1233			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t)cmsg))
1234				goto bad;
1235			if (!m_append(control, datalen, (c_caddr_t)data))
1236				goto bad;
1237		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg)));
1238
1239		if (m_length(control, NULL) == 0) {
1240			m_freem(control);
1241			control = NULL;
1242		}
1243	}
1244
1245	msg.msg_iov = iov;
1246	msg.msg_flags = 0;
1247	error = linux_sendit(td, args->s, &msg, args->flags, control,
1248	    UIO_USERSPACE);
1249
1250bad:
1251	free(iov, M_IOV);
1252	if (cmsg)
1253		free(cmsg, M_TEMP);
1254	return (error);
1255}
1256
1257struct linux_recvmsg_args {
1258	int s;
1259	l_uintptr_t msg;
1260	int flags;
1261};
1262
1263static int
1264linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1265{
1266	struct cmsghdr *cm;
1267	struct cmsgcred *cmcred;
1268	struct msghdr msg;
1269	struct l_cmsghdr *linux_cmsg = NULL;
1270	struct l_ucred linux_ucred;
1271	socklen_t datalen, outlen;
1272	struct l_msghdr linux_msg;
1273	struct iovec *iov, *uiov;
1274	struct mbuf *control = NULL;
1275	struct mbuf **controlp;
1276	caddr_t outbuf;
1277	void *data;
1278	int error, i, fd, fds, *fdp;
1279
1280	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1281	if (error)
1282		return (error);
1283
1284	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1285	if (error)
1286		return (error);
1287
1288#ifdef COMPAT_LINUX32
1289	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1290	    &iov, EMSGSIZE);
1291#else
1292	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1293#endif
1294	if (error)
1295		return (error);
1296
1297	if (msg.msg_name) {
1298		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1299		    msg.msg_namelen);
1300		if (error)
1301			goto bad;
1302	}
1303
1304	uiov = msg.msg_iov;
1305	msg.msg_iov = iov;
1306	controlp = (msg.msg_control != NULL) ? &control : NULL;
1307	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1308	msg.msg_iov = uiov;
1309	if (error)
1310		goto bad;
1311
1312	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1313	if (error)
1314		goto bad;
1315
1316	if (linux_msg.msg_name) {
1317		error = bsd_to_linux_sockaddr((struct sockaddr *)
1318		    PTRIN(linux_msg.msg_name));
1319		if (error)
1320			goto bad;
1321	}
1322	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1323		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1324		if (error)
1325			goto bad;
1326	}
1327
1328	outbuf = PTRIN(linux_msg.msg_control);
1329	outlen = 0;
1330
1331	if (control) {
1332		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1333
1334		msg.msg_control = mtod(control, struct cmsghdr *);
1335		msg.msg_controllen = control->m_len;
1336
1337		cm = CMSG_FIRSTHDR(&msg);
1338
1339		while (cm != NULL) {
1340			linux_cmsg->cmsg_type =
1341			    bsd_to_linux_cmsg_type(cm->cmsg_type);
1342			linux_cmsg->cmsg_level =
1343			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1344			if (linux_cmsg->cmsg_type == -1
1345			    || cm->cmsg_level != SOL_SOCKET)
1346			{
1347				error = EINVAL;
1348				goto bad;
1349			}
1350
1351			data = CMSG_DATA(cm);
1352			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1353
1354			switch (cm->cmsg_type)
1355			{
1356			case SCM_RIGHTS:
1357				if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
1358					fds = datalen / sizeof(int);
1359					fdp = data;
1360					for (i = 0; i < fds; i++) {
1361						fd = *fdp++;
1362						(void)kern_fcntl(td, fd,
1363						    F_SETFD, FD_CLOEXEC);
1364					}
1365				}
1366				break;
1367
1368			case SCM_CREDS:
1369				/*
1370				 * Currently LOCAL_CREDS is never in
1371				 * effect for Linux so no need to worry
1372				 * about sockcred
1373				 */
1374				if (datalen != sizeof(*cmcred)) {
1375					error = EMSGSIZE;
1376					goto bad;
1377				}
1378				cmcred = (struct cmsgcred *)data;
1379				bzero(&linux_ucred, sizeof(linux_ucred));
1380				linux_ucred.pid = cmcred->cmcred_pid;
1381				linux_ucred.uid = cmcred->cmcred_uid;
1382				linux_ucred.gid = cmcred->cmcred_gid;
1383				data = &linux_ucred;
1384				datalen = sizeof(linux_ucred);
1385				break;
1386			}
1387
1388			if (outlen + LINUX_CMSG_LEN(datalen) >
1389			    linux_msg.msg_controllen) {
1390				if (outlen == 0) {
1391					error = EMSGSIZE;
1392					goto bad;
1393				} else {
1394					linux_msg.msg_flags |=
1395					    LINUX_MSG_CTRUNC;
1396					goto out;
1397				}
1398			}
1399
1400			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1401
1402			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1403			if (error)
1404				goto bad;
1405			outbuf += L_CMSG_HDRSZ;
1406
1407			error = copyout(data, outbuf, datalen);
1408			if (error)
1409				goto bad;
1410
1411			outbuf += LINUX_CMSG_ALIGN(datalen);
1412			outlen += LINUX_CMSG_LEN(datalen);
1413
1414			cm = CMSG_NXTHDR(&msg, cm);
1415		}
1416	}
1417
1418out:
1419	linux_msg.msg_controllen = outlen;
1420	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1421
1422bad:
1423	free(iov, M_IOV);
1424	if (control != NULL)
1425		m_freem(control);
1426	if (linux_cmsg != NULL)
1427		free(linux_cmsg, M_TEMP);
1428
1429	return (error);
1430}
1431
1432struct linux_shutdown_args {
1433	int s;
1434	int how;
1435};
1436
1437static int
1438linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1439{
1440	struct shutdown_args /* {
1441		int s;
1442		int how;
1443	} */ bsd_args;
1444
1445	bsd_args.s = args->s;
1446	bsd_args.how = args->how;
1447	return (sys_shutdown(td, &bsd_args));
1448}
1449
1450struct linux_setsockopt_args {
1451	int s;
1452	int level;
1453	int optname;
1454	l_uintptr_t optval;
1455	int optlen;
1456};
1457
1458static int
1459linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1460{
1461	struct setsockopt_args /* {
1462		int s;
1463		int level;
1464		int name;
1465		caddr_t val;
1466		int valsize;
1467	} */ bsd_args;
1468	l_timeval linux_tv;
1469	struct timeval tv;
1470	int error, name;
1471
1472	bsd_args.s = args->s;
1473	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1474	switch (bsd_args.level) {
1475	case SOL_SOCKET:
1476		name = linux_to_bsd_so_sockopt(args->optname);
1477		switch (name) {
1478		case SO_RCVTIMEO:
1479			/* FALLTHROUGH */
1480		case SO_SNDTIMEO:
1481			error = copyin(PTRIN(args->optval), &linux_tv,
1482			    sizeof(linux_tv));
1483			if (error)
1484				return (error);
1485			tv.tv_sec = linux_tv.tv_sec;
1486			tv.tv_usec = linux_tv.tv_usec;
1487			return (kern_setsockopt(td, args->s, bsd_args.level,
1488			    name, &tv, UIO_SYSSPACE, sizeof(tv)));
1489			/* NOTREACHED */
1490			break;
1491		default:
1492			break;
1493		}
1494		break;
1495	case IPPROTO_IP:
1496		name = linux_to_bsd_ip_sockopt(args->optname);
1497		break;
1498	case IPPROTO_TCP:
1499		/* Linux TCP option values match BSD's */
1500		name = args->optname;
1501		break;
1502	default:
1503		name = -1;
1504		break;
1505	}
1506	if (name == -1)
1507		return (ENOPROTOOPT);
1508
1509	bsd_args.name = name;
1510	bsd_args.val = PTRIN(args->optval);
1511	bsd_args.valsize = args->optlen;
1512
1513	if (name == IPV6_NEXTHOP) {
1514		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1515			bsd_args.valsize);
1516		error = sys_setsockopt(td, &bsd_args);
1517		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1518	} else
1519		error = sys_setsockopt(td, &bsd_args);
1520
1521	return (error);
1522}
1523
1524struct linux_getsockopt_args {
1525	int s;
1526	int level;
1527	int optname;
1528	l_uintptr_t optval;
1529	l_uintptr_t optlen;
1530};
1531
1532static int
1533linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1534{
1535	struct getsockopt_args /* {
1536		int s;
1537		int level;
1538		int name;
1539		caddr_t val;
1540		int *avalsize;
1541	} */ bsd_args;
1542	l_timeval linux_tv;
1543	struct timeval tv;
1544	socklen_t tv_len, xulen;
1545	struct xucred xu;
1546	struct l_ucred lxu;
1547	int error, name;
1548
1549	bsd_args.s = args->s;
1550	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1551	switch (bsd_args.level) {
1552	case SOL_SOCKET:
1553		name = linux_to_bsd_so_sockopt(args->optname);
1554		switch (name) {
1555		case SO_RCVTIMEO:
1556			/* FALLTHROUGH */
1557		case SO_SNDTIMEO:
1558			tv_len = sizeof(tv);
1559			error = kern_getsockopt(td, args->s, bsd_args.level,
1560			    name, &tv, UIO_SYSSPACE, &tv_len);
1561			if (error)
1562				return (error);
1563			linux_tv.tv_sec = tv.tv_sec;
1564			linux_tv.tv_usec = tv.tv_usec;
1565			return (copyout(&linux_tv, PTRIN(args->optval),
1566			    sizeof(linux_tv)));
1567			/* NOTREACHED */
1568			break;
1569		case LOCAL_PEERCRED:
1570			if (args->optlen != sizeof(lxu))
1571				return (EINVAL);
1572			xulen = sizeof(xu);
1573			error = kern_getsockopt(td, args->s, bsd_args.level,
1574			    name, &xu, UIO_SYSSPACE, &xulen);
1575			if (error)
1576				return (error);
1577			/*
1578			 * XXX Use 0 for pid as the FreeBSD does not cache peer pid.
1579			 */
1580			lxu.pid = 0;
1581			lxu.uid = xu.cr_uid;
1582			lxu.gid = xu.cr_gid;
1583			return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu)));
1584			/* NOTREACHED */
1585			break;
1586		default:
1587			break;
1588		}
1589		break;
1590	case IPPROTO_IP:
1591		name = linux_to_bsd_ip_sockopt(args->optname);
1592		break;
1593	case IPPROTO_TCP:
1594		/* Linux TCP option values match BSD's */
1595		name = args->optname;
1596		break;
1597	default:
1598		name = -1;
1599		break;
1600	}
1601	if (name == -1)
1602		return (EINVAL);
1603
1604	bsd_args.name = name;
1605	bsd_args.val = PTRIN(args->optval);
1606	bsd_args.avalsize = PTRIN(args->optlen);
1607
1608	if (name == IPV6_NEXTHOP) {
1609		error = sys_getsockopt(td, &bsd_args);
1610		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1611	} else
1612		error = sys_getsockopt(td, &bsd_args);
1613
1614	return (error);
1615}
1616
1617/* Argument list sizes for linux_socketcall */
1618
1619#define LINUX_AL(x) ((x) * sizeof(l_ulong))
1620
1621static const unsigned char lxs_args[] = {
1622	LINUX_AL(0) /* unused*/,	LINUX_AL(3) /* socket */,
1623	LINUX_AL(3) /* bind */,		LINUX_AL(3) /* connect */,
1624	LINUX_AL(2) /* listen */,	LINUX_AL(3) /* accept */,
1625	LINUX_AL(3) /* getsockname */,	LINUX_AL(3) /* getpeername */,
1626	LINUX_AL(4) /* socketpair */,	LINUX_AL(4) /* send */,
1627	LINUX_AL(4) /* recv */,		LINUX_AL(6) /* sendto */,
1628	LINUX_AL(6) /* recvfrom */,	LINUX_AL(2) /* shutdown */,
1629	LINUX_AL(5) /* setsockopt */,	LINUX_AL(5) /* getsockopt */,
1630	LINUX_AL(3) /* sendmsg */,	LINUX_AL(3) /* recvmsg */,
1631	LINUX_AL(4) /* accept4 */
1632};
1633
1634#define	LINUX_AL_SIZE	sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
1635
1636int
1637linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1638{
1639	l_ulong a[6];
1640	void *arg;
1641	int error;
1642
1643	if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE)
1644		return (EINVAL);
1645	error = copyin(PTRIN(args->args), a, lxs_args[args->what]);
1646	if (error)
1647		return (error);
1648
1649	arg = a;
1650	switch (args->what) {
1651	case LINUX_SOCKET:
1652		return (linux_socket(td, arg));
1653	case LINUX_BIND:
1654		return (linux_bind(td, arg));
1655	case LINUX_CONNECT:
1656		return (linux_connect(td, arg));
1657	case LINUX_LISTEN:
1658		return (linux_listen(td, arg));
1659	case LINUX_ACCEPT:
1660		return (linux_accept(td, arg));
1661	case LINUX_GETSOCKNAME:
1662		return (linux_getsockname(td, arg));
1663	case LINUX_GETPEERNAME:
1664		return (linux_getpeername(td, arg));
1665	case LINUX_SOCKETPAIR:
1666		return (linux_socketpair(td, arg));
1667	case LINUX_SEND:
1668		return (linux_send(td, arg));
1669	case LINUX_RECV:
1670		return (linux_recv(td, arg));
1671	case LINUX_SENDTO:
1672		return (linux_sendto(td, arg));
1673	case LINUX_RECVFROM:
1674		return (linux_recvfrom(td, arg));
1675	case LINUX_SHUTDOWN:
1676		return (linux_shutdown(td, arg));
1677	case LINUX_SETSOCKOPT:
1678		return (linux_setsockopt(td, arg));
1679	case LINUX_GETSOCKOPT:
1680		return (linux_getsockopt(td, arg));
1681	case LINUX_SENDMSG:
1682		return (linux_sendmsg(td, arg));
1683	case LINUX_RECVMSG:
1684		return (linux_recvmsg(td, arg));
1685	case LINUX_ACCEPT4:
1686		return (linux_accept4(td, arg));
1687	}
1688
1689	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1690	return (ENOSYS);
1691}
1692