linux_socket.c revision 226071
1/*-
2 * Copyright (c) 1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 226071 2011-10-06 20:28:08Z jkim $");
31
32/* XXX we use functions that might not exist. */
33#include "opt_compat.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/proc.h>
38#include <sys/systm.h>
39#include <sys/sysproto.h>
40#include <sys/capability.h>
41#include <sys/fcntl.h>
42#include <sys/file.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/mbuf.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/syscallsubr.h>
51#include <sys/uio.h>
52#include <sys/syslog.h>
53#include <sys/un.h>
54
55#include <net/if.h>
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#ifdef INET6
60#include <netinet/ip6.h>
61#include <netinet6/ip6_var.h>
62#include <netinet6/in6_var.h>
63#endif
64
65#ifdef COMPAT_LINUX32
66#include <machine/../linux32/linux.h>
67#include <machine/../linux32/linux32_proto.h>
68#else
69#include <machine/../linux/linux.h>
70#include <machine/../linux/linux_proto.h>
71#endif
72#include <compat/linux/linux_socket.h>
73#include <compat/linux/linux_util.h>
74
75static int linux_to_bsd_domain(int);
76
77/*
78 * Reads a linux sockaddr and does any necessary translation.
79 * Linux sockaddrs don't have a length field, only a family.
80 * Copy the osockaddr structure pointed to by osa to kernel, adjust
81 * family and convert to sockaddr.
82 */
83static int
84linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int osalen)
85{
86	int error=0, bdom;
87	struct sockaddr *sa;
88	struct osockaddr *kosa;
89#ifdef INET6
90	int oldv6size;
91	struct sockaddr_in6 *sin6;
92#endif
93	int alloclen, hdrlen, namelen;
94
95	if (osalen < 2 || osalen > UCHAR_MAX || !osa)
96		return (EINVAL);
97
98	alloclen = osalen;
99#ifdef INET6
100	oldv6size = 0;
101	/*
102	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
103	 * if it's a v4-mapped address, so reserve the proper space
104	 * for it.
105	 */
106	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
107		alloclen = sizeof (struct sockaddr_in6);
108		oldv6size = 1;
109	}
110#endif
111
112	kosa = malloc(alloclen, M_SONAME, M_WAITOK);
113
114	if ((error = copyin(osa, kosa, osalen)))
115		goto out;
116
117	bdom = linux_to_bsd_domain(kosa->sa_family);
118	if (bdom == -1) {
119		error = EAFNOSUPPORT;
120		goto out;
121	}
122
123#ifdef INET6
124	/*
125	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
126	 * which lacks the scope id compared with RFC2553 one. If we detect
127	 * the situation, reject the address and write a message to system log.
128	 *
129	 * Still accept addresses for which the scope id is not used.
130	 */
131	if (oldv6size && bdom == AF_INET6) {
132		sin6 = (struct sockaddr_in6 *)kosa;
133		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
134		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
135		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
136		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
137		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
138		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
139			sin6->sin6_scope_id = 0;
140		} else {
141			log(LOG_DEBUG,
142			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
143			error = EINVAL;
144			goto out;
145		}
146	} else
147#endif
148	if (bdom == AF_INET) {
149		alloclen = sizeof(struct sockaddr_in);
150		if (osalen < alloclen) {
151			error = EINVAL;
152			goto out;
153		}
154	}
155
156	if (bdom == AF_LOCAL && osalen > sizeof(struct sockaddr_un)) {
157		hdrlen = offsetof(struct sockaddr_un, sun_path);
158		namelen = strnlen(((struct sockaddr_un *)kosa)->sun_path,
159		    osalen - hdrlen);
160		if (hdrlen + namelen > sizeof(struct sockaddr_un)) {
161			error = ENAMETOOLONG;
162			goto out;
163		}
164		alloclen = sizeof(struct sockaddr_un);
165	}
166
167	sa = (struct sockaddr *) kosa;
168	sa->sa_family = bdom;
169	sa->sa_len = alloclen;
170
171	*sap = sa;
172	return (0);
173
174out:
175	free(kosa, M_SONAME);
176	return (error);
177}
178
179static int
180linux_to_bsd_domain(int domain)
181{
182
183	switch (domain) {
184	case LINUX_AF_UNSPEC:
185		return (AF_UNSPEC);
186	case LINUX_AF_UNIX:
187		return (AF_LOCAL);
188	case LINUX_AF_INET:
189		return (AF_INET);
190	case LINUX_AF_INET6:
191		return (AF_INET6);
192	case LINUX_AF_AX25:
193		return (AF_CCITT);
194	case LINUX_AF_IPX:
195		return (AF_IPX);
196	case LINUX_AF_APPLETALK:
197		return (AF_APPLETALK);
198	}
199	return (-1);
200}
201
202static int
203bsd_to_linux_domain(int domain)
204{
205
206	switch (domain) {
207	case AF_UNSPEC:
208		return (LINUX_AF_UNSPEC);
209	case AF_LOCAL:
210		return (LINUX_AF_UNIX);
211	case AF_INET:
212		return (LINUX_AF_INET);
213	case AF_INET6:
214		return (LINUX_AF_INET6);
215	case AF_CCITT:
216		return (LINUX_AF_AX25);
217	case AF_IPX:
218		return (LINUX_AF_IPX);
219	case AF_APPLETALK:
220		return (LINUX_AF_APPLETALK);
221	}
222	return (-1);
223}
224
225static int
226linux_to_bsd_sockopt_level(int level)
227{
228
229	switch (level) {
230	case LINUX_SOL_SOCKET:
231		return (SOL_SOCKET);
232	}
233	return (level);
234}
235
236static int
237bsd_to_linux_sockopt_level(int level)
238{
239
240	switch (level) {
241	case SOL_SOCKET:
242		return (LINUX_SOL_SOCKET);
243	}
244	return (level);
245}
246
247static int
248linux_to_bsd_ip_sockopt(int opt)
249{
250
251	switch (opt) {
252	case LINUX_IP_TOS:
253		return (IP_TOS);
254	case LINUX_IP_TTL:
255		return (IP_TTL);
256	case LINUX_IP_OPTIONS:
257		return (IP_OPTIONS);
258	case LINUX_IP_MULTICAST_IF:
259		return (IP_MULTICAST_IF);
260	case LINUX_IP_MULTICAST_TTL:
261		return (IP_MULTICAST_TTL);
262	case LINUX_IP_MULTICAST_LOOP:
263		return (IP_MULTICAST_LOOP);
264	case LINUX_IP_ADD_MEMBERSHIP:
265		return (IP_ADD_MEMBERSHIP);
266	case LINUX_IP_DROP_MEMBERSHIP:
267		return (IP_DROP_MEMBERSHIP);
268	case LINUX_IP_HDRINCL:
269		return (IP_HDRINCL);
270	}
271	return (-1);
272}
273
274static int
275linux_to_bsd_so_sockopt(int opt)
276{
277
278	switch (opt) {
279	case LINUX_SO_DEBUG:
280		return (SO_DEBUG);
281	case LINUX_SO_REUSEADDR:
282		return (SO_REUSEADDR);
283	case LINUX_SO_TYPE:
284		return (SO_TYPE);
285	case LINUX_SO_ERROR:
286		return (SO_ERROR);
287	case LINUX_SO_DONTROUTE:
288		return (SO_DONTROUTE);
289	case LINUX_SO_BROADCAST:
290		return (SO_BROADCAST);
291	case LINUX_SO_SNDBUF:
292		return (SO_SNDBUF);
293	case LINUX_SO_RCVBUF:
294		return (SO_RCVBUF);
295	case LINUX_SO_KEEPALIVE:
296		return (SO_KEEPALIVE);
297	case LINUX_SO_OOBINLINE:
298		return (SO_OOBINLINE);
299	case LINUX_SO_LINGER:
300		return (SO_LINGER);
301	case LINUX_SO_PEERCRED:
302		return (LOCAL_PEERCRED);
303	case LINUX_SO_RCVLOWAT:
304		return (SO_RCVLOWAT);
305	case LINUX_SO_SNDLOWAT:
306		return (SO_SNDLOWAT);
307	case LINUX_SO_RCVTIMEO:
308		return (SO_RCVTIMEO);
309	case LINUX_SO_SNDTIMEO:
310		return (SO_SNDTIMEO);
311	case LINUX_SO_TIMESTAMP:
312		return (SO_TIMESTAMP);
313	case LINUX_SO_ACCEPTCONN:
314		return (SO_ACCEPTCONN);
315	}
316	return (-1);
317}
318
319static int
320linux_to_bsd_msg_flags(int flags)
321{
322	int ret_flags = 0;
323
324	if (flags & LINUX_MSG_OOB)
325		ret_flags |= MSG_OOB;
326	if (flags & LINUX_MSG_PEEK)
327		ret_flags |= MSG_PEEK;
328	if (flags & LINUX_MSG_DONTROUTE)
329		ret_flags |= MSG_DONTROUTE;
330	if (flags & LINUX_MSG_CTRUNC)
331		ret_flags |= MSG_CTRUNC;
332	if (flags & LINUX_MSG_TRUNC)
333		ret_flags |= MSG_TRUNC;
334	if (flags & LINUX_MSG_DONTWAIT)
335		ret_flags |= MSG_DONTWAIT;
336	if (flags & LINUX_MSG_EOR)
337		ret_flags |= MSG_EOR;
338	if (flags & LINUX_MSG_WAITALL)
339		ret_flags |= MSG_WAITALL;
340	if (flags & LINUX_MSG_NOSIGNAL)
341		ret_flags |= MSG_NOSIGNAL;
342#if 0 /* not handled */
343	if (flags & LINUX_MSG_PROXY)
344		;
345	if (flags & LINUX_MSG_FIN)
346		;
347	if (flags & LINUX_MSG_SYN)
348		;
349	if (flags & LINUX_MSG_CONFIRM)
350		;
351	if (flags & LINUX_MSG_RST)
352		;
353	if (flags & LINUX_MSG_ERRQUEUE)
354		;
355#endif
356	return ret_flags;
357}
358
359/*
360* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
361* native syscall will fault.  Thus, we don't really need to check the
362* return values for these functions.
363*/
364
365static int
366bsd_to_linux_sockaddr(struct sockaddr *arg)
367{
368	struct sockaddr sa;
369	size_t sa_len = sizeof(struct sockaddr);
370	int error;
371
372	if ((error = copyin(arg, &sa, sa_len)))
373		return (error);
374
375	*(u_short *)&sa = sa.sa_family;
376
377	error = copyout(&sa, arg, sa_len);
378
379	return (error);
380}
381
382static int
383linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
384{
385	struct sockaddr sa;
386	size_t sa_len = sizeof(struct sockaddr);
387	int error;
388
389	if ((error = copyin(arg, &sa, sa_len)))
390		return (error);
391
392	sa.sa_family = *(sa_family_t *)&sa;
393	sa.sa_len = len;
394
395	error = copyout(&sa, arg, sa_len);
396
397	return (error);
398}
399
400
401static int
402linux_sa_put(struct osockaddr *osa)
403{
404	struct osockaddr sa;
405	int error, bdom;
406
407	/*
408	 * Only read/write the osockaddr family part, the rest is
409	 * not changed.
410	 */
411	error = copyin(osa, &sa, sizeof(sa.sa_family));
412	if (error)
413		return (error);
414
415	bdom = bsd_to_linux_domain(sa.sa_family);
416	if (bdom == -1)
417		return (EINVAL);
418
419	sa.sa_family = bdom;
420	error = copyout(&sa, osa, sizeof(sa.sa_family));
421	if (error)
422		return (error);
423
424	return (0);
425}
426
427static int
428linux_to_bsd_cmsg_type(int cmsg_type)
429{
430
431	switch (cmsg_type) {
432	case LINUX_SCM_RIGHTS:
433		return (SCM_RIGHTS);
434	case LINUX_SCM_CREDENTIALS:
435		return (SCM_CREDS);
436	}
437	return (-1);
438}
439
440static int
441bsd_to_linux_cmsg_type(int cmsg_type)
442{
443
444	switch (cmsg_type) {
445	case SCM_RIGHTS:
446		return (LINUX_SCM_RIGHTS);
447	case SCM_CREDS:
448		return (LINUX_SCM_CREDENTIALS);
449	}
450	return (-1);
451}
452
453static int
454linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
455{
456	if (lhdr->msg_controllen > INT_MAX)
457		return (ENOBUFS);
458
459	bhdr->msg_name		= PTRIN(lhdr->msg_name);
460	bhdr->msg_namelen	= lhdr->msg_namelen;
461	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
462	bhdr->msg_iovlen	= lhdr->msg_iovlen;
463	bhdr->msg_control	= PTRIN(lhdr->msg_control);
464
465	/*
466	 * msg_controllen is skipped since BSD and LINUX control messages
467	 * are potentially different sizes (e.g. the cred structure used
468	 * by SCM_CREDS is different between the two operating system).
469	 *
470	 * The caller can set it (if necessary) after converting all the
471	 * control messages.
472	 */
473
474	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
475	return (0);
476}
477
478static int
479bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
480{
481	lhdr->msg_name		= PTROUT(bhdr->msg_name);
482	lhdr->msg_namelen	= bhdr->msg_namelen;
483	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
484	lhdr->msg_iovlen	= bhdr->msg_iovlen;
485	lhdr->msg_control	= PTROUT(bhdr->msg_control);
486
487	/*
488	 * msg_controllen is skipped since BSD and LINUX control messages
489	 * are potentially different sizes (e.g. the cred structure used
490	 * by SCM_CREDS is different between the two operating system).
491	 *
492	 * The caller can set it (if necessary) after converting all the
493	 * control messages.
494	 */
495
496	/* msg_flags skipped */
497	return (0);
498}
499
500static int
501linux_set_socket_flags(struct thread *td, int s, int flags)
502{
503	int error;
504
505	if (flags & LINUX_SOCK_NONBLOCK) {
506		error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
507		if (error)
508			return (error);
509	}
510	if (flags & LINUX_SOCK_CLOEXEC) {
511		error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
512		if (error)
513			return (error);
514	}
515	return (0);
516}
517
518static int
519linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
520    struct mbuf *control, enum uio_seg segflg)
521{
522	struct sockaddr *to;
523	int error;
524
525	if (mp->msg_name != NULL) {
526		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
527		if (error)
528			return (error);
529		mp->msg_name = to;
530	} else
531		to = NULL;
532
533	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
534	    segflg);
535
536	if (to)
537		free(to, M_SONAME);
538	return (error);
539}
540
541/* Return 0 if IP_HDRINCL is set for the given socket. */
542static int
543linux_check_hdrincl(struct thread *td, int s)
544{
545	int error, optval, size_val;
546
547	size_val = sizeof(optval);
548	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
549	    &optval, UIO_SYSSPACE, &size_val);
550	if (error)
551		return (error);
552
553	return (optval == 0);
554}
555
556struct linux_sendto_args {
557	int s;
558	l_uintptr_t msg;
559	int len;
560	int flags;
561	l_uintptr_t to;
562	int tolen;
563};
564
565/*
566 * Updated sendto() when IP_HDRINCL is set:
567 * tweak endian-dependent fields in the IP packet.
568 */
569static int
570linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
571{
572/*
573 * linux_ip_copysize defines how many bytes we should copy
574 * from the beginning of the IP packet before we customize it for BSD.
575 * It should include all the fields we modify (ip_len and ip_off).
576 */
577#define linux_ip_copysize	8
578
579	struct ip *packet;
580	struct msghdr msg;
581	struct iovec aiov[1];
582	int error;
583
584	/* Check that the packet isn't too big or too small. */
585	if (linux_args->len < linux_ip_copysize ||
586	    linux_args->len > IP_MAXPACKET)
587		return (EINVAL);
588
589	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
590
591	/* Make kernel copy of the packet to be sent */
592	if ((error = copyin(PTRIN(linux_args->msg), packet,
593	    linux_args->len)))
594		goto goout;
595
596	/* Convert fields from Linux to BSD raw IP socket format */
597	packet->ip_len = linux_args->len;
598	packet->ip_off = ntohs(packet->ip_off);
599
600	/* Prepare the msghdr and iovec structures describing the new packet */
601	msg.msg_name = PTRIN(linux_args->to);
602	msg.msg_namelen = linux_args->tolen;
603	msg.msg_iov = aiov;
604	msg.msg_iovlen = 1;
605	msg.msg_control = NULL;
606	msg.msg_flags = 0;
607	aiov[0].iov_base = (char *)packet;
608	aiov[0].iov_len = linux_args->len;
609	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
610	    NULL, UIO_SYSSPACE);
611goout:
612	free(packet, M_TEMP);
613	return (error);
614}
615
616struct linux_socket_args {
617	int domain;
618	int type;
619	int protocol;
620};
621
622static int
623linux_socket(struct thread *td, struct linux_socket_args *args)
624{
625	struct socket_args /* {
626		int domain;
627		int type;
628		int protocol;
629	} */ bsd_args;
630	int retval_socket, socket_flags;
631
632	bsd_args.protocol = args->protocol;
633	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
634	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
635		return (EINVAL);
636	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
637	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
638		return (EINVAL);
639	bsd_args.domain = linux_to_bsd_domain(args->domain);
640	if (bsd_args.domain == -1)
641		return (EAFNOSUPPORT);
642
643	retval_socket = sys_socket(td, &bsd_args);
644	if (retval_socket)
645		return (retval_socket);
646
647	retval_socket = linux_set_socket_flags(td, td->td_retval[0],
648	    socket_flags);
649	if (retval_socket) {
650		(void)kern_close(td, td->td_retval[0]);
651		goto out;
652	}
653
654	if (bsd_args.type == SOCK_RAW
655	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
656	    && bsd_args.domain == PF_INET) {
657		/* It's a raw IP socket: set the IP_HDRINCL option. */
658		int hdrincl;
659
660		hdrincl = 1;
661		/* We ignore any error returned by kern_setsockopt() */
662		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
663		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
664	}
665#ifdef INET6
666	/*
667	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default
668	 * and some apps depend on this. So, set V6ONLY to 0 for Linux apps.
669	 * For simplicity we do this unconditionally of the net.inet6.ip6.v6only
670	 * sysctl value.
671	 */
672	if (bsd_args.domain == PF_INET6) {
673		int v6only;
674
675		v6only = 0;
676		/* We ignore any error returned by setsockopt() */
677		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
678		    &v6only, UIO_SYSSPACE, sizeof(v6only));
679	}
680#endif
681
682out:
683	return (retval_socket);
684}
685
686struct linux_bind_args {
687	int s;
688	l_uintptr_t name;
689	int namelen;
690};
691
692static int
693linux_bind(struct thread *td, struct linux_bind_args *args)
694{
695	struct sockaddr *sa;
696	int error;
697
698	error = linux_getsockaddr(&sa, PTRIN(args->name),
699	    args->namelen);
700	if (error)
701		return (error);
702
703	error = kern_bind(td, args->s, sa);
704	free(sa, M_SONAME);
705	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
706	   	return (EINVAL);
707	return (error);
708}
709
710struct linux_connect_args {
711	int s;
712	l_uintptr_t name;
713	int namelen;
714};
715int linux_connect(struct thread *, struct linux_connect_args *);
716
717int
718linux_connect(struct thread *td, struct linux_connect_args *args)
719{
720	struct socket *so;
721	struct sockaddr *sa;
722	u_int fflag;
723	int error;
724
725	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
726	    args->namelen);
727	if (error)
728		return (error);
729
730	error = kern_connect(td, args->s, sa);
731	free(sa, M_SONAME);
732	if (error != EISCONN)
733		return (error);
734
735	/*
736	 * Linux doesn't return EISCONN the first time it occurs,
737	 * when on a non-blocking socket. Instead it returns the
738	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
739	 *
740	 * XXXRW: Instead of using fgetsock(), check that it is a
741	 * socket and use the file descriptor reference instead of
742	 * creating a new one.
743	 */
744	error = fgetsock(td, args->s, CAP_CONNECT, &so, &fflag);
745	if (error == 0) {
746		error = EISCONN;
747		if (fflag & FNONBLOCK) {
748			SOCK_LOCK(so);
749			if (so->so_emuldata == 0)
750				error = so->so_error;
751			so->so_emuldata = (void *)1;
752			SOCK_UNLOCK(so);
753		}
754		fputsock(so);
755	}
756	return (error);
757}
758
759struct linux_listen_args {
760	int s;
761	int backlog;
762};
763
764static int
765linux_listen(struct thread *td, struct linux_listen_args *args)
766{
767	struct listen_args /* {
768		int s;
769		int backlog;
770	} */ bsd_args;
771
772	bsd_args.s = args->s;
773	bsd_args.backlog = args->backlog;
774	return (sys_listen(td, &bsd_args));
775}
776
777static int
778linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
779    l_uintptr_t namelen, int flags)
780{
781	struct accept_args /* {
782		int	s;
783		struct sockaddr * __restrict name;
784		socklen_t * __restrict anamelen;
785	} */ bsd_args;
786	int error;
787
788	if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
789		return (EINVAL);
790
791	bsd_args.s = s;
792	/* XXX: */
793	bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
794	bsd_args.anamelen = PTRIN(namelen);/* XXX */
795	error = sys_accept(td, &bsd_args);
796	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
797	if (error) {
798		if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
799			return (EINVAL);
800		return (error);
801	}
802
803	/*
804	 * linux appears not to copy flags from the parent socket to the
805	 * accepted one, so we must clear the flags in the new descriptor
806	 * and apply the requested flags.
807	 */
808	error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0);
809	if (error)
810		goto out;
811	error = linux_set_socket_flags(td, td->td_retval[0], flags);
812	if (error)
813		goto out;
814	if (addr)
815		error = linux_sa_put(PTRIN(addr));
816
817out:
818	if (error) {
819		(void)kern_close(td, td->td_retval[0]);
820		td->td_retval[0] = 0;
821	}
822	return (error);
823}
824
825struct linux_accept_args {
826	int s;
827	l_uintptr_t addr;
828	l_uintptr_t namelen;
829};
830
831static int
832linux_accept(struct thread *td, struct linux_accept_args *args)
833{
834
835	return (linux_accept_common(td, args->s, args->addr,
836	    args->namelen, 0));
837}
838
839struct linux_accept4_args {
840	int s;
841	l_uintptr_t addr;
842	l_uintptr_t namelen;
843	int flags;
844};
845
846static int
847linux_accept4(struct thread *td, struct linux_accept4_args *args)
848{
849
850	return (linux_accept_common(td, args->s, args->addr,
851	    args->namelen, args->flags));
852}
853
854struct linux_getsockname_args {
855	int s;
856	l_uintptr_t addr;
857	l_uintptr_t namelen;
858};
859
860static int
861linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
862{
863	struct getsockname_args /* {
864		int	fdes;
865		struct sockaddr * __restrict asa;
866		socklen_t * __restrict alen;
867	} */ bsd_args;
868	int error;
869
870	bsd_args.fdes = args->s;
871	/* XXX: */
872	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
873	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
874	error = sys_getsockname(td, &bsd_args);
875	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
876	if (error)
877		return (error);
878	error = linux_sa_put(PTRIN(args->addr));
879	if (error)
880		return (error);
881	return (0);
882}
883
884struct linux_getpeername_args {
885	int s;
886	l_uintptr_t addr;
887	l_uintptr_t namelen;
888};
889
890static int
891linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
892{
893	struct getpeername_args /* {
894		int fdes;
895		caddr_t asa;
896		int *alen;
897	} */ bsd_args;
898	int error;
899
900	bsd_args.fdes = args->s;
901	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
902	bsd_args.alen = (int *)PTRIN(args->namelen);
903	error = sys_getpeername(td, &bsd_args);
904	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
905	if (error)
906		return (error);
907	error = linux_sa_put(PTRIN(args->addr));
908	if (error)
909		return (error);
910	return (0);
911}
912
913struct linux_socketpair_args {
914	int domain;
915	int type;
916	int protocol;
917	l_uintptr_t rsv;
918};
919
920static int
921linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
922{
923	struct socketpair_args /* {
924		int domain;
925		int type;
926		int protocol;
927		int *rsv;
928	} */ bsd_args;
929	int error, socket_flags;
930	int sv[2];
931
932	bsd_args.domain = linux_to_bsd_domain(args->domain);
933	if (bsd_args.domain != PF_LOCAL)
934		return (EAFNOSUPPORT);
935
936	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
937	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
938		return (EINVAL);
939	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
940	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
941		return (EINVAL);
942
943	if (args->protocol != 0 && args->protocol != PF_UNIX)
944
945		/*
946		 * Use of PF_UNIX as protocol argument is not right,
947		 * but Linux does it.
948		 * Do not map PF_UNIX as its Linux value is identical
949		 * to FreeBSD one.
950		 */
951		return (EPROTONOSUPPORT);
952	else
953		bsd_args.protocol = 0;
954	bsd_args.rsv = (int *)PTRIN(args->rsv);
955	error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
956	    bsd_args.protocol, sv);
957	if (error)
958		return (error);
959	error = linux_set_socket_flags(td, sv[0], socket_flags);
960	if (error)
961		goto out;
962	error = linux_set_socket_flags(td, sv[1], socket_flags);
963	if (error)
964		goto out;
965
966	error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
967
968out:
969	if (error) {
970		(void)kern_close(td, sv[0]);
971		(void)kern_close(td, sv[1]);
972	}
973	return (error);
974}
975
976struct linux_send_args {
977	int s;
978	l_uintptr_t msg;
979	int len;
980	int flags;
981};
982
983static int
984linux_send(struct thread *td, struct linux_send_args *args)
985{
986	struct sendto_args /* {
987		int s;
988		caddr_t buf;
989		int len;
990		int flags;
991		caddr_t to;
992		int tolen;
993	} */ bsd_args;
994
995	bsd_args.s = args->s;
996	bsd_args.buf = (caddr_t)PTRIN(args->msg);
997	bsd_args.len = args->len;
998	bsd_args.flags = args->flags;
999	bsd_args.to = NULL;
1000	bsd_args.tolen = 0;
1001	return sys_sendto(td, &bsd_args);
1002}
1003
1004struct linux_recv_args {
1005	int s;
1006	l_uintptr_t msg;
1007	int len;
1008	int flags;
1009};
1010
1011static int
1012linux_recv(struct thread *td, struct linux_recv_args *args)
1013{
1014	struct recvfrom_args /* {
1015		int s;
1016		caddr_t buf;
1017		int len;
1018		int flags;
1019		struct sockaddr *from;
1020		socklen_t fromlenaddr;
1021	} */ bsd_args;
1022
1023	bsd_args.s = args->s;
1024	bsd_args.buf = (caddr_t)PTRIN(args->msg);
1025	bsd_args.len = args->len;
1026	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1027	bsd_args.from = NULL;
1028	bsd_args.fromlenaddr = 0;
1029	return (sys_recvfrom(td, &bsd_args));
1030}
1031
1032static int
1033linux_sendto(struct thread *td, struct linux_sendto_args *args)
1034{
1035	struct msghdr msg;
1036	struct iovec aiov;
1037	int error;
1038
1039	if (linux_check_hdrincl(td, args->s) == 0)
1040		/* IP_HDRINCL set, tweak the packet before sending */
1041		return (linux_sendto_hdrincl(td, args));
1042
1043	msg.msg_name = PTRIN(args->to);
1044	msg.msg_namelen = args->tolen;
1045	msg.msg_iov = &aiov;
1046	msg.msg_iovlen = 1;
1047	msg.msg_control = NULL;
1048	msg.msg_flags = 0;
1049	aiov.iov_base = PTRIN(args->msg);
1050	aiov.iov_len = args->len;
1051	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
1052	    UIO_USERSPACE);
1053	return (error);
1054}
1055
1056struct linux_recvfrom_args {
1057	int s;
1058	l_uintptr_t buf;
1059	int len;
1060	int flags;
1061	l_uintptr_t from;
1062	l_uintptr_t fromlen;
1063};
1064
1065static int
1066linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
1067{
1068	struct recvfrom_args /* {
1069		int	s;
1070		caddr_t	buf;
1071		size_t	len;
1072		int	flags;
1073		struct sockaddr * __restrict from;
1074		socklen_t * __restrict fromlenaddr;
1075	} */ bsd_args;
1076	size_t len;
1077	int error;
1078
1079	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
1080		return (error);
1081
1082	bsd_args.s = args->s;
1083	bsd_args.buf = PTRIN(args->buf);
1084	bsd_args.len = args->len;
1085	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1086	/* XXX: */
1087	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
1088	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
1089
1090	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
1091	error = sys_recvfrom(td, &bsd_args);
1092	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
1093
1094	if (error)
1095		return (error);
1096	if (args->from) {
1097		error = linux_sa_put((struct osockaddr *)
1098		    PTRIN(args->from));
1099		if (error)
1100			return (error);
1101	}
1102	return (0);
1103}
1104
1105struct linux_sendmsg_args {
1106	int s;
1107	l_uintptr_t msg;
1108	int flags;
1109};
1110
1111static int
1112linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1113{
1114	struct cmsghdr *cmsg;
1115	struct cmsgcred cmcred;
1116	struct mbuf *control;
1117	struct msghdr msg;
1118	struct l_cmsghdr linux_cmsg;
1119	struct l_cmsghdr *ptr_cmsg;
1120	struct l_msghdr linux_msg;
1121	struct iovec *iov;
1122	socklen_t datalen;
1123	struct sockaddr *sa;
1124	sa_family_t sa_family;
1125	void *data;
1126	int error;
1127
1128	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1129	if (error)
1130		return (error);
1131
1132	/*
1133	 * Some Linux applications (ping) define a non-NULL control data
1134	 * pointer, but a msg_controllen of 0, which is not allowed in the
1135	 * FreeBSD system call interface.  NULL the msg_control pointer in
1136	 * order to handle this case.  This should be checked, but allows the
1137	 * Linux ping to work.
1138	 */
1139	if (PTRIN(linux_msg.msg_control) != NULL && linux_msg.msg_controllen == 0)
1140		linux_msg.msg_control = PTROUT(NULL);
1141
1142	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1143	if (error)
1144		return (error);
1145
1146#ifdef COMPAT_LINUX32
1147	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1148	    &iov, EMSGSIZE);
1149#else
1150	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1151#endif
1152	if (error)
1153		return (error);
1154
1155	control = NULL;
1156	cmsg = NULL;
1157
1158	if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
1159		error = kern_getsockname(td, args->s, &sa, &datalen);
1160		if (error)
1161			goto bad;
1162		sa_family = sa->sa_family;
1163		free(sa, M_SONAME);
1164
1165		error = ENOBUFS;
1166		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1167		control = m_get(M_WAIT, MT_CONTROL);
1168		if (control == NULL)
1169			goto bad;
1170
1171		do {
1172			error = copyin(ptr_cmsg, &linux_cmsg,
1173			    sizeof(struct l_cmsghdr));
1174			if (error)
1175				goto bad;
1176
1177			error = EINVAL;
1178			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1179				goto bad;
1180
1181			/*
1182			 * Now we support only SCM_RIGHTS and SCM_CRED,
1183			 * so return EINVAL in any other cmsg_type
1184			 */
1185			cmsg->cmsg_type =
1186			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type);
1187			cmsg->cmsg_level =
1188			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1189			if (cmsg->cmsg_type == -1
1190			    || cmsg->cmsg_level != SOL_SOCKET)
1191				goto bad;
1192
1193			/*
1194			 * Some applications (e.g. pulseaudio) attempt to
1195			 * send ancillary data even if the underlying protocol
1196			 * doesn't support it which is not allowed in the
1197			 * FreeBSD system call interface.
1198			 */
1199			if (sa_family != AF_UNIX)
1200				continue;
1201
1202			data = LINUX_CMSG_DATA(ptr_cmsg);
1203			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1204
1205			switch (cmsg->cmsg_type)
1206			{
1207			case SCM_RIGHTS:
1208				break;
1209
1210			case SCM_CREDS:
1211				data = &cmcred;
1212				datalen = sizeof(cmcred);
1213
1214				/*
1215				 * The lower levels will fill in the structure
1216				 */
1217				bzero(data, datalen);
1218				break;
1219			}
1220
1221			cmsg->cmsg_len = CMSG_LEN(datalen);
1222
1223			error = ENOBUFS;
1224			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t) cmsg))
1225				goto bad;
1226			if (!m_append(control, datalen, (c_caddr_t) data))
1227				goto bad;
1228		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg)));
1229
1230		if (m_length(control, NULL) == 0) {
1231			m_freem(control);
1232			control = NULL;
1233		}
1234	}
1235
1236	msg.msg_iov = iov;
1237	msg.msg_flags = 0;
1238	error = linux_sendit(td, args->s, &msg, args->flags, control,
1239	    UIO_USERSPACE);
1240
1241bad:
1242	free(iov, M_IOV);
1243	if (cmsg)
1244		free(cmsg, M_TEMP);
1245	return (error);
1246}
1247
1248struct linux_recvmsg_args {
1249	int s;
1250	l_uintptr_t msg;
1251	int flags;
1252};
1253
1254static int
1255linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1256{
1257	struct cmsghdr *cm;
1258	struct cmsgcred *cmcred;
1259	struct msghdr msg;
1260	struct l_cmsghdr *linux_cmsg = NULL;
1261	struct l_ucred linux_ucred;
1262	socklen_t datalen, outlen;
1263	struct l_msghdr linux_msg;
1264	struct iovec *iov, *uiov;
1265	struct mbuf *control = NULL;
1266	struct mbuf **controlp;
1267	caddr_t outbuf;
1268	void *data;
1269	int error, i, fd, fds, *fdp;
1270
1271	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1272	if (error)
1273		return (error);
1274
1275	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1276	if (error)
1277		return (error);
1278
1279#ifdef COMPAT_LINUX32
1280	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1281	    &iov, EMSGSIZE);
1282#else
1283	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1284#endif
1285	if (error)
1286		return (error);
1287
1288	if (msg.msg_name) {
1289		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1290		    msg.msg_namelen);
1291		if (error)
1292			goto bad;
1293	}
1294
1295	uiov = msg.msg_iov;
1296	msg.msg_iov = iov;
1297	controlp = (msg.msg_control != NULL) ? &control : NULL;
1298	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1299	msg.msg_iov = uiov;
1300	if (error)
1301		goto bad;
1302
1303	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1304	if (error)
1305		goto bad;
1306
1307	if (linux_msg.msg_name) {
1308		error = bsd_to_linux_sockaddr((struct sockaddr *)
1309		    PTRIN(linux_msg.msg_name));
1310		if (error)
1311			goto bad;
1312	}
1313	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1314		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1315		if (error)
1316			goto bad;
1317	}
1318
1319	outbuf = PTRIN(linux_msg.msg_control);
1320	outlen = 0;
1321
1322	if (control) {
1323		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1324
1325		msg.msg_control = mtod(control, struct cmsghdr *);
1326		msg.msg_controllen = control->m_len;
1327
1328		cm = CMSG_FIRSTHDR(&msg);
1329
1330		while (cm != NULL) {
1331			linux_cmsg->cmsg_type =
1332			    bsd_to_linux_cmsg_type(cm->cmsg_type);
1333			linux_cmsg->cmsg_level =
1334			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1335			if (linux_cmsg->cmsg_type == -1
1336			    || cm->cmsg_level != SOL_SOCKET)
1337			{
1338				error = EINVAL;
1339				goto bad;
1340			}
1341
1342			data = CMSG_DATA(cm);
1343			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1344
1345			switch (cm->cmsg_type)
1346			{
1347			case SCM_RIGHTS:
1348				if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
1349					fds = datalen / sizeof(int);
1350					fdp = data;
1351					for (i = 0; i < fds; i++) {
1352						fd = *fdp++;
1353						(void)kern_fcntl(td, fd,
1354						    F_SETFD, FD_CLOEXEC);
1355					}
1356				}
1357				break;
1358
1359			case SCM_CREDS:
1360				/*
1361				 * Currently LOCAL_CREDS is never in
1362				 * effect for Linux so no need to worry
1363				 * about sockcred
1364				 */
1365				if (datalen != sizeof (*cmcred)) {
1366					error = EMSGSIZE;
1367					goto bad;
1368				}
1369				cmcred = (struct cmsgcred *)data;
1370				bzero(&linux_ucred, sizeof(linux_ucred));
1371				linux_ucred.pid = cmcred->cmcred_pid;
1372				linux_ucred.uid = cmcred->cmcred_uid;
1373				linux_ucred.gid = cmcred->cmcred_gid;
1374				data = &linux_ucred;
1375				datalen = sizeof(linux_ucred);
1376				break;
1377			}
1378
1379			if (outlen + LINUX_CMSG_LEN(datalen) >
1380			    linux_msg.msg_controllen) {
1381				if (outlen == 0) {
1382					error = EMSGSIZE;
1383					goto bad;
1384				} else {
1385					linux_msg.msg_flags |=
1386					    LINUX_MSG_CTRUNC;
1387					goto out;
1388				}
1389			}
1390
1391			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1392
1393			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1394			if (error)
1395				goto bad;
1396			outbuf += L_CMSG_HDRSZ;
1397
1398			error = copyout(data, outbuf, datalen);
1399			if (error)
1400				goto bad;
1401
1402			outbuf += LINUX_CMSG_ALIGN(datalen);
1403			outlen += LINUX_CMSG_LEN(datalen);
1404
1405			cm = CMSG_NXTHDR(&msg, cm);
1406		}
1407	}
1408
1409out:
1410	linux_msg.msg_controllen = outlen;
1411	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1412
1413bad:
1414	free(iov, M_IOV);
1415	if (control != NULL)
1416		m_freem(control);
1417	if (linux_cmsg != NULL)
1418		free(linux_cmsg, M_TEMP);
1419
1420	return (error);
1421}
1422
1423struct linux_shutdown_args {
1424	int s;
1425	int how;
1426};
1427
1428static int
1429linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1430{
1431	struct shutdown_args /* {
1432		int s;
1433		int how;
1434	} */ bsd_args;
1435
1436	bsd_args.s = args->s;
1437	bsd_args.how = args->how;
1438	return (sys_shutdown(td, &bsd_args));
1439}
1440
1441struct linux_setsockopt_args {
1442	int s;
1443	int level;
1444	int optname;
1445	l_uintptr_t optval;
1446	int optlen;
1447};
1448
1449static int
1450linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1451{
1452	struct setsockopt_args /* {
1453		int s;
1454		int level;
1455		int name;
1456		caddr_t val;
1457		int valsize;
1458	} */ bsd_args;
1459	l_timeval linux_tv;
1460	struct timeval tv;
1461	int error, name;
1462
1463	bsd_args.s = args->s;
1464	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1465	switch (bsd_args.level) {
1466	case SOL_SOCKET:
1467		name = linux_to_bsd_so_sockopt(args->optname);
1468		switch (name) {
1469		case SO_RCVTIMEO:
1470			/* FALLTHROUGH */
1471		case SO_SNDTIMEO:
1472			error = copyin(PTRIN(args->optval), &linux_tv,
1473			    sizeof(linux_tv));
1474			if (error)
1475				return (error);
1476			tv.tv_sec = linux_tv.tv_sec;
1477			tv.tv_usec = linux_tv.tv_usec;
1478			return (kern_setsockopt(td, args->s, bsd_args.level,
1479			    name, &tv, UIO_SYSSPACE, sizeof(tv)));
1480			/* NOTREACHED */
1481			break;
1482		default:
1483			break;
1484		}
1485		break;
1486	case IPPROTO_IP:
1487		name = linux_to_bsd_ip_sockopt(args->optname);
1488		break;
1489	case IPPROTO_TCP:
1490		/* Linux TCP option values match BSD's */
1491		name = args->optname;
1492		break;
1493	default:
1494		name = -1;
1495		break;
1496	}
1497	if (name == -1)
1498		return (ENOPROTOOPT);
1499
1500	bsd_args.name = name;
1501	bsd_args.val = PTRIN(args->optval);
1502	bsd_args.valsize = args->optlen;
1503
1504	if (name == IPV6_NEXTHOP) {
1505		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1506			bsd_args.valsize);
1507		error = sys_setsockopt(td, &bsd_args);
1508		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1509	} else
1510		error = sys_setsockopt(td, &bsd_args);
1511
1512	return (error);
1513}
1514
1515struct linux_getsockopt_args {
1516	int s;
1517	int level;
1518	int optname;
1519	l_uintptr_t optval;
1520	l_uintptr_t optlen;
1521};
1522
1523static int
1524linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1525{
1526	struct getsockopt_args /* {
1527		int s;
1528		int level;
1529		int name;
1530		caddr_t val;
1531		int *avalsize;
1532	} */ bsd_args;
1533	l_timeval linux_tv;
1534	struct timeval tv;
1535	socklen_t tv_len, xulen;
1536	struct xucred xu;
1537	struct l_ucred lxu;
1538	int error, name;
1539
1540	bsd_args.s = args->s;
1541	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1542	switch (bsd_args.level) {
1543	case SOL_SOCKET:
1544		name = linux_to_bsd_so_sockopt(args->optname);
1545		switch (name) {
1546		case SO_RCVTIMEO:
1547			/* FALLTHROUGH */
1548		case SO_SNDTIMEO:
1549			tv_len = sizeof(tv);
1550			error = kern_getsockopt(td, args->s, bsd_args.level,
1551			    name, &tv, UIO_SYSSPACE, &tv_len);
1552			if (error)
1553				return (error);
1554			linux_tv.tv_sec = tv.tv_sec;
1555			linux_tv.tv_usec = tv.tv_usec;
1556			return (copyout(&linux_tv, PTRIN(args->optval),
1557			    sizeof(linux_tv)));
1558			/* NOTREACHED */
1559			break;
1560		case LOCAL_PEERCRED:
1561			if (args->optlen != sizeof(lxu))
1562				return (EINVAL);
1563			xulen = sizeof(xu);
1564			error = kern_getsockopt(td, args->s, bsd_args.level,
1565			    name, &xu, UIO_SYSSPACE, &xulen);
1566			if (error)
1567				return (error);
1568			/*
1569			 * XXX Use 0 for pid as the FreeBSD does not cache peer pid.
1570			 */
1571			lxu.pid = 0;
1572			lxu.uid = xu.cr_uid;
1573			lxu.gid = xu.cr_gid;
1574			return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu)));
1575			/* NOTREACHED */
1576			break;
1577		default:
1578			break;
1579		}
1580		break;
1581	case IPPROTO_IP:
1582		name = linux_to_bsd_ip_sockopt(args->optname);
1583		break;
1584	case IPPROTO_TCP:
1585		/* Linux TCP option values match BSD's */
1586		name = args->optname;
1587		break;
1588	default:
1589		name = -1;
1590		break;
1591	}
1592	if (name == -1)
1593		return (EINVAL);
1594
1595	bsd_args.name = name;
1596	bsd_args.val = PTRIN(args->optval);
1597	bsd_args.avalsize = PTRIN(args->optlen);
1598
1599	if (name == IPV6_NEXTHOP) {
1600		error = sys_getsockopt(td, &bsd_args);
1601		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1602	} else
1603		error = sys_getsockopt(td, &bsd_args);
1604
1605	return (error);
1606}
1607
1608/* Argument list sizes for linux_socketcall */
1609
1610#define LINUX_AL(x) ((x) * sizeof(l_ulong))
1611
1612static const unsigned char lxs_args[] = {
1613	LINUX_AL(0) /* unused*/,	LINUX_AL(3) /* socket */,
1614	LINUX_AL(3) /* bind */,		LINUX_AL(3) /* connect */,
1615	LINUX_AL(2) /* listen */,	LINUX_AL(3) /* accept */,
1616	LINUX_AL(3) /* getsockname */,	LINUX_AL(3) /* getpeername */,
1617	LINUX_AL(4) /* socketpair */,	LINUX_AL(4) /* send */,
1618	LINUX_AL(4) /* recv */,		LINUX_AL(6) /* sendto */,
1619	LINUX_AL(6) /* recvfrom */,	LINUX_AL(2) /* shutdown */,
1620	LINUX_AL(5) /* setsockopt */,	LINUX_AL(5) /* getsockopt */,
1621	LINUX_AL(3) /* sendmsg */,	LINUX_AL(3) /* recvmsg */,
1622	LINUX_AL(4) /* accept4 */
1623};
1624
1625#define	LINUX_AL_SIZE	sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
1626
1627int
1628linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1629{
1630	l_ulong a[6];
1631	void *arg;
1632	int error;
1633
1634	if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE)
1635		return (EINVAL);
1636	error = copyin(PTRIN(args->args), a, lxs_args[args->what]);
1637	if (error)
1638		return (error);
1639
1640	arg = a;
1641	switch (args->what) {
1642	case LINUX_SOCKET:
1643		return (linux_socket(td, arg));
1644	case LINUX_BIND:
1645		return (linux_bind(td, arg));
1646	case LINUX_CONNECT:
1647		return (linux_connect(td, arg));
1648	case LINUX_LISTEN:
1649		return (linux_listen(td, arg));
1650	case LINUX_ACCEPT:
1651		return (linux_accept(td, arg));
1652	case LINUX_GETSOCKNAME:
1653		return (linux_getsockname(td, arg));
1654	case LINUX_GETPEERNAME:
1655		return (linux_getpeername(td, arg));
1656	case LINUX_SOCKETPAIR:
1657		return (linux_socketpair(td, arg));
1658	case LINUX_SEND:
1659		return (linux_send(td, arg));
1660	case LINUX_RECV:
1661		return (linux_recv(td, arg));
1662	case LINUX_SENDTO:
1663		return (linux_sendto(td, arg));
1664	case LINUX_RECVFROM:
1665		return (linux_recvfrom(td, arg));
1666	case LINUX_SHUTDOWN:
1667		return (linux_shutdown(td, arg));
1668	case LINUX_SETSOCKOPT:
1669		return (linux_setsockopt(td, arg));
1670	case LINUX_GETSOCKOPT:
1671		return (linux_getsockopt(td, arg));
1672	case LINUX_SENDMSG:
1673		return (linux_sendmsg(td, arg));
1674	case LINUX_RECVMSG:
1675		return (linux_recvmsg(td, arg));
1676	case LINUX_ACCEPT4:
1677		return (linux_accept4(td, arg));
1678	}
1679
1680	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1681	return (ENOSYS);
1682}
1683