linux_socket.c revision 226068
1/*-
2 * Copyright (c) 1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 226068 2011-10-06 19:59:14Z jkim $");
31
32/* XXX we use functions that might not exist. */
33#include "opt_compat.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/proc.h>
38#include <sys/systm.h>
39#include <sys/sysproto.h>
40#include <sys/capability.h>
41#include <sys/fcntl.h>
42#include <sys/file.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/mbuf.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/syscallsubr.h>
51#include <sys/uio.h>
52#include <sys/syslog.h>
53#include <sys/un.h>
54
55#include <net/if.h>
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#ifdef INET6
60#include <netinet/ip6.h>
61#include <netinet6/ip6_var.h>
62#include <netinet6/in6_var.h>
63#endif
64
65#ifdef COMPAT_LINUX32
66#include <machine/../linux32/linux.h>
67#include <machine/../linux32/linux32_proto.h>
68#else
69#include <machine/../linux/linux.h>
70#include <machine/../linux/linux_proto.h>
71#endif
72#include <compat/linux/linux_socket.h>
73#include <compat/linux/linux_util.h>
74
75static int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
76    struct malloc_type *);
77static int linux_to_bsd_domain(int);
78
79/*
80 * Reads a linux sockaddr and does any necessary translation.
81 * Linux sockaddrs don't have a length field, only a family.
82 */
83static int
84linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
85{
86	int osalen = len;
87
88	return (do_sa_get(sap, osa, &osalen, M_SONAME));
89}
90
91/*
92 * Copy the osockaddr structure pointed to by osa to kernel, adjust
93 * family and convert to sockaddr.
94 */
95static int
96do_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
97    struct malloc_type *mtype)
98{
99	int error=0, bdom;
100	struct sockaddr *sa;
101	struct osockaddr *kosa;
102#ifdef INET6
103	int oldv6size;
104	struct sockaddr_in6 *sin6;
105#endif
106	int alloclen, hdrlen, namelen;
107
108	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
109		return (EINVAL);
110
111	alloclen = *osalen;
112#ifdef INET6
113	oldv6size = 0;
114	/*
115	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
116	 * if it's a v4-mapped address, so reserve the proper space
117	 * for it.
118	 */
119	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
120		alloclen = sizeof (struct sockaddr_in6);
121		oldv6size = 1;
122	}
123#endif
124
125	kosa = malloc(alloclen, mtype, M_WAITOK);
126
127	if ((error = copyin(osa, kosa, *osalen)))
128		goto out;
129
130	bdom = linux_to_bsd_domain(kosa->sa_family);
131	if (bdom == -1) {
132		error = EAFNOSUPPORT;
133		goto out;
134	}
135
136#ifdef INET6
137	/*
138	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
139	 * which lacks the scope id compared with RFC2553 one. If we detect
140	 * the situation, reject the address and write a message to system log.
141	 *
142	 * Still accept addresses for which the scope id is not used.
143	 */
144	if (oldv6size && bdom == AF_INET6) {
145		sin6 = (struct sockaddr_in6 *)kosa;
146		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
147		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
148		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
149		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
150		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
151		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
152			sin6->sin6_scope_id = 0;
153		} else {
154			log(LOG_DEBUG,
155			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
156			error = EINVAL;
157			goto out;
158		}
159	} else
160#endif
161	if (bdom == AF_INET) {
162		alloclen = sizeof(struct sockaddr_in);
163		if (*osalen < alloclen) {
164			error = EINVAL;
165			goto out;
166		}
167	}
168
169	if (bdom == AF_LOCAL && *osalen > sizeof(struct sockaddr_un)) {
170		hdrlen = offsetof(struct sockaddr_un, sun_path);
171		namelen = strnlen(((struct sockaddr_un *)kosa)->sun_path,
172		    *osalen - hdrlen);
173		if (hdrlen + namelen > sizeof(struct sockaddr_un)) {
174			error = EINVAL;
175			goto out;
176		}
177		alloclen = sizeof(struct sockaddr_un);
178	}
179
180	sa = (struct sockaddr *) kosa;
181	sa->sa_family = bdom;
182	sa->sa_len = alloclen;
183
184	*sap = sa;
185	*osalen = alloclen;
186	return (0);
187
188out:
189	free(kosa, mtype);
190	return (error);
191}
192
193static int
194linux_to_bsd_domain(int domain)
195{
196
197	switch (domain) {
198	case LINUX_AF_UNSPEC:
199		return (AF_UNSPEC);
200	case LINUX_AF_UNIX:
201		return (AF_LOCAL);
202	case LINUX_AF_INET:
203		return (AF_INET);
204	case LINUX_AF_INET6:
205		return (AF_INET6);
206	case LINUX_AF_AX25:
207		return (AF_CCITT);
208	case LINUX_AF_IPX:
209		return (AF_IPX);
210	case LINUX_AF_APPLETALK:
211		return (AF_APPLETALK);
212	}
213	return (-1);
214}
215
216static int
217bsd_to_linux_domain(int domain)
218{
219
220	switch (domain) {
221	case AF_UNSPEC:
222		return (LINUX_AF_UNSPEC);
223	case AF_LOCAL:
224		return (LINUX_AF_UNIX);
225	case AF_INET:
226		return (LINUX_AF_INET);
227	case AF_INET6:
228		return (LINUX_AF_INET6);
229	case AF_CCITT:
230		return (LINUX_AF_AX25);
231	case AF_IPX:
232		return (LINUX_AF_IPX);
233	case AF_APPLETALK:
234		return (LINUX_AF_APPLETALK);
235	}
236	return (-1);
237}
238
239static int
240linux_to_bsd_sockopt_level(int level)
241{
242
243	switch (level) {
244	case LINUX_SOL_SOCKET:
245		return (SOL_SOCKET);
246	}
247	return (level);
248}
249
250static int
251bsd_to_linux_sockopt_level(int level)
252{
253
254	switch (level) {
255	case SOL_SOCKET:
256		return (LINUX_SOL_SOCKET);
257	}
258	return (level);
259}
260
261static int
262linux_to_bsd_ip_sockopt(int opt)
263{
264
265	switch (opt) {
266	case LINUX_IP_TOS:
267		return (IP_TOS);
268	case LINUX_IP_TTL:
269		return (IP_TTL);
270	case LINUX_IP_OPTIONS:
271		return (IP_OPTIONS);
272	case LINUX_IP_MULTICAST_IF:
273		return (IP_MULTICAST_IF);
274	case LINUX_IP_MULTICAST_TTL:
275		return (IP_MULTICAST_TTL);
276	case LINUX_IP_MULTICAST_LOOP:
277		return (IP_MULTICAST_LOOP);
278	case LINUX_IP_ADD_MEMBERSHIP:
279		return (IP_ADD_MEMBERSHIP);
280	case LINUX_IP_DROP_MEMBERSHIP:
281		return (IP_DROP_MEMBERSHIP);
282	case LINUX_IP_HDRINCL:
283		return (IP_HDRINCL);
284	}
285	return (-1);
286}
287
288static int
289linux_to_bsd_so_sockopt(int opt)
290{
291
292	switch (opt) {
293	case LINUX_SO_DEBUG:
294		return (SO_DEBUG);
295	case LINUX_SO_REUSEADDR:
296		return (SO_REUSEADDR);
297	case LINUX_SO_TYPE:
298		return (SO_TYPE);
299	case LINUX_SO_ERROR:
300		return (SO_ERROR);
301	case LINUX_SO_DONTROUTE:
302		return (SO_DONTROUTE);
303	case LINUX_SO_BROADCAST:
304		return (SO_BROADCAST);
305	case LINUX_SO_SNDBUF:
306		return (SO_SNDBUF);
307	case LINUX_SO_RCVBUF:
308		return (SO_RCVBUF);
309	case LINUX_SO_KEEPALIVE:
310		return (SO_KEEPALIVE);
311	case LINUX_SO_OOBINLINE:
312		return (SO_OOBINLINE);
313	case LINUX_SO_LINGER:
314		return (SO_LINGER);
315	case LINUX_SO_PEERCRED:
316		return (LOCAL_PEERCRED);
317	case LINUX_SO_RCVLOWAT:
318		return (SO_RCVLOWAT);
319	case LINUX_SO_SNDLOWAT:
320		return (SO_SNDLOWAT);
321	case LINUX_SO_RCVTIMEO:
322		return (SO_RCVTIMEO);
323	case LINUX_SO_SNDTIMEO:
324		return (SO_SNDTIMEO);
325	case LINUX_SO_TIMESTAMP:
326		return (SO_TIMESTAMP);
327	case LINUX_SO_ACCEPTCONN:
328		return (SO_ACCEPTCONN);
329	}
330	return (-1);
331}
332
333static int
334linux_to_bsd_msg_flags(int flags)
335{
336	int ret_flags = 0;
337
338	if (flags & LINUX_MSG_OOB)
339		ret_flags |= MSG_OOB;
340	if (flags & LINUX_MSG_PEEK)
341		ret_flags |= MSG_PEEK;
342	if (flags & LINUX_MSG_DONTROUTE)
343		ret_flags |= MSG_DONTROUTE;
344	if (flags & LINUX_MSG_CTRUNC)
345		ret_flags |= MSG_CTRUNC;
346	if (flags & LINUX_MSG_TRUNC)
347		ret_flags |= MSG_TRUNC;
348	if (flags & LINUX_MSG_DONTWAIT)
349		ret_flags |= MSG_DONTWAIT;
350	if (flags & LINUX_MSG_EOR)
351		ret_flags |= MSG_EOR;
352	if (flags & LINUX_MSG_WAITALL)
353		ret_flags |= MSG_WAITALL;
354	if (flags & LINUX_MSG_NOSIGNAL)
355		ret_flags |= MSG_NOSIGNAL;
356#if 0 /* not handled */
357	if (flags & LINUX_MSG_PROXY)
358		;
359	if (flags & LINUX_MSG_FIN)
360		;
361	if (flags & LINUX_MSG_SYN)
362		;
363	if (flags & LINUX_MSG_CONFIRM)
364		;
365	if (flags & LINUX_MSG_RST)
366		;
367	if (flags & LINUX_MSG_ERRQUEUE)
368		;
369#endif
370	return ret_flags;
371}
372
373/*
374* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
375* native syscall will fault.  Thus, we don't really need to check the
376* return values for these functions.
377*/
378
379static int
380bsd_to_linux_sockaddr(struct sockaddr *arg)
381{
382	struct sockaddr sa;
383	size_t sa_len = sizeof(struct sockaddr);
384	int error;
385
386	if ((error = copyin(arg, &sa, sa_len)))
387		return (error);
388
389	*(u_short *)&sa = sa.sa_family;
390
391	error = copyout(&sa, arg, sa_len);
392
393	return (error);
394}
395
396static int
397linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
398{
399	struct sockaddr sa;
400	size_t sa_len = sizeof(struct sockaddr);
401	int error;
402
403	if ((error = copyin(arg, &sa, sa_len)))
404		return (error);
405
406	sa.sa_family = *(sa_family_t *)&sa;
407	sa.sa_len = len;
408
409	error = copyout(&sa, arg, sa_len);
410
411	return (error);
412}
413
414
415static int
416linux_sa_put(struct osockaddr *osa)
417{
418	struct osockaddr sa;
419	int error, bdom;
420
421	/*
422	 * Only read/write the osockaddr family part, the rest is
423	 * not changed.
424	 */
425	error = copyin(osa, &sa, sizeof(sa.sa_family));
426	if (error)
427		return (error);
428
429	bdom = bsd_to_linux_domain(sa.sa_family);
430	if (bdom == -1)
431		return (EINVAL);
432
433	sa.sa_family = bdom;
434	error = copyout(&sa, osa, sizeof(sa.sa_family));
435	if (error)
436		return (error);
437
438	return (0);
439}
440
441static int
442linux_to_bsd_cmsg_type(int cmsg_type)
443{
444
445	switch (cmsg_type) {
446	case LINUX_SCM_RIGHTS:
447		return (SCM_RIGHTS);
448	case LINUX_SCM_CREDENTIALS:
449		return (SCM_CREDS);
450	}
451	return (-1);
452}
453
454static int
455bsd_to_linux_cmsg_type(int cmsg_type)
456{
457
458	switch (cmsg_type) {
459	case SCM_RIGHTS:
460		return (LINUX_SCM_RIGHTS);
461	case SCM_CREDS:
462		return (LINUX_SCM_CREDENTIALS);
463	}
464	return (-1);
465}
466
467static int
468linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
469{
470	if (lhdr->msg_controllen > INT_MAX)
471		return (ENOBUFS);
472
473	bhdr->msg_name		= PTRIN(lhdr->msg_name);
474	bhdr->msg_namelen	= lhdr->msg_namelen;
475	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
476	bhdr->msg_iovlen	= lhdr->msg_iovlen;
477	bhdr->msg_control	= PTRIN(lhdr->msg_control);
478
479	/*
480	 * msg_controllen is skipped since BSD and LINUX control messages
481	 * are potentially different sizes (e.g. the cred structure used
482	 * by SCM_CREDS is different between the two operating system).
483	 *
484	 * The caller can set it (if necessary) after converting all the
485	 * control messages.
486	 */
487
488	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
489	return (0);
490}
491
492static int
493bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
494{
495	lhdr->msg_name		= PTROUT(bhdr->msg_name);
496	lhdr->msg_namelen	= bhdr->msg_namelen;
497	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
498	lhdr->msg_iovlen	= bhdr->msg_iovlen;
499	lhdr->msg_control	= PTROUT(bhdr->msg_control);
500
501	/*
502	 * msg_controllen is skipped since BSD and LINUX control messages
503	 * are potentially different sizes (e.g. the cred structure used
504	 * by SCM_CREDS is different between the two operating system).
505	 *
506	 * The caller can set it (if necessary) after converting all the
507	 * control messages.
508	 */
509
510	/* msg_flags skipped */
511	return (0);
512}
513
514static int
515linux_set_socket_flags(struct thread *td, int s, int flags)
516{
517	int error;
518
519	if (flags & LINUX_SOCK_NONBLOCK) {
520		error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
521		if (error)
522			return (error);
523	}
524	if (flags & LINUX_SOCK_CLOEXEC) {
525		error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
526		if (error)
527			return (error);
528	}
529	return (0);
530}
531
532static int
533linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
534    struct mbuf *control, enum uio_seg segflg)
535{
536	struct sockaddr *to;
537	int error;
538
539	if (mp->msg_name != NULL) {
540		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
541		if (error)
542			return (error);
543		mp->msg_name = to;
544	} else
545		to = NULL;
546
547	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
548	    segflg);
549
550	if (to)
551		free(to, M_SONAME);
552	return (error);
553}
554
555/* Return 0 if IP_HDRINCL is set for the given socket. */
556static int
557linux_check_hdrincl(struct thread *td, int s)
558{
559	int error, optval, size_val;
560
561	size_val = sizeof(optval);
562	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
563	    &optval, UIO_SYSSPACE, &size_val);
564	if (error)
565		return (error);
566
567	return (optval == 0);
568}
569
570struct linux_sendto_args {
571	int s;
572	l_uintptr_t msg;
573	int len;
574	int flags;
575	l_uintptr_t to;
576	int tolen;
577};
578
579/*
580 * Updated sendto() when IP_HDRINCL is set:
581 * tweak endian-dependent fields in the IP packet.
582 */
583static int
584linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
585{
586/*
587 * linux_ip_copysize defines how many bytes we should copy
588 * from the beginning of the IP packet before we customize it for BSD.
589 * It should include all the fields we modify (ip_len and ip_off).
590 */
591#define linux_ip_copysize	8
592
593	struct ip *packet;
594	struct msghdr msg;
595	struct iovec aiov[1];
596	int error;
597
598	/* Check that the packet isn't too big or too small. */
599	if (linux_args->len < linux_ip_copysize ||
600	    linux_args->len > IP_MAXPACKET)
601		return (EINVAL);
602
603	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
604
605	/* Make kernel copy of the packet to be sent */
606	if ((error = copyin(PTRIN(linux_args->msg), packet,
607	    linux_args->len)))
608		goto goout;
609
610	/* Convert fields from Linux to BSD raw IP socket format */
611	packet->ip_len = linux_args->len;
612	packet->ip_off = ntohs(packet->ip_off);
613
614	/* Prepare the msghdr and iovec structures describing the new packet */
615	msg.msg_name = PTRIN(linux_args->to);
616	msg.msg_namelen = linux_args->tolen;
617	msg.msg_iov = aiov;
618	msg.msg_iovlen = 1;
619	msg.msg_control = NULL;
620	msg.msg_flags = 0;
621	aiov[0].iov_base = (char *)packet;
622	aiov[0].iov_len = linux_args->len;
623	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
624	    NULL, UIO_SYSSPACE);
625goout:
626	free(packet, M_TEMP);
627	return (error);
628}
629
630struct linux_socket_args {
631	int domain;
632	int type;
633	int protocol;
634};
635
636static int
637linux_socket(struct thread *td, struct linux_socket_args *args)
638{
639	struct socket_args /* {
640		int domain;
641		int type;
642		int protocol;
643	} */ bsd_args;
644	int retval_socket, socket_flags;
645
646	bsd_args.protocol = args->protocol;
647	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
648	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
649		return (EINVAL);
650	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
651	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
652		return (EINVAL);
653	bsd_args.domain = linux_to_bsd_domain(args->domain);
654	if (bsd_args.domain == -1)
655		return (EAFNOSUPPORT);
656
657	retval_socket = sys_socket(td, &bsd_args);
658	if (retval_socket)
659		return (retval_socket);
660
661	retval_socket = linux_set_socket_flags(td, td->td_retval[0],
662	    socket_flags);
663	if (retval_socket) {
664		(void)kern_close(td, td->td_retval[0]);
665		goto out;
666	}
667
668	if (bsd_args.type == SOCK_RAW
669	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
670	    && bsd_args.domain == PF_INET) {
671		/* It's a raw IP socket: set the IP_HDRINCL option. */
672		int hdrincl;
673
674		hdrincl = 1;
675		/* We ignore any error returned by kern_setsockopt() */
676		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
677		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
678	}
679#ifdef INET6
680	/*
681	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default
682	 * and some apps depend on this. So, set V6ONLY to 0 for Linux apps.
683	 * For simplicity we do this unconditionally of the net.inet6.ip6.v6only
684	 * sysctl value.
685	 */
686	if (bsd_args.domain == PF_INET6) {
687		int v6only;
688
689		v6only = 0;
690		/* We ignore any error returned by setsockopt() */
691		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
692		    &v6only, UIO_SYSSPACE, sizeof(v6only));
693	}
694#endif
695
696out:
697	return (retval_socket);
698}
699
700struct linux_bind_args {
701	int s;
702	l_uintptr_t name;
703	int namelen;
704};
705
706static int
707linux_bind(struct thread *td, struct linux_bind_args *args)
708{
709	struct sockaddr *sa;
710	int error;
711
712	error = linux_getsockaddr(&sa, PTRIN(args->name),
713	    args->namelen);
714	if (error)
715		return (error);
716
717	error = kern_bind(td, args->s, sa);
718	free(sa, M_SONAME);
719	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
720	   	return (EINVAL);
721	return (error);
722}
723
724struct linux_connect_args {
725	int s;
726	l_uintptr_t name;
727	int namelen;
728};
729int linux_connect(struct thread *, struct linux_connect_args *);
730
731int
732linux_connect(struct thread *td, struct linux_connect_args *args)
733{
734	struct socket *so;
735	struct sockaddr *sa;
736	u_int fflag;
737	int error;
738
739	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
740	    args->namelen);
741	if (error)
742		return (error);
743
744	error = kern_connect(td, args->s, sa);
745	free(sa, M_SONAME);
746	if (error != EISCONN)
747		return (error);
748
749	/*
750	 * Linux doesn't return EISCONN the first time it occurs,
751	 * when on a non-blocking socket. Instead it returns the
752	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
753	 *
754	 * XXXRW: Instead of using fgetsock(), check that it is a
755	 * socket and use the file descriptor reference instead of
756	 * creating a new one.
757	 */
758	error = fgetsock(td, args->s, CAP_CONNECT, &so, &fflag);
759	if (error == 0) {
760		error = EISCONN;
761		if (fflag & FNONBLOCK) {
762			SOCK_LOCK(so);
763			if (so->so_emuldata == 0)
764				error = so->so_error;
765			so->so_emuldata = (void *)1;
766			SOCK_UNLOCK(so);
767		}
768		fputsock(so);
769	}
770	return (error);
771}
772
773struct linux_listen_args {
774	int s;
775	int backlog;
776};
777
778static int
779linux_listen(struct thread *td, struct linux_listen_args *args)
780{
781	struct listen_args /* {
782		int s;
783		int backlog;
784	} */ bsd_args;
785
786	bsd_args.s = args->s;
787	bsd_args.backlog = args->backlog;
788	return (sys_listen(td, &bsd_args));
789}
790
791static int
792linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
793    l_uintptr_t namelen, int flags)
794{
795	struct accept_args /* {
796		int	s;
797		struct sockaddr * __restrict name;
798		socklen_t * __restrict anamelen;
799	} */ bsd_args;
800	int error;
801
802	if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
803		return (EINVAL);
804
805	bsd_args.s = s;
806	/* XXX: */
807	bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
808	bsd_args.anamelen = PTRIN(namelen);/* XXX */
809	error = sys_accept(td, &bsd_args);
810	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
811	if (error) {
812		if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
813			return (EINVAL);
814		return (error);
815	}
816
817	/*
818	 * linux appears not to copy flags from the parent socket to the
819	 * accepted one, so we must clear the flags in the new descriptor
820	 * and apply the requested flags.
821	 */
822	error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0);
823	if (error)
824		goto out;
825	error = linux_set_socket_flags(td, td->td_retval[0], flags);
826	if (error)
827		goto out;
828	if (addr)
829		error = linux_sa_put(PTRIN(addr));
830
831out:
832	if (error) {
833		(void)kern_close(td, td->td_retval[0]);
834		td->td_retval[0] = 0;
835	}
836	return (error);
837}
838
839struct linux_accept_args {
840	int s;
841	l_uintptr_t addr;
842	l_uintptr_t namelen;
843};
844
845static int
846linux_accept(struct thread *td, struct linux_accept_args *args)
847{
848
849	return (linux_accept_common(td, args->s, args->addr,
850	    args->namelen, 0));
851}
852
853struct linux_accept4_args {
854	int s;
855	l_uintptr_t addr;
856	l_uintptr_t namelen;
857	int flags;
858};
859
860static int
861linux_accept4(struct thread *td, struct linux_accept4_args *args)
862{
863
864	return (linux_accept_common(td, args->s, args->addr,
865	    args->namelen, args->flags));
866}
867
868struct linux_getsockname_args {
869	int s;
870	l_uintptr_t addr;
871	l_uintptr_t namelen;
872};
873
874static int
875linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
876{
877	struct getsockname_args /* {
878		int	fdes;
879		struct sockaddr * __restrict asa;
880		socklen_t * __restrict alen;
881	} */ bsd_args;
882	int error;
883
884	bsd_args.fdes = args->s;
885	/* XXX: */
886	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
887	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
888	error = sys_getsockname(td, &bsd_args);
889	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
890	if (error)
891		return (error);
892	error = linux_sa_put(PTRIN(args->addr));
893	if (error)
894		return (error);
895	return (0);
896}
897
898struct linux_getpeername_args {
899	int s;
900	l_uintptr_t addr;
901	l_uintptr_t namelen;
902};
903
904static int
905linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
906{
907	struct getpeername_args /* {
908		int fdes;
909		caddr_t asa;
910		int *alen;
911	} */ bsd_args;
912	int error;
913
914	bsd_args.fdes = args->s;
915	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
916	bsd_args.alen = (int *)PTRIN(args->namelen);
917	error = sys_getpeername(td, &bsd_args);
918	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
919	if (error)
920		return (error);
921	error = linux_sa_put(PTRIN(args->addr));
922	if (error)
923		return (error);
924	return (0);
925}
926
927struct linux_socketpair_args {
928	int domain;
929	int type;
930	int protocol;
931	l_uintptr_t rsv;
932};
933
934static int
935linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
936{
937	struct socketpair_args /* {
938		int domain;
939		int type;
940		int protocol;
941		int *rsv;
942	} */ bsd_args;
943	int error, socket_flags;
944	int sv[2];
945
946	bsd_args.domain = linux_to_bsd_domain(args->domain);
947	if (bsd_args.domain != PF_LOCAL)
948		return (EAFNOSUPPORT);
949
950	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
951	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
952		return (EINVAL);
953	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
954	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
955		return (EINVAL);
956
957	if (args->protocol != 0 && args->protocol != PF_UNIX)
958
959		/*
960		 * Use of PF_UNIX as protocol argument is not right,
961		 * but Linux does it.
962		 * Do not map PF_UNIX as its Linux value is identical
963		 * to FreeBSD one.
964		 */
965		return (EPROTONOSUPPORT);
966	else
967		bsd_args.protocol = 0;
968	bsd_args.rsv = (int *)PTRIN(args->rsv);
969	error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
970	    bsd_args.protocol, sv);
971	if (error)
972		return (error);
973	error = linux_set_socket_flags(td, sv[0], socket_flags);
974	if (error)
975		goto out;
976	error = linux_set_socket_flags(td, sv[1], socket_flags);
977	if (error)
978		goto out;
979
980	error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
981
982out:
983	if (error) {
984		(void)kern_close(td, sv[0]);
985		(void)kern_close(td, sv[1]);
986	}
987	return (error);
988}
989
990struct linux_send_args {
991	int s;
992	l_uintptr_t msg;
993	int len;
994	int flags;
995};
996
997static int
998linux_send(struct thread *td, struct linux_send_args *args)
999{
1000	struct sendto_args /* {
1001		int s;
1002		caddr_t buf;
1003		int len;
1004		int flags;
1005		caddr_t to;
1006		int tolen;
1007	} */ bsd_args;
1008
1009	bsd_args.s = args->s;
1010	bsd_args.buf = (caddr_t)PTRIN(args->msg);
1011	bsd_args.len = args->len;
1012	bsd_args.flags = args->flags;
1013	bsd_args.to = NULL;
1014	bsd_args.tolen = 0;
1015	return sys_sendto(td, &bsd_args);
1016}
1017
1018struct linux_recv_args {
1019	int s;
1020	l_uintptr_t msg;
1021	int len;
1022	int flags;
1023};
1024
1025static int
1026linux_recv(struct thread *td, struct linux_recv_args *args)
1027{
1028	struct recvfrom_args /* {
1029		int s;
1030		caddr_t buf;
1031		int len;
1032		int flags;
1033		struct sockaddr *from;
1034		socklen_t fromlenaddr;
1035	} */ bsd_args;
1036
1037	bsd_args.s = args->s;
1038	bsd_args.buf = (caddr_t)PTRIN(args->msg);
1039	bsd_args.len = args->len;
1040	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1041	bsd_args.from = NULL;
1042	bsd_args.fromlenaddr = 0;
1043	return (sys_recvfrom(td, &bsd_args));
1044}
1045
1046static int
1047linux_sendto(struct thread *td, struct linux_sendto_args *args)
1048{
1049	struct msghdr msg;
1050	struct iovec aiov;
1051	int error;
1052
1053	if (linux_check_hdrincl(td, args->s) == 0)
1054		/* IP_HDRINCL set, tweak the packet before sending */
1055		return (linux_sendto_hdrincl(td, args));
1056
1057	msg.msg_name = PTRIN(args->to);
1058	msg.msg_namelen = args->tolen;
1059	msg.msg_iov = &aiov;
1060	msg.msg_iovlen = 1;
1061	msg.msg_control = NULL;
1062	msg.msg_flags = 0;
1063	aiov.iov_base = PTRIN(args->msg);
1064	aiov.iov_len = args->len;
1065	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
1066	    UIO_USERSPACE);
1067	return (error);
1068}
1069
1070struct linux_recvfrom_args {
1071	int s;
1072	l_uintptr_t buf;
1073	int len;
1074	int flags;
1075	l_uintptr_t from;
1076	l_uintptr_t fromlen;
1077};
1078
1079static int
1080linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
1081{
1082	struct recvfrom_args /* {
1083		int	s;
1084		caddr_t	buf;
1085		size_t	len;
1086		int	flags;
1087		struct sockaddr * __restrict from;
1088		socklen_t * __restrict fromlenaddr;
1089	} */ bsd_args;
1090	size_t len;
1091	int error;
1092
1093	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
1094		return (error);
1095
1096	bsd_args.s = args->s;
1097	bsd_args.buf = PTRIN(args->buf);
1098	bsd_args.len = args->len;
1099	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1100	/* XXX: */
1101	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
1102	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
1103
1104	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
1105	error = sys_recvfrom(td, &bsd_args);
1106	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
1107
1108	if (error)
1109		return (error);
1110	if (args->from) {
1111		error = linux_sa_put((struct osockaddr *)
1112		    PTRIN(args->from));
1113		if (error)
1114			return (error);
1115	}
1116	return (0);
1117}
1118
1119struct linux_sendmsg_args {
1120	int s;
1121	l_uintptr_t msg;
1122	int flags;
1123};
1124
1125static int
1126linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1127{
1128	struct cmsghdr *cmsg;
1129	struct cmsgcred cmcred;
1130	struct mbuf *control;
1131	struct msghdr msg;
1132	struct l_cmsghdr linux_cmsg;
1133	struct l_cmsghdr *ptr_cmsg;
1134	struct l_msghdr linux_msg;
1135	struct iovec *iov;
1136	socklen_t datalen;
1137	struct sockaddr *sa;
1138	sa_family_t sa_family;
1139	void *data;
1140	int error;
1141
1142	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1143	if (error)
1144		return (error);
1145
1146	/*
1147	 * Some Linux applications (ping) define a non-NULL control data
1148	 * pointer, but a msg_controllen of 0, which is not allowed in the
1149	 * FreeBSD system call interface.  NULL the msg_control pointer in
1150	 * order to handle this case.  This should be checked, but allows the
1151	 * Linux ping to work.
1152	 */
1153	if (PTRIN(linux_msg.msg_control) != NULL && linux_msg.msg_controllen == 0)
1154		linux_msg.msg_control = PTROUT(NULL);
1155
1156	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1157	if (error)
1158		return (error);
1159
1160#ifdef COMPAT_LINUX32
1161	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1162	    &iov, EMSGSIZE);
1163#else
1164	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1165#endif
1166	if (error)
1167		return (error);
1168
1169	control = NULL;
1170	cmsg = NULL;
1171
1172	if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
1173		error = kern_getsockname(td, args->s, &sa, &datalen);
1174		if (error)
1175			goto bad;
1176		sa_family = sa->sa_family;
1177		free(sa, M_SONAME);
1178
1179		error = ENOBUFS;
1180		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1181		control = m_get(M_WAIT, MT_CONTROL);
1182		if (control == NULL)
1183			goto bad;
1184
1185		do {
1186			error = copyin(ptr_cmsg, &linux_cmsg,
1187			    sizeof(struct l_cmsghdr));
1188			if (error)
1189				goto bad;
1190
1191			error = EINVAL;
1192			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1193				goto bad;
1194
1195			/*
1196			 * Now we support only SCM_RIGHTS and SCM_CRED,
1197			 * so return EINVAL in any other cmsg_type
1198			 */
1199			cmsg->cmsg_type =
1200			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type);
1201			cmsg->cmsg_level =
1202			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1203			if (cmsg->cmsg_type == -1
1204			    || cmsg->cmsg_level != SOL_SOCKET)
1205				goto bad;
1206
1207			/*
1208			 * Some applications (e.g. pulseaudio) attempt to
1209			 * send ancillary data even if the underlying protocol
1210			 * doesn't support it which is not allowed in the
1211			 * FreeBSD system call interface.
1212			 */
1213			if (sa_family != AF_UNIX)
1214				continue;
1215
1216			data = LINUX_CMSG_DATA(ptr_cmsg);
1217			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1218
1219			switch (cmsg->cmsg_type)
1220			{
1221			case SCM_RIGHTS:
1222				break;
1223
1224			case SCM_CREDS:
1225				data = &cmcred;
1226				datalen = sizeof(cmcred);
1227
1228				/*
1229				 * The lower levels will fill in the structure
1230				 */
1231				bzero(data, datalen);
1232				break;
1233			}
1234
1235			cmsg->cmsg_len = CMSG_LEN(datalen);
1236
1237			error = ENOBUFS;
1238			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t) cmsg))
1239				goto bad;
1240			if (!m_append(control, datalen, (c_caddr_t) data))
1241				goto bad;
1242		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg)));
1243
1244		if (m_length(control, NULL) == 0) {
1245			m_freem(control);
1246			control = NULL;
1247		}
1248	}
1249
1250	msg.msg_iov = iov;
1251	msg.msg_flags = 0;
1252	error = linux_sendit(td, args->s, &msg, args->flags, control,
1253	    UIO_USERSPACE);
1254
1255bad:
1256	free(iov, M_IOV);
1257	if (cmsg)
1258		free(cmsg, M_TEMP);
1259	return (error);
1260}
1261
1262struct linux_recvmsg_args {
1263	int s;
1264	l_uintptr_t msg;
1265	int flags;
1266};
1267
1268static int
1269linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1270{
1271	struct cmsghdr *cm;
1272	struct cmsgcred *cmcred;
1273	struct msghdr msg;
1274	struct l_cmsghdr *linux_cmsg = NULL;
1275	struct l_ucred linux_ucred;
1276	socklen_t datalen, outlen;
1277	struct l_msghdr linux_msg;
1278	struct iovec *iov, *uiov;
1279	struct mbuf *control = NULL;
1280	struct mbuf **controlp;
1281	caddr_t outbuf;
1282	void *data;
1283	int error, i, fd, fds, *fdp;
1284
1285	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1286	if (error)
1287		return (error);
1288
1289	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1290	if (error)
1291		return (error);
1292
1293#ifdef COMPAT_LINUX32
1294	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1295	    &iov, EMSGSIZE);
1296#else
1297	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1298#endif
1299	if (error)
1300		return (error);
1301
1302	if (msg.msg_name) {
1303		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1304		    msg.msg_namelen);
1305		if (error)
1306			goto bad;
1307	}
1308
1309	uiov = msg.msg_iov;
1310	msg.msg_iov = iov;
1311	controlp = (msg.msg_control != NULL) ? &control : NULL;
1312	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1313	msg.msg_iov = uiov;
1314	if (error)
1315		goto bad;
1316
1317	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1318	if (error)
1319		goto bad;
1320
1321	if (linux_msg.msg_name) {
1322		error = bsd_to_linux_sockaddr((struct sockaddr *)
1323		    PTRIN(linux_msg.msg_name));
1324		if (error)
1325			goto bad;
1326	}
1327	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1328		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1329		if (error)
1330			goto bad;
1331	}
1332
1333	outbuf = PTRIN(linux_msg.msg_control);
1334	outlen = 0;
1335
1336	if (control) {
1337		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1338
1339		msg.msg_control = mtod(control, struct cmsghdr *);
1340		msg.msg_controllen = control->m_len;
1341
1342		cm = CMSG_FIRSTHDR(&msg);
1343
1344		while (cm != NULL) {
1345			linux_cmsg->cmsg_type =
1346			    bsd_to_linux_cmsg_type(cm->cmsg_type);
1347			linux_cmsg->cmsg_level =
1348			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1349			if (linux_cmsg->cmsg_type == -1
1350			    || cm->cmsg_level != SOL_SOCKET)
1351			{
1352				error = EINVAL;
1353				goto bad;
1354			}
1355
1356			data = CMSG_DATA(cm);
1357			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1358
1359			switch (cm->cmsg_type)
1360			{
1361			case SCM_RIGHTS:
1362				if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
1363					fds = datalen / sizeof(int);
1364					fdp = data;
1365					for (i = 0; i < fds; i++) {
1366						fd = *fdp++;
1367						(void)kern_fcntl(td, fd,
1368						    F_SETFD, FD_CLOEXEC);
1369					}
1370				}
1371				break;
1372
1373			case SCM_CREDS:
1374				/*
1375				 * Currently LOCAL_CREDS is never in
1376				 * effect for Linux so no need to worry
1377				 * about sockcred
1378				 */
1379				if (datalen != sizeof (*cmcred)) {
1380					error = EMSGSIZE;
1381					goto bad;
1382				}
1383				cmcred = (struct cmsgcred *)data;
1384				bzero(&linux_ucred, sizeof(linux_ucred));
1385				linux_ucred.pid = cmcred->cmcred_pid;
1386				linux_ucred.uid = cmcred->cmcred_uid;
1387				linux_ucred.gid = cmcred->cmcred_gid;
1388				data = &linux_ucred;
1389				datalen = sizeof(linux_ucred);
1390				break;
1391			}
1392
1393			if (outlen + LINUX_CMSG_LEN(datalen) >
1394			    linux_msg.msg_controllen) {
1395				if (outlen == 0) {
1396					error = EMSGSIZE;
1397					goto bad;
1398				} else {
1399					linux_msg.msg_flags |=
1400					    LINUX_MSG_CTRUNC;
1401					goto out;
1402				}
1403			}
1404
1405			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1406
1407			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1408			if (error)
1409				goto bad;
1410			outbuf += L_CMSG_HDRSZ;
1411
1412			error = copyout(data, outbuf, datalen);
1413			if (error)
1414				goto bad;
1415
1416			outbuf += LINUX_CMSG_ALIGN(datalen);
1417			outlen += LINUX_CMSG_LEN(datalen);
1418
1419			cm = CMSG_NXTHDR(&msg, cm);
1420		}
1421	}
1422
1423out:
1424	linux_msg.msg_controllen = outlen;
1425	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1426
1427bad:
1428	free(iov, M_IOV);
1429	if (control != NULL)
1430		m_freem(control);
1431	if (linux_cmsg != NULL)
1432		free(linux_cmsg, M_TEMP);
1433
1434	return (error);
1435}
1436
1437struct linux_shutdown_args {
1438	int s;
1439	int how;
1440};
1441
1442static int
1443linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1444{
1445	struct shutdown_args /* {
1446		int s;
1447		int how;
1448	} */ bsd_args;
1449
1450	bsd_args.s = args->s;
1451	bsd_args.how = args->how;
1452	return (sys_shutdown(td, &bsd_args));
1453}
1454
1455struct linux_setsockopt_args {
1456	int s;
1457	int level;
1458	int optname;
1459	l_uintptr_t optval;
1460	int optlen;
1461};
1462
1463static int
1464linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1465{
1466	struct setsockopt_args /* {
1467		int s;
1468		int level;
1469		int name;
1470		caddr_t val;
1471		int valsize;
1472	} */ bsd_args;
1473	l_timeval linux_tv;
1474	struct timeval tv;
1475	int error, name;
1476
1477	bsd_args.s = args->s;
1478	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1479	switch (bsd_args.level) {
1480	case SOL_SOCKET:
1481		name = linux_to_bsd_so_sockopt(args->optname);
1482		switch (name) {
1483		case SO_RCVTIMEO:
1484			/* FALLTHROUGH */
1485		case SO_SNDTIMEO:
1486			error = copyin(PTRIN(args->optval), &linux_tv,
1487			    sizeof(linux_tv));
1488			if (error)
1489				return (error);
1490			tv.tv_sec = linux_tv.tv_sec;
1491			tv.tv_usec = linux_tv.tv_usec;
1492			return (kern_setsockopt(td, args->s, bsd_args.level,
1493			    name, &tv, UIO_SYSSPACE, sizeof(tv)));
1494			/* NOTREACHED */
1495			break;
1496		default:
1497			break;
1498		}
1499		break;
1500	case IPPROTO_IP:
1501		name = linux_to_bsd_ip_sockopt(args->optname);
1502		break;
1503	case IPPROTO_TCP:
1504		/* Linux TCP option values match BSD's */
1505		name = args->optname;
1506		break;
1507	default:
1508		name = -1;
1509		break;
1510	}
1511	if (name == -1)
1512		return (ENOPROTOOPT);
1513
1514	bsd_args.name = name;
1515	bsd_args.val = PTRIN(args->optval);
1516	bsd_args.valsize = args->optlen;
1517
1518	if (name == IPV6_NEXTHOP) {
1519		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1520			bsd_args.valsize);
1521		error = sys_setsockopt(td, &bsd_args);
1522		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1523	} else
1524		error = sys_setsockopt(td, &bsd_args);
1525
1526	return (error);
1527}
1528
1529struct linux_getsockopt_args {
1530	int s;
1531	int level;
1532	int optname;
1533	l_uintptr_t optval;
1534	l_uintptr_t optlen;
1535};
1536
1537static int
1538linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1539{
1540	struct getsockopt_args /* {
1541		int s;
1542		int level;
1543		int name;
1544		caddr_t val;
1545		int *avalsize;
1546	} */ bsd_args;
1547	l_timeval linux_tv;
1548	struct timeval tv;
1549	socklen_t tv_len, xulen;
1550	struct xucred xu;
1551	struct l_ucred lxu;
1552	int error, name;
1553
1554	bsd_args.s = args->s;
1555	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1556	switch (bsd_args.level) {
1557	case SOL_SOCKET:
1558		name = linux_to_bsd_so_sockopt(args->optname);
1559		switch (name) {
1560		case SO_RCVTIMEO:
1561			/* FALLTHROUGH */
1562		case SO_SNDTIMEO:
1563			tv_len = sizeof(tv);
1564			error = kern_getsockopt(td, args->s, bsd_args.level,
1565			    name, &tv, UIO_SYSSPACE, &tv_len);
1566			if (error)
1567				return (error);
1568			linux_tv.tv_sec = tv.tv_sec;
1569			linux_tv.tv_usec = tv.tv_usec;
1570			return (copyout(&linux_tv, PTRIN(args->optval),
1571			    sizeof(linux_tv)));
1572			/* NOTREACHED */
1573			break;
1574		case LOCAL_PEERCRED:
1575			if (args->optlen != sizeof(lxu))
1576				return (EINVAL);
1577			xulen = sizeof(xu);
1578			error = kern_getsockopt(td, args->s, bsd_args.level,
1579			    name, &xu, UIO_SYSSPACE, &xulen);
1580			if (error)
1581				return (error);
1582			/*
1583			 * XXX Use 0 for pid as the FreeBSD does not cache peer pid.
1584			 */
1585			lxu.pid = 0;
1586			lxu.uid = xu.cr_uid;
1587			lxu.gid = xu.cr_gid;
1588			return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu)));
1589			/* NOTREACHED */
1590			break;
1591		default:
1592			break;
1593		}
1594		break;
1595	case IPPROTO_IP:
1596		name = linux_to_bsd_ip_sockopt(args->optname);
1597		break;
1598	case IPPROTO_TCP:
1599		/* Linux TCP option values match BSD's */
1600		name = args->optname;
1601		break;
1602	default:
1603		name = -1;
1604		break;
1605	}
1606	if (name == -1)
1607		return (EINVAL);
1608
1609	bsd_args.name = name;
1610	bsd_args.val = PTRIN(args->optval);
1611	bsd_args.avalsize = PTRIN(args->optlen);
1612
1613	if (name == IPV6_NEXTHOP) {
1614		error = sys_getsockopt(td, &bsd_args);
1615		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1616	} else
1617		error = sys_getsockopt(td, &bsd_args);
1618
1619	return (error);
1620}
1621
1622/* Argument list sizes for linux_socketcall */
1623
1624#define LINUX_AL(x) ((x) * sizeof(l_ulong))
1625
1626static const unsigned char lxs_args[] = {
1627	LINUX_AL(0) /* unused*/,	LINUX_AL(3) /* socket */,
1628	LINUX_AL(3) /* bind */,		LINUX_AL(3) /* connect */,
1629	LINUX_AL(2) /* listen */,	LINUX_AL(3) /* accept */,
1630	LINUX_AL(3) /* getsockname */,	LINUX_AL(3) /* getpeername */,
1631	LINUX_AL(4) /* socketpair */,	LINUX_AL(4) /* send */,
1632	LINUX_AL(4) /* recv */,		LINUX_AL(6) /* sendto */,
1633	LINUX_AL(6) /* recvfrom */,	LINUX_AL(2) /* shutdown */,
1634	LINUX_AL(5) /* setsockopt */,	LINUX_AL(5) /* getsockopt */,
1635	LINUX_AL(3) /* sendmsg */,	LINUX_AL(3) /* recvmsg */,
1636	LINUX_AL(4) /* accept4 */
1637};
1638
1639#define	LINUX_AL_SIZE	sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
1640
1641int
1642linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1643{
1644	l_ulong a[6];
1645	void *arg;
1646	int error;
1647
1648	if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE)
1649		return (EINVAL);
1650	error = copyin(PTRIN(args->args), a, lxs_args[args->what]);
1651	if (error)
1652		return (error);
1653
1654	arg = a;
1655	switch (args->what) {
1656	case LINUX_SOCKET:
1657		return (linux_socket(td, arg));
1658	case LINUX_BIND:
1659		return (linux_bind(td, arg));
1660	case LINUX_CONNECT:
1661		return (linux_connect(td, arg));
1662	case LINUX_LISTEN:
1663		return (linux_listen(td, arg));
1664	case LINUX_ACCEPT:
1665		return (linux_accept(td, arg));
1666	case LINUX_GETSOCKNAME:
1667		return (linux_getsockname(td, arg));
1668	case LINUX_GETPEERNAME:
1669		return (linux_getpeername(td, arg));
1670	case LINUX_SOCKETPAIR:
1671		return (linux_socketpair(td, arg));
1672	case LINUX_SEND:
1673		return (linux_send(td, arg));
1674	case LINUX_RECV:
1675		return (linux_recv(td, arg));
1676	case LINUX_SENDTO:
1677		return (linux_sendto(td, arg));
1678	case LINUX_RECVFROM:
1679		return (linux_recvfrom(td, arg));
1680	case LINUX_SHUTDOWN:
1681		return (linux_shutdown(td, arg));
1682	case LINUX_SETSOCKOPT:
1683		return (linux_setsockopt(td, arg));
1684	case LINUX_GETSOCKOPT:
1685		return (linux_getsockopt(td, arg));
1686	case LINUX_SENDMSG:
1687		return (linux_sendmsg(td, arg));
1688	case LINUX_RECVMSG:
1689		return (linux_recvmsg(td, arg));
1690	case LINUX_ACCEPT4:
1691		return (linux_accept4(td, arg));
1692	}
1693
1694	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1695	return (ENOSYS);
1696}
1697