linux_socket.c revision 185571
1153323Srodrigc/*-
2159451Srodrigc * Copyright (c) 1995 S�ren Schmidt
3159451Srodrigc * All rights reserved.
4153323Srodrigc *
5159451Srodrigc * Redistribution and use in source and binary forms, with or without
6159451Srodrigc * modification, are permitted provided that the following conditions
7153323Srodrigc * are met:
8153323Srodrigc * 1. Redistributions of source code must retain the above copyright
9159451Srodrigc *    notice, this list of conditions and the following disclaimer
10159451Srodrigc *    in this position and unchanged.
11159451Srodrigc * 2. Redistributions in binary form must reproduce the above copyright
12159451Srodrigc *    notice, this list of conditions and the following disclaimer in the
13153323Srodrigc *    documentation and/or other materials provided with the distribution.
14159451Srodrigc * 3. The name of the author may not be used to endorse or promote products
15159451Srodrigc *    derived from this software without specific prior written permission
16159451Srodrigc *
17153323Srodrigc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18153323Srodrigc * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19159451Srodrigc * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20153323Srodrigc * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21159451Srodrigc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22159451Srodrigc * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23153323Srodrigc * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24153323Srodrigc * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25153323Srodrigc * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26159451Srodrigc * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27153323Srodrigc */
28153323Srodrigc
29153323Srodrigc#include <sys/cdefs.h>
30153323Srodrigc__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 185571 2008-12-02 21:37:28Z bz $");
31159451Srodrigc
32153323Srodrigc/* XXX we use functions that might not exist. */
33153323Srodrigc#include "opt_compat.h"
34153323Srodrigc#include "opt_inet6.h"
35153323Srodrigc
36153323Srodrigc#include <sys/param.h>
37159451Srodrigc#include <sys/proc.h>
38153323Srodrigc#include <sys/systm.h>
39159451Srodrigc#include <sys/sysproto.h>
40153323Srodrigc#include <sys/fcntl.h>
41159451Srodrigc#include <sys/file.h>
42159451Srodrigc#include <sys/limits.h>
43159451Srodrigc#include <sys/lock.h>
44159451Srodrigc#include <sys/malloc.h>
45153323Srodrigc#include <sys/mutex.h>
46153323Srodrigc#include <sys/mbuf.h>
47153323Srodrigc#include <sys/socket.h>
48153323Srodrigc#include <sys/socketvar.h>
49153323Srodrigc#include <sys/syscallsubr.h>
50159451Srodrigc#include <sys/uio.h>
51159451Srodrigc#include <sys/syslog.h>
52153323Srodrigc#include <sys/un.h>
53153323Srodrigc#include <sys/vimage.h>
54153323Srodrigc
55153323Srodrigc#include <net/if.h>
56159451Srodrigc#include <netinet/in.h>
57159451Srodrigc#include <netinet/in_systm.h>
58153323Srodrigc#include <netinet/ip.h>
59159451Srodrigc#ifdef INET6
60153323Srodrigc#include <netinet/ip6.h>
61153323Srodrigc#include <netinet6/ip6_var.h>
62153323Srodrigc#include <netinet6/in6_var.h>
63153323Srodrigc#include <netinet6/vinet6.h>
64153323Srodrigc#endif
65153323Srodrigc
66153323Srodrigc#ifdef COMPAT_LINUX32
67153323Srodrigc#include <machine/../linux32/linux.h>
68153323Srodrigc#include <machine/../linux32/linux32_proto.h>
69153323Srodrigc#else
70153323Srodrigc#include <machine/../linux/linux.h>
71153323Srodrigc#include <machine/../linux/linux_proto.h>
72153323Srodrigc#endif
73153323Srodrigc#include <compat/linux/linux_socket.h>
74153323Srodrigc#include <compat/linux/linux_util.h>
75153323Srodrigc
76153323Srodrigcstatic int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
77153323Srodrigc    struct malloc_type *);
78153323Srodrigcstatic int linux_to_bsd_domain(int);
79153323Srodrigc
80153323Srodrigc/*
81153323Srodrigc * Reads a linux sockaddr and does any necessary translation.
82153323Srodrigc * Linux sockaddrs don't have a length field, only a family.
83153323Srodrigc */
84153323Srodrigcstatic int
85153323Srodrigclinux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
86153323Srodrigc{
87153323Srodrigc	int osalen = len;
88153323Srodrigc
89153323Srodrigc	return (do_sa_get(sap, osa, &osalen, M_SONAME));
90153323Srodrigc}
91153323Srodrigc
92153323Srodrigc/*
93153323Srodrigc * Copy the osockaddr structure pointed to by osa to kernel, adjust
94153323Srodrigc * family and convert to sockaddr.
95159451Srodrigc */
96159451Srodrigcstatic int
97153323Srodrigcdo_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
98153323Srodrigc    struct malloc_type *mtype)
99153323Srodrigc{
100153323Srodrigc	int error=0, bdom;
101153323Srodrigc	struct sockaddr *sa;
102153323Srodrigc	struct osockaddr *kosa;
103153323Srodrigc	int alloclen;
104153323Srodrigc#ifdef INET6
105153323Srodrigc	int oldv6size;
106153323Srodrigc	struct sockaddr_in6 *sin6;
107153323Srodrigc#endif
108159451Srodrigc
109159451Srodrigc	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
110159451Srodrigc		return (EINVAL);
111159451Srodrigc
112153323Srodrigc	alloclen = *osalen;
113159451Srodrigc#ifdef INET6
114159451Srodrigc	oldv6size = 0;
115159451Srodrigc	/*
116159451Srodrigc	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
117153323Srodrigc	 * if it's a v4-mapped address, so reserve the proper space
118153323Srodrigc	 * for it.
119153323Srodrigc	 */
120159451Srodrigc	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
121159451Srodrigc		alloclen = sizeof (struct sockaddr_in6);
122159451Srodrigc		oldv6size = 1;
123159451Srodrigc	}
124159451Srodrigc#endif
125159451Srodrigc
126159451Srodrigc	kosa = malloc(alloclen, mtype, M_WAITOK);
127159451Srodrigc
128159451Srodrigc	if ((error = copyin(osa, kosa, *osalen)))
129159451Srodrigc		goto out;
130159451Srodrigc
131159451Srodrigc	bdom = linux_to_bsd_domain(kosa->sa_family);
132159451Srodrigc	if (bdom == -1) {
133159451Srodrigc		error = EINVAL;
134153323Srodrigc		goto out;
135153323Srodrigc	}
136153323Srodrigc
137153323Srodrigc#ifdef INET6
138153323Srodrigc	/*
139153323Srodrigc	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
140153323Srodrigc	 * which lacks the scope id compared with RFC2553 one. If we detect
141153323Srodrigc	 * the situation, reject the address and write a message to system log.
142153323Srodrigc	 *
143153323Srodrigc	 * Still accept addresses for which the scope id is not used.
144153323Srodrigc	 */
145153323Srodrigc	if (oldv6size && bdom == AF_INET6) {
146153323Srodrigc		sin6 = (struct sockaddr_in6 *)kosa;
147153323Srodrigc		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
148153323Srodrigc		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
149153323Srodrigc		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
150153323Srodrigc		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
151153323Srodrigc		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
152153323Srodrigc		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
153153323Srodrigc			sin6->sin6_scope_id = 0;
154153323Srodrigc		} else {
155153323Srodrigc			log(LOG_DEBUG,
156153323Srodrigc			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
157153323Srodrigc			error = EINVAL;
158153323Srodrigc			goto out;
159153323Srodrigc		}
160153323Srodrigc	} else
161153323Srodrigc#endif
162153323Srodrigc	if (bdom == AF_INET)
163153323Srodrigc		alloclen = sizeof(struct sockaddr_in);
164153323Srodrigc
165153323Srodrigc	sa = (struct sockaddr *) kosa;
166153323Srodrigc	sa->sa_family = bdom;
167153323Srodrigc	sa->sa_len = alloclen;
168153323Srodrigc
169153323Srodrigc	*sap = sa;
170153323Srodrigc	*osalen = alloclen;
171153323Srodrigc	return (0);
172153323Srodrigc
173153323Srodrigcout:
174153323Srodrigc	free(kosa, mtype);
175153323Srodrigc	return (error);
176153323Srodrigc}
177153323Srodrigc
178153323Srodrigcstatic int
179153323Srodrigclinux_to_bsd_domain(int domain)
180153323Srodrigc{
181153323Srodrigc
182153323Srodrigc	switch (domain) {
183153323Srodrigc	case LINUX_AF_UNSPEC:
184153323Srodrigc		return (AF_UNSPEC);
185153323Srodrigc	case LINUX_AF_UNIX:
186159451Srodrigc		return (AF_LOCAL);
187153323Srodrigc	case LINUX_AF_INET:
188153323Srodrigc		return (AF_INET);
189153323Srodrigc	case LINUX_AF_INET6:
190153323Srodrigc		return (AF_INET6);
191153323Srodrigc	case LINUX_AF_AX25:
192153323Srodrigc		return (AF_CCITT);
193153323Srodrigc	case LINUX_AF_IPX:
194153323Srodrigc		return (AF_IPX);
195153323Srodrigc	case LINUX_AF_APPLETALK:
196153323Srodrigc		return (AF_APPLETALK);
197153323Srodrigc	}
198153323Srodrigc	return (-1);
199153323Srodrigc}
200153323Srodrigc
201153323Srodrigcstatic int
202153323Srodrigcbsd_to_linux_domain(int domain)
203153323Srodrigc{
204153323Srodrigc
205153323Srodrigc	switch (domain) {
206153323Srodrigc	case AF_UNSPEC:
207159451Srodrigc		return (LINUX_AF_UNSPEC);
208159451Srodrigc	case AF_LOCAL:
209159451Srodrigc		return (LINUX_AF_UNIX);
210159451Srodrigc	case AF_INET:
211159451Srodrigc		return (LINUX_AF_INET);
212159451Srodrigc	case AF_INET6:
213159451Srodrigc		return (LINUX_AF_INET6);
214159451Srodrigc	case AF_CCITT:
215159451Srodrigc		return (LINUX_AF_AX25);
216159451Srodrigc	case AF_IPX:
217159451Srodrigc		return (LINUX_AF_IPX);
218159451Srodrigc	case AF_APPLETALK:
219153323Srodrigc		return (LINUX_AF_APPLETALK);
220153323Srodrigc	}
221153323Srodrigc	return (-1);
222153323Srodrigc}
223153323Srodrigc
224153323Srodrigcstatic int
225153323Srodrigclinux_to_bsd_sockopt_level(int level)
226153323Srodrigc{
227153323Srodrigc
228153323Srodrigc	switch (level) {
229159451Srodrigc	case LINUX_SOL_SOCKET:
230153323Srodrigc		return (SOL_SOCKET);
231159451Srodrigc	}
232153323Srodrigc	return (level);
233153323Srodrigc}
234153323Srodrigc
235153323Srodrigcstatic int
236153323Srodrigcbsd_to_linux_sockopt_level(int level)
237153323Srodrigc{
238153323Srodrigc
239153323Srodrigc	switch (level) {
240153323Srodrigc	case SOL_SOCKET:
241153323Srodrigc		return (LINUX_SOL_SOCKET);
242153323Srodrigc	}
243153323Srodrigc	return (level);
244153323Srodrigc}
245159451Srodrigc
246159451Srodrigcstatic int
247153323Srodrigclinux_to_bsd_ip_sockopt(int opt)
248153323Srodrigc{
249153323Srodrigc
250153323Srodrigc	switch (opt) {
251153323Srodrigc	case LINUX_IP_TOS:
252153323Srodrigc		return (IP_TOS);
253153323Srodrigc	case LINUX_IP_TTL:
254153323Srodrigc		return (IP_TTL);
255153323Srodrigc	case LINUX_IP_OPTIONS:
256159451Srodrigc		return (IP_OPTIONS);
257153323Srodrigc	case LINUX_IP_MULTICAST_IF:
258153323Srodrigc		return (IP_MULTICAST_IF);
259153323Srodrigc	case LINUX_IP_MULTICAST_TTL:
260153323Srodrigc		return (IP_MULTICAST_TTL);
261153323Srodrigc	case LINUX_IP_MULTICAST_LOOP:
262153323Srodrigc		return (IP_MULTICAST_LOOP);
263153323Srodrigc	case LINUX_IP_ADD_MEMBERSHIP:
264153323Srodrigc		return (IP_ADD_MEMBERSHIP);
265153323Srodrigc	case LINUX_IP_DROP_MEMBERSHIP:
266153323Srodrigc		return (IP_DROP_MEMBERSHIP);
267159451Srodrigc	case LINUX_IP_HDRINCL:
268153323Srodrigc		return (IP_HDRINCL);
269153323Srodrigc	}
270153323Srodrigc	return (-1);
271153323Srodrigc}
272159451Srodrigc
273159451Srodrigcstatic int
274159451Srodrigclinux_to_bsd_so_sockopt(int opt)
275159451Srodrigc{
276159451Srodrigc
277159451Srodrigc	switch (opt) {
278159451Srodrigc	case LINUX_SO_DEBUG:
279159451Srodrigc		return (SO_DEBUG);
280153323Srodrigc	case LINUX_SO_REUSEADDR:
281153323Srodrigc		return (SO_REUSEADDR);
282153323Srodrigc	case LINUX_SO_TYPE:
283153323Srodrigc		return (SO_TYPE);
284153323Srodrigc	case LINUX_SO_ERROR:
285153323Srodrigc		return (SO_ERROR);
286153323Srodrigc	case LINUX_SO_DONTROUTE:
287153323Srodrigc		return (SO_DONTROUTE);
288153323Srodrigc	case LINUX_SO_BROADCAST:
289153323Srodrigc		return (SO_BROADCAST);
290153323Srodrigc	case LINUX_SO_SNDBUF:
291153323Srodrigc		return (SO_SNDBUF);
292153323Srodrigc	case LINUX_SO_RCVBUF:
293159451Srodrigc		return (SO_RCVBUF);
294159451Srodrigc	case LINUX_SO_KEEPALIVE:
295153323Srodrigc		return (SO_KEEPALIVE);
296153323Srodrigc	case LINUX_SO_OOBINLINE:
297153323Srodrigc		return (SO_OOBINLINE);
298159451Srodrigc	case LINUX_SO_LINGER:
299153323Srodrigc		return (SO_LINGER);
300153323Srodrigc	case LINUX_SO_PEERCRED:
301153323Srodrigc		return (LOCAL_PEERCRED);
302153323Srodrigc	case LINUX_SO_RCVLOWAT:
303153323Srodrigc		return (SO_RCVLOWAT);
304153323Srodrigc	case LINUX_SO_SNDLOWAT:
305153323Srodrigc		return (SO_SNDLOWAT);
306153323Srodrigc	case LINUX_SO_RCVTIMEO:
307153323Srodrigc		return (SO_RCVTIMEO);
308153323Srodrigc	case LINUX_SO_SNDTIMEO:
309153323Srodrigc		return (SO_SNDTIMEO);
310153323Srodrigc	case LINUX_SO_TIMESTAMP:
311153323Srodrigc		return (SO_TIMESTAMP);
312153323Srodrigc	case LINUX_SO_ACCEPTCONN:
313159451Srodrigc		return (SO_ACCEPTCONN);
314159451Srodrigc	}
315159451Srodrigc	return (-1);
316153323Srodrigc}
317153323Srodrigc
318159451Srodrigcstatic int
319159451Srodrigclinux_to_bsd_msg_flags(int flags)
320159451Srodrigc{
321159451Srodrigc	int ret_flags = 0;
322153323Srodrigc
323159451Srodrigc	if (flags & LINUX_MSG_OOB)
324159451Srodrigc		ret_flags |= MSG_OOB;
325159451Srodrigc	if (flags & LINUX_MSG_PEEK)
326153323Srodrigc		ret_flags |= MSG_PEEK;
327153323Srodrigc	if (flags & LINUX_MSG_DONTROUTE)
328153323Srodrigc		ret_flags |= MSG_DONTROUTE;
329153323Srodrigc	if (flags & LINUX_MSG_CTRUNC)
330159451Srodrigc		ret_flags |= MSG_CTRUNC;
331153323Srodrigc	if (flags & LINUX_MSG_TRUNC)
332153323Srodrigc		ret_flags |= MSG_TRUNC;
333153323Srodrigc	if (flags & LINUX_MSG_DONTWAIT)
334153323Srodrigc		ret_flags |= MSG_DONTWAIT;
335153323Srodrigc	if (flags & LINUX_MSG_EOR)
336153323Srodrigc		ret_flags |= MSG_EOR;
337153323Srodrigc	if (flags & LINUX_MSG_WAITALL)
338153323Srodrigc		ret_flags |= MSG_WAITALL;
339153323Srodrigc	if (flags & LINUX_MSG_NOSIGNAL)
340159451Srodrigc		ret_flags |= MSG_NOSIGNAL;
341159451Srodrigc#if 0 /* not handled */
342159451Srodrigc	if (flags & LINUX_MSG_PROXY)
343159451Srodrigc		;
344153323Srodrigc	if (flags & LINUX_MSG_FIN)
345153323Srodrigc		;
346153323Srodrigc	if (flags & LINUX_MSG_SYN)
347153323Srodrigc		;
348153323Srodrigc	if (flags & LINUX_MSG_CONFIRM)
349153323Srodrigc		;
350153323Srodrigc	if (flags & LINUX_MSG_RST)
351153323Srodrigc		;
352153323Srodrigc	if (flags & LINUX_MSG_ERRQUEUE)
353153323Srodrigc		;
354159451Srodrigc#endif
355153323Srodrigc	return ret_flags;
356159451Srodrigc}
357153323Srodrigc
358159451Srodrigc/*
359159451Srodrigc* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
360153323Srodrigc* native syscall will fault.  Thus, we don't really need to check the
361153323Srodrigc* return values for these functions.
362159451Srodrigc*/
363153323Srodrigc
364153323Srodrigcstatic int
365159451Srodrigcbsd_to_linux_sockaddr(struct sockaddr *arg)
366159451Srodrigc{
367153323Srodrigc	struct sockaddr sa;
368153323Srodrigc	size_t sa_len = sizeof(struct sockaddr);
369153323Srodrigc	int error;
370153323Srodrigc
371153323Srodrigc	if ((error = copyin(arg, &sa, sa_len)))
372153323Srodrigc		return (error);
373153323Srodrigc
374153323Srodrigc	*(u_short *)&sa = sa.sa_family;
375153323Srodrigc
376153323Srodrigc	error = copyout(&sa, arg, sa_len);
377153323Srodrigc
378153323Srodrigc	return (error);
379153323Srodrigc}
380159451Srodrigc
381159451Srodrigcstatic int
382159451Srodrigclinux_to_bsd_sockaddr(struct sockaddr *arg, int len)
383159451Srodrigc{
384153323Srodrigc	struct sockaddr sa;
385153323Srodrigc	size_t sa_len = sizeof(struct sockaddr);
386153323Srodrigc	int error;
387153323Srodrigc
388153323Srodrigc	if ((error = copyin(arg, &sa, sa_len)))
389153323Srodrigc		return (error);
390153323Srodrigc
391153323Srodrigc	sa.sa_family = *(sa_family_t *)&sa;
392153323Srodrigc	sa.sa_len = len;
393153323Srodrigc
394153323Srodrigc	error = copyout(&sa, arg, sa_len);
395153323Srodrigc
396153323Srodrigc	return (error);
397153323Srodrigc}
398153323Srodrigc
399153323Srodrigc
400153323Srodrigcstatic int
401153323Srodrigclinux_sa_put(struct osockaddr *osa)
402153323Srodrigc{
403153323Srodrigc	struct osockaddr sa;
404153323Srodrigc	int error, bdom;
405153323Srodrigc
406153323Srodrigc	/*
407153323Srodrigc	 * Only read/write the osockaddr family part, the rest is
408153323Srodrigc	 * not changed.
409153323Srodrigc	 */
410153323Srodrigc	error = copyin(osa, &sa, sizeof(sa.sa_family));
411153323Srodrigc	if (error)
412153323Srodrigc		return (error);
413153323Srodrigc
414153323Srodrigc	bdom = bsd_to_linux_domain(sa.sa_family);
415153323Srodrigc	if (bdom == -1)
416153323Srodrigc		return (EINVAL);
417153323Srodrigc
418153323Srodrigc	sa.sa_family = bdom;
419153323Srodrigc	error = copyout(&sa, osa, sizeof(sa.sa_family));
420153323Srodrigc	if (error)
421153323Srodrigc		return (error);
422153323Srodrigc
423153323Srodrigc	return (0);
424153323Srodrigc}
425153323Srodrigc
426153323Srodrigcstatic int
427153323Srodrigclinux_to_bsd_cmsg_type(int cmsg_type)
428153323Srodrigc{
429153323Srodrigc
430153323Srodrigc	switch (cmsg_type) {
431153323Srodrigc	case LINUX_SCM_RIGHTS:
432153323Srodrigc		return (SCM_RIGHTS);
433153323Srodrigc	}
434153323Srodrigc	return (-1);
435153323Srodrigc}
436153323Srodrigc
437153323Srodrigcstatic int
438153323Srodrigcbsd_to_linux_cmsg_type(int cmsg_type)
439153323Srodrigc{
440153323Srodrigc
441153323Srodrigc	switch (cmsg_type) {
442153323Srodrigc	case SCM_RIGHTS:
443153323Srodrigc		return (LINUX_SCM_RIGHTS);
444153323Srodrigc	}
445153323Srodrigc	return (-1);
446153323Srodrigc}
447153323Srodrigc
448153323Srodrigc
449153323Srodrigc
450153323Srodrigcstatic int
451153323Srodrigclinux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
452153323Srodrigc{
453159451Srodrigc	if (lhdr->msg_controllen > INT_MAX)
454159451Srodrigc		return (ENOBUFS);
455159451Srodrigc
456159451Srodrigc	bhdr->msg_name		= PTRIN(lhdr->msg_name);
457159451Srodrigc	bhdr->msg_namelen	= lhdr->msg_namelen;
458159451Srodrigc	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
459159451Srodrigc	bhdr->msg_iovlen	= lhdr->msg_iovlen;
460159451Srodrigc	bhdr->msg_control	= PTRIN(lhdr->msg_control);
461159451Srodrigc	bhdr->msg_controllen	= lhdr->msg_controllen;
462159451Srodrigc	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
463159451Srodrigc	return (0);
464159451Srodrigc}
465159451Srodrigc
466153323Srodrigcstatic int
467153323Srodrigcbsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
468153323Srodrigc{
469153323Srodrigc	lhdr->msg_name		= PTROUT(bhdr->msg_name);
470153323Srodrigc	lhdr->msg_namelen	= bhdr->msg_namelen;
471153323Srodrigc	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
472153323Srodrigc	lhdr->msg_iovlen	= bhdr->msg_iovlen;
473153323Srodrigc	lhdr->msg_control	= PTROUT(bhdr->msg_control);
474153323Srodrigc	lhdr->msg_controllen	= bhdr->msg_controllen;
475153323Srodrigc	/* msg_flags skipped */
476153323Srodrigc	return (0);
477153323Srodrigc}
478153323Srodrigc
479153323Srodrigcstatic int
480153323Srodrigclinux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
481153323Srodrigc    struct mbuf *control, enum uio_seg segflg)
482153323Srodrigc{
483153323Srodrigc	struct sockaddr *to;
484153323Srodrigc	int error;
485153323Srodrigc
486153323Srodrigc	if (mp->msg_name != NULL) {
487153323Srodrigc		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
488153323Srodrigc		if (error)
489153323Srodrigc			return (error);
490153323Srodrigc		mp->msg_name = to;
491153323Srodrigc	} else
492153323Srodrigc		to = NULL;
493153323Srodrigc
494153323Srodrigc	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
495153323Srodrigc	    segflg);
496159451Srodrigc
497159451Srodrigc	if (to)
498159451Srodrigc		free(to, M_SONAME);
499159451Srodrigc	return (error);
500159451Srodrigc}
501159451Srodrigc
502159451Srodrigc/* Return 0 if IP_HDRINCL is set for the given socket. */
503159451Srodrigcstatic int
504159451Srodrigclinux_check_hdrincl(struct thread *td, int s)
505159451Srodrigc{
506159451Srodrigc	int error, optval, size_val;
507159451Srodrigc
508153323Srodrigc	size_val = sizeof(optval);
509159451Srodrigc	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
510159451Srodrigc	    &optval, UIO_SYSSPACE, &size_val);
511159451Srodrigc	if (error)
512153323Srodrigc		return (error);
513153323Srodrigc
514153323Srodrigc	return (optval == 0);
515153323Srodrigc}
516159451Srodrigc
517153323Srodrigcstruct linux_sendto_args {
518159451Srodrigc	int s;
519159451Srodrigc	l_uintptr_t msg;
520153323Srodrigc	int len;
521159451Srodrigc	int flags;
522159451Srodrigc	l_uintptr_t to;
523159451Srodrigc	int tolen;
524159451Srodrigc};
525153323Srodrigc
526153323Srodrigc/*
527153323Srodrigc * Updated sendto() when IP_HDRINCL is set:
528153323Srodrigc * tweak endian-dependent fields in the IP packet.
529159451Srodrigc */
530159451Srodrigcstatic int
531159451Srodrigclinux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
532153323Srodrigc{
533153323Srodrigc/*
534153323Srodrigc * linux_ip_copysize defines how many bytes we should copy
535153323Srodrigc * from the beginning of the IP packet before we customize it for BSD.
536159451Srodrigc * It should include all the fields we modify (ip_len and ip_off).
537159451Srodrigc */
538159451Srodrigc#define linux_ip_copysize	8
539153323Srodrigc
540159451Srodrigc	struct ip *packet;
541159451Srodrigc	struct msghdr msg;
542159451Srodrigc	struct iovec aiov[1];
543159451Srodrigc	int error;
544159451Srodrigc
545159451Srodrigc	/* Check that the packet isn't too big or too small. */
546153323Srodrigc	if (linux_args->len < linux_ip_copysize ||
547159451Srodrigc	    linux_args->len > IP_MAXPACKET)
548159451Srodrigc		return (EINVAL);
549153323Srodrigc
550159451Srodrigc	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
551159451Srodrigc
552159451Srodrigc	/* Make kernel copy of the packet to be sent */
553159451Srodrigc	if ((error = copyin(PTRIN(linux_args->msg), packet,
554159451Srodrigc	    linux_args->len)))
555159451Srodrigc		goto goout;
556159451Srodrigc
557159451Srodrigc	/* Convert fields from Linux to BSD raw IP socket format */
558159451Srodrigc	packet->ip_len = linux_args->len;
559159451Srodrigc	packet->ip_off = ntohs(packet->ip_off);
560153323Srodrigc
561159451Srodrigc	/* Prepare the msghdr and iovec structures describing the new packet */
562153323Srodrigc	msg.msg_name = PTRIN(linux_args->to);
563159451Srodrigc	msg.msg_namelen = linux_args->tolen;
564153323Srodrigc	msg.msg_iov = aiov;
565159451Srodrigc	msg.msg_iovlen = 1;
566159451Srodrigc	msg.msg_control = NULL;
567153323Srodrigc	msg.msg_flags = 0;
568153323Srodrigc	aiov[0].iov_base = (char *)packet;
569153323Srodrigc	aiov[0].iov_len = linux_args->len;
570153323Srodrigc	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
571153323Srodrigc	    NULL, UIO_SYSSPACE);
572153323Srodrigcgoout:
573153323Srodrigc	free(packet, M_TEMP);
574153323Srodrigc	return (error);
575153323Srodrigc}
576153323Srodrigc
577153323Srodrigcstruct linux_socket_args {
578153323Srodrigc	int domain;
579153323Srodrigc	int type;
580153323Srodrigc	int protocol;
581153323Srodrigc};
582153323Srodrigc
583153323Srodrigcstatic int
584153323Srodrigclinux_socket(struct thread *td, struct linux_socket_args *args)
585153323Srodrigc{
586153323Srodrigc#ifdef INET6
587153323Srodrigc	INIT_VNET_INET6(curvnet);
588153323Srodrigc#endif
589159451Srodrigc	struct socket_args /* {
590153323Srodrigc		int domain;
591153323Srodrigc		int type;
592153323Srodrigc		int protocol;
593153323Srodrigc	} */ bsd_args;
594153323Srodrigc	int retval_socket;
595153323Srodrigc
596153323Srodrigc	bsd_args.protocol = args->protocol;
597153323Srodrigc	bsd_args.type = args->type;
598153323Srodrigc	bsd_args.domain = linux_to_bsd_domain(args->domain);
599153323Srodrigc	if (bsd_args.domain == -1)
600153323Srodrigc		return (EINVAL);
601153323Srodrigc
602153323Srodrigc	retval_socket = socket(td, &bsd_args);
603153323Srodrigc	if (bsd_args.type == SOCK_RAW
604153323Srodrigc	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
605153323Srodrigc	    && bsd_args.domain == AF_INET
606159451Srodrigc	    && retval_socket >= 0) {
607153323Srodrigc		/* It's a raw IP socket: set the IP_HDRINCL option. */
608153323Srodrigc		int hdrincl;
609153323Srodrigc
610153323Srodrigc		hdrincl = 1;
611153323Srodrigc		/* We ignore any error returned by kern_setsockopt() */
612153323Srodrigc		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
613153323Srodrigc		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
614153323Srodrigc	}
615153323Srodrigc#ifdef INET6
616153323Srodrigc	/*
617153323Srodrigc	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
618153323Srodrigc	 * default and some apps depend on this. So, set V6ONLY to 0
619153323Srodrigc	 * for Linux apps if the sysctl value is set to 1.
620153323Srodrigc	 */
621153323Srodrigc	if (bsd_args.domain == PF_INET6 && retval_socket >= 0
622153323Srodrigc#ifndef KLD_MODULE
623153323Srodrigc	    /*
624153323Srodrigc	     * XXX: Avoid undefined symbol error with an IPv4 only
625153323Srodrigc	     * kernel.
626153323Srodrigc	     */
627153323Srodrigc	    && V_ip6_v6only
628153323Srodrigc#endif
629153323Srodrigc	    ) {
630153323Srodrigc		int v6only;
631153323Srodrigc
632153323Srodrigc		v6only = 0;
633153323Srodrigc		/* We ignore any error returned by setsockopt() */
634153323Srodrigc		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
635153323Srodrigc		    &v6only, UIO_SYSSPACE, sizeof(v6only));
636153323Srodrigc	}
637153323Srodrigc#endif
638153323Srodrigc
639153323Srodrigc	return (retval_socket);
640153323Srodrigc}
641153323Srodrigc
642153323Srodrigcstruct linux_bind_args {
643153323Srodrigc	int s;
644153323Srodrigc	l_uintptr_t name;
645153323Srodrigc	int namelen;
646153323Srodrigc};
647153323Srodrigc
648153323Srodrigcstatic int
649153323Srodrigclinux_bind(struct thread *td, struct linux_bind_args *args)
650153323Srodrigc{
651153323Srodrigc	struct sockaddr *sa;
652153323Srodrigc	int error;
653153323Srodrigc
654153323Srodrigc	error = linux_getsockaddr(&sa, PTRIN(args->name),
655153323Srodrigc	    args->namelen);
656153323Srodrigc	if (error)
657153323Srodrigc		return (error);
658153323Srodrigc
659153323Srodrigc	error = kern_bind(td, args->s, sa);
660153323Srodrigc	free(sa, M_SONAME);
661159451Srodrigc	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
662159451Srodrigc	   	return (EINVAL);
663159451Srodrigc	return (error);
664159451Srodrigc}
665159451Srodrigc
666153323Srodrigcstruct linux_connect_args {
667159451Srodrigc	int s;
668159451Srodrigc	l_uintptr_t name;
669159451Srodrigc	int namelen;
670159451Srodrigc};
671159451Srodrigcint linux_connect(struct thread *, struct linux_connect_args *);
672159451Srodrigc
673159451Srodrigcint
674159451Srodrigclinux_connect(struct thread *td, struct linux_connect_args *args)
675159451Srodrigc{
676159451Srodrigc	struct socket *so;
677159451Srodrigc	struct sockaddr *sa;
678159451Srodrigc	u_int fflag;
679159451Srodrigc	int error;
680159451Srodrigc
681159451Srodrigc	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
682159451Srodrigc	    args->namelen);
683159451Srodrigc	if (error)
684159451Srodrigc		return (error);
685159451Srodrigc
686159451Srodrigc	error = kern_connect(td, args->s, sa);
687159451Srodrigc	free(sa, M_SONAME);
688159451Srodrigc	if (error != EISCONN)
689159451Srodrigc		return (error);
690153323Srodrigc
691153323Srodrigc	/*
692153323Srodrigc	 * Linux doesn't return EISCONN the first time it occurs,
693153323Srodrigc	 * when on a non-blocking socket. Instead it returns the
694153323Srodrigc	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
695153323Srodrigc	 *
696153323Srodrigc	 * XXXRW: Instead of using fgetsock(), check that it is a
697153323Srodrigc	 * socket and use the file descriptor reference instead of
698153323Srodrigc	 * creating a new one.
699153323Srodrigc	 */
700153323Srodrigc	error = fgetsock(td, args->s, &so, &fflag);
701153323Srodrigc	if (error == 0) {
702153323Srodrigc		error = EISCONN;
703153323Srodrigc		if (fflag & FNONBLOCK) {
704153323Srodrigc			SOCK_LOCK(so);
705159451Srodrigc			if (so->so_emuldata == 0)
706153323Srodrigc				error = so->so_error;
707159451Srodrigc			so->so_emuldata = (void *)1;
708159451Srodrigc			SOCK_UNLOCK(so);
709159451Srodrigc		}
710159451Srodrigc		fputsock(so);
711159451Srodrigc	}
712159451Srodrigc	return (error);
713159451Srodrigc}
714159451Srodrigc
715159451Srodrigcstruct linux_listen_args {
716159451Srodrigc	int s;
717159451Srodrigc	int backlog;
718159451Srodrigc};
719159451Srodrigc
720153323Srodrigcstatic int
721159451Srodrigclinux_listen(struct thread *td, struct linux_listen_args *args)
722153323Srodrigc{
723153323Srodrigc	struct listen_args /* {
724153323Srodrigc		int s;
725153323Srodrigc		int backlog;
726153323Srodrigc	} */ bsd_args;
727153323Srodrigc
728153323Srodrigc	bsd_args.s = args->s;
729153323Srodrigc	bsd_args.backlog = args->backlog;
730153323Srodrigc	return (listen(td, &bsd_args));
731153323Srodrigc}
732153323Srodrigc
733153323Srodrigcstruct linux_accept_args {
734153323Srodrigc	int s;
735153323Srodrigc	l_uintptr_t addr;
736153323Srodrigc	l_uintptr_t namelen;
737153323Srodrigc};
738153323Srodrigc
739153323Srodrigcstatic int
740153323Srodrigclinux_accept(struct thread *td, struct linux_accept_args *args)
741153323Srodrigc{
742153323Srodrigc	struct accept_args /* {
743159451Srodrigc		int	s;
744153323Srodrigc		struct sockaddr * __restrict name;
745153323Srodrigc		socklen_t * __restrict anamelen;
746159451Srodrigc	} */ bsd_args;
747159451Srodrigc	int error, fd;
748153323Srodrigc
749153323Srodrigc	bsd_args.s = args->s;
750153323Srodrigc	/* XXX: */
751153323Srodrigc	bsd_args.name = (struct sockaddr * __restrict)PTRIN(args->addr);
752153323Srodrigc	bsd_args.anamelen = PTRIN(args->namelen);/* XXX */
753153323Srodrigc	error = accept(td, &bsd_args);
754153323Srodrigc	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
755153323Srodrigc	if (error) {
756153323Srodrigc		if (error == EFAULT && args->namelen != sizeof(struct sockaddr_in))
757153323Srodrigc			return (EINVAL);
758153323Srodrigc		return (error);
759153323Srodrigc	}
760153323Srodrigc	if (args->addr) {
761153323Srodrigc		error = linux_sa_put(PTRIN(args->addr));
762153323Srodrigc		if (error) {
763153323Srodrigc			(void)kern_close(td, td->td_retval[0]);
764153323Srodrigc			return (error);
765153323Srodrigc		}
766153323Srodrigc	}
767153323Srodrigc
768153323Srodrigc	/*
769153323Srodrigc	 * linux appears not to copy flags from the parent socket to the
770153323Srodrigc	 * accepted one, so we must clear the flags in the new descriptor.
771153323Srodrigc	 * Ignore any errors, because we already have an open fd.
772153323Srodrigc	 */
773153323Srodrigc	fd = td->td_retval[0];
774153323Srodrigc	(void)kern_fcntl(td, fd, F_SETFL, 0);
775153323Srodrigc	td->td_retval[0] = fd;
776153323Srodrigc	return (0);
777153323Srodrigc}
778153323Srodrigc
779153323Srodrigcstruct linux_getsockname_args {
780153323Srodrigc	int s;
781153323Srodrigc	l_uintptr_t addr;
782153323Srodrigc	l_uintptr_t namelen;
783153323Srodrigc};
784153323Srodrigc
785153323Srodrigcstatic int
786153323Srodrigclinux_getsockname(struct thread *td, struct linux_getsockname_args *args)
787153323Srodrigc{
788153323Srodrigc	struct getsockname_args /* {
789153323Srodrigc		int	fdes;
790153323Srodrigc		struct sockaddr * __restrict asa;
791153323Srodrigc		socklen_t * __restrict alen;
792153323Srodrigc	} */ bsd_args;
793153323Srodrigc	int error;
794153323Srodrigc
795153323Srodrigc	bsd_args.fdes = args->s;
796153323Srodrigc	/* XXX: */
797153323Srodrigc	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
798153323Srodrigc	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
799153323Srodrigc	error = getsockname(td, &bsd_args);
800153323Srodrigc	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
801153323Srodrigc	if (error)
802153323Srodrigc		return (error);
803153323Srodrigc	error = linux_sa_put(PTRIN(args->addr));
804153323Srodrigc	if (error)
805153323Srodrigc		return (error);
806153323Srodrigc	return (0);
807153323Srodrigc}
808153323Srodrigc
809153323Srodrigcstruct linux_getpeername_args {
810153323Srodrigc	int s;
811153323Srodrigc	l_uintptr_t addr;
812153323Srodrigc	l_uintptr_t namelen;
813153323Srodrigc};
814153323Srodrigc
815153323Srodrigcstatic int
816153323Srodrigclinux_getpeername(struct thread *td, struct linux_getpeername_args *args)
817153323Srodrigc{
818153323Srodrigc	struct getpeername_args /* {
819153323Srodrigc		int fdes;
820153323Srodrigc		caddr_t asa;
821153323Srodrigc		int *alen;
822153323Srodrigc	} */ bsd_args;
823153323Srodrigc	int error;
824153323Srodrigc
825153323Srodrigc	bsd_args.fdes = args->s;
826153323Srodrigc	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
827153323Srodrigc	bsd_args.alen = (int *)PTRIN(args->namelen);
828153323Srodrigc	error = getpeername(td, &bsd_args);
829153323Srodrigc	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
830153323Srodrigc	if (error)
831153323Srodrigc		return (error);
832153323Srodrigc	error = linux_sa_put(PTRIN(args->addr));
833153323Srodrigc	if (error)
834153323Srodrigc		return (error);
835153323Srodrigc	return (0);
836153323Srodrigc}
837153323Srodrigc
838153323Srodrigcstruct linux_socketpair_args {
839153323Srodrigc	int domain;
840153323Srodrigc	int type;
841153323Srodrigc	int protocol;
842153323Srodrigc	l_uintptr_t rsv;
843153323Srodrigc};
844153323Srodrigc
845153323Srodrigcstatic int
846153323Srodrigclinux_socketpair(struct thread *td, struct linux_socketpair_args *args)
847153323Srodrigc{
848153323Srodrigc	struct socketpair_args /* {
849153323Srodrigc		int domain;
850153323Srodrigc		int type;
851153323Srodrigc		int protocol;
852153323Srodrigc		int *rsv;
853153323Srodrigc	} */ bsd_args;
854153323Srodrigc
855153323Srodrigc	bsd_args.domain = linux_to_bsd_domain(args->domain);
856153323Srodrigc	if (bsd_args.domain == -1)
857153323Srodrigc		return (EINVAL);
858159451Srodrigc
859153323Srodrigc	bsd_args.type = args->type;
860153323Srodrigc	bsd_args.protocol = args->protocol;
861153323Srodrigc	bsd_args.rsv = (int *)PTRIN(args->rsv);
862153323Srodrigc	return (socketpair(td, &bsd_args));
863153323Srodrigc}
864153323Srodrigc
865153323Srodrigcstruct linux_send_args {
866153323Srodrigc	int s;
867153323Srodrigc	l_uintptr_t msg;
868153323Srodrigc	int len;
869153323Srodrigc	int flags;
870153323Srodrigc};
871153323Srodrigc
872153323Srodrigcstatic int
873153323Srodrigclinux_send(struct thread *td, struct linux_send_args *args)
874159451Srodrigc{
875159451Srodrigc	struct sendto_args /* {
876159451Srodrigc		int s;
877153323Srodrigc		caddr_t buf;
878153323Srodrigc		int len;
879153323Srodrigc		int flags;
880159451Srodrigc		caddr_t to;
881153323Srodrigc		int tolen;
882153323Srodrigc	} */ bsd_args;
883153323Srodrigc
884153323Srodrigc	bsd_args.s = args->s;
885153323Srodrigc	bsd_args.buf = (caddr_t)PTRIN(args->msg);
886153323Srodrigc	bsd_args.len = args->len;
887153323Srodrigc	bsd_args.flags = args->flags;
888153323Srodrigc	bsd_args.to = NULL;
889153323Srodrigc	bsd_args.tolen = 0;
890153323Srodrigc	return sendto(td, &bsd_args);
891153323Srodrigc}
892153323Srodrigc
893153323Srodrigcstruct linux_recv_args {
894153323Srodrigc	int s;
895153323Srodrigc	l_uintptr_t msg;
896153323Srodrigc	int len;
897153323Srodrigc	int flags;
898153323Srodrigc};
899153323Srodrigc
900153323Srodrigcstatic int
901153323Srodrigclinux_recv(struct thread *td, struct linux_recv_args *args)
902153323Srodrigc{
903153323Srodrigc	struct recvfrom_args /* {
904153323Srodrigc		int s;
905153323Srodrigc		caddr_t buf;
906153323Srodrigc		int len;
907153323Srodrigc		int flags;
908153323Srodrigc		struct sockaddr *from;
909153323Srodrigc		socklen_t fromlenaddr;
910153323Srodrigc	} */ bsd_args;
911153323Srodrigc
912153323Srodrigc	bsd_args.s = args->s;
913153323Srodrigc	bsd_args.buf = (caddr_t)PTRIN(args->msg);
914153323Srodrigc	bsd_args.len = args->len;
915159451Srodrigc	bsd_args.flags = args->flags;
916153323Srodrigc	bsd_args.from = NULL;
917153323Srodrigc	bsd_args.fromlenaddr = 0;
918159451Srodrigc	return (recvfrom(td, &bsd_args));
919153323Srodrigc}
920153323Srodrigc
921153323Srodrigcstatic int
922153323Srodrigclinux_sendto(struct thread *td, struct linux_sendto_args *args)
923153323Srodrigc{
924153323Srodrigc	struct msghdr msg;
925153323Srodrigc	struct iovec aiov;
926153323Srodrigc	int error;
927153323Srodrigc
928153323Srodrigc	if (linux_check_hdrincl(td, args->s) == 0)
929153323Srodrigc		/* IP_HDRINCL set, tweak the packet before sending */
930159451Srodrigc		return (linux_sendto_hdrincl(td, args));
931153323Srodrigc
932159451Srodrigc	msg.msg_name = PTRIN(args->to);
933159451Srodrigc	msg.msg_namelen = args->tolen;
934159451Srodrigc	msg.msg_iov = &aiov;
935159451Srodrigc	msg.msg_iovlen = 1;
936153323Srodrigc	msg.msg_control = NULL;
937153323Srodrigc	msg.msg_flags = 0;
938153323Srodrigc	aiov.iov_base = PTRIN(args->msg);
939153323Srodrigc	aiov.iov_len = args->len;
940153323Srodrigc	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
941153323Srodrigc	    UIO_USERSPACE);
942153323Srodrigc	return (error);
943159451Srodrigc}
944153323Srodrigc
945153323Srodrigcstruct linux_recvfrom_args {
946153323Srodrigc	int s;
947153323Srodrigc	l_uintptr_t buf;
948159451Srodrigc	int len;
949153323Srodrigc	int flags;
950153323Srodrigc	l_uintptr_t from;
951153323Srodrigc	l_uintptr_t fromlen;
952153323Srodrigc};
953153323Srodrigc
954153323Srodrigcstatic int
955153323Srodrigclinux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
956153323Srodrigc{
957153323Srodrigc	struct recvfrom_args /* {
958153323Srodrigc		int	s;
959153323Srodrigc		caddr_t	buf;
960153323Srodrigc		size_t	len;
961153323Srodrigc		int	flags;
962153323Srodrigc		struct sockaddr * __restrict from;
963153323Srodrigc		socklen_t * __restrict fromlenaddr;
964153323Srodrigc	} */ bsd_args;
965153323Srodrigc	size_t len;
966153323Srodrigc	int error;
967153323Srodrigc
968153323Srodrigc	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
969153323Srodrigc		return (error);
970153323Srodrigc
971153323Srodrigc	bsd_args.s = args->s;
972153323Srodrigc	bsd_args.buf = PTRIN(args->buf);
973153323Srodrigc	bsd_args.len = args->len;
974153323Srodrigc	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
975153323Srodrigc	/* XXX: */
976153323Srodrigc	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
977153323Srodrigc	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
978153323Srodrigc
979153323Srodrigc	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
980153323Srodrigc	error = recvfrom(td, &bsd_args);
981153323Srodrigc	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
982153323Srodrigc
983153323Srodrigc	if (error)
984153323Srodrigc		return (error);
985153323Srodrigc	if (args->from) {
986153323Srodrigc		error = linux_sa_put((struct osockaddr *)
987153323Srodrigc		    PTRIN(args->from));
988153323Srodrigc		if (error)
989153323Srodrigc			return (error);
990153323Srodrigc	}
991153323Srodrigc	return (0);
992153323Srodrigc}
993153323Srodrigc
994153323Srodrigcstruct linux_sendmsg_args {
995153323Srodrigc	int s;
996153323Srodrigc	l_uintptr_t msg;
997153323Srodrigc	int flags;
998153323Srodrigc};
999153323Srodrigc
1000153323Srodrigcstatic int
1001153323Srodrigclinux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1002153323Srodrigc{
1003153323Srodrigc	struct cmsghdr *cmsg;
1004153323Srodrigc	struct mbuf *control;
1005153323Srodrigc	struct msghdr msg;
1006153323Srodrigc	struct l_cmsghdr linux_cmsg;
1007153323Srodrigc	struct l_cmsghdr *ptr_cmsg;
1008153323Srodrigc	struct l_msghdr linux_msg;
1009153323Srodrigc	struct iovec *iov;
1010153323Srodrigc	socklen_t datalen;
1011153323Srodrigc	void *data;
1012153323Srodrigc	int error;
1013153323Srodrigc
1014153323Srodrigc	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1015153323Srodrigc	if (error)
1016153323Srodrigc		return (error);
1017153323Srodrigc	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1018153323Srodrigc	if (error)
1019153323Srodrigc		return (error);
1020153323Srodrigc
1021153323Srodrigc	/*
1022153323Srodrigc	 * Some Linux applications (ping) define a non-NULL control data
1023153323Srodrigc	 * pointer, but a msg_controllen of 0, which is not allowed in the
1024153323Srodrigc	 * FreeBSD system call interface.  NULL the msg_control pointer in
1025153323Srodrigc	 * order to handle this case.  This should be checked, but allows the
1026153323Srodrigc	 * Linux ping to work.
1027153323Srodrigc	 */
1028153323Srodrigc	if (msg.msg_control != NULL && msg.msg_controllen == 0)
1029153323Srodrigc		msg.msg_control = NULL;
1030153323Srodrigc
1031153323Srodrigc#ifdef COMPAT_LINUX32
1032159451Srodrigc	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1033153323Srodrigc	    &iov, EMSGSIZE);
1034153323Srodrigc#else
1035153323Srodrigc	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1036153323Srodrigc#endif
1037153323Srodrigc	if (error)
1038153323Srodrigc		return (error);
1039153323Srodrigc
1040153323Srodrigc	if (msg.msg_control != NULL) {
1041153323Srodrigc		error = ENOBUFS;
1042153323Srodrigc		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1043153323Srodrigc		control = m_get(M_WAIT, MT_CONTROL);
1044153323Srodrigc		if (control == NULL)
1045153323Srodrigc			goto bad;
1046153323Srodrigc		ptr_cmsg = LINUX_CMSG_FIRSTHDR(&msg);
1047153323Srodrigc
1048153323Srodrigc		do {
1049153323Srodrigc			error = copyin(ptr_cmsg, &linux_cmsg,
1050153323Srodrigc			    sizeof(struct l_cmsghdr));
1051153323Srodrigc			if (error)
1052153323Srodrigc				goto bad;
1053153323Srodrigc
1054153323Srodrigc			error = EINVAL;
1055153323Srodrigc			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1056153323Srodrigc				goto bad;
1057153323Srodrigc
1058153323Srodrigc			/*
1059153323Srodrigc			 * Now we support only SCM_RIGHTS, so return EINVAL
1060153323Srodrigc			 * in any other cmsg_type
1061153323Srodrigc			 */
1062153323Srodrigc			if ((cmsg->cmsg_type =
1063153323Srodrigc			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type)) == -1)
1064153323Srodrigc				goto bad;
1065153323Srodrigc			cmsg->cmsg_level =
1066153323Srodrigc			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1067153323Srodrigc
1068153323Srodrigc			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1069153323Srodrigc			cmsg->cmsg_len = CMSG_LEN(datalen);
1070153323Srodrigc			data = LINUX_CMSG_DATA(ptr_cmsg);
1071153323Srodrigc
1072153323Srodrigc			error = ENOBUFS;
1073153323Srodrigc			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t) cmsg))
1074153323Srodrigc				goto bad;
1075153323Srodrigc			if (!m_append(control, datalen, (c_caddr_t) data))
1076153323Srodrigc				goto bad;
1077153323Srodrigc		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&msg, ptr_cmsg)));
1078153323Srodrigc	} else {
1079153323Srodrigc		control = NULL;
1080153323Srodrigc		cmsg = NULL;
1081153323Srodrigc	}
1082153323Srodrigc
1083153323Srodrigc	msg.msg_iov = iov;
1084153323Srodrigc	msg.msg_flags = 0;
1085153323Srodrigc	error = linux_sendit(td, args->s, &msg, args->flags, control,
1086153323Srodrigc	    UIO_USERSPACE);
1087153323Srodrigc
1088153323Srodrigcbad:
1089153323Srodrigc	free(iov, M_IOV);
1090153323Srodrigc	if (cmsg)
1091153323Srodrigc		free(cmsg, M_TEMP);
1092153323Srodrigc	return (error);
1093153323Srodrigc}
1094153323Srodrigc
1095153323Srodrigcstruct linux_recvmsg_args {
1096153323Srodrigc	int s;
1097153323Srodrigc	l_uintptr_t msg;
1098153323Srodrigc	int flags;
1099153323Srodrigc};
1100153323Srodrigc
1101153323Srodrigcstatic int
1102153323Srodrigclinux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1103153323Srodrigc{
1104153323Srodrigc	struct cmsghdr *cm;
1105153323Srodrigc	struct msghdr msg;
1106153323Srodrigc	struct l_cmsghdr *linux_cmsg = NULL;
1107153323Srodrigc	socklen_t datalen, outlen, clen;
1108153323Srodrigc	struct l_msghdr linux_msg;
1109153323Srodrigc	struct iovec *iov, *uiov;
1110159451Srodrigc	struct mbuf *control = NULL;
1111159451Srodrigc	struct mbuf **controlp;
1112159451Srodrigc	caddr_t outbuf;
1113159451Srodrigc	void *data;
1114159451Srodrigc	int error;
1115153323Srodrigc
1116153323Srodrigc	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1117153323Srodrigc	if (error)
1118153323Srodrigc		return (error);
1119153323Srodrigc
1120153323Srodrigc	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1121153323Srodrigc	if (error)
1122153323Srodrigc		return (error);
1123153323Srodrigc
1124153323Srodrigc#ifdef COMPAT_LINUX32
1125159451Srodrigc	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1126153323Srodrigc	    &iov, EMSGSIZE);
1127153323Srodrigc#else
1128153323Srodrigc	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1129153323Srodrigc#endif
1130153323Srodrigc	if (error)
1131153323Srodrigc		return (error);
1132153323Srodrigc
1133153323Srodrigc	if (msg.msg_name) {
1134153323Srodrigc		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1135153323Srodrigc		    msg.msg_namelen);
1136153323Srodrigc		if (error)
1137153323Srodrigc			goto bad;
1138153323Srodrigc	}
1139153323Srodrigc
1140153323Srodrigc	uiov = msg.msg_iov;
1141153323Srodrigc	msg.msg_iov = iov;
1142153323Srodrigc	controlp = (msg.msg_control != NULL) ? &control : NULL;
1143153323Srodrigc	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1144153323Srodrigc	msg.msg_iov = uiov;
1145153323Srodrigc	if (error)
1146153323Srodrigc		goto bad;
1147153323Srodrigc
1148153323Srodrigc	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1149153323Srodrigc	if (error)
1150153323Srodrigc		goto bad;
1151153323Srodrigc
1152153323Srodrigc	if (linux_msg.msg_name) {
1153159451Srodrigc		error = bsd_to_linux_sockaddr((struct sockaddr *)
1154153323Srodrigc		    PTRIN(linux_msg.msg_name));
1155153323Srodrigc		if (error)
1156153323Srodrigc			goto bad;
1157153323Srodrigc	}
1158153323Srodrigc	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1159153323Srodrigc		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1160153323Srodrigc		if (error)
1161159451Srodrigc			goto bad;
1162153323Srodrigc	}
1163159451Srodrigc
1164153323Srodrigc	if (control) {
1165153323Srodrigc
1166153323Srodrigc		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1167159451Srodrigc		outbuf = PTRIN(linux_msg.msg_control);
1168153323Srodrigc		cm = mtod(control, struct cmsghdr *);
1169153323Srodrigc		outlen = 0;
1170153323Srodrigc		clen = control->m_len;
1171153323Srodrigc
1172153323Srodrigc		while (cm != NULL) {
1173153323Srodrigc
1174153323Srodrigc			if ((linux_cmsg->cmsg_type =
1175153323Srodrigc			    bsd_to_linux_cmsg_type(cm->cmsg_type)) == -1)
1176153323Srodrigc			{
1177153323Srodrigc				error = EINVAL;
1178153323Srodrigc				goto bad;
1179153323Srodrigc			}
1180153323Srodrigc			data = CMSG_DATA(cm);
1181153323Srodrigc			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1182153323Srodrigc
1183153323Srodrigc			if (outlen + LINUX_CMSG_LEN(datalen) >
1184153323Srodrigc			    linux_msg.msg_controllen) {
1185153323Srodrigc				if (outlen == 0) {
1186153323Srodrigc					error = EMSGSIZE;
1187153323Srodrigc					goto bad;
1188153323Srodrigc				} else {
1189153323Srodrigc					linux_msg.msg_flags |= LINUX_MSG_CTRUNC;
1190153323Srodrigc					goto out;
1191153323Srodrigc				}
1192153323Srodrigc			}
1193153323Srodrigc
1194153323Srodrigc			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1195153323Srodrigc			linux_cmsg->cmsg_level =
1196153323Srodrigc			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1197153323Srodrigc
1198153323Srodrigc			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1199153323Srodrigc			if (error)
1200153323Srodrigc				goto bad;
1201153323Srodrigc			outbuf += L_CMSG_HDRSZ;
1202153323Srodrigc
1203153323Srodrigc			error = copyout(data, outbuf, datalen);
1204153323Srodrigc			if (error)
1205153323Srodrigc				goto bad;
1206153323Srodrigc
1207153323Srodrigc			outbuf += LINUX_CMSG_ALIGN(datalen);
1208153323Srodrigc			outlen += LINUX_CMSG_LEN(datalen);
1209153323Srodrigc			linux_msg.msg_controllen = outlen;
1210153323Srodrigc
1211153323Srodrigc			if (CMSG_SPACE(datalen) < clen) {
1212153323Srodrigc				clen -= CMSG_SPACE(datalen);
1213153323Srodrigc				cm = (struct cmsghdr *)
1214153323Srodrigc				    ((caddr_t)cm + CMSG_SPACE(datalen));
1215153323Srodrigc			} else
1216153323Srodrigc				cm = NULL;
1217153323Srodrigc		}
1218153323Srodrigc	}
1219153323Srodrigc
1220153323Srodrigcout:
1221153323Srodrigc	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1222153323Srodrigc
1223153323Srodrigcbad:
1224153323Srodrigc	free(iov, M_IOV);
1225153323Srodrigc	if (control != NULL)
1226153323Srodrigc		m_freem(control);
1227153323Srodrigc	if (linux_cmsg != NULL)
1228153323Srodrigc		free(linux_cmsg, M_TEMP);
1229153323Srodrigc
1230153323Srodrigc	return (error);
1231153323Srodrigc}
1232153323Srodrigc
1233153323Srodrigcstruct linux_shutdown_args {
1234153323Srodrigc	int s;
1235153323Srodrigc	int how;
1236153323Srodrigc};
1237153323Srodrigc
1238153323Srodrigcstatic int
1239153323Srodrigclinux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1240153323Srodrigc{
1241153323Srodrigc	struct shutdown_args /* {
1242153323Srodrigc		int s;
1243153323Srodrigc		int how;
1244153323Srodrigc	} */ bsd_args;
1245153323Srodrigc
1246153323Srodrigc	bsd_args.s = args->s;
1247153323Srodrigc	bsd_args.how = args->how;
1248153323Srodrigc	return (shutdown(td, &bsd_args));
1249153323Srodrigc}
1250153323Srodrigc
1251153323Srodrigcstruct linux_setsockopt_args {
1252153323Srodrigc	int s;
1253153323Srodrigc	int level;
1254153323Srodrigc	int optname;
1255153323Srodrigc	l_uintptr_t optval;
1256153323Srodrigc	int optlen;
1257153323Srodrigc};
1258153323Srodrigc
1259153323Srodrigcstatic int
1260153323Srodrigclinux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1261153323Srodrigc{
1262153323Srodrigc	struct setsockopt_args /* {
1263153323Srodrigc		int s;
1264159451Srodrigc		int level;
1265153323Srodrigc		int name;
1266153323Srodrigc		caddr_t val;
1267153323Srodrigc		int valsize;
1268153323Srodrigc	} */ bsd_args;
1269153323Srodrigc	int error, name;
1270153323Srodrigc
1271153323Srodrigc	bsd_args.s = args->s;
1272153323Srodrigc	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1273153323Srodrigc	switch (bsd_args.level) {
1274153323Srodrigc	case SOL_SOCKET:
1275153323Srodrigc		name = linux_to_bsd_so_sockopt(args->optname);
1276153323Srodrigc		break;
1277153323Srodrigc	case IPPROTO_IP:
1278153323Srodrigc		name = linux_to_bsd_ip_sockopt(args->optname);
1279153323Srodrigc		break;
1280153323Srodrigc	case IPPROTO_TCP:
1281153323Srodrigc		/* Linux TCP option values match BSD's */
1282153323Srodrigc		name = args->optname;
1283153323Srodrigc		break;
1284153323Srodrigc	default:
1285153323Srodrigc		name = -1;
1286153323Srodrigc		break;
1287153323Srodrigc	}
1288153323Srodrigc	if (name == -1)
1289153323Srodrigc		return (ENOPROTOOPT);
1290153323Srodrigc
1291153323Srodrigc	bsd_args.name = name;
1292153323Srodrigc	bsd_args.val = PTRIN(args->optval);
1293153323Srodrigc	bsd_args.valsize = args->optlen;
1294153323Srodrigc
1295153323Srodrigc	if (name == IPV6_NEXTHOP) {
1296153323Srodrigc		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1297153323Srodrigc			bsd_args.valsize);
1298153323Srodrigc		error = setsockopt(td, &bsd_args);
1299153323Srodrigc		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1300153323Srodrigc	} else
1301153323Srodrigc		error = setsockopt(td, &bsd_args);
1302153323Srodrigc
1303153323Srodrigc	return (error);
1304153323Srodrigc}
1305153323Srodrigc
1306153323Srodrigcstruct linux_getsockopt_args {
1307153323Srodrigc	int s;
1308153323Srodrigc	int level;
1309153323Srodrigc	int optname;
1310153323Srodrigc	l_uintptr_t optval;
1311153323Srodrigc	l_uintptr_t optlen;
1312153323Srodrigc};
1313153323Srodrigc
1314153323Srodrigcstatic int
1315153323Srodrigclinux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1316153323Srodrigc{
1317153323Srodrigc	struct getsockopt_args /* {
1318153323Srodrigc		int s;
1319153323Srodrigc		int level;
1320153323Srodrigc		int name;
1321153323Srodrigc		caddr_t val;
1322153323Srodrigc		int *avalsize;
1323153323Srodrigc	} */ bsd_args;
1324153323Srodrigc	int error, name;
1325153323Srodrigc
1326153323Srodrigc	bsd_args.s = args->s;
1327153323Srodrigc	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1328153323Srodrigc	switch (bsd_args.level) {
1329153323Srodrigc	case SOL_SOCKET:
1330153323Srodrigc		name = linux_to_bsd_so_sockopt(args->optname);
1331153323Srodrigc		break;
1332153323Srodrigc	case IPPROTO_IP:
1333153323Srodrigc		name = linux_to_bsd_ip_sockopt(args->optname);
1334153323Srodrigc		break;
1335153323Srodrigc	case IPPROTO_TCP:
1336153323Srodrigc		/* Linux TCP option values match BSD's */
1337153323Srodrigc		name = args->optname;
1338153323Srodrigc		break;
1339153323Srodrigc	default:
1340153323Srodrigc		name = -1;
1341153323Srodrigc		break;
1342153323Srodrigc	}
1343153323Srodrigc	if (name == -1)
1344153323Srodrigc		return (EINVAL);
1345153323Srodrigc
1346153323Srodrigc	bsd_args.name = name;
1347153323Srodrigc	bsd_args.val = PTRIN(args->optval);
1348153323Srodrigc	bsd_args.avalsize = PTRIN(args->optlen);
1349153323Srodrigc
1350153323Srodrigc	if (name == IPV6_NEXTHOP) {
1351153323Srodrigc		error = getsockopt(td, &bsd_args);
1352153323Srodrigc		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1353153323Srodrigc	} else
1354153323Srodrigc		error = getsockopt(td, &bsd_args);
1355153323Srodrigc
1356153323Srodrigc	return (error);
1357153323Srodrigc}
1358153323Srodrigc
1359153323Srodrigcint
1360153323Srodrigclinux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1361153323Srodrigc{
1362153323Srodrigc	void *arg = (void *)(intptr_t)args->args;
1363153323Srodrigc
1364153323Srodrigc	switch (args->what) {
1365153323Srodrigc	case LINUX_SOCKET:
1366153323Srodrigc		return (linux_socket(td, arg));
1367153323Srodrigc	case LINUX_BIND:
1368153323Srodrigc		return (linux_bind(td, arg));
1369153323Srodrigc	case LINUX_CONNECT:
1370153323Srodrigc		return (linux_connect(td, arg));
1371153323Srodrigc	case LINUX_LISTEN:
1372153323Srodrigc		return (linux_listen(td, arg));
1373153323Srodrigc	case LINUX_ACCEPT:
1374153323Srodrigc		return (linux_accept(td, arg));
1375153323Srodrigc	case LINUX_GETSOCKNAME:
1376153323Srodrigc		return (linux_getsockname(td, arg));
1377153323Srodrigc	case LINUX_GETPEERNAME:
1378153323Srodrigc		return (linux_getpeername(td, arg));
1379153323Srodrigc	case LINUX_SOCKETPAIR:
1380153323Srodrigc		return (linux_socketpair(td, arg));
1381153323Srodrigc	case LINUX_SEND:
1382153323Srodrigc		return (linux_send(td, arg));
1383153323Srodrigc	case LINUX_RECV:
1384153323Srodrigc		return (linux_recv(td, arg));
1385153323Srodrigc	case LINUX_SENDTO:
1386153323Srodrigc		return (linux_sendto(td, arg));
1387153323Srodrigc	case LINUX_RECVFROM:
1388153323Srodrigc		return (linux_recvfrom(td, arg));
1389153323Srodrigc	case LINUX_SHUTDOWN:
1390153323Srodrigc		return (linux_shutdown(td, arg));
1391153323Srodrigc	case LINUX_SETSOCKOPT:
1392153323Srodrigc		return (linux_setsockopt(td, arg));
1393153323Srodrigc	case LINUX_GETSOCKOPT:
1394153323Srodrigc		return (linux_getsockopt(td, arg));
1395153323Srodrigc	case LINUX_SENDMSG:
1396153323Srodrigc		return (linux_sendmsg(td, arg));
1397153323Srodrigc	case LINUX_RECVMSG:
1398153323Srodrigc		return (linux_recvmsg(td, arg));
1399153323Srodrigc	}
1400153323Srodrigc
1401153323Srodrigc	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1402153323Srodrigc	return (ENOSYS);
1403153323Srodrigc}
1404153323Srodrigc