linux_socket.c revision 185442
1169689Skan/*-
2132718Skan * Copyright (c) 1995 S�ren Schmidt
3132718Skan * All rights reserved.
4132718Skan *
5132718Skan * Redistribution and use in source and binary forms, with or without
6132718Skan * modification, are permitted provided that the following conditions
7132718Skan * are met:
8132718Skan * 1. Redistributions of source code must retain the above copyright
9132718Skan *    notice, this list of conditions and the following disclaimer
10132718Skan *    in this position and unchanged.
11132718Skan * 2. Redistributions in binary form must reproduce the above copyright
12132718Skan *    notice, this list of conditions and the following disclaimer in the
13132718Skan *    documentation and/or other materials provided with the distribution.
14132718Skan * 3. The name of the author may not be used to endorse or promote products
15132718Skan *    derived from this software without specific prior written permission
16132718Skan *
17169689Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18169689Skan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19132718Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20132718Skan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21132718Skan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22132718Skan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23132718Skan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24132718Skan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25132718Skan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26132718Skan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27132718Skan */
28132718Skan
29132718Skan#include <sys/cdefs.h>
30132718Skan__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 185442 2008-11-29 17:14:06Z kib $");
31132718Skan
32132718Skan/* XXX we use functions that might not exist. */
33132718Skan#include "opt_compat.h"
34169689Skan#include "opt_inet6.h"
35169689Skan
36169689Skan#include <sys/param.h>
37132718Skan#include <sys/proc.h>
38132718Skan#include <sys/systm.h>
39132718Skan#include <sys/sysproto.h>
40132718Skan#include <sys/fcntl.h>
41132718Skan#include <sys/file.h>
42132718Skan#include <sys/limits.h>
43132718Skan#include <sys/lock.h>
44132718Skan#include <sys/malloc.h>
45132718Skan#include <sys/mutex.h>
46132718Skan#include <sys/mbuf.h>
47132718Skan#include <sys/socket.h>
48132718Skan#include <sys/socketvar.h>
49132718Skan#include <sys/syscallsubr.h>
50132718Skan#include <sys/uio.h>
51132718Skan#include <sys/syslog.h>
52132718Skan#include <sys/un.h>
53132718Skan#include <sys/vimage.h>
54132718Skan
55132718Skan#include <netinet/in.h>
56132718Skan#include <netinet/in_systm.h>
57132718Skan#include <netinet/ip.h>
58132718Skan#ifdef INET6
59132718Skan#include <netinet/ip6.h>
60132718Skan#include <netinet6/ip6_var.h>
61132718Skan#endif
62132718Skan
63132718Skan#ifdef COMPAT_LINUX32
64132718Skan#include <machine/../linux32/linux.h>
65132718Skan#include <machine/../linux32/linux32_proto.h>
66132718Skan#else
67132718Skan#include <machine/../linux/linux.h>
68132718Skan#include <machine/../linux/linux_proto.h>
69132718Skan#endif
70132718Skan#include <compat/linux/linux_socket.h>
71132718Skan#include <compat/linux/linux_util.h>
72132718Skan
73132718Skanstatic int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
74132718Skan    struct malloc_type *);
75132718Skanstatic int linux_to_bsd_domain(int);
76132718Skan
77132718Skan/*
78132718Skan * Reads a linux sockaddr and does any necessary translation.
79132718Skan * Linux sockaddrs don't have a length field, only a family.
80132718Skan */
81132718Skanstatic int
82132718Skanlinux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
83132718Skan{
84132718Skan	int osalen = len;
85132718Skan
86132718Skan	return (do_sa_get(sap, osa, &osalen, M_SONAME));
87132718Skan}
88132718Skan
89132718Skan/*
90132718Skan * Copy the osockaddr structure pointed to by osa to kernel, adjust
91132718Skan * family and convert to sockaddr.
92132718Skan */
93132718Skanstatic int
94132718Skando_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
95132718Skan    struct malloc_type *mtype)
96132718Skan{
97132718Skan	int error=0, bdom;
98132718Skan	struct sockaddr *sa;
99132718Skan	struct osockaddr *kosa;
100132718Skan	int alloclen;
101132718Skan#ifdef INET6
102132718Skan	int oldv6size;
103132718Skan	struct sockaddr_in6 *sin6;
104132718Skan#endif
105132718Skan
106132718Skan	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
107132718Skan		return (EINVAL);
108132718Skan
109132718Skan	alloclen = *osalen;
110132718Skan#ifdef INET6
111132718Skan	oldv6size = 0;
112132718Skan	/*
113132718Skan	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
114132718Skan	 * if it's a v4-mapped address, so reserve the proper space
115132718Skan	 * for it.
116132718Skan	 */
117132718Skan	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
118132718Skan		alloclen = sizeof (struct sockaddr_in6);
119132718Skan		oldv6size = 1;
120132718Skan	}
121132718Skan#endif
122132718Skan
123132718Skan	kosa = malloc(alloclen, mtype, M_WAITOK);
124132718Skan
125132718Skan	if ((error = copyin(osa, kosa, *osalen)))
126132718Skan		goto out;
127132718Skan
128132718Skan	bdom = linux_to_bsd_domain(kosa->sa_family);
129132718Skan	if (bdom == -1) {
130132718Skan		error = EINVAL;
131132718Skan		goto out;
132132718Skan	}
133132718Skan
134132718Skan#ifdef INET6
135132718Skan	/*
136132718Skan	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
137132718Skan	 * which lacks the scope id compared with RFC2553 one. If we detect
138132718Skan	 * the situation, reject the address and write a message to system log.
139132718Skan	 *
140132718Skan	 * Still accept addresses for which the scope id is not used.
141132718Skan	 */
142132718Skan	if (oldv6size && bdom == AF_INET6) {
143132718Skan		sin6 = (struct sockaddr_in6 *)kosa;
144132718Skan		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
145132718Skan		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
146132718Skan		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
147132718Skan		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
148132718Skan		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
149132718Skan		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
150132718Skan			sin6->sin6_scope_id = 0;
151132718Skan		} else {
152132718Skan			log(LOG_DEBUG,
153132718Skan			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
154132718Skan			error = EINVAL;
155132718Skan			goto out;
156132718Skan		}
157132718Skan	} else
158132718Skan#endif
159132718Skan	if (bdom == AF_INET)
160132718Skan		alloclen = sizeof(struct sockaddr_in);
161132718Skan
162132718Skan	sa = (struct sockaddr *) kosa;
163132718Skan	sa->sa_family = bdom;
164132718Skan	sa->sa_len = alloclen;
165132718Skan
166132718Skan	*sap = sa;
167132718Skan	*osalen = alloclen;
168132718Skan	return (0);
169132718Skan
170132718Skanout:
171132718Skan	free(kosa, mtype);
172132718Skan	return (error);
173132718Skan}
174132718Skan
175132718Skanstatic int
176132718Skanlinux_to_bsd_domain(int domain)
177132718Skan{
178132718Skan
179132718Skan	switch (domain) {
180132718Skan	case LINUX_AF_UNSPEC:
181132718Skan		return (AF_UNSPEC);
182132718Skan	case LINUX_AF_UNIX:
183132718Skan		return (AF_LOCAL);
184132718Skan	case LINUX_AF_INET:
185132718Skan		return (AF_INET);
186132718Skan	case LINUX_AF_INET6:
187132718Skan		return (AF_INET6);
188132718Skan	case LINUX_AF_AX25:
189132718Skan		return (AF_CCITT);
190132718Skan	case LINUX_AF_IPX:
191132718Skan		return (AF_IPX);
192132718Skan	case LINUX_AF_APPLETALK:
193132718Skan		return (AF_APPLETALK);
194132718Skan	}
195132718Skan	return (-1);
196132718Skan}
197132718Skan
198132718Skanstatic int
199132718Skanbsd_to_linux_domain(int domain)
200132718Skan{
201132718Skan
202132718Skan	switch (domain) {
203132718Skan	case AF_UNSPEC:
204132718Skan		return (LINUX_AF_UNSPEC);
205132718Skan	case AF_LOCAL:
206132718Skan		return (LINUX_AF_UNIX);
207132718Skan	case AF_INET:
208132718Skan		return (LINUX_AF_INET);
209132718Skan	case AF_INET6:
210132718Skan		return (LINUX_AF_INET6);
211132718Skan	case AF_CCITT:
212132718Skan		return (LINUX_AF_AX25);
213132718Skan	case AF_IPX:
214132718Skan		return (LINUX_AF_IPX);
215132718Skan	case AF_APPLETALK:
216132718Skan		return (LINUX_AF_APPLETALK);
217132718Skan	}
218132718Skan	return (-1);
219132718Skan}
220132718Skan
221132718Skanstatic int
222132718Skanlinux_to_bsd_sockopt_level(int level)
223132718Skan{
224132718Skan
225132718Skan	switch (level) {
226132718Skan	case LINUX_SOL_SOCKET:
227132718Skan		return (SOL_SOCKET);
228132718Skan	}
229132718Skan	return (level);
230132718Skan}
231132718Skan
232132718Skanstatic int
233132718Skanbsd_to_linux_sockopt_level(int level)
234132718Skan{
235132718Skan
236132718Skan	switch (level) {
237132718Skan	case SOL_SOCKET:
238132718Skan		return (LINUX_SOL_SOCKET);
239132718Skan	}
240132718Skan	return (level);
241132718Skan}
242132718Skan
243132718Skanstatic int
244132718Skanlinux_to_bsd_ip_sockopt(int opt)
245132718Skan{
246132718Skan
247132718Skan	switch (opt) {
248132718Skan	case LINUX_IP_TOS:
249132718Skan		return (IP_TOS);
250132718Skan	case LINUX_IP_TTL:
251132718Skan		return (IP_TTL);
252132718Skan	case LINUX_IP_OPTIONS:
253132718Skan		return (IP_OPTIONS);
254132718Skan	case LINUX_IP_MULTICAST_IF:
255132718Skan		return (IP_MULTICAST_IF);
256132718Skan	case LINUX_IP_MULTICAST_TTL:
257132718Skan		return (IP_MULTICAST_TTL);
258132718Skan	case LINUX_IP_MULTICAST_LOOP:
259132718Skan		return (IP_MULTICAST_LOOP);
260132718Skan	case LINUX_IP_ADD_MEMBERSHIP:
261132718Skan		return (IP_ADD_MEMBERSHIP);
262132718Skan	case LINUX_IP_DROP_MEMBERSHIP:
263132718Skan		return (IP_DROP_MEMBERSHIP);
264132718Skan	case LINUX_IP_HDRINCL:
265132718Skan		return (IP_HDRINCL);
266132718Skan	}
267132718Skan	return (-1);
268132718Skan}
269132718Skan
270132718Skanstatic int
271132718Skanlinux_to_bsd_so_sockopt(int opt)
272132718Skan{
273132718Skan
274132718Skan	switch (opt) {
275132718Skan	case LINUX_SO_DEBUG:
276132718Skan		return (SO_DEBUG);
277132718Skan	case LINUX_SO_REUSEADDR:
278132718Skan		return (SO_REUSEADDR);
279132718Skan	case LINUX_SO_TYPE:
280132718Skan		return (SO_TYPE);
281132718Skan	case LINUX_SO_ERROR:
282132718Skan		return (SO_ERROR);
283132718Skan	case LINUX_SO_DONTROUTE:
284132718Skan		return (SO_DONTROUTE);
285132718Skan	case LINUX_SO_BROADCAST:
286132718Skan		return (SO_BROADCAST);
287132718Skan	case LINUX_SO_SNDBUF:
288132718Skan		return (SO_SNDBUF);
289132718Skan	case LINUX_SO_RCVBUF:
290132718Skan		return (SO_RCVBUF);
291132718Skan	case LINUX_SO_KEEPALIVE:
292132718Skan		return (SO_KEEPALIVE);
293132718Skan	case LINUX_SO_OOBINLINE:
294132718Skan		return (SO_OOBINLINE);
295132718Skan	case LINUX_SO_LINGER:
296132718Skan		return (SO_LINGER);
297132718Skan	case LINUX_SO_PEERCRED:
298132718Skan		return (LOCAL_PEERCRED);
299132718Skan	case LINUX_SO_RCVLOWAT:
300132718Skan		return (SO_RCVLOWAT);
301132718Skan	case LINUX_SO_SNDLOWAT:
302132718Skan		return (SO_SNDLOWAT);
303132718Skan	case LINUX_SO_RCVTIMEO:
304132718Skan		return (SO_RCVTIMEO);
305132718Skan	case LINUX_SO_SNDTIMEO:
306132718Skan		return (SO_SNDTIMEO);
307132718Skan	case LINUX_SO_TIMESTAMP:
308132718Skan		return (SO_TIMESTAMP);
309132718Skan	case LINUX_SO_ACCEPTCONN:
310132718Skan		return (SO_ACCEPTCONN);
311132718Skan	}
312132718Skan	return (-1);
313132718Skan}
314132718Skan
315132718Skanstatic int
316132718Skanlinux_to_bsd_msg_flags(int flags)
317132718Skan{
318132718Skan	int ret_flags = 0;
319132718Skan
320132718Skan	if (flags & LINUX_MSG_OOB)
321132718Skan		ret_flags |= MSG_OOB;
322132718Skan	if (flags & LINUX_MSG_PEEK)
323132718Skan		ret_flags |= MSG_PEEK;
324132718Skan	if (flags & LINUX_MSG_DONTROUTE)
325132718Skan		ret_flags |= MSG_DONTROUTE;
326132718Skan	if (flags & LINUX_MSG_CTRUNC)
327132718Skan		ret_flags |= MSG_CTRUNC;
328132718Skan	if (flags & LINUX_MSG_TRUNC)
329132718Skan		ret_flags |= MSG_TRUNC;
330132718Skan	if (flags & LINUX_MSG_DONTWAIT)
331132718Skan		ret_flags |= MSG_DONTWAIT;
332132718Skan	if (flags & LINUX_MSG_EOR)
333132718Skan		ret_flags |= MSG_EOR;
334132718Skan	if (flags & LINUX_MSG_WAITALL)
335132718Skan		ret_flags |= MSG_WAITALL;
336132718Skan	if (flags & LINUX_MSG_NOSIGNAL)
337132718Skan		ret_flags |= MSG_NOSIGNAL;
338132718Skan#if 0 /* not handled */
339132718Skan	if (flags & LINUX_MSG_PROXY)
340132718Skan		;
341132718Skan	if (flags & LINUX_MSG_FIN)
342132718Skan		;
343132718Skan	if (flags & LINUX_MSG_SYN)
344132718Skan		;
345132718Skan	if (flags & LINUX_MSG_CONFIRM)
346132718Skan		;
347132718Skan	if (flags & LINUX_MSG_RST)
348132718Skan		;
349132718Skan	if (flags & LINUX_MSG_ERRQUEUE)
350132718Skan		;
351132718Skan#endif
352132718Skan	return ret_flags;
353132718Skan}
354132718Skan
355132718Skan/*
356132718Skan* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
357132718Skan* native syscall will fault.  Thus, we don't really need to check the
358132718Skan* return values for these functions.
359132718Skan*/
360132718Skan
361132718Skanstatic int
362132718Skanbsd_to_linux_sockaddr(struct sockaddr *arg)
363132718Skan{
364132718Skan	struct sockaddr sa;
365132718Skan	size_t sa_len = sizeof(struct sockaddr);
366132718Skan	int error;
367132718Skan
368132718Skan	if ((error = copyin(arg, &sa, sa_len)))
369132718Skan		return (error);
370132718Skan
371132718Skan	*(u_short *)&sa = sa.sa_family;
372132718Skan
373132718Skan	error = copyout(&sa, arg, sa_len);
374132718Skan
375132718Skan	return (error);
376132718Skan}
377132718Skan
378132718Skanstatic int
379132718Skanlinux_to_bsd_sockaddr(struct sockaddr *arg, int len)
380132718Skan{
381132718Skan	struct sockaddr sa;
382132718Skan	size_t sa_len = sizeof(struct sockaddr);
383132718Skan	int error;
384132718Skan
385132718Skan	if ((error = copyin(arg, &sa, sa_len)))
386132718Skan		return (error);
387132718Skan
388132718Skan	sa.sa_family = *(sa_family_t *)&sa;
389132718Skan	sa.sa_len = len;
390132718Skan
391132718Skan	error = copyout(&sa, arg, sa_len);
392132718Skan
393132718Skan	return (error);
394132718Skan}
395132718Skan
396132718Skan
397132718Skanstatic int
398132718Skanlinux_sa_put(struct osockaddr *osa)
399132718Skan{
400132718Skan	struct osockaddr sa;
401132718Skan	int error, bdom;
402132718Skan
403132718Skan	/*
404132718Skan	 * Only read/write the osockaddr family part, the rest is
405132718Skan	 * not changed.
406132718Skan	 */
407132718Skan	error = copyin(osa, &sa, sizeof(sa.sa_family));
408132718Skan	if (error)
409132718Skan		return (error);
410132718Skan
411132718Skan	bdom = bsd_to_linux_domain(sa.sa_family);
412132718Skan	if (bdom == -1)
413132718Skan		return (EINVAL);
414132718Skan
415132718Skan	sa.sa_family = bdom;
416132718Skan	error = copyout(&sa, osa, sizeof(sa.sa_family));
417132718Skan	if (error)
418132718Skan		return (error);
419132718Skan
420132718Skan	return (0);
421132718Skan}
422169689Skan
423132718Skanstatic int
424132718Skanlinux_to_bsd_cmsg_type(int cmsg_type)
425132718Skan{
426132718Skan
427132718Skan	switch (cmsg_type) {
428132718Skan	case LINUX_SCM_RIGHTS:
429132718Skan		return (SCM_RIGHTS);
430169689Skan	}
431132718Skan	return (-1);
432132718Skan}
433132718Skan
434132718Skanstatic int
435132718Skanbsd_to_linux_cmsg_type(int cmsg_type)
436132718Skan{
437132718Skan
438132718Skan	switch (cmsg_type) {
439132718Skan	case SCM_RIGHTS:
440132718Skan		return (LINUX_SCM_RIGHTS);
441132718Skan	}
442132718Skan	return (-1);
443132718Skan}
444132718Skan
445132718Skan
446132718Skan
447132718Skanstatic int
448132718Skanlinux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
449132718Skan{
450132718Skan	if (lhdr->msg_controllen > INT_MAX)
451132718Skan		return (ENOBUFS);
452132718Skan
453132718Skan	bhdr->msg_name		= PTRIN(lhdr->msg_name);
454132718Skan	bhdr->msg_namelen	= lhdr->msg_namelen;
455132718Skan	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
456132718Skan	bhdr->msg_iovlen	= lhdr->msg_iovlen;
457132718Skan	bhdr->msg_control	= PTRIN(lhdr->msg_control);
458132718Skan	bhdr->msg_controllen	= lhdr->msg_controllen;
459132718Skan	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
460132718Skan	return (0);
461132718Skan}
462132718Skan
463132718Skanstatic int
464132718Skanbsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
465132718Skan{
466132718Skan	lhdr->msg_name		= PTROUT(bhdr->msg_name);
467132718Skan	lhdr->msg_namelen	= bhdr->msg_namelen;
468132718Skan	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
469132718Skan	lhdr->msg_iovlen	= bhdr->msg_iovlen;
470132718Skan	lhdr->msg_control	= PTROUT(bhdr->msg_control);
471132718Skan	lhdr->msg_controllen	= bhdr->msg_controllen;
472132718Skan	/* msg_flags skipped */
473132718Skan	return (0);
474132718Skan}
475132718Skan
476132718Skanstatic int
477132718Skanlinux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
478132718Skan    struct mbuf *control, enum uio_seg segflg)
479132718Skan{
480132718Skan	struct sockaddr *to;
481132718Skan	int error;
482132718Skan
483132718Skan	if (mp->msg_name != NULL) {
484132718Skan		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
485132718Skan		if (error)
486132718Skan			return (error);
487132718Skan		mp->msg_name = to;
488132718Skan	} else
489132718Skan		to = NULL;
490132718Skan
491132718Skan	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
492132718Skan	    segflg);
493132718Skan
494132718Skan	if (to)
495132718Skan		free(to, M_SONAME);
496132718Skan	return (error);
497132718Skan}
498132718Skan
499132718Skan/* Return 0 if IP_HDRINCL is set for the given socket. */
500132718Skanstatic int
501132718Skanlinux_check_hdrincl(struct thread *td, int s)
502132718Skan{
503132718Skan	int error, optval, size_val;
504132718Skan
505132718Skan	size_val = sizeof(optval);
506132718Skan	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
507132718Skan	    &optval, UIO_SYSSPACE, &size_val);
508132718Skan	if (error)
509132718Skan		return (error);
510132718Skan
511132718Skan	return (optval == 0);
512132718Skan}
513132718Skan
514132718Skanstruct linux_sendto_args {
515132718Skan	int s;
516132718Skan	l_uintptr_t msg;
517132718Skan	int len;
518132718Skan	int flags;
519132718Skan	l_uintptr_t to;
520132718Skan	int tolen;
521132718Skan};
522132718Skan
523132718Skan/*
524132718Skan * Updated sendto() when IP_HDRINCL is set:
525132718Skan * tweak endian-dependent fields in the IP packet.
526132718Skan */
527132718Skanstatic int
528132718Skanlinux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
529132718Skan{
530132718Skan/*
531132718Skan * linux_ip_copysize defines how many bytes we should copy
532132718Skan * from the beginning of the IP packet before we customize it for BSD.
533132718Skan * It should include all the fields we modify (ip_len and ip_off).
534132718Skan */
535132718Skan#define linux_ip_copysize	8
536132718Skan
537132718Skan	struct ip *packet;
538132718Skan	struct msghdr msg;
539132718Skan	struct iovec aiov[1];
540132718Skan	int error;
541132718Skan
542132718Skan	/* Check that the packet isn't too big or too small. */
543132718Skan	if (linux_args->len < linux_ip_copysize ||
544132718Skan	    linux_args->len > IP_MAXPACKET)
545132718Skan		return (EINVAL);
546132718Skan
547132718Skan	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
548132718Skan
549132718Skan	/* Make kernel copy of the packet to be sent */
550132718Skan	if ((error = copyin(PTRIN(linux_args->msg), packet,
551132718Skan	    linux_args->len)))
552132718Skan		goto goout;
553132718Skan
554132718Skan	/* Convert fields from Linux to BSD raw IP socket format */
555132718Skan	packet->ip_len = linux_args->len;
556132718Skan	packet->ip_off = ntohs(packet->ip_off);
557132718Skan
558132718Skan	/* Prepare the msghdr and iovec structures describing the new packet */
559132718Skan	msg.msg_name = PTRIN(linux_args->to);
560132718Skan	msg.msg_namelen = linux_args->tolen;
561132718Skan	msg.msg_iov = aiov;
562132718Skan	msg.msg_iovlen = 1;
563132718Skan	msg.msg_control = NULL;
564132718Skan	msg.msg_flags = 0;
565132718Skan	aiov[0].iov_base = (char *)packet;
566132718Skan	aiov[0].iov_len = linux_args->len;
567132718Skan	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
568132718Skan	    NULL, UIO_SYSSPACE);
569132718Skangoout:
570132718Skan	free(packet, M_TEMP);
571132718Skan	return (error);
572132718Skan}
573132718Skan
574132718Skanstruct linux_socket_args {
575132718Skan	int domain;
576132718Skan	int type;
577132718Skan	int protocol;
578132718Skan};
579132718Skan
580132718Skanstatic int
581132718Skanlinux_socket(struct thread *td, struct linux_socket_args *args)
582132718Skan{
583132718Skan#ifdef INET6
584132718Skan	INIT_VNET_INET6(curvnet);
585132718Skan#endif
586132718Skan	struct socket_args /* {
587132718Skan		int domain;
588132718Skan		int type;
589132718Skan		int protocol;
590132718Skan	} */ bsd_args;
591132718Skan	int retval_socket;
592132718Skan
593132718Skan	bsd_args.protocol = args->protocol;
594132718Skan	bsd_args.type = args->type;
595132718Skan	bsd_args.domain = linux_to_bsd_domain(args->domain);
596132718Skan	if (bsd_args.domain == -1)
597132718Skan		return (EINVAL);
598132718Skan
599132718Skan	retval_socket = socket(td, &bsd_args);
600132718Skan	if (bsd_args.type == SOCK_RAW
601132718Skan	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
602132718Skan	    && bsd_args.domain == AF_INET
603132718Skan	    && retval_socket >= 0) {
604132718Skan		/* It's a raw IP socket: set the IP_HDRINCL option. */
605132718Skan		int hdrincl;
606132718Skan
607132718Skan		hdrincl = 1;
608132718Skan		/* We ignore any error returned by kern_setsockopt() */
609132718Skan		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
610132718Skan		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
611132718Skan	}
612132718Skan#ifdef INET6
613132718Skan	/*
614132718Skan	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
615132718Skan	 * default and some apps depend on this. So, set V6ONLY to 0
616132718Skan	 * for Linux apps if the sysctl value is set to 1.
617132718Skan	 */
618132718Skan	if (bsd_args.domain == PF_INET6 && retval_socket >= 0
619132718Skan#ifndef KLD_MODULE
620132718Skan	    /*
621132718Skan	     * XXX: Avoid undefined symbol error with an IPv4 only
622132718Skan	     * kernel.
623132718Skan	     */
624132718Skan	    && V_ip6_v6only
625132718Skan#endif
626132718Skan	    ) {
627132718Skan		int v6only;
628132718Skan
629132718Skan		v6only = 0;
630132718Skan		/* We ignore any error returned by setsockopt() */
631132718Skan		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
632132718Skan		    &v6only, UIO_SYSSPACE, sizeof(v6only));
633132718Skan	}
634132718Skan#endif
635132718Skan
636132718Skan	return (retval_socket);
637132718Skan}
638132718Skan
639132718Skanstruct linux_bind_args {
640132718Skan	int s;
641132718Skan	l_uintptr_t name;
642132718Skan	int namelen;
643132718Skan};
644132718Skan
645132718Skanstatic int
646132718Skanlinux_bind(struct thread *td, struct linux_bind_args *args)
647132718Skan{
648132718Skan	struct sockaddr *sa;
649132718Skan	int error;
650132718Skan
651132718Skan	error = linux_getsockaddr(&sa, PTRIN(args->name),
652132718Skan	    args->namelen);
653132718Skan	if (error)
654132718Skan		return (error);
655132718Skan
656132718Skan	error = kern_bind(td, args->s, sa);
657132718Skan	free(sa, M_SONAME);
658132718Skan	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
659132718Skan	   	return (EINVAL);
660132718Skan	return (error);
661132718Skan}
662132718Skan
663132718Skanstruct linux_connect_args {
664132718Skan	int s;
665132718Skan	l_uintptr_t name;
666132718Skan	int namelen;
667132718Skan};
668132718Skanint linux_connect(struct thread *, struct linux_connect_args *);
669132718Skan
670132718Skanint
671132718Skanlinux_connect(struct thread *td, struct linux_connect_args *args)
672132718Skan{
673132718Skan	struct socket *so;
674132718Skan	struct sockaddr *sa;
675132718Skan	u_int fflag;
676132718Skan	int error;
677132718Skan
678132718Skan	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
679132718Skan	    args->namelen);
680132718Skan	if (error)
681132718Skan		return (error);
682132718Skan
683132718Skan	error = kern_connect(td, args->s, sa);
684132718Skan	free(sa, M_SONAME);
685132718Skan	if (error != EISCONN)
686132718Skan		return (error);
687132718Skan
688132718Skan	/*
689132718Skan	 * Linux doesn't return EISCONN the first time it occurs,
690132718Skan	 * when on a non-blocking socket. Instead it returns the
691132718Skan	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
692132718Skan	 *
693132718Skan	 * XXXRW: Instead of using fgetsock(), check that it is a
694132718Skan	 * socket and use the file descriptor reference instead of
695132718Skan	 * creating a new one.
696132718Skan	 */
697132718Skan	error = fgetsock(td, args->s, &so, &fflag);
698132718Skan	if (error == 0) {
699132718Skan		error = EISCONN;
700132718Skan		if (fflag & FNONBLOCK) {
701132718Skan			SOCK_LOCK(so);
702132718Skan			if (so->so_emuldata == 0)
703132718Skan				error = so->so_error;
704132718Skan			so->so_emuldata = (void *)1;
705132718Skan			SOCK_UNLOCK(so);
706132718Skan		}
707132718Skan		fputsock(so);
708132718Skan	}
709132718Skan	return (error);
710132718Skan}
711132718Skan
712132718Skanstruct linux_listen_args {
713132718Skan	int s;
714132718Skan	int backlog;
715132718Skan};
716132718Skan
717132718Skanstatic int
718132718Skanlinux_listen(struct thread *td, struct linux_listen_args *args)
719132718Skan{
720132718Skan	struct listen_args /* {
721132718Skan		int s;
722132718Skan		int backlog;
723132718Skan	} */ bsd_args;
724132718Skan
725132718Skan	bsd_args.s = args->s;
726132718Skan	bsd_args.backlog = args->backlog;
727132718Skan	return (listen(td, &bsd_args));
728132718Skan}
729132718Skan
730132718Skanstruct linux_accept_args {
731132718Skan	int s;
732132718Skan	l_uintptr_t addr;
733132718Skan	l_uintptr_t namelen;
734132718Skan};
735132718Skan
736132718Skanstatic int
737132718Skanlinux_accept(struct thread *td, struct linux_accept_args *args)
738132718Skan{
739132718Skan	struct accept_args /* {
740132718Skan		int	s;
741132718Skan		struct sockaddr * __restrict name;
742132718Skan		socklen_t * __restrict anamelen;
743132718Skan	} */ bsd_args;
744132718Skan	int error, fd;
745132718Skan
746132718Skan	bsd_args.s = args->s;
747132718Skan	/* XXX: */
748132718Skan	bsd_args.name = (struct sockaddr * __restrict)PTRIN(args->addr);
749132718Skan	bsd_args.anamelen = PTRIN(args->namelen);/* XXX */
750132718Skan	error = accept(td, &bsd_args);
751132718Skan	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
752132718Skan	if (error) {
753132718Skan		if (error == EFAULT && args->namelen != sizeof(struct sockaddr_in))
754132718Skan			return (EINVAL);
755132718Skan		return (error);
756132718Skan	}
757132718Skan	if (args->addr) {
758132718Skan		error = linux_sa_put(PTRIN(args->addr));
759132718Skan		if (error) {
760132718Skan			(void)kern_close(td, td->td_retval[0]);
761132718Skan			return (error);
762132718Skan		}
763132718Skan	}
764132718Skan
765132718Skan	/*
766132718Skan	 * linux appears not to copy flags from the parent socket to the
767132718Skan	 * accepted one, so we must clear the flags in the new descriptor.
768132718Skan	 * Ignore any errors, because we already have an open fd.
769132718Skan	 */
770132718Skan	fd = td->td_retval[0];
771132718Skan	(void)kern_fcntl(td, fd, F_SETFL, 0);
772132718Skan	td->td_retval[0] = fd;
773132718Skan	return (0);
774132718Skan}
775132718Skan
776132718Skanstruct linux_getsockname_args {
777132718Skan	int s;
778132718Skan	l_uintptr_t addr;
779132718Skan	l_uintptr_t namelen;
780132718Skan};
781132718Skan
782132718Skanstatic int
783132718Skanlinux_getsockname(struct thread *td, struct linux_getsockname_args *args)
784132718Skan{
785132718Skan	struct getsockname_args /* {
786132718Skan		int	fdes;
787132718Skan		struct sockaddr * __restrict asa;
788132718Skan		socklen_t * __restrict alen;
789132718Skan	} */ bsd_args;
790132718Skan	int error;
791132718Skan
792132718Skan	bsd_args.fdes = args->s;
793132718Skan	/* XXX: */
794132718Skan	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
795132718Skan	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
796132718Skan	error = getsockname(td, &bsd_args);
797132718Skan	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
798132718Skan	if (error)
799132718Skan		return (error);
800132718Skan	error = linux_sa_put(PTRIN(args->addr));
801132718Skan	if (error)
802132718Skan		return (error);
803132718Skan	return (0);
804132718Skan}
805132718Skan
806132718Skanstruct linux_getpeername_args {
807132718Skan	int s;
808132718Skan	l_uintptr_t addr;
809132718Skan	l_uintptr_t namelen;
810132718Skan};
811132718Skan
812132718Skanstatic int
813132718Skanlinux_getpeername(struct thread *td, struct linux_getpeername_args *args)
814132718Skan{
815132718Skan	struct getpeername_args /* {
816132718Skan		int fdes;
817132718Skan		caddr_t asa;
818132718Skan		int *alen;
819132718Skan	} */ bsd_args;
820132718Skan	int error;
821132718Skan
822132718Skan	bsd_args.fdes = args->s;
823132718Skan	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
824132718Skan	bsd_args.alen = (int *)PTRIN(args->namelen);
825132718Skan	error = getpeername(td, &bsd_args);
826132718Skan	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
827132718Skan	if (error)
828132718Skan		return (error);
829132718Skan	error = linux_sa_put(PTRIN(args->addr));
830132718Skan	if (error)
831132718Skan		return (error);
832132718Skan	return (0);
833132718Skan}
834132718Skan
835132718Skanstruct linux_socketpair_args {
836132718Skan	int domain;
837132718Skan	int type;
838132718Skan	int protocol;
839132718Skan	l_uintptr_t rsv;
840132718Skan};
841132718Skan
842132718Skanstatic int
843132718Skanlinux_socketpair(struct thread *td, struct linux_socketpair_args *args)
844132718Skan{
845132718Skan	struct socketpair_args /* {
846132718Skan		int domain;
847132718Skan		int type;
848132718Skan		int protocol;
849132718Skan		int *rsv;
850132718Skan	} */ bsd_args;
851132718Skan
852132718Skan	bsd_args.domain = linux_to_bsd_domain(args->domain);
853132718Skan	if (bsd_args.domain == -1)
854132718Skan		return (EINVAL);
855132718Skan
856132718Skan	bsd_args.type = args->type;
857132718Skan	bsd_args.protocol = args->protocol;
858132718Skan	bsd_args.rsv = (int *)PTRIN(args->rsv);
859132718Skan	return (socketpair(td, &bsd_args));
860132718Skan}
861132718Skan
862132718Skanstruct linux_send_args {
863132718Skan	int s;
864132718Skan	l_uintptr_t msg;
865132718Skan	int len;
866132718Skan	int flags;
867132718Skan};
868132718Skan
869132718Skanstatic int
870132718Skanlinux_send(struct thread *td, struct linux_send_args *args)
871132718Skan{
872132718Skan	struct sendto_args /* {
873132718Skan		int s;
874132718Skan		caddr_t buf;
875132718Skan		int len;
876132718Skan		int flags;
877132718Skan		caddr_t to;
878132718Skan		int tolen;
879132718Skan	} */ bsd_args;
880132718Skan
881132718Skan	bsd_args.s = args->s;
882132718Skan	bsd_args.buf = (caddr_t)PTRIN(args->msg);
883132718Skan	bsd_args.len = args->len;
884132718Skan	bsd_args.flags = args->flags;
885132718Skan	bsd_args.to = NULL;
886132718Skan	bsd_args.tolen = 0;
887132718Skan	return sendto(td, &bsd_args);
888132718Skan}
889132718Skan
890132718Skanstruct linux_recv_args {
891132718Skan	int s;
892132718Skan	l_uintptr_t msg;
893132718Skan	int len;
894132718Skan	int flags;
895132718Skan};
896132718Skan
897132718Skanstatic int
898132718Skanlinux_recv(struct thread *td, struct linux_recv_args *args)
899132718Skan{
900132718Skan	struct recvfrom_args /* {
901132718Skan		int s;
902132718Skan		caddr_t buf;
903132718Skan		int len;
904132718Skan		int flags;
905132718Skan		struct sockaddr *from;
906132718Skan		socklen_t fromlenaddr;
907132718Skan	} */ bsd_args;
908132718Skan
909132718Skan	bsd_args.s = args->s;
910132718Skan	bsd_args.buf = (caddr_t)PTRIN(args->msg);
911132718Skan	bsd_args.len = args->len;
912132718Skan	bsd_args.flags = args->flags;
913132718Skan	bsd_args.from = NULL;
914132718Skan	bsd_args.fromlenaddr = 0;
915132718Skan	return (recvfrom(td, &bsd_args));
916132718Skan}
917132718Skan
918132718Skanstatic int
919132718Skanlinux_sendto(struct thread *td, struct linux_sendto_args *args)
920132718Skan{
921132718Skan	struct msghdr msg;
922132718Skan	struct iovec aiov;
923132718Skan	int error;
924132718Skan
925132718Skan	if (linux_check_hdrincl(td, args->s) == 0)
926132718Skan		/* IP_HDRINCL set, tweak the packet before sending */
927132718Skan		return (linux_sendto_hdrincl(td, args));
928132718Skan
929132718Skan	msg.msg_name = PTRIN(args->to);
930132718Skan	msg.msg_namelen = args->tolen;
931132718Skan	msg.msg_iov = &aiov;
932132718Skan	msg.msg_iovlen = 1;
933132718Skan	msg.msg_control = NULL;
934132718Skan	msg.msg_flags = 0;
935132718Skan	aiov.iov_base = PTRIN(args->msg);
936132718Skan	aiov.iov_len = args->len;
937132718Skan	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
938132718Skan	    UIO_USERSPACE);
939132718Skan	return (error);
940132718Skan}
941132718Skan
942132718Skanstruct linux_recvfrom_args {
943132718Skan	int s;
944132718Skan	l_uintptr_t buf;
945132718Skan	int len;
946132718Skan	int flags;
947132718Skan	l_uintptr_t from;
948132718Skan	l_uintptr_t fromlen;
949132718Skan};
950132718Skan
951132718Skanstatic int
952132718Skanlinux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
953132718Skan{
954132718Skan	struct recvfrom_args /* {
955132718Skan		int	s;
956132718Skan		caddr_t	buf;
957132718Skan		size_t	len;
958132718Skan		int	flags;
959132718Skan		struct sockaddr * __restrict from;
960132718Skan		socklen_t * __restrict fromlenaddr;
961132718Skan	} */ bsd_args;
962132718Skan	size_t len;
963132718Skan	int error;
964132718Skan
965132718Skan	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
966132718Skan		return (error);
967132718Skan
968132718Skan	bsd_args.s = args->s;
969132718Skan	bsd_args.buf = PTRIN(args->buf);
970132718Skan	bsd_args.len = args->len;
971132718Skan	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
972132718Skan	/* XXX: */
973132718Skan	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
974132718Skan	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
975132718Skan
976132718Skan	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
977132718Skan	error = recvfrom(td, &bsd_args);
978132718Skan	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
979132718Skan
980132718Skan	if (error)
981132718Skan		return (error);
982132718Skan	if (args->from) {
983132718Skan		error = linux_sa_put((struct osockaddr *)
984132718Skan		    PTRIN(args->from));
985132718Skan		if (error)
986132718Skan			return (error);
987132718Skan	}
988132718Skan	return (0);
989169689Skan}
990132718Skan
991132718Skanstruct linux_sendmsg_args {
992132718Skan	int s;
993169689Skan	l_uintptr_t msg;
994169689Skan	int flags;
995169689Skan};
996169689Skan
997169689Skanstatic int
998169689Skanlinux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
999169689Skan{
1000169689Skan	struct cmsghdr *cmsg;
1001132718Skan	struct mbuf *control;
1002132718Skan	struct msghdr msg;
1003132718Skan	struct l_cmsghdr linux_cmsg;
1004132718Skan	struct l_cmsghdr *ptr_cmsg;
1005132718Skan	struct l_msghdr linux_msg;
1006132718Skan	struct iovec *iov;
1007132718Skan	socklen_t datalen;
1008132718Skan	void *data;
1009132718Skan	int error;
1010132718Skan
1011132718Skan	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1012132718Skan	if (error)
1013132718Skan		return (error);
1014132718Skan	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1015132718Skan	if (error)
1016132718Skan		return (error);
1017132718Skan
1018132718Skan	/*
1019132718Skan	 * Some Linux applications (ping) define a non-NULL control data
1020132718Skan	 * pointer, but a msg_controllen of 0, which is not allowed in the
1021132718Skan	 * FreeBSD system call interface.  NULL the msg_control pointer in
1022132718Skan	 * order to handle this case.  This should be checked, but allows the
1023132718Skan	 * Linux ping to work.
1024132718Skan	 */
1025132718Skan	if (msg.msg_control != NULL && msg.msg_controllen == 0)
1026132718Skan		msg.msg_control = NULL;
1027132718Skan
1028132718Skan#ifdef COMPAT_LINUX32
1029132718Skan	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1030132718Skan	    &iov, EMSGSIZE);
1031132718Skan#else
1032132718Skan	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1033132718Skan#endif
1034132718Skan	if (error)
1035132718Skan		return (error);
1036132718Skan
1037132718Skan	if (msg.msg_control != NULL) {
1038132718Skan		error = ENOBUFS;
1039132718Skan		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1040132718Skan		control = m_get(M_WAIT, MT_CONTROL);
1041132718Skan		if (control == NULL)
1042132718Skan			goto bad;
1043132718Skan		ptr_cmsg = LINUX_CMSG_FIRSTHDR(&msg);
1044132718Skan
1045132718Skan		do {
1046132718Skan			error = copyin(ptr_cmsg, &linux_cmsg,
1047132718Skan			    sizeof(struct l_cmsghdr));
1048132718Skan			if (error)
1049132718Skan				goto bad;
1050132718Skan
1051132718Skan			error = EINVAL;
1052132718Skan			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1053132718Skan				goto bad;
1054132718Skan
1055132718Skan			/*
1056132718Skan			 * Now we support only SCM_RIGHTS, so return EINVAL
1057132718Skan			 * in any other cmsg_type
1058132718Skan			 */
1059132718Skan			if ((cmsg->cmsg_type =
1060132718Skan			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type)) == -1)
1061132718Skan				goto bad;
1062132718Skan			cmsg->cmsg_level =
1063132718Skan			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1064132718Skan
1065132718Skan			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1066132718Skan			cmsg->cmsg_len = CMSG_LEN(datalen);
1067132718Skan			data = LINUX_CMSG_DATA(ptr_cmsg);
1068132718Skan
1069132718Skan			error = ENOBUFS;
1070132718Skan			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t) cmsg))
1071132718Skan				goto bad;
1072132718Skan			if (!m_append(control, datalen, (c_caddr_t) data))
1073132718Skan				goto bad;
1074132718Skan		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&msg, ptr_cmsg)));
1075132718Skan	} else {
1076132718Skan		control = NULL;
1077132718Skan		cmsg = NULL;
1078132718Skan	}
1079132718Skan
1080132718Skan	msg.msg_iov = iov;
1081132718Skan	msg.msg_flags = 0;
1082132718Skan	error = linux_sendit(td, args->s, &msg, args->flags, control,
1083132718Skan	    UIO_USERSPACE);
1084132718Skan
1085132718Skanbad:
1086132718Skan	free(iov, M_IOV);
1087132718Skan	if (cmsg)
1088132718Skan		free(cmsg, M_TEMP);
1089132718Skan	return (error);
1090132718Skan}
1091132718Skan
1092132718Skanstruct linux_recvmsg_args {
1093132718Skan	int s;
1094132718Skan	l_uintptr_t msg;
1095132718Skan	int flags;
1096132718Skan};
1097132718Skan
1098132718Skanstatic int
1099132718Skanlinux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1100132718Skan{
1101132718Skan	struct cmsghdr *cm;
1102132718Skan	struct msghdr msg;
1103132718Skan	struct l_cmsghdr *linux_cmsg = NULL;
1104132718Skan	socklen_t datalen, outlen, clen;
1105132718Skan	struct l_msghdr linux_msg;
1106132718Skan	struct iovec *iov, *uiov;
1107132718Skan	struct mbuf *control = NULL;
1108132718Skan	struct mbuf **controlp;
1109132718Skan	caddr_t outbuf;
1110132718Skan	void *data;
1111132718Skan	int error;
1112132718Skan
1113132718Skan	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1114132718Skan	if (error)
1115132718Skan		return (error);
1116132718Skan
1117132718Skan	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1118132718Skan	if (error)
1119132718Skan		return (error);
1120132718Skan
1121132718Skan#ifdef COMPAT_LINUX32
1122132718Skan	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1123132718Skan	    &iov, EMSGSIZE);
1124132718Skan#else
1125132718Skan	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1126132718Skan#endif
1127132718Skan	if (error)
1128132718Skan		return (error);
1129132718Skan
1130132718Skan	if (msg.msg_name) {
1131132718Skan		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1132132718Skan		    msg.msg_namelen);
1133132718Skan		if (error)
1134132718Skan			goto bad;
1135132718Skan	}
1136132718Skan
1137132718Skan	uiov = msg.msg_iov;
1138132718Skan	msg.msg_iov = iov;
1139132718Skan	controlp = (msg.msg_control != NULL) ? &control : NULL;
1140132718Skan	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1141132718Skan	msg.msg_iov = uiov;
1142132718Skan	if (error)
1143132718Skan		goto bad;
1144132718Skan
1145132718Skan	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1146132718Skan	if (error)
1147132718Skan		goto bad;
1148132718Skan
1149132718Skan	if (linux_msg.msg_name) {
1150132718Skan		error = bsd_to_linux_sockaddr((struct sockaddr *)
1151132718Skan		    PTRIN(linux_msg.msg_name));
1152132718Skan		if (error)
1153132718Skan			goto bad;
1154132718Skan	}
1155132718Skan	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1156132718Skan		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1157132718Skan		if (error)
1158132718Skan			goto bad;
1159132718Skan	}
1160132718Skan
1161132718Skan	if (control) {
1162132718Skan
1163132718Skan		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1164132718Skan		outbuf = PTRIN(linux_msg.msg_control);
1165132718Skan		cm = mtod(control, struct cmsghdr *);
1166132718Skan		outlen = 0;
1167132718Skan		clen = control->m_len;
1168132718Skan
1169132718Skan		while (cm != NULL) {
1170132718Skan
1171132718Skan			if ((linux_cmsg->cmsg_type =
1172132718Skan			    bsd_to_linux_cmsg_type(cm->cmsg_type)) == -1)
1173132718Skan			{
1174132718Skan				error = EINVAL;
1175132718Skan				goto bad;
1176132718Skan			}
1177132718Skan			data = CMSG_DATA(cm);
1178132718Skan			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1179132718Skan
1180132718Skan			if (outlen + LINUX_CMSG_LEN(datalen) >
1181132718Skan			    linux_msg.msg_controllen) {
1182132718Skan				if (outlen == 0) {
1183132718Skan					error = EMSGSIZE;
1184132718Skan					goto bad;
1185132718Skan				} else {
1186132718Skan					linux_msg.msg_flags |= LINUX_MSG_CTRUNC;
1187132718Skan					goto out;
1188132718Skan				}
1189132718Skan			}
1190132718Skan
1191132718Skan			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1192132718Skan			linux_cmsg->cmsg_level =
1193132718Skan			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1194132718Skan
1195132718Skan			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1196132718Skan			if (error)
1197132718Skan				goto bad;
1198132718Skan			outbuf += L_CMSG_HDRSZ;
1199132718Skan
1200132718Skan			error = copyout(data, outbuf, datalen);
1201132718Skan			if (error)
1202132718Skan				goto bad;
1203132718Skan
1204132718Skan			outbuf += LINUX_CMSG_ALIGN(datalen);
1205132718Skan			outlen += LINUX_CMSG_LEN(datalen);
1206132718Skan			linux_msg.msg_controllen = outlen;
1207132718Skan
1208132718Skan			if (CMSG_SPACE(datalen) < clen) {
1209132718Skan				clen -= CMSG_SPACE(datalen);
1210132718Skan				cm = (struct cmsghdr *)
1211132718Skan				    ((caddr_t)cm + CMSG_SPACE(datalen));
1212132718Skan			} else
1213132718Skan				cm = NULL;
1214132718Skan		}
1215132718Skan	}
1216132718Skan
1217132718Skanout:
1218132718Skan	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1219132718Skan
1220132718Skanbad:
1221132718Skan	free(iov, M_IOV);
1222132718Skan	if (control != NULL)
1223132718Skan		m_freem(control);
1224132718Skan	if (linux_cmsg != NULL)
1225132718Skan		free(linux_cmsg, M_TEMP);
1226132718Skan
1227132718Skan	return (error);
1228132718Skan}
1229132718Skan
1230132718Skanstruct linux_shutdown_args {
1231132718Skan	int s;
1232132718Skan	int how;
1233132718Skan};
1234132718Skan
1235132718Skanstatic int
1236132718Skanlinux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1237132718Skan{
1238132718Skan	struct shutdown_args /* {
1239132718Skan		int s;
1240132718Skan		int how;
1241132718Skan	} */ bsd_args;
1242132718Skan
1243132718Skan	bsd_args.s = args->s;
1244132718Skan	bsd_args.how = args->how;
1245132718Skan	return (shutdown(td, &bsd_args));
1246132718Skan}
1247132718Skan
1248132718Skanstruct linux_setsockopt_args {
1249132718Skan	int s;
1250132718Skan	int level;
1251132718Skan	int optname;
1252132718Skan	l_uintptr_t optval;
1253132718Skan	int optlen;
1254132718Skan};
1255132718Skan
1256132718Skanstatic int
1257132718Skanlinux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1258{
1259	struct setsockopt_args /* {
1260		int s;
1261		int level;
1262		int name;
1263		caddr_t val;
1264		int valsize;
1265	} */ bsd_args;
1266	int error, name;
1267
1268	bsd_args.s = args->s;
1269	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1270	switch (bsd_args.level) {
1271	case SOL_SOCKET:
1272		name = linux_to_bsd_so_sockopt(args->optname);
1273		break;
1274	case IPPROTO_IP:
1275		name = linux_to_bsd_ip_sockopt(args->optname);
1276		break;
1277	case IPPROTO_TCP:
1278		/* Linux TCP option values match BSD's */
1279		name = args->optname;
1280		break;
1281	default:
1282		name = -1;
1283		break;
1284	}
1285	if (name == -1)
1286		return (ENOPROTOOPT);
1287
1288	bsd_args.name = name;
1289	bsd_args.val = PTRIN(args->optval);
1290	bsd_args.valsize = args->optlen;
1291
1292	if (name == IPV6_NEXTHOP) {
1293		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1294			bsd_args.valsize);
1295		error = setsockopt(td, &bsd_args);
1296		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1297	} else
1298		error = setsockopt(td, &bsd_args);
1299
1300	return (error);
1301}
1302
1303struct linux_getsockopt_args {
1304	int s;
1305	int level;
1306	int optname;
1307	l_uintptr_t optval;
1308	l_uintptr_t optlen;
1309};
1310
1311static int
1312linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1313{
1314	struct getsockopt_args /* {
1315		int s;
1316		int level;
1317		int name;
1318		caddr_t val;
1319		int *avalsize;
1320	} */ bsd_args;
1321	int error, name;
1322
1323	bsd_args.s = args->s;
1324	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1325	switch (bsd_args.level) {
1326	case SOL_SOCKET:
1327		name = linux_to_bsd_so_sockopt(args->optname);
1328		break;
1329	case IPPROTO_IP:
1330		name = linux_to_bsd_ip_sockopt(args->optname);
1331		break;
1332	case IPPROTO_TCP:
1333		/* Linux TCP option values match BSD's */
1334		name = args->optname;
1335		break;
1336	default:
1337		name = -1;
1338		break;
1339	}
1340	if (name == -1)
1341		return (EINVAL);
1342
1343	bsd_args.name = name;
1344	bsd_args.val = PTRIN(args->optval);
1345	bsd_args.avalsize = PTRIN(args->optlen);
1346
1347	if (name == IPV6_NEXTHOP) {
1348		error = getsockopt(td, &bsd_args);
1349		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1350	} else
1351		error = getsockopt(td, &bsd_args);
1352
1353	return (error);
1354}
1355
1356int
1357linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1358{
1359	void *arg = (void *)(intptr_t)args->args;
1360
1361	switch (args->what) {
1362	case LINUX_SOCKET:
1363		return (linux_socket(td, arg));
1364	case LINUX_BIND:
1365		return (linux_bind(td, arg));
1366	case LINUX_CONNECT:
1367		return (linux_connect(td, arg));
1368	case LINUX_LISTEN:
1369		return (linux_listen(td, arg));
1370	case LINUX_ACCEPT:
1371		return (linux_accept(td, arg));
1372	case LINUX_GETSOCKNAME:
1373		return (linux_getsockname(td, arg));
1374	case LINUX_GETPEERNAME:
1375		return (linux_getpeername(td, arg));
1376	case LINUX_SOCKETPAIR:
1377		return (linux_socketpair(td, arg));
1378	case LINUX_SEND:
1379		return (linux_send(td, arg));
1380	case LINUX_RECV:
1381		return (linux_recv(td, arg));
1382	case LINUX_SENDTO:
1383		return (linux_sendto(td, arg));
1384	case LINUX_RECVFROM:
1385		return (linux_recvfrom(td, arg));
1386	case LINUX_SHUTDOWN:
1387		return (linux_shutdown(td, arg));
1388	case LINUX_SETSOCKOPT:
1389		return (linux_setsockopt(td, arg));
1390	case LINUX_GETSOCKOPT:
1391		return (linux_getsockopt(td, arg));
1392	case LINUX_SENDMSG:
1393		return (linux_sendmsg(td, arg));
1394	case LINUX_RECVMSG:
1395		return (linux_recvmsg(td, arg));
1396	}
1397
1398	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1399	return (ENOSYS);
1400}
1401