linux_socket.c revision 121008
1162485Sjulian/*-
2162485Sjulian * Copyright (c) 1995 S�ren Schmidt
3162485Sjulian * All rights reserved.
4162485Sjulian *
5162485Sjulian * Redistribution and use in source and binary forms, with or without
6162485Sjulian * modification, are permitted provided that the following conditions
7162485Sjulian * are met:
8162485Sjulian * 1. Redistributions of source code must retain the above copyright
9162485Sjulian *    notice, this list of conditions and the following disclaimer
10162485Sjulian *    in this position and unchanged.
11162485Sjulian * 2. Redistributions in binary form must reproduce the above copyright
12162485Sjulian *    notice, this list of conditions and the following disclaimer in the
13162485Sjulian *    documentation and/or other materials provided with the distribution.
14162485Sjulian * 3. The name of the author may not be used to endorse or promote products
15162485Sjulian *    derived from this software without specific prior written permission
16162485Sjulian *
17162485Sjulian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18162485Sjulian * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19162485Sjulian * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20162485Sjulian * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21162485Sjulian * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22162485Sjulian * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23162485Sjulian * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24162485Sjulian * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25162485Sjulian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26162485Sjulian * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27162485Sjulian */
28162485Sjulian
29162485Sjulian#include <sys/cdefs.h>
30162485Sjulian__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 121008 2003-10-11 15:08:32Z iwasaki $");
31162485Sjulian
32162485Sjulian/* XXX we use functions that might not exist. */
33162485Sjulian#include "opt_compat.h"
34162485Sjulian#include "opt_inet6.h"
35162485Sjulian
36162485Sjulian#ifndef COMPAT_43
37162485Sjulian#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
38162485Sjulian#endif
39162485Sjulian
40162485Sjulian#include <sys/param.h>
41162485Sjulian#include <sys/proc.h>
42162485Sjulian#include <sys/systm.h>
43162485Sjulian#include <sys/sysproto.h>
44162485Sjulian#include <sys/fcntl.h>
45162485Sjulian#include <sys/file.h>
46162485Sjulian#include <sys/limits.h>
47162485Sjulian#include <sys/malloc.h>
48162485Sjulian#include <sys/socket.h>
49162485Sjulian#include <sys/socketvar.h>
50162485Sjulian#include <sys/syscallsubr.h>
51162485Sjulian#include <sys/uio.h>
52162485Sjulian#include <sys/syslog.h>
53162485Sjulian
54162485Sjulian#include <netinet/in.h>
55162485Sjulian#include <netinet/in_systm.h>
56162485Sjulian#include <netinet/ip.h>
57162485Sjulian#ifdef INET6
58162485Sjulian#include <netinet/ip6.h>
59162485Sjulian#include <netinet6/ip6_var.h>
60162485Sjulian#endif
61162485Sjulian
62162485Sjulian#include <machine/../linux/linux.h>
63162485Sjulian#include <machine/../linux/linux_proto.h>
64162485Sjulian#include <compat/linux/linux_socket.h>
65162485Sjulian#include <compat/linux/linux_util.h>
66162485Sjulian
67162485Sjulianstatic int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
68162485Sjulian    struct malloc_type *);
69162485Sjulianstatic int linux_to_bsd_domain(int);
70162485Sjulian
71162485Sjulian/*
72162485Sjulian * Reads a linux sockaddr and does any necessary translation.
73162485Sjulian * Linux sockaddrs don't have a length field, only a family.
74162485Sjulian */
75162485Sjulianstatic int
76162485Sjulianlinux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
77162485Sjulian{
78162485Sjulian	int osalen = len;
79162485Sjulian
80162485Sjulian	return (do_sa_get(sap, osa, &osalen, M_SONAME));
81162485Sjulian}
82162485Sjulian
83162485Sjulian/*
84162485Sjulian * Copy the osockaddr structure pointed to by osa to kernel, adjust
85162485Sjulian * family and convert to sockaddr.
86162485Sjulian */
87162485Sjulianstatic int
88162485Sjuliando_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
89162485Sjulian    struct malloc_type *mtype)
90162485Sjulian{
91162485Sjulian	int error=0, bdom;
92162485Sjulian	struct sockaddr *sa;
93162485Sjulian	struct osockaddr *kosa;
94162485Sjulian	int alloclen;
95162485Sjulian#ifdef INET6
96162485Sjulian	int oldv6size;
97162485Sjulian	struct sockaddr_in6 *sin6;
98162485Sjulian#endif
99162485Sjulian
100162485Sjulian	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
101162485Sjulian		return (EINVAL);
102162485Sjulian
103162485Sjulian	alloclen = *osalen;
104162485Sjulian#ifdef INET6
105162485Sjulian	oldv6size = 0;
106162485Sjulian	/*
107162485Sjulian	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
108162485Sjulian	 * if it's a v4-mapped address, so reserve the proper space
109162485Sjulian	 * for it.
110162485Sjulian	 */
111162485Sjulian	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
112162485Sjulian		alloclen = sizeof (struct sockaddr_in6);
113162485Sjulian		oldv6size = 1;
114162485Sjulian	}
115162485Sjulian#endif
116162485Sjulian
117162485Sjulian	MALLOC(kosa, struct osockaddr *, alloclen, mtype, M_WAITOK);
118162485Sjulian
119162485Sjulian	if ((error = copyin(osa, kosa, *osalen)))
120162485Sjulian		goto out;
121162485Sjulian
122162485Sjulian	bdom = linux_to_bsd_domain(kosa->sa_family);
123162485Sjulian	if (bdom == -1) {
124162485Sjulian		error = EINVAL;
125162485Sjulian		goto out;
126162485Sjulian	}
127162485Sjulian
128162485Sjulian#ifdef INET6
129162485Sjulian	/*
130162485Sjulian	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
131162485Sjulian	 * which lacks the scope id compared with RFC2553 one. If we detect
132162485Sjulian	 * the situation, reject the address and write a message to system log.
133162485Sjulian	 *
134162485Sjulian	 * Still accept addresses for which the scope id is not used.
135162485Sjulian	 */
136162485Sjulian	if (oldv6size && bdom == AF_INET6) {
137162485Sjulian		sin6 = (struct sockaddr_in6 *)kosa;
138162485Sjulian		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
139162485Sjulian		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
140162485Sjulian		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
141162485Sjulian		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
142162485Sjulian		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
143162485Sjulian		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
144162485Sjulian			sin6->sin6_scope_id = 0;
145162485Sjulian		} else {
146162485Sjulian			log(LOG_DEBUG,
147162485Sjulian			    "obsolete pre-RFC2553 sockaddr_in6 rejected");
148162485Sjulian			error = EINVAL;
149162485Sjulian			goto out;
150162485Sjulian		}
151162485Sjulian	} else
152162485Sjulian#endif
153162485Sjulian	if (bdom == AF_INET)
154162485Sjulian		alloclen = sizeof(struct sockaddr_in);
155162485Sjulian
156162485Sjulian	sa = (struct sockaddr *) kosa;
157162485Sjulian	sa->sa_family = bdom;
158162485Sjulian	sa->sa_len = alloclen;
159162485Sjulian
160162485Sjulian	*sap = sa;
161162485Sjulian	*osalen = alloclen;
162162485Sjulian	return (0);
163162485Sjulian
164162485Sjulianout:
165162485Sjulian	FREE(kosa, mtype);
166162485Sjulian	return (error);
167162485Sjulian}
168162485Sjulian
169162485Sjulianstatic int
170162485Sjulianlinux_to_bsd_domain(int domain)
171162485Sjulian{
172162485Sjulian
173162485Sjulian	switch (domain) {
174162485Sjulian	case LINUX_AF_UNSPEC:
175162485Sjulian		return (AF_UNSPEC);
176162485Sjulian	case LINUX_AF_UNIX:
177162485Sjulian		return (AF_LOCAL);
178162485Sjulian	case LINUX_AF_INET:
179162485Sjulian		return (AF_INET);
180162485Sjulian	case LINUX_AF_INET6:
181162485Sjulian		return (AF_INET6);
182162485Sjulian	case LINUX_AF_AX25:
183162485Sjulian		return (AF_CCITT);
184162485Sjulian	case LINUX_AF_IPX:
185162485Sjulian		return (AF_IPX);
186162485Sjulian	case LINUX_AF_APPLETALK:
187162485Sjulian		return (AF_APPLETALK);
188162485Sjulian	}
189162485Sjulian	return (-1);
190162485Sjulian}
191162485Sjulian
192162485Sjulian#ifndef __alpha__
193162485Sjulianstatic int
194162485Sjulianbsd_to_linux_domain(int domain)
195162485Sjulian{
196162485Sjulian
197162485Sjulian	switch (domain) {
198162485Sjulian	case AF_UNSPEC:
199162485Sjulian		return (LINUX_AF_UNSPEC);
200162485Sjulian	case AF_LOCAL:
201162485Sjulian		return (LINUX_AF_UNIX);
202162485Sjulian	case AF_INET:
203162485Sjulian		return (LINUX_AF_INET);
204162485Sjulian	case AF_INET6:
205162485Sjulian		return (LINUX_AF_INET6);
206162485Sjulian	case AF_CCITT:
207162485Sjulian		return (LINUX_AF_AX25);
208162485Sjulian	case AF_IPX:
209162485Sjulian		return (LINUX_AF_IPX);
210162485Sjulian	case AF_APPLETALK:
211162485Sjulian		return (LINUX_AF_APPLETALK);
212162485Sjulian	}
213162485Sjulian	return (-1);
214162485Sjulian}
215162485Sjulian
216162485Sjulianstatic int
217162485Sjulianlinux_to_bsd_sockopt_level(int level)
218162485Sjulian{
219162485Sjulian
220162485Sjulian	switch (level) {
221162485Sjulian	case LINUX_SOL_SOCKET:
222162485Sjulian		return (SOL_SOCKET);
223162485Sjulian	}
224162485Sjulian	return (level);
225162485Sjulian}
226162485Sjulian
227162485Sjulianstatic int
228162485Sjulianbsd_to_linux_sockopt_level(int level)
229162485Sjulian{
230162485Sjulian
231162485Sjulian	switch (level) {
232162485Sjulian	case SOL_SOCKET:
233162485Sjulian		return (LINUX_SOL_SOCKET);
234162485Sjulian	}
235162485Sjulian	return (level);
236162485Sjulian}
237162485Sjulian
238162485Sjulianstatic int
239162485Sjulianlinux_to_bsd_ip_sockopt(int opt)
240162485Sjulian{
241162485Sjulian
242162485Sjulian	switch (opt) {
243162485Sjulian	case LINUX_IP_TOS:
244162485Sjulian		return (IP_TOS);
245162485Sjulian	case LINUX_IP_TTL:
246162485Sjulian		return (IP_TTL);
247162485Sjulian	case LINUX_IP_OPTIONS:
248162485Sjulian		return (IP_OPTIONS);
249162485Sjulian	case LINUX_IP_MULTICAST_IF:
250162485Sjulian		return (IP_MULTICAST_IF);
251162485Sjulian	case LINUX_IP_MULTICAST_TTL:
252162485Sjulian		return (IP_MULTICAST_TTL);
253162485Sjulian	case LINUX_IP_MULTICAST_LOOP:
254162485Sjulian		return (IP_MULTICAST_LOOP);
255162485Sjulian	case LINUX_IP_ADD_MEMBERSHIP:
256162485Sjulian		return (IP_ADD_MEMBERSHIP);
257162485Sjulian	case LINUX_IP_DROP_MEMBERSHIP:
258162485Sjulian		return (IP_DROP_MEMBERSHIP);
259162485Sjulian	case LINUX_IP_HDRINCL:
260162485Sjulian		return (IP_HDRINCL);
261162485Sjulian	}
262162485Sjulian	return (-1);
263162485Sjulian}
264162485Sjulian
265162485Sjulianstatic int
266162485Sjulianlinux_to_bsd_so_sockopt(int opt)
267162485Sjulian{
268162485Sjulian
269162485Sjulian	switch (opt) {
270162485Sjulian	case LINUX_SO_DEBUG:
271162485Sjulian		return (SO_DEBUG);
272162485Sjulian	case LINUX_SO_REUSEADDR:
273162485Sjulian		return (SO_REUSEADDR);
274162485Sjulian	case LINUX_SO_TYPE:
275162485Sjulian		return (SO_TYPE);
276162485Sjulian	case LINUX_SO_ERROR:
277162485Sjulian		return (SO_ERROR);
278162485Sjulian	case LINUX_SO_DONTROUTE:
279162485Sjulian		return (SO_DONTROUTE);
280162485Sjulian	case LINUX_SO_BROADCAST:
281162485Sjulian		return (SO_BROADCAST);
282162485Sjulian	case LINUX_SO_SNDBUF:
283162485Sjulian		return (SO_SNDBUF);
284162485Sjulian	case LINUX_SO_RCVBUF:
285162485Sjulian		return (SO_RCVBUF);
286162485Sjulian	case LINUX_SO_KEEPALIVE:
287162485Sjulian		return (SO_KEEPALIVE);
288162485Sjulian	case LINUX_SO_OOBINLINE:
289162485Sjulian		return (SO_OOBINLINE);
290162485Sjulian	case LINUX_SO_LINGER:
291162485Sjulian		return (SO_LINGER);
292162485Sjulian	}
293162485Sjulian	return (-1);
294162485Sjulian}
295162485Sjulian
296162485Sjulianstatic int
297162485Sjulianlinux_to_bsd_msg_flags(int flags)
298162485Sjulian{
299162485Sjulian	int ret_flags = 0;
300162485Sjulian
301162485Sjulian	if (flags & LINUX_MSG_OOB)
302162485Sjulian		ret_flags |= MSG_OOB;
303	if (flags & LINUX_MSG_PEEK)
304		ret_flags |= MSG_PEEK;
305	if (flags & LINUX_MSG_DONTROUTE)
306		ret_flags |= MSG_DONTROUTE;
307	if (flags & LINUX_MSG_CTRUNC)
308		ret_flags |= MSG_CTRUNC;
309	if (flags & LINUX_MSG_TRUNC)
310		ret_flags |= MSG_TRUNC;
311	if (flags & LINUX_MSG_DONTWAIT)
312		ret_flags |= MSG_DONTWAIT;
313	if (flags & LINUX_MSG_EOR)
314		ret_flags |= MSG_EOR;
315	if (flags & LINUX_MSG_WAITALL)
316		ret_flags |= MSG_WAITALL;
317#if 0 /* not handled */
318	if (flags & LINUX_MSG_PROXY)
319		;
320	if (flags & LINUX_MSG_FIN)
321		;
322	if (flags & LINUX_MSG_SYN)
323		;
324	if (flags & LINUX_MSG_CONFIRM)
325		;
326	if (flags & LINUX_MSG_RST)
327		;
328	if (flags & LINUX_MSG_ERRQUEUE)
329		;
330	if (flags & LINUX_MSG_NOSIGNAL)
331		;
332#endif
333	return ret_flags;
334}
335
336/*
337 * Allocate stackgap and put the converted sockaddr structure
338 * there, address on stackgap returned in sap.
339 */
340static int
341linux_sa_get(caddr_t *sgp, struct sockaddr **sap,
342	     const struct osockaddr *osa, int *osalen)
343{
344	struct sockaddr *sa, *usa;
345	int alloclen, error;
346
347	alloclen = *osalen;
348	error = do_sa_get(&sa, osa, &alloclen, M_TEMP);
349	if (error)
350		return (error);
351
352	usa = (struct sockaddr *) stackgap_alloc(sgp, alloclen);
353	if (!usa) {
354		error = ENOMEM;
355		goto out;
356	}
357
358	if ((error = copyout(sa, usa, alloclen)))
359		goto out;
360
361	*sap = usa;
362	*osalen = alloclen;
363
364out:
365	FREE(sa, M_TEMP);
366	return (error);
367}
368
369static int
370linux_sa_put(struct osockaddr *osa)
371{
372	struct osockaddr sa;
373	int error, bdom;
374
375	/*
376	 * Only read/write the osockaddr family part, the rest is
377	 * not changed.
378	 */
379	error = copyin(osa, &sa, sizeof(sa.sa_family));
380	if (error)
381		return (error);
382
383	bdom = bsd_to_linux_domain(sa.sa_family);
384	if (bdom == -1)
385		return (EINVAL);
386
387	sa.sa_family = bdom;
388	error = copyout(&sa, osa, sizeof(sa.sa_family));
389	if (error)
390		return (error);
391
392	return (0);
393}
394
395/* Return 0 if IP_HDRINCL is set for the given socket. */
396static int
397linux_check_hdrincl(struct thread *td, caddr_t *sg, int s)
398{
399	struct getsockopt_args /* {
400		int s;
401		int level;
402		int name;
403		caddr_t val;
404		int *avalsize;
405	} */ bsd_args;
406	int error;
407	caddr_t val, valsize;
408	int size_val = sizeof val;
409	int optval;
410
411	val = stackgap_alloc(sg, sizeof(int));
412	valsize = stackgap_alloc(sg, sizeof(int));
413
414	if ((error = copyout(&size_val, valsize, sizeof(size_val))))
415		return (error);
416
417	bsd_args.s = s;
418	bsd_args.level = IPPROTO_IP;
419	bsd_args.name = IP_HDRINCL;
420	bsd_args.val = val;
421	bsd_args.avalsize = (int *)valsize;
422	if ((error = getsockopt(td, &bsd_args)))
423		return (error);
424
425	if ((error = copyin(val, &optval, sizeof(optval))))
426		return (error);
427
428	return (optval == 0);
429}
430
431/*
432 * Updated sendto() when IP_HDRINCL is set:
433 * tweak endian-dependent fields in the IP packet.
434 */
435static int
436linux_sendto_hdrincl(struct thread *td, caddr_t *sg, struct sendto_args *bsd_args)
437{
438/*
439 * linux_ip_copysize defines how many bytes we should copy
440 * from the beginning of the IP packet before we customize it for BSD.
441 * It should include all the fields we modify (ip_len and ip_off)
442 * and be as small as possible to minimize copying overhead.
443 */
444#define linux_ip_copysize	8
445
446	struct ip *packet;
447	struct msghdr *msg;
448	struct iovec *iov;
449
450	int error;
451	struct  sendmsg_args /* {
452		int s;
453		caddr_t msg;
454		int flags;
455	} */ sendmsg_args;
456
457	/* Check the packet isn't too small before we mess with it */
458	if (bsd_args->len < linux_ip_copysize)
459		return (EINVAL);
460
461	/*
462	 * Tweaking the user buffer in place would be bad manners.
463	 * We create a corrected IP header with just the needed length,
464	 * then use an iovec to glue it to the rest of the user packet
465	 * when calling sendmsg().
466	 */
467	packet = (struct ip *)stackgap_alloc(sg, linux_ip_copysize);
468	msg = (struct msghdr *)stackgap_alloc(sg, sizeof(*msg));
469	iov = (struct iovec *)stackgap_alloc(sg, sizeof(*iov)*2);
470
471	/* Make a copy of the beginning of the packet to be sent */
472	if ((error = copyin(bsd_args->buf, packet, linux_ip_copysize)))
473		return (error);
474
475	/* Convert fields from Linux to BSD raw IP socket format */
476	packet->ip_len = bsd_args->len;
477	packet->ip_off = ntohs(packet->ip_off);
478
479	/* Prepare the msghdr and iovec structures describing the new packet */
480	msg->msg_name = bsd_args->to;
481	msg->msg_namelen = bsd_args->tolen;
482	msg->msg_iov = iov;
483	msg->msg_iovlen = 2;
484	msg->msg_control = NULL;
485	msg->msg_controllen = 0;
486	msg->msg_flags = 0;
487	iov[0].iov_base = (char *)packet;
488	iov[0].iov_len = linux_ip_copysize;
489	iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize;
490	iov[1].iov_len = bsd_args->len - linux_ip_copysize;
491
492	sendmsg_args.s = bsd_args->s;
493	sendmsg_args.msg = (caddr_t)msg;
494	sendmsg_args.flags = bsd_args->flags;
495	return (sendmsg(td, &sendmsg_args));
496}
497
498struct linux_socket_args {
499	int domain;
500	int type;
501	int protocol;
502};
503
504static int
505linux_socket(struct thread *td, struct linux_socket_args *args)
506{
507	struct linux_socket_args linux_args;
508	struct socket_args /* {
509		int domain;
510		int type;
511		int protocol;
512	} */ bsd_args;
513	struct setsockopt_args /* {
514		int s;
515		int level;
516		int name;
517		caddr_t val;
518		int valsize;
519	} */ bsd_setsockopt_args;
520	int error;
521	int retval_socket;
522
523	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
524		return (error);
525
526	bsd_args.protocol = linux_args.protocol;
527	bsd_args.type = linux_args.type;
528	bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
529	if (bsd_args.domain == -1)
530		return (EINVAL);
531
532	retval_socket = socket(td, &bsd_args);
533	if (bsd_args.type == SOCK_RAW
534	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
535	    && bsd_args.domain == AF_INET
536	    && retval_socket >= 0) {
537		/* It's a raw IP socket: set the IP_HDRINCL option. */
538		caddr_t sg;
539		int *hdrincl;
540
541		sg = stackgap_init();
542		hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl));
543		*hdrincl = 1;
544		bsd_setsockopt_args.s = td->td_retval[0];
545		bsd_setsockopt_args.level = IPPROTO_IP;
546		bsd_setsockopt_args.name = IP_HDRINCL;
547		bsd_setsockopt_args.val = (caddr_t)hdrincl;
548		bsd_setsockopt_args.valsize = sizeof(*hdrincl);
549		/* We ignore any error returned by setsockopt() */
550		setsockopt(td, &bsd_setsockopt_args);
551		/* Copy back the return value from socket() */
552		td->td_retval[0] = bsd_setsockopt_args.s;
553	}
554#ifdef INET6
555	/*
556	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
557	 * default and some apps depend on this. So, set V6ONLY to 0
558	 * for Linux apps if the sysctl value is set to 1.
559	 */
560	if (bsd_args.domain == PF_INET6 && retval_socket >= 0
561#ifndef KLD_MODULE
562	    /*
563	     * XXX: Avoid undefined symbol error with an IPv4 only
564	     * kernel.
565	     */
566	    && ip6_v6only
567#endif
568	    ) {
569		caddr_t sg;
570		int *v6only;
571
572		sg = stackgap_init();
573		v6only = (int *)stackgap_alloc(&sg, sizeof(*v6only));
574		*v6only = 0;
575		bsd_setsockopt_args.s = td->td_retval[0];
576		bsd_setsockopt_args.level = IPPROTO_IPV6;
577		bsd_setsockopt_args.name = IPV6_V6ONLY;
578		bsd_setsockopt_args.val = (caddr_t)v6only;
579		bsd_setsockopt_args.valsize = sizeof(*v6only);
580		/* We ignore any error returned by setsockopt() */
581		setsockopt(td, &bsd_setsockopt_args);
582		/* Copy back the return value from socket() */
583		td->td_retval[0] = bsd_setsockopt_args.s;
584	}
585#endif
586
587	return (retval_socket);
588}
589
590struct linux_bind_args {
591	int s;
592	struct osockaddr *name;
593	int namelen;
594};
595
596static int
597linux_bind(struct thread *td, struct linux_bind_args *args)
598{
599	struct linux_bind_args linux_args;
600	struct sockaddr *sa;
601	int error;
602
603	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
604		return (error);
605
606	error = linux_getsockaddr(&sa, linux_args.name, linux_args.namelen);
607	if (error)
608		return (error);
609
610	return (kern_bind(td, linux_args.s, sa));
611}
612
613struct linux_connect_args {
614	int s;
615	struct osockaddr * name;
616	int namelen;
617};
618int linux_connect(struct thread *, struct linux_connect_args *);
619#endif /* !__alpha__*/
620
621int
622linux_connect(struct thread *td, struct linux_connect_args *args)
623{
624	struct linux_connect_args linux_args;
625	struct socket *so;
626	struct sockaddr *sa;
627	u_int fflag;
628	int error;
629
630#ifdef __alpha__
631	bcopy(args, &linux_args, sizeof(linux_args));
632#else
633	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
634		return (error);
635#endif /* __alpha__ */
636
637	error = linux_getsockaddr(&sa, (struct osockaddr *)linux_args.name,
638	    linux_args.namelen);
639	if (error)
640		return (error);
641
642	error = kern_connect(td, linux_args.s, sa);
643	if (error != EISCONN)
644		return (error);
645
646	/*
647	 * Linux doesn't return EISCONN the first time it occurs,
648	 * when on a non-blocking socket. Instead it returns the
649	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
650	 */
651	if ((error = fgetsock(td, linux_args.s, &so, &fflag)) != 0)
652		return(error);
653	error = EISCONN;
654	if (fflag & FNONBLOCK) {
655		if (so->so_emuldata == 0)
656			error = so->so_error;
657		so->so_emuldata = (void *)1;
658	}
659	fputsock(so);
660	return (error);
661}
662
663#ifndef __alpha__
664
665struct linux_listen_args {
666	int s;
667	int backlog;
668};
669
670static int
671linux_listen(struct thread *td, struct linux_listen_args *args)
672{
673	struct linux_listen_args linux_args;
674	struct listen_args /* {
675		int s;
676		int backlog;
677	} */ bsd_args;
678	int error;
679
680	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
681		return (error);
682
683	bsd_args.s = linux_args.s;
684	bsd_args.backlog = linux_args.backlog;
685	return (listen(td, &bsd_args));
686}
687
688struct linux_accept_args {
689	int s;
690	struct osockaddr *addr;
691	int *namelen;
692};
693
694static int
695linux_accept(struct thread *td, struct linux_accept_args *args)
696{
697	struct linux_accept_args linux_args;
698	struct accept_args /* {
699		int s;
700		caddr_t name;
701		int *anamelen;
702	} */ bsd_args;
703	struct close_args /* {
704		int     fd;
705	} */ c_args;
706	struct fcntl_args /* {
707		int fd;
708		int cmd;
709		long arg;
710	} */ f_args;
711	int error;
712
713	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
714		return (error);
715
716	bsd_args.s = linux_args.s;
717	bsd_args.name = (caddr_t)linux_args.addr;
718	bsd_args.anamelen = linux_args.namelen;
719	error = oaccept(td, &bsd_args);
720	if (error)
721		return (error);
722	if (linux_args.addr) {
723		error = linux_sa_put(linux_args.addr);
724		if (error) {
725			c_args.fd = td->td_retval[0];
726			(void)close(td, &c_args);
727			return (error);
728		}
729	}
730
731	/*
732	 * linux appears not to copy flags from the parent socket to the
733	 * accepted one, so we must clear the flags in the new descriptor.
734	 * Ignore any errors, because we already have an open fd.
735	 */
736	f_args.fd = td->td_retval[0];
737	f_args.cmd = F_SETFL;
738	f_args.arg = 0;
739	(void)fcntl(td, &f_args);
740	td->td_retval[0] = f_args.fd;
741	return (0);
742}
743
744struct linux_getsockname_args {
745	int s;
746	struct osockaddr *addr;
747	int *namelen;
748};
749
750static int
751linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
752{
753	struct linux_getsockname_args linux_args;
754	struct getsockname_args /* {
755		int fdes;
756		caddr_t asa;
757		int *alen;
758	} */ bsd_args;
759	int error;
760
761	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
762		return (error);
763
764	bsd_args.fdes = linux_args.s;
765	bsd_args.asa = (caddr_t) linux_args.addr;
766	bsd_args.alen = linux_args.namelen;
767	error = ogetsockname(td, &bsd_args);
768	if (error)
769		return (error);
770	error = linux_sa_put(linux_args.addr);
771	if (error)
772		return (error);
773	return (0);
774}
775
776struct linux_getpeername_args {
777	int s;
778	struct osockaddr *addr;
779	int *namelen;
780};
781
782static int
783linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
784{
785	struct linux_getpeername_args linux_args;
786	struct ogetpeername_args /* {
787		int fdes;
788		caddr_t asa;
789		int *alen;
790	} */ bsd_args;
791	int error;
792
793	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
794		return (error);
795
796	bsd_args.fdes = linux_args.s;
797	bsd_args.asa = (caddr_t) linux_args.addr;
798	bsd_args.alen = linux_args.namelen;
799	error = ogetpeername(td, &bsd_args);
800	if (error)
801		return (error);
802	error = linux_sa_put(linux_args.addr);
803	if (error)
804		return (error);
805	return (0);
806}
807
808struct linux_socketpair_args {
809	int domain;
810	int type;
811	int protocol;
812	int *rsv;
813};
814
815static int
816linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
817{
818	struct linux_socketpair_args linux_args;
819	struct socketpair_args /* {
820		int domain;
821		int type;
822		int protocol;
823		int *rsv;
824	} */ bsd_args;
825	int error;
826
827	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
828		return (error);
829
830	bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
831	if (bsd_args.domain == -1)
832		return (EINVAL);
833
834	bsd_args.type = linux_args.type;
835	bsd_args.protocol = linux_args.protocol;
836	bsd_args.rsv = linux_args.rsv;
837	return (socketpair(td, &bsd_args));
838}
839
840struct linux_send_args {
841	int s;
842	void *msg;
843	int len;
844	int flags;
845};
846
847static int
848linux_send(struct thread *td, struct linux_send_args *args)
849{
850	struct linux_send_args linux_args;
851	struct osend_args /* {
852		int s;
853		caddr_t buf;
854		int len;
855		int flags;
856	} */ bsd_args;
857	int error;
858
859	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
860		return (error);
861
862	bsd_args.s = linux_args.s;
863	bsd_args.buf = linux_args.msg;
864	bsd_args.len = linux_args.len;
865	bsd_args.flags = linux_args.flags;
866	return (osend(td, &bsd_args));
867}
868
869struct linux_recv_args {
870	int s;
871	void *msg;
872	int len;
873	int flags;
874};
875
876static int
877linux_recv(struct thread *td, struct linux_recv_args *args)
878{
879	struct linux_recv_args linux_args;
880	struct orecv_args /* {
881		int s;
882		caddr_t buf;
883		int len;
884		int flags;
885	} */ bsd_args;
886	int error;
887
888	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
889		return (error);
890
891	bsd_args.s = linux_args.s;
892	bsd_args.buf = linux_args.msg;
893	bsd_args.len = linux_args.len;
894	bsd_args.flags = linux_args.flags;
895	return (orecv(td, &bsd_args));
896}
897
898struct linux_sendto_args {
899	int s;
900	void *msg;
901	int len;
902	int flags;
903	caddr_t to;
904	int tolen;
905};
906
907static int
908linux_sendto(struct thread *td, struct linux_sendto_args *args)
909{
910	struct linux_sendto_args linux_args;
911	struct sendto_args /* {
912		int s;
913		caddr_t buf;
914		size_t len;
915		int flags;
916		caddr_t to;
917		int tolen;
918	} */ bsd_args;
919	caddr_t sg = stackgap_init();
920	struct sockaddr *to;
921	int tolen, error;
922
923	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
924		return (error);
925
926	tolen = linux_args.tolen;
927	if (linux_args.to) {
928		error = linux_sa_get(&sg, &to,
929		    (struct osockaddr *) linux_args.to, &tolen);
930		if (error)
931			return (error);
932	} else
933		to = NULL;
934
935	bsd_args.s = linux_args.s;
936	bsd_args.buf = linux_args.msg;
937	bsd_args.len = linux_args.len;
938	bsd_args.flags = linux_args.flags;
939	bsd_args.to = (caddr_t) to;
940	bsd_args.tolen = (unsigned int) tolen;
941
942	if (linux_check_hdrincl(td, &sg, linux_args.s) == 0)
943		/* IP_HDRINCL set, tweak the packet before sending */
944		return (linux_sendto_hdrincl(td, &sg, &bsd_args));
945
946	return (sendto(td, &bsd_args));
947}
948
949struct linux_recvfrom_args {
950	int s;
951	void *buf;
952	int len;
953	int flags;
954	caddr_t from;
955	int *fromlen;
956};
957
958static int
959linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
960{
961	struct linux_recvfrom_args linux_args;
962	struct recvfrom_args /* {
963		int s;
964		caddr_t buf;
965		size_t len;
966		int flags;
967		caddr_t from;
968		int *fromlenaddr;
969	} */ bsd_args;
970	int error;
971
972	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
973		return (error);
974
975	bsd_args.s = linux_args.s;
976	bsd_args.buf = linux_args.buf;
977	bsd_args.len = linux_args.len;
978	bsd_args.flags = linux_to_bsd_msg_flags(linux_args.flags);
979	bsd_args.from = linux_args.from;
980	bsd_args.fromlenaddr = linux_args.fromlen;
981	error = orecvfrom(td, &bsd_args);
982	if (error)
983		return (error);
984	if (linux_args.from) {
985		error = linux_sa_put((struct osockaddr *) linux_args.from);
986		if (error)
987			return (error);
988	}
989	return (0);
990}
991
992struct linux_sendmsg_args {
993	int s;
994	const struct msghdr *msg;
995	int flags;
996};
997
998static int
999linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1000{
1001	struct linux_sendmsg_args linux_args;
1002	struct sendmsg_args /* {
1003		int s;
1004		const struct msghdr *msg;
1005		int flags;
1006	} */ bsd_args;
1007	struct msghdr msg;
1008	struct msghdr *nmsg = NULL;
1009	struct cmsghdr *cmsg;
1010	int error;
1011	caddr_t sg;
1012
1013	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1014		return (error);
1015
1016	error = copyin(linux_args.msg, &msg, sizeof(msg));
1017	if (error)
1018		return (error);
1019
1020	sg = stackgap_init();
1021	nmsg = (struct msghdr *)stackgap_alloc(&sg, sizeof(struct msghdr));
1022	if (nmsg == NULL)
1023		return (ENOMEM);
1024
1025	bcopy(&msg, nmsg, sizeof(struct msghdr));
1026
1027	if (msg.msg_name != NULL) {
1028		struct sockaddr *sa;
1029
1030		error = linux_sa_get(&sg, &sa,
1031		    (struct osockaddr *) msg.msg_name, &msg.msg_namelen);
1032		if (error)
1033			return (error);
1034
1035		nmsg->msg_name = sa;
1036	}
1037
1038	if (msg.msg_control != NULL) {
1039		nmsg->msg_control = (struct cmsghdr *)stackgap_alloc(&sg,
1040		    msg.msg_controllen);
1041		if (nmsg->msg_control == NULL)
1042			return (ENOMEM);
1043
1044		bcopy(msg.msg_control, nmsg->msg_control, msg.msg_controllen);
1045		cmsg = (struct cmsghdr*)nmsg->msg_control;
1046
1047		cmsg->cmsg_level = linux_to_bsd_sockopt_level(cmsg->cmsg_level);
1048	}
1049
1050	bsd_args.s = linux_args.s;
1051	bsd_args.msg = (caddr_t)nmsg;
1052	bsd_args.flags = linux_to_bsd_msg_flags(linux_args.flags);
1053	return (sendmsg(td, &bsd_args));
1054}
1055
1056struct linux_recvmsg_args {
1057	int s;
1058	struct msghdr *msg;
1059	int flags;
1060};
1061
1062static int
1063linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1064{
1065	struct linux_recvmsg_args linux_args;
1066	struct recvmsg_args /* {
1067		int	s;
1068		struct	msghdr *msg;
1069		int	flags;
1070	} */ bsd_args;
1071	struct msghdr msg;
1072	struct cmsghdr *cmsg;
1073	int error;
1074
1075	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1076		return (error);
1077
1078	bsd_args.s = linux_args.s;
1079	bsd_args.msg = linux_args.msg;
1080	bsd_args.flags = linux_to_bsd_msg_flags(linux_args.flags);
1081	error = recvmsg(td, &bsd_args);
1082	if (error)
1083		return (error);
1084
1085	if (bsd_args.msg->msg_control != NULL) {
1086		cmsg = (struct cmsghdr*)bsd_args.msg->msg_control;
1087		cmsg->cmsg_level = bsd_to_linux_sockopt_level(cmsg->cmsg_level);
1088	}
1089
1090	error = copyin(linux_args.msg, &msg, sizeof(msg));
1091	if (error)
1092		return (error);
1093	if (msg.msg_name && msg.msg_namelen > 2)
1094		error = linux_sa_put(msg.msg_name);
1095	return (error);
1096}
1097
1098struct linux_shutdown_args {
1099	int s;
1100	int how;
1101};
1102
1103static int
1104linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1105{
1106	struct linux_shutdown_args linux_args;
1107	struct shutdown_args /* {
1108		int s;
1109		int how;
1110	} */ bsd_args;
1111	int error;
1112
1113	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1114		return (error);
1115
1116	bsd_args.s = linux_args.s;
1117	bsd_args.how = linux_args.how;
1118	return (shutdown(td, &bsd_args));
1119}
1120
1121struct linux_setsockopt_args {
1122	int s;
1123	int level;
1124	int optname;
1125	void *optval;
1126	int optlen;
1127};
1128
1129static int
1130linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1131{
1132	struct linux_setsockopt_args linux_args;
1133	struct setsockopt_args /* {
1134		int s;
1135		int level;
1136		int name;
1137		caddr_t val;
1138		int valsize;
1139	} */ bsd_args;
1140	int error, name;
1141
1142	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1143		return (error);
1144
1145	bsd_args.s = linux_args.s;
1146	bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
1147	switch (bsd_args.level) {
1148	case SOL_SOCKET:
1149		name = linux_to_bsd_so_sockopt(linux_args.optname);
1150		break;
1151	case IPPROTO_IP:
1152		name = linux_to_bsd_ip_sockopt(linux_args.optname);
1153		break;
1154	case IPPROTO_TCP:
1155		/* Linux TCP option values match BSD's */
1156		name = linux_args.optname;
1157		break;
1158	default:
1159		name = -1;
1160		break;
1161	}
1162	if (name == -1)
1163		return (EINVAL);
1164
1165	bsd_args.name = name;
1166	bsd_args.val = linux_args.optval;
1167	bsd_args.valsize = linux_args.optlen;
1168	return (setsockopt(td, &bsd_args));
1169}
1170
1171struct linux_getsockopt_args {
1172	int s;
1173	int level;
1174	int optname;
1175	void *optval;
1176	int *optlen;
1177};
1178
1179static int
1180linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1181{
1182	struct linux_getsockopt_args linux_args;
1183	struct getsockopt_args /* {
1184		int s;
1185		int level;
1186		int name;
1187		caddr_t val;
1188		int *avalsize;
1189	} */ bsd_args;
1190	int error, name;
1191
1192	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1193		return (error);
1194
1195	bsd_args.s = linux_args.s;
1196	bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
1197	switch (bsd_args.level) {
1198	case SOL_SOCKET:
1199		name = linux_to_bsd_so_sockopt(linux_args.optname);
1200		break;
1201	case IPPROTO_IP:
1202		name = linux_to_bsd_ip_sockopt(linux_args.optname);
1203		break;
1204	case IPPROTO_TCP:
1205		/* Linux TCP option values match BSD's */
1206		name = linux_args.optname;
1207		break;
1208	default:
1209		name = -1;
1210		break;
1211	}
1212	if (name == -1)
1213		return (EINVAL);
1214
1215	bsd_args.name = name;
1216	bsd_args.val = linux_args.optval;
1217	bsd_args.avalsize = linux_args.optlen;
1218	return (getsockopt(td, &bsd_args));
1219}
1220
1221int
1222linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1223{
1224	void *arg = (void *)args->args;
1225
1226	switch (args->what) {
1227	case LINUX_SOCKET:
1228		return (linux_socket(td, arg));
1229	case LINUX_BIND:
1230		return (linux_bind(td, arg));
1231	case LINUX_CONNECT:
1232		return (linux_connect(td, arg));
1233	case LINUX_LISTEN:
1234		return (linux_listen(td, arg));
1235	case LINUX_ACCEPT:
1236		return (linux_accept(td, arg));
1237	case LINUX_GETSOCKNAME:
1238		return (linux_getsockname(td, arg));
1239	case LINUX_GETPEERNAME:
1240		return (linux_getpeername(td, arg));
1241	case LINUX_SOCKETPAIR:
1242		return (linux_socketpair(td, arg));
1243	case LINUX_SEND:
1244		return (linux_send(td, arg));
1245	case LINUX_RECV:
1246		return (linux_recv(td, arg));
1247	case LINUX_SENDTO:
1248		return (linux_sendto(td, arg));
1249	case LINUX_RECVFROM:
1250		return (linux_recvfrom(td, arg));
1251	case LINUX_SHUTDOWN:
1252		return (linux_shutdown(td, arg));
1253	case LINUX_SETSOCKOPT:
1254		return (linux_setsockopt(td, arg));
1255	case LINUX_GETSOCKOPT:
1256		return (linux_getsockopt(td, arg));
1257	case LINUX_SENDMSG:
1258		return (linux_sendmsg(td, arg));
1259	case LINUX_RECVMSG:
1260		return (linux_recvmsg(td, arg));
1261	}
1262
1263	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1264	return (ENOSYS);
1265}
1266#endif	/*!__alpha__*/
1267