linux_socket.c revision 193262
1161754Sru/*-
288276Smarkm * Copyright (c) 1995 S�ren Schmidt
388276Smarkm * All rights reserved.
47527Sjkh *
57527Sjkh * Redistribution and use in source and binary forms, with or without
67527Sjkh * modification, are permitted provided that the following conditions
77527Sjkh * are met:
87527Sjkh * 1. Redistributions of source code must retain the above copyright
97527Sjkh *    notice, this list of conditions and the following disclaimer
107527Sjkh *    in this position and unchanged.
117527Sjkh * 2. Redistributions in binary form must reproduce the above copyright
127527Sjkh *    notice, this list of conditions and the following disclaimer in the
137527Sjkh *    documentation and/or other materials provided with the distribution.
147527Sjkh * 3. The name of the author may not be used to endorse or promote products
157527Sjkh *    derived from this software without specific prior written permission
167527Sjkh *
17161754Sru * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
187527Sjkh * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
197527Sjkh * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
207527Sjkh * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
217527Sjkh * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
227527Sjkh * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
237527Sjkh * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
247527Sjkh * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
257527Sjkh * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
267527Sjkh * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
277527Sjkh */
287527Sjkh
297527Sjkh#include <sys/cdefs.h>
307527Sjkh__FBSDID("$FreeBSD: head/sys/compat/linux/linux_socket.c 193262 2009-06-01 20:42:27Z dchagin $");
317527Sjkh
327527Sjkh/* XXX we use functions that might not exist. */
337527Sjkh#include "opt_compat.h"
3488276Smarkm#include "opt_inet6.h"
3588276Smarkm
3688276Smarkm#include <sys/param.h>
377527Sjkh#include <sys/proc.h>
38161754Sru#include <sys/systm.h>
397527Sjkh#include <sys/sysproto.h>
407527Sjkh#include <sys/fcntl.h>
417527Sjkh#include <sys/file.h>
427527Sjkh#include <sys/limits.h>
437527Sjkh#include <sys/lock.h>
4428365Scharnier#include <sys/malloc.h>
457527Sjkh#include <sys/mutex.h>
46161754Sru#include <sys/mbuf.h>
4728365Scharnier#include <sys/socket.h>
487527Sjkh#include <sys/socketvar.h>
497527Sjkh#include <sys/syscallsubr.h>
507527Sjkh#include <sys/uio.h>
517527Sjkh#include <sys/syslog.h>
527527Sjkh#include <sys/un.h>
5388276Smarkm#include <sys/vimage.h>
547527Sjkh
557527Sjkh#include <net/if.h>
567527Sjkh#include <netinet/in.h>
577527Sjkh#include <netinet/in_systm.h>
587527Sjkh#include <netinet/ip.h>
597527Sjkh#ifdef INET6
607527Sjkh#include <netinet/ip6.h>
617527Sjkh#include <netinet6/ip6_var.h>
6288276Smarkm#include <netinet6/in6_var.h>
637527Sjkh#include <netinet6/vinet6.h>
647527Sjkh#endif
657527Sjkh
667527Sjkh#ifdef COMPAT_LINUX32
6788276Smarkm#include <machine/../linux32/linux.h>
687527Sjkh#include <machine/../linux32/linux32_proto.h>
697527Sjkh#else
707527Sjkh#include <machine/../linux/linux.h>
717527Sjkh#include <machine/../linux/linux_proto.h>
727527Sjkh#endif
737527Sjkh#include <compat/linux/linux_socket.h>
74161754Sru#include <compat/linux/linux_util.h>
75161754Sru
7628365Scharnierstatic int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
77161754Sru    struct malloc_type *);
787527Sjkhstatic int linux_to_bsd_domain(int);
79161754Sru
807527Sjkh/*
817527Sjkh * Reads a linux sockaddr and does any necessary translation.
827527Sjkh * Linux sockaddrs don't have a length field, only a family.
8388276Smarkm */
847527Sjkhstatic int
857527Sjkhlinux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
867527Sjkh{
877527Sjkh	int osalen = len;
887527Sjkh
897527Sjkh	return (do_sa_get(sap, osa, &osalen, M_SONAME));
907527Sjkh}
9188276Smarkm
927527Sjkh/*
937527Sjkh * Copy the osockaddr structure pointed to by osa to kernel, adjust
94161754Sru * family and convert to sockaddr.
95161754Sru */
967527Sjkhstatic int
977527Sjkhdo_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
987527Sjkh    struct malloc_type *mtype)
997527Sjkh{
1007527Sjkh	int error=0, bdom;
1017527Sjkh	struct sockaddr *sa;
1027527Sjkh	struct osockaddr *kosa;
1037527Sjkh	int alloclen;
1047527Sjkh#ifdef INET6
105161754Sru	int oldv6size;
1067527Sjkh	struct sockaddr_in6 *sin6;
10788276Smarkm#endif
10888276Smarkm
1097527Sjkh	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
1107527Sjkh		return (EINVAL);
111161754Sru
11288276Smarkm	alloclen = *osalen;
11388276Smarkm#ifdef INET6
1147527Sjkh	oldv6size = 0;
1157527Sjkh	/*
116161754Sru	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
11788276Smarkm	 * if it's a v4-mapped address, so reserve the proper space
1187527Sjkh	 * for it.
1197527Sjkh	 */
1207527Sjkh	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
1217527Sjkh		alloclen = sizeof (struct sockaddr_in6);
1227527Sjkh		oldv6size = 1;
1237527Sjkh	}
1247527Sjkh#endif
1257527Sjkh
1267527Sjkh	kosa = malloc(alloclen, mtype, M_WAITOK);
1277527Sjkh
128161754Sru	if ((error = copyin(osa, kosa, *osalen)))
129161754Sru		goto out;
1307527Sjkh
1317527Sjkh	bdom = linux_to_bsd_domain(kosa->sa_family);
1327527Sjkh	if (bdom == -1) {
1337527Sjkh		error = EINVAL;
1347527Sjkh		goto out;
1357527Sjkh	}
1367527Sjkh
1377527Sjkh#ifdef INET6
1387527Sjkh	/*
1397527Sjkh	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
1407527Sjkh	 * which lacks the scope id compared with RFC2553 one. If we detect
1417527Sjkh	 * the situation, reject the address and write a message to system log.
1427527Sjkh	 *
1437527Sjkh	 * Still accept addresses for which the scope id is not used.
1447527Sjkh	 */
1457527Sjkh	if (oldv6size && bdom == AF_INET6) {
14688276Smarkm		sin6 = (struct sockaddr_in6 *)kosa;
14788276Smarkm		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
14888276Smarkm		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
14988276Smarkm		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
1507527Sjkh		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
1517527Sjkh		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
1527527Sjkh		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
1537527Sjkh			sin6->sin6_scope_id = 0;
1547527Sjkh		} else {
1557527Sjkh			log(LOG_DEBUG,
1567527Sjkh			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
1577527Sjkh			error = EINVAL;
1587527Sjkh			goto out;
1597527Sjkh		}
1607527Sjkh	} else
1617527Sjkh#endif
1627527Sjkh	if (bdom == AF_INET)
1637527Sjkh		alloclen = sizeof(struct sockaddr_in);
16488276Smarkm
1657527Sjkh	sa = (struct sockaddr *) kosa;
16688276Smarkm	sa->sa_family = bdom;
1677527Sjkh	sa->sa_len = alloclen;
16888276Smarkm
1697527Sjkh	*sap = sa;
17088276Smarkm	*osalen = alloclen;
1717527Sjkh	return (0);
17288276Smarkm
1737527Sjkhout:
17488276Smarkm	free(kosa, mtype);
1757527Sjkh	return (error);
17688276Smarkm}
1777527Sjkh
17888276Smarkmstatic int
1797527Sjkhlinux_to_bsd_domain(int domain)
18088276Smarkm{
1817527Sjkh
18288276Smarkm	switch (domain) {
1837527Sjkh	case LINUX_AF_UNSPEC:
18488276Smarkm		return (AF_UNSPEC);
1857527Sjkh	case LINUX_AF_UNIX:
18688276Smarkm		return (AF_LOCAL);
18788276Smarkm	case LINUX_AF_INET:
18888276Smarkm		return (AF_INET);
189161754Sru	case LINUX_AF_INET6:
190161754Sru		return (AF_INET6);
1917527Sjkh	case LINUX_AF_AX25:
1927527Sjkh		return (AF_CCITT);
1937527Sjkh	case LINUX_AF_IPX:
1947527Sjkh		return (AF_IPX);
1957527Sjkh	case LINUX_AF_APPLETALK:
1967527Sjkh		return (AF_APPLETALK);
1977527Sjkh	}
1987527Sjkh	return (-1);
1997527Sjkh}
2007527Sjkh
2017527Sjkhstatic int
2027527Sjkhbsd_to_linux_domain(int domain)
2037527Sjkh{
2047527Sjkh
2057527Sjkh	switch (domain) {
2067527Sjkh	case AF_UNSPEC:
2077527Sjkh		return (LINUX_AF_UNSPEC);
2087527Sjkh	case AF_LOCAL:
2097527Sjkh		return (LINUX_AF_UNIX);
2107527Sjkh	case AF_INET:
2117527Sjkh		return (LINUX_AF_INET);
212161754Sru	case AF_INET6:
2137527Sjkh		return (LINUX_AF_INET6);
21488276Smarkm	case AF_CCITT:
2157527Sjkh		return (LINUX_AF_AX25);
2167527Sjkh	case AF_IPX:
2177527Sjkh		return (LINUX_AF_IPX);
2187527Sjkh	case AF_APPLETALK:
21988276Smarkm		return (LINUX_AF_APPLETALK);
22088276Smarkm	}
22188276Smarkm	return (-1);
22288276Smarkm}
2237527Sjkh
2247527Sjkhstatic int
2257527Sjkhlinux_to_bsd_sockopt_level(int level)
2267527Sjkh{
2277527Sjkh
2287527Sjkh	switch (level) {
2297527Sjkh	case LINUX_SOL_SOCKET:
2307527Sjkh		return (SOL_SOCKET);
2317527Sjkh	}
2327527Sjkh	return (level);
23388276Smarkm}
2347527Sjkh
2357527Sjkhstatic int
2367527Sjkhbsd_to_linux_sockopt_level(int level)
2377527Sjkh{
2387527Sjkh
2397527Sjkh	switch (level) {
2407527Sjkh	case SOL_SOCKET:
2417527Sjkh		return (LINUX_SOL_SOCKET);
2427527Sjkh	}
243	return (level);
244}
245
246static int
247linux_to_bsd_ip_sockopt(int opt)
248{
249
250	switch (opt) {
251	case LINUX_IP_TOS:
252		return (IP_TOS);
253	case LINUX_IP_TTL:
254		return (IP_TTL);
255	case LINUX_IP_OPTIONS:
256		return (IP_OPTIONS);
257	case LINUX_IP_MULTICAST_IF:
258		return (IP_MULTICAST_IF);
259	case LINUX_IP_MULTICAST_TTL:
260		return (IP_MULTICAST_TTL);
261	case LINUX_IP_MULTICAST_LOOP:
262		return (IP_MULTICAST_LOOP);
263	case LINUX_IP_ADD_MEMBERSHIP:
264		return (IP_ADD_MEMBERSHIP);
265	case LINUX_IP_DROP_MEMBERSHIP:
266		return (IP_DROP_MEMBERSHIP);
267	case LINUX_IP_HDRINCL:
268		return (IP_HDRINCL);
269	}
270	return (-1);
271}
272
273static int
274linux_to_bsd_so_sockopt(int opt)
275{
276
277	switch (opt) {
278	case LINUX_SO_DEBUG:
279		return (SO_DEBUG);
280	case LINUX_SO_REUSEADDR:
281		return (SO_REUSEADDR);
282	case LINUX_SO_TYPE:
283		return (SO_TYPE);
284	case LINUX_SO_ERROR:
285		return (SO_ERROR);
286	case LINUX_SO_DONTROUTE:
287		return (SO_DONTROUTE);
288	case LINUX_SO_BROADCAST:
289		return (SO_BROADCAST);
290	case LINUX_SO_SNDBUF:
291		return (SO_SNDBUF);
292	case LINUX_SO_RCVBUF:
293		return (SO_RCVBUF);
294	case LINUX_SO_KEEPALIVE:
295		return (SO_KEEPALIVE);
296	case LINUX_SO_OOBINLINE:
297		return (SO_OOBINLINE);
298	case LINUX_SO_LINGER:
299		return (SO_LINGER);
300	case LINUX_SO_PEERCRED:
301		return (LOCAL_PEERCRED);
302	case LINUX_SO_RCVLOWAT:
303		return (SO_RCVLOWAT);
304	case LINUX_SO_SNDLOWAT:
305		return (SO_SNDLOWAT);
306	case LINUX_SO_RCVTIMEO:
307		return (SO_RCVTIMEO);
308	case LINUX_SO_SNDTIMEO:
309		return (SO_SNDTIMEO);
310	case LINUX_SO_TIMESTAMP:
311		return (SO_TIMESTAMP);
312	case LINUX_SO_ACCEPTCONN:
313		return (SO_ACCEPTCONN);
314	}
315	return (-1);
316}
317
318static int
319linux_to_bsd_msg_flags(int flags)
320{
321	int ret_flags = 0;
322
323	if (flags & LINUX_MSG_OOB)
324		ret_flags |= MSG_OOB;
325	if (flags & LINUX_MSG_PEEK)
326		ret_flags |= MSG_PEEK;
327	if (flags & LINUX_MSG_DONTROUTE)
328		ret_flags |= MSG_DONTROUTE;
329	if (flags & LINUX_MSG_CTRUNC)
330		ret_flags |= MSG_CTRUNC;
331	if (flags & LINUX_MSG_TRUNC)
332		ret_flags |= MSG_TRUNC;
333	if (flags & LINUX_MSG_DONTWAIT)
334		ret_flags |= MSG_DONTWAIT;
335	if (flags & LINUX_MSG_EOR)
336		ret_flags |= MSG_EOR;
337	if (flags & LINUX_MSG_WAITALL)
338		ret_flags |= MSG_WAITALL;
339	if (flags & LINUX_MSG_NOSIGNAL)
340		ret_flags |= MSG_NOSIGNAL;
341#if 0 /* not handled */
342	if (flags & LINUX_MSG_PROXY)
343		;
344	if (flags & LINUX_MSG_FIN)
345		;
346	if (flags & LINUX_MSG_SYN)
347		;
348	if (flags & LINUX_MSG_CONFIRM)
349		;
350	if (flags & LINUX_MSG_RST)
351		;
352	if (flags & LINUX_MSG_ERRQUEUE)
353		;
354#endif
355	return ret_flags;
356}
357
358/*
359* If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
360* native syscall will fault.  Thus, we don't really need to check the
361* return values for these functions.
362*/
363
364static int
365bsd_to_linux_sockaddr(struct sockaddr *arg)
366{
367	struct sockaddr sa;
368	size_t sa_len = sizeof(struct sockaddr);
369	int error;
370
371	if ((error = copyin(arg, &sa, sa_len)))
372		return (error);
373
374	*(u_short *)&sa = sa.sa_family;
375
376	error = copyout(&sa, arg, sa_len);
377
378	return (error);
379}
380
381static int
382linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
383{
384	struct sockaddr sa;
385	size_t sa_len = sizeof(struct sockaddr);
386	int error;
387
388	if ((error = copyin(arg, &sa, sa_len)))
389		return (error);
390
391	sa.sa_family = *(sa_family_t *)&sa;
392	sa.sa_len = len;
393
394	error = copyout(&sa, arg, sa_len);
395
396	return (error);
397}
398
399
400static int
401linux_sa_put(struct osockaddr *osa)
402{
403	struct osockaddr sa;
404	int error, bdom;
405
406	/*
407	 * Only read/write the osockaddr family part, the rest is
408	 * not changed.
409	 */
410	error = copyin(osa, &sa, sizeof(sa.sa_family));
411	if (error)
412		return (error);
413
414	bdom = bsd_to_linux_domain(sa.sa_family);
415	if (bdom == -1)
416		return (EINVAL);
417
418	sa.sa_family = bdom;
419	error = copyout(&sa, osa, sizeof(sa.sa_family));
420	if (error)
421		return (error);
422
423	return (0);
424}
425
426static int
427linux_to_bsd_cmsg_type(int cmsg_type)
428{
429
430	switch (cmsg_type) {
431	case LINUX_SCM_RIGHTS:
432		return (SCM_RIGHTS);
433	}
434	return (-1);
435}
436
437static int
438bsd_to_linux_cmsg_type(int cmsg_type)
439{
440
441	switch (cmsg_type) {
442	case SCM_RIGHTS:
443		return (LINUX_SCM_RIGHTS);
444	}
445	return (-1);
446}
447
448static int
449linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr)
450{
451	if (lhdr->msg_controllen > INT_MAX)
452		return (ENOBUFS);
453
454	bhdr->msg_name		= PTRIN(lhdr->msg_name);
455	bhdr->msg_namelen	= lhdr->msg_namelen;
456	bhdr->msg_iov		= PTRIN(lhdr->msg_iov);
457	bhdr->msg_iovlen	= lhdr->msg_iovlen;
458	bhdr->msg_control	= PTRIN(lhdr->msg_control);
459	bhdr->msg_controllen	= lhdr->msg_controllen;
460	bhdr->msg_flags		= linux_to_bsd_msg_flags(lhdr->msg_flags);
461	return (0);
462}
463
464static int
465bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
466{
467	lhdr->msg_name		= PTROUT(bhdr->msg_name);
468	lhdr->msg_namelen	= bhdr->msg_namelen;
469	lhdr->msg_iov		= PTROUT(bhdr->msg_iov);
470	lhdr->msg_iovlen	= bhdr->msg_iovlen;
471	lhdr->msg_control	= PTROUT(bhdr->msg_control);
472	lhdr->msg_controllen	= bhdr->msg_controllen;
473	/* msg_flags skipped */
474	return (0);
475}
476
477static int
478linux_set_socket_flags(struct thread *td, int s, int flags)
479{
480	int error;
481
482	if (flags & LINUX_SOCK_NONBLOCK) {
483		error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
484		if (error)
485			return (error);
486	}
487	if (flags & LINUX_SOCK_CLOEXEC) {
488		error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
489		if (error)
490			return (error);
491	}
492	return (0);
493}
494
495static int
496linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
497    struct mbuf *control, enum uio_seg segflg)
498{
499	struct sockaddr *to;
500	int error;
501
502	if (mp->msg_name != NULL) {
503		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
504		if (error)
505			return (error);
506		mp->msg_name = to;
507	} else
508		to = NULL;
509
510	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
511	    segflg);
512
513	if (to)
514		free(to, M_SONAME);
515	return (error);
516}
517
518/* Return 0 if IP_HDRINCL is set for the given socket. */
519static int
520linux_check_hdrincl(struct thread *td, int s)
521{
522	int error, optval, size_val;
523
524	size_val = sizeof(optval);
525	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
526	    &optval, UIO_SYSSPACE, &size_val);
527	if (error)
528		return (error);
529
530	return (optval == 0);
531}
532
533struct linux_sendto_args {
534	int s;
535	l_uintptr_t msg;
536	int len;
537	int flags;
538	l_uintptr_t to;
539	int tolen;
540};
541
542/*
543 * Updated sendto() when IP_HDRINCL is set:
544 * tweak endian-dependent fields in the IP packet.
545 */
546static int
547linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
548{
549/*
550 * linux_ip_copysize defines how many bytes we should copy
551 * from the beginning of the IP packet before we customize it for BSD.
552 * It should include all the fields we modify (ip_len and ip_off).
553 */
554#define linux_ip_copysize	8
555
556	struct ip *packet;
557	struct msghdr msg;
558	struct iovec aiov[1];
559	int error;
560
561	/* Check that the packet isn't too big or too small. */
562	if (linux_args->len < linux_ip_copysize ||
563	    linux_args->len > IP_MAXPACKET)
564		return (EINVAL);
565
566	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
567
568	/* Make kernel copy of the packet to be sent */
569	if ((error = copyin(PTRIN(linux_args->msg), packet,
570	    linux_args->len)))
571		goto goout;
572
573	/* Convert fields from Linux to BSD raw IP socket format */
574	packet->ip_len = linux_args->len;
575	packet->ip_off = ntohs(packet->ip_off);
576
577	/* Prepare the msghdr and iovec structures describing the new packet */
578	msg.msg_name = PTRIN(linux_args->to);
579	msg.msg_namelen = linux_args->tolen;
580	msg.msg_iov = aiov;
581	msg.msg_iovlen = 1;
582	msg.msg_control = NULL;
583	msg.msg_flags = 0;
584	aiov[0].iov_base = (char *)packet;
585	aiov[0].iov_len = linux_args->len;
586	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
587	    NULL, UIO_SYSSPACE);
588goout:
589	free(packet, M_TEMP);
590	return (error);
591}
592
593struct linux_socket_args {
594	int domain;
595	int type;
596	int protocol;
597};
598
599static int
600linux_socket(struct thread *td, struct linux_socket_args *args)
601{
602#ifdef INET6
603#ifndef KLD_MODULE
604	INIT_VNET_INET6(curvnet);
605#endif
606#endif
607	struct socket_args /* {
608		int domain;
609		int type;
610		int protocol;
611	} */ bsd_args;
612	int retval_socket, socket_flags;
613
614	bsd_args.protocol = args->protocol;
615	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
616	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
617		return (EINVAL);
618	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
619	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
620		return (EINVAL);
621	bsd_args.domain = linux_to_bsd_domain(args->domain);
622	if (bsd_args.domain == -1)
623		return (EAFNOSUPPORT);
624
625	retval_socket = socket(td, &bsd_args);
626	if (retval_socket)
627		return (retval_socket);
628
629	retval_socket = linux_set_socket_flags(td, td->td_retval[0],
630	    socket_flags);
631	if (retval_socket) {
632		(void)kern_close(td, td->td_retval[0]);
633		goto out;
634	}
635
636	if (bsd_args.type == SOCK_RAW
637	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
638	    && bsd_args.domain == PF_INET) {
639		/* It's a raw IP socket: set the IP_HDRINCL option. */
640		int hdrincl;
641
642		hdrincl = 1;
643		/* We ignore any error returned by kern_setsockopt() */
644		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
645		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
646	}
647#ifdef INET6
648	/*
649	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
650	 * default and some apps depend on this. So, set V6ONLY to 0
651	 * for Linux apps if the sysctl value is set to 1.
652	 */
653	if (bsd_args.domain == PF_INET6
654#ifndef KLD_MODULE
655	    /*
656	     * XXX: Avoid undefined symbol error with an IPv4 only
657	     * kernel.
658	     */
659	    && V_ip6_v6only
660#endif
661	    ) {
662		int v6only;
663
664		v6only = 0;
665		/* We ignore any error returned by setsockopt() */
666		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
667		    &v6only, UIO_SYSSPACE, sizeof(v6only));
668	}
669#endif
670
671out:
672	return (retval_socket);
673}
674
675struct linux_bind_args {
676	int s;
677	l_uintptr_t name;
678	int namelen;
679};
680
681static int
682linux_bind(struct thread *td, struct linux_bind_args *args)
683{
684	struct sockaddr *sa;
685	int error;
686
687	error = linux_getsockaddr(&sa, PTRIN(args->name),
688	    args->namelen);
689	if (error)
690		return (error);
691
692	error = kern_bind(td, args->s, sa);
693	free(sa, M_SONAME);
694	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
695	   	return (EINVAL);
696	return (error);
697}
698
699struct linux_connect_args {
700	int s;
701	l_uintptr_t name;
702	int namelen;
703};
704int linux_connect(struct thread *, struct linux_connect_args *);
705
706int
707linux_connect(struct thread *td, struct linux_connect_args *args)
708{
709	struct socket *so;
710	struct sockaddr *sa;
711	u_int fflag;
712	int error;
713
714	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
715	    args->namelen);
716	if (error)
717		return (error);
718
719	error = kern_connect(td, args->s, sa);
720	free(sa, M_SONAME);
721	if (error != EISCONN)
722		return (error);
723
724	/*
725	 * Linux doesn't return EISCONN the first time it occurs,
726	 * when on a non-blocking socket. Instead it returns the
727	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
728	 *
729	 * XXXRW: Instead of using fgetsock(), check that it is a
730	 * socket and use the file descriptor reference instead of
731	 * creating a new one.
732	 */
733	error = fgetsock(td, args->s, &so, &fflag);
734	if (error == 0) {
735		error = EISCONN;
736		if (fflag & FNONBLOCK) {
737			SOCK_LOCK(so);
738			if (so->so_emuldata == 0)
739				error = so->so_error;
740			so->so_emuldata = (void *)1;
741			SOCK_UNLOCK(so);
742		}
743		fputsock(so);
744	}
745	return (error);
746}
747
748struct linux_listen_args {
749	int s;
750	int backlog;
751};
752
753static int
754linux_listen(struct thread *td, struct linux_listen_args *args)
755{
756	struct listen_args /* {
757		int s;
758		int backlog;
759	} */ bsd_args;
760
761	bsd_args.s = args->s;
762	bsd_args.backlog = args->backlog;
763	return (listen(td, &bsd_args));
764}
765
766static int
767linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
768    l_uintptr_t namelen)
769{
770	struct accept_args /* {
771		int	s;
772		struct sockaddr * __restrict name;
773		socklen_t * __restrict anamelen;
774	} */ bsd_args;
775	int error, fd;
776
777	bsd_args.s = s;
778	/* XXX: */
779	bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
780	bsd_args.anamelen = PTRIN(namelen);/* XXX */
781	error = accept(td, &bsd_args);
782	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
783	if (error) {
784		if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
785			return (EINVAL);
786		return (error);
787	}
788	if (addr) {
789		error = linux_sa_put(PTRIN(addr));
790		if (error) {
791			(void)kern_close(td, td->td_retval[0]);
792			return (error);
793		}
794	}
795
796	/*
797	 * linux appears not to copy flags from the parent socket to the
798	 * accepted one, so we must clear the flags in the new descriptor.
799	 * Ignore any errors, because we already have an open fd.
800	 */
801	fd = td->td_retval[0];
802	(void)kern_fcntl(td, fd, F_SETFL, 0);
803	td->td_retval[0] = fd;
804	return (0);
805}
806
807struct linux_accept_args {
808	int s;
809	l_uintptr_t addr;
810	l_uintptr_t namelen;
811};
812
813static int
814linux_accept(struct thread *td, struct linux_accept_args *args)
815{
816
817	return (linux_accept_common(td, args->s, args->addr,
818	    args->namelen));
819}
820
821struct linux_getsockname_args {
822	int s;
823	l_uintptr_t addr;
824	l_uintptr_t namelen;
825};
826
827static int
828linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
829{
830	struct getsockname_args /* {
831		int	fdes;
832		struct sockaddr * __restrict asa;
833		socklen_t * __restrict alen;
834	} */ bsd_args;
835	int error;
836
837	bsd_args.fdes = args->s;
838	/* XXX: */
839	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
840	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
841	error = getsockname(td, &bsd_args);
842	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
843	if (error)
844		return (error);
845	error = linux_sa_put(PTRIN(args->addr));
846	if (error)
847		return (error);
848	return (0);
849}
850
851struct linux_getpeername_args {
852	int s;
853	l_uintptr_t addr;
854	l_uintptr_t namelen;
855};
856
857static int
858linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
859{
860	struct getpeername_args /* {
861		int fdes;
862		caddr_t asa;
863		int *alen;
864	} */ bsd_args;
865	int error;
866
867	bsd_args.fdes = args->s;
868	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
869	bsd_args.alen = (int *)PTRIN(args->namelen);
870	error = getpeername(td, &bsd_args);
871	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
872	if (error)
873		return (error);
874	error = linux_sa_put(PTRIN(args->addr));
875	if (error)
876		return (error);
877	return (0);
878}
879
880struct linux_socketpair_args {
881	int domain;
882	int type;
883	int protocol;
884	l_uintptr_t rsv;
885};
886
887static int
888linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
889{
890	struct socketpair_args /* {
891		int domain;
892		int type;
893		int protocol;
894		int *rsv;
895	} */ bsd_args;
896	int error, socket_flags;
897	int sv[2];
898
899	bsd_args.domain = linux_to_bsd_domain(args->domain);
900	if (bsd_args.domain != PF_LOCAL)
901		return (EAFNOSUPPORT);
902
903	socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
904	if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
905		return (EINVAL);
906	bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
907	if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
908		return (EINVAL);
909
910	if (args->protocol != 0 && args->protocol != PF_UNIX)
911
912		/*
913		 * Use of PF_UNIX as protocol argument is not right,
914		 * but Linux does it.
915		 * Do not map PF_UNIX as its Linux value is identical
916		 * to FreeBSD one.
917		 */
918		return (EPROTONOSUPPORT);
919	else
920		bsd_args.protocol = 0;
921	bsd_args.rsv = (int *)PTRIN(args->rsv);
922	error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
923	    bsd_args.protocol, sv);
924	if (error)
925		return (error);
926	error = linux_set_socket_flags(td, sv[0], socket_flags);
927	if (error)
928		goto out;
929	error = linux_set_socket_flags(td, sv[1], socket_flags);
930	if (error)
931		goto out;
932
933	error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
934
935out:
936	if (error) {
937		(void)kern_close(td, sv[0]);
938		(void)kern_close(td, sv[1]);
939	}
940	return (error);
941}
942
943struct linux_send_args {
944	int s;
945	l_uintptr_t msg;
946	int len;
947	int flags;
948};
949
950static int
951linux_send(struct thread *td, struct linux_send_args *args)
952{
953	struct sendto_args /* {
954		int s;
955		caddr_t buf;
956		int len;
957		int flags;
958		caddr_t to;
959		int tolen;
960	} */ bsd_args;
961
962	bsd_args.s = args->s;
963	bsd_args.buf = (caddr_t)PTRIN(args->msg);
964	bsd_args.len = args->len;
965	bsd_args.flags = args->flags;
966	bsd_args.to = NULL;
967	bsd_args.tolen = 0;
968	return sendto(td, &bsd_args);
969}
970
971struct linux_recv_args {
972	int s;
973	l_uintptr_t msg;
974	int len;
975	int flags;
976};
977
978static int
979linux_recv(struct thread *td, struct linux_recv_args *args)
980{
981	struct recvfrom_args /* {
982		int s;
983		caddr_t buf;
984		int len;
985		int flags;
986		struct sockaddr *from;
987		socklen_t fromlenaddr;
988	} */ bsd_args;
989
990	bsd_args.s = args->s;
991	bsd_args.buf = (caddr_t)PTRIN(args->msg);
992	bsd_args.len = args->len;
993	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
994	bsd_args.from = NULL;
995	bsd_args.fromlenaddr = 0;
996	return (recvfrom(td, &bsd_args));
997}
998
999static int
1000linux_sendto(struct thread *td, struct linux_sendto_args *args)
1001{
1002	struct msghdr msg;
1003	struct iovec aiov;
1004	int error;
1005
1006	if (linux_check_hdrincl(td, args->s) == 0)
1007		/* IP_HDRINCL set, tweak the packet before sending */
1008		return (linux_sendto_hdrincl(td, args));
1009
1010	msg.msg_name = PTRIN(args->to);
1011	msg.msg_namelen = args->tolen;
1012	msg.msg_iov = &aiov;
1013	msg.msg_iovlen = 1;
1014	msg.msg_control = NULL;
1015	msg.msg_flags = 0;
1016	aiov.iov_base = PTRIN(args->msg);
1017	aiov.iov_len = args->len;
1018	error = linux_sendit(td, args->s, &msg, args->flags, NULL,
1019	    UIO_USERSPACE);
1020	return (error);
1021}
1022
1023struct linux_recvfrom_args {
1024	int s;
1025	l_uintptr_t buf;
1026	int len;
1027	int flags;
1028	l_uintptr_t from;
1029	l_uintptr_t fromlen;
1030};
1031
1032static int
1033linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
1034{
1035	struct recvfrom_args /* {
1036		int	s;
1037		caddr_t	buf;
1038		size_t	len;
1039		int	flags;
1040		struct sockaddr * __restrict from;
1041		socklen_t * __restrict fromlenaddr;
1042	} */ bsd_args;
1043	size_t len;
1044	int error;
1045
1046	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
1047		return (error);
1048
1049	bsd_args.s = args->s;
1050	bsd_args.buf = PTRIN(args->buf);
1051	bsd_args.len = args->len;
1052	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1053	/* XXX: */
1054	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
1055	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
1056
1057	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
1058	error = recvfrom(td, &bsd_args);
1059	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
1060
1061	if (error)
1062		return (error);
1063	if (args->from) {
1064		error = linux_sa_put((struct osockaddr *)
1065		    PTRIN(args->from));
1066		if (error)
1067			return (error);
1068	}
1069	return (0);
1070}
1071
1072struct linux_sendmsg_args {
1073	int s;
1074	l_uintptr_t msg;
1075	int flags;
1076};
1077
1078static int
1079linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
1080{
1081	struct cmsghdr *cmsg;
1082	struct mbuf *control;
1083	struct msghdr msg;
1084	struct l_cmsghdr linux_cmsg;
1085	struct l_cmsghdr *ptr_cmsg;
1086	struct l_msghdr linux_msg;
1087	struct iovec *iov;
1088	socklen_t datalen;
1089	void *data;
1090	int error;
1091
1092	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1093	if (error)
1094		return (error);
1095	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1096	if (error)
1097		return (error);
1098
1099	/*
1100	 * Some Linux applications (ping) define a non-NULL control data
1101	 * pointer, but a msg_controllen of 0, which is not allowed in the
1102	 * FreeBSD system call interface.  NULL the msg_control pointer in
1103	 * order to handle this case.  This should be checked, but allows the
1104	 * Linux ping to work.
1105	 */
1106	if (msg.msg_control != NULL && msg.msg_controllen == 0)
1107		msg.msg_control = NULL;
1108
1109#ifdef COMPAT_LINUX32
1110	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1111	    &iov, EMSGSIZE);
1112#else
1113	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1114#endif
1115	if (error)
1116		return (error);
1117
1118	if (msg.msg_control != NULL) {
1119		error = ENOBUFS;
1120		cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1121		control = m_get(M_WAIT, MT_CONTROL);
1122		if (control == NULL)
1123			goto bad;
1124		ptr_cmsg = LINUX_CMSG_FIRSTHDR(&msg);
1125
1126		do {
1127			error = copyin(ptr_cmsg, &linux_cmsg,
1128			    sizeof(struct l_cmsghdr));
1129			if (error)
1130				goto bad;
1131
1132			error = EINVAL;
1133			if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
1134				goto bad;
1135
1136			/*
1137			 * Now we support only SCM_RIGHTS, so return EINVAL
1138			 * in any other cmsg_type
1139			 */
1140			if ((cmsg->cmsg_type =
1141			    linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type)) == -1)
1142				goto bad;
1143			cmsg->cmsg_level =
1144			    linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level);
1145
1146			datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
1147			cmsg->cmsg_len = CMSG_LEN(datalen);
1148			data = LINUX_CMSG_DATA(ptr_cmsg);
1149
1150			error = ENOBUFS;
1151			if (!m_append(control, CMSG_HDRSZ, (c_caddr_t) cmsg))
1152				goto bad;
1153			if (!m_append(control, datalen, (c_caddr_t) data))
1154				goto bad;
1155		} while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&msg, ptr_cmsg)));
1156	} else {
1157		control = NULL;
1158		cmsg = NULL;
1159	}
1160
1161	msg.msg_iov = iov;
1162	msg.msg_flags = 0;
1163	error = linux_sendit(td, args->s, &msg, args->flags, control,
1164	    UIO_USERSPACE);
1165
1166bad:
1167	free(iov, M_IOV);
1168	if (cmsg)
1169		free(cmsg, M_TEMP);
1170	return (error);
1171}
1172
1173struct linux_recvmsg_args {
1174	int s;
1175	l_uintptr_t msg;
1176	int flags;
1177};
1178
1179static int
1180linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1181{
1182	struct cmsghdr *cm;
1183	struct msghdr msg;
1184	struct l_cmsghdr *linux_cmsg = NULL;
1185	socklen_t datalen, outlen, clen;
1186	struct l_msghdr linux_msg;
1187	struct iovec *iov, *uiov;
1188	struct mbuf *control = NULL;
1189	struct mbuf **controlp;
1190	caddr_t outbuf;
1191	void *data;
1192	int error, i, fd, fds, *fdp;
1193
1194	error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
1195	if (error)
1196		return (error);
1197
1198	error = linux_to_bsd_msghdr(&msg, &linux_msg);
1199	if (error)
1200		return (error);
1201
1202#ifdef COMPAT_LINUX32
1203	error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
1204	    &iov, EMSGSIZE);
1205#else
1206	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1207#endif
1208	if (error)
1209		return (error);
1210
1211	if (msg.msg_name) {
1212		error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1213		    msg.msg_namelen);
1214		if (error)
1215			goto bad;
1216	}
1217
1218	uiov = msg.msg_iov;
1219	msg.msg_iov = iov;
1220	controlp = (msg.msg_control != NULL) ? &control : NULL;
1221	error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
1222	msg.msg_iov = uiov;
1223	if (error)
1224		goto bad;
1225
1226	error = bsd_to_linux_msghdr(&msg, &linux_msg);
1227	if (error)
1228		goto bad;
1229
1230	if (linux_msg.msg_name) {
1231		error = bsd_to_linux_sockaddr((struct sockaddr *)
1232		    PTRIN(linux_msg.msg_name));
1233		if (error)
1234			goto bad;
1235	}
1236	if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
1237		error = linux_sa_put(PTRIN(linux_msg.msg_name));
1238		if (error)
1239			goto bad;
1240	}
1241
1242	if (control) {
1243
1244		linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
1245		outbuf = PTRIN(linux_msg.msg_control);
1246		cm = mtod(control, struct cmsghdr *);
1247		outlen = 0;
1248		clen = control->m_len;
1249
1250		while (cm != NULL) {
1251
1252			if ((linux_cmsg->cmsg_type =
1253			    bsd_to_linux_cmsg_type(cm->cmsg_type)) == -1)
1254			{
1255				error = EINVAL;
1256				goto bad;
1257			}
1258			data = CMSG_DATA(cm);
1259			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1260
1261			switch (linux_cmsg->cmsg_type)
1262			{
1263			case LINUX_SCM_RIGHTS:
1264				if (outlen + LINUX_CMSG_LEN(datalen) >
1265				    linux_msg.msg_controllen) {
1266					if (outlen == 0) {
1267						error = EMSGSIZE;
1268						goto bad;
1269					} else {
1270						linux_msg.msg_flags |=
1271						    LINUX_MSG_CTRUNC;
1272						goto out;
1273					}
1274				}
1275				if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
1276					fds = datalen / sizeof(int);
1277					fdp = data;
1278					for (i = 0; i < fds; i++) {
1279						fd = *fdp++;
1280						(void)kern_fcntl(td, fd,
1281						    F_SETFD, FD_CLOEXEC);
1282					}
1283				}
1284				break;
1285			}
1286
1287			linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
1288			linux_cmsg->cmsg_level =
1289			    bsd_to_linux_sockopt_level(cm->cmsg_level);
1290
1291			error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
1292			if (error)
1293				goto bad;
1294			outbuf += L_CMSG_HDRSZ;
1295
1296			error = copyout(data, outbuf, datalen);
1297			if (error)
1298				goto bad;
1299
1300			outbuf += LINUX_CMSG_ALIGN(datalen);
1301			outlen += LINUX_CMSG_LEN(datalen);
1302			linux_msg.msg_controllen = outlen;
1303
1304			if (CMSG_SPACE(datalen) < clen) {
1305				clen -= CMSG_SPACE(datalen);
1306				cm = (struct cmsghdr *)
1307				    ((caddr_t)cm + CMSG_SPACE(datalen));
1308			} else
1309				cm = NULL;
1310		}
1311	}
1312
1313out:
1314	error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
1315
1316bad:
1317	free(iov, M_IOV);
1318	if (control != NULL)
1319		m_freem(control);
1320	if (linux_cmsg != NULL)
1321		free(linux_cmsg, M_TEMP);
1322
1323	return (error);
1324}
1325
1326struct linux_shutdown_args {
1327	int s;
1328	int how;
1329};
1330
1331static int
1332linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1333{
1334	struct shutdown_args /* {
1335		int s;
1336		int how;
1337	} */ bsd_args;
1338
1339	bsd_args.s = args->s;
1340	bsd_args.how = args->how;
1341	return (shutdown(td, &bsd_args));
1342}
1343
1344struct linux_setsockopt_args {
1345	int s;
1346	int level;
1347	int optname;
1348	l_uintptr_t optval;
1349	int optlen;
1350};
1351
1352static int
1353linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1354{
1355	struct setsockopt_args /* {
1356		int s;
1357		int level;
1358		int name;
1359		caddr_t val;
1360		int valsize;
1361	} */ bsd_args;
1362	l_timeval linux_tv;
1363	struct timeval tv;
1364	int error, name;
1365
1366	bsd_args.s = args->s;
1367	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1368	switch (bsd_args.level) {
1369	case SOL_SOCKET:
1370		name = linux_to_bsd_so_sockopt(args->optname);
1371		switch (name) {
1372		case SO_RCVTIMEO:
1373			/* FALLTHROUGH */
1374		case SO_SNDTIMEO:
1375			error = copyin(PTRIN(args->optval), &linux_tv,
1376			    sizeof(linux_tv));
1377			if (error)
1378				return (error);
1379			tv.tv_sec = linux_tv.tv_sec;
1380			tv.tv_usec = linux_tv.tv_usec;
1381			return (kern_setsockopt(td, args->s, bsd_args.level,
1382			    name, &tv, UIO_SYSSPACE, sizeof(tv)));
1383			/* NOTREACHED */
1384			break;
1385		default:
1386			break;
1387		}
1388		break;
1389	case IPPROTO_IP:
1390		name = linux_to_bsd_ip_sockopt(args->optname);
1391		break;
1392	case IPPROTO_TCP:
1393		/* Linux TCP option values match BSD's */
1394		name = args->optname;
1395		break;
1396	default:
1397		name = -1;
1398		break;
1399	}
1400	if (name == -1)
1401		return (ENOPROTOOPT);
1402
1403	bsd_args.name = name;
1404	bsd_args.val = PTRIN(args->optval);
1405	bsd_args.valsize = args->optlen;
1406
1407	if (name == IPV6_NEXTHOP) {
1408		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1409			bsd_args.valsize);
1410		error = setsockopt(td, &bsd_args);
1411		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1412	} else
1413		error = setsockopt(td, &bsd_args);
1414
1415	return (error);
1416}
1417
1418struct linux_getsockopt_args {
1419	int s;
1420	int level;
1421	int optname;
1422	l_uintptr_t optval;
1423	l_uintptr_t optlen;
1424};
1425
1426static int
1427linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1428{
1429	struct getsockopt_args /* {
1430		int s;
1431		int level;
1432		int name;
1433		caddr_t val;
1434		int *avalsize;
1435	} */ bsd_args;
1436	l_timeval linux_tv;
1437	struct timeval tv;
1438	socklen_t tv_len, xulen;
1439	struct xucred xu;
1440	struct l_ucred lxu;
1441	int error, name;
1442
1443	bsd_args.s = args->s;
1444	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1445	switch (bsd_args.level) {
1446	case SOL_SOCKET:
1447		name = linux_to_bsd_so_sockopt(args->optname);
1448		switch (name) {
1449		case SO_RCVTIMEO:
1450			/* FALLTHROUGH */
1451		case SO_SNDTIMEO:
1452			tv_len = sizeof(tv);
1453			error = kern_getsockopt(td, args->s, bsd_args.level,
1454			    name, &tv, UIO_SYSSPACE, &tv_len);
1455			if (error)
1456				return (error);
1457			linux_tv.tv_sec = tv.tv_sec;
1458			linux_tv.tv_usec = tv.tv_usec;
1459			return (copyout(&linux_tv, PTRIN(args->optval),
1460			    sizeof(linux_tv)));
1461			/* NOTREACHED */
1462			break;
1463		case LOCAL_PEERCRED:
1464			if (args->optlen != sizeof(lxu))
1465				return (EINVAL);
1466			xulen = sizeof(xu);
1467			error = kern_getsockopt(td, args->s, bsd_args.level,
1468			    name, &xu, UIO_SYSSPACE, &xulen);
1469			if (error)
1470				return (error);
1471			/*
1472			 * XXX Use 0 for pid as the FreeBSD does not cache peer pid.
1473			 */
1474			lxu.pid = 0;
1475			lxu.uid = xu.cr_uid;
1476			lxu.gid = xu.cr_gid;
1477			return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu)));
1478			/* NOTREACHED */
1479			break;
1480		default:
1481			break;
1482		}
1483		break;
1484	case IPPROTO_IP:
1485		name = linux_to_bsd_ip_sockopt(args->optname);
1486		break;
1487	case IPPROTO_TCP:
1488		/* Linux TCP option values match BSD's */
1489		name = args->optname;
1490		break;
1491	default:
1492		name = -1;
1493		break;
1494	}
1495	if (name == -1)
1496		return (EINVAL);
1497
1498	bsd_args.name = name;
1499	bsd_args.val = PTRIN(args->optval);
1500	bsd_args.avalsize = PTRIN(args->optlen);
1501
1502	if (name == IPV6_NEXTHOP) {
1503		error = getsockopt(td, &bsd_args);
1504		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1505	} else
1506		error = getsockopt(td, &bsd_args);
1507
1508	return (error);
1509}
1510
1511/* Argument list sizes for linux_socketcall */
1512
1513#define LINUX_AL(x) ((x) * sizeof(l_ulong))
1514
1515static const unsigned char lxs_args[] = {
1516	LINUX_AL(0) /* unused*/,	LINUX_AL(3) /* socket */,
1517	LINUX_AL(3) /* bind */,		LINUX_AL(3) /* connect */,
1518	LINUX_AL(2) /* listen */,	LINUX_AL(3) /* accept */,
1519	LINUX_AL(3) /* getsockname */,	LINUX_AL(3) /* getpeername */,
1520	LINUX_AL(4) /* socketpair */,	LINUX_AL(4) /* send */,
1521	LINUX_AL(4) /* recv */,		LINUX_AL(6) /* sendto */,
1522	LINUX_AL(6) /* recvfrom */,	LINUX_AL(2) /* shutdown */,
1523	LINUX_AL(5) /* setsockopt */,	LINUX_AL(5) /* getsockopt */,
1524	LINUX_AL(3) /* sendmsg */,	LINUX_AL(3) /* recvmsg */
1525};
1526
1527#define	LINUX_AL_SIZE	sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
1528
1529int
1530linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1531{
1532	l_ulong a[6];
1533	void *arg;
1534	int error;
1535
1536	if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE)
1537		return (EINVAL);
1538	error = copyin(PTRIN(args->args), a, lxs_args[args->what]);
1539	if (error)
1540		return (error);
1541
1542	arg = a;
1543	switch (args->what) {
1544	case LINUX_SOCKET:
1545		return (linux_socket(td, arg));
1546	case LINUX_BIND:
1547		return (linux_bind(td, arg));
1548	case LINUX_CONNECT:
1549		return (linux_connect(td, arg));
1550	case LINUX_LISTEN:
1551		return (linux_listen(td, arg));
1552	case LINUX_ACCEPT:
1553		return (linux_accept(td, arg));
1554	case LINUX_GETSOCKNAME:
1555		return (linux_getsockname(td, arg));
1556	case LINUX_GETPEERNAME:
1557		return (linux_getpeername(td, arg));
1558	case LINUX_SOCKETPAIR:
1559		return (linux_socketpair(td, arg));
1560	case LINUX_SEND:
1561		return (linux_send(td, arg));
1562	case LINUX_RECV:
1563		return (linux_recv(td, arg));
1564	case LINUX_SENDTO:
1565		return (linux_sendto(td, arg));
1566	case LINUX_RECVFROM:
1567		return (linux_recvfrom(td, arg));
1568	case LINUX_SHUTDOWN:
1569		return (linux_shutdown(td, arg));
1570	case LINUX_SETSOCKOPT:
1571		return (linux_setsockopt(td, arg));
1572	case LINUX_GETSOCKOPT:
1573		return (linux_getsockopt(td, arg));
1574	case LINUX_SENDMSG:
1575		return (linux_sendmsg(td, arg));
1576	case LINUX_RECVMSG:
1577		return (linux_recvmsg(td, arg));
1578	}
1579
1580	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1581	return (ENOSYS);
1582}
1583