1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 *    must display the following acknowledgement:
45 *	This product includes software developed by the University of
46 *	California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 *    may be used to endorse or promote products derived from this software
49 *    without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
64 */
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections.  This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
77#include <sys/vnode_internal.h>
78#include <sys/malloc.h>
79#include <sys/mcache.h>
80#include <sys/mbuf.h>
81#include <kern/lock.h>
82#include <sys/domain.h>
83#include <sys/protosw.h>
84#include <sys/signalvar.h>
85#include <sys/socket.h>
86#include <sys/socketvar.h>
87#include <sys/kernel.h>
88#include <sys/uio_internal.h>
89#include <sys/kauth.h>
90#include <kern/task.h>
91#include <sys/priv.h>
92
93#include <security/audit/audit.h>
94
95#include <sys/kdebug.h>
96#include <sys/sysproto.h>
97#include <netinet/in.h>
98#include <net/route.h>
99#include <netinet/in_pcb.h>
100
101#if CONFIG_MACF_SOCKET_SUBSET
102#include <security/mac_framework.h>
103#endif /* MAC_SOCKET_SUBSET */
104
105#define	f_flag f_fglob->fg_flag
106#define	f_type f_fglob->fg_ops->fo_type
107#define	f_msgcount f_fglob->fg_msgcount
108#define	f_cred f_fglob->fg_cred
109#define	f_ops f_fglob->fg_ops
110#define	f_offset f_fglob->fg_offset
111#define	f_data f_fglob->fg_data
112
113
114#define	DBG_LAYER_IN_BEG	NETDBG_CODE(DBG_NETSOCK, 0)
115#define	DBG_LAYER_IN_END	NETDBG_CODE(DBG_NETSOCK, 2)
116#define	DBG_LAYER_OUT_BEG	NETDBG_CODE(DBG_NETSOCK, 1)
117#define	DBG_LAYER_OUT_END	NETDBG_CODE(DBG_NETSOCK, 3)
118#define	DBG_FNC_SENDMSG		NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define	DBG_FNC_SENDTO		NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define	DBG_FNC_SENDIT		NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define	DBG_FNC_RECVFROM	NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define	DBG_FNC_RECVMSG		NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define	DBG_FNC_RECVIT		NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define	DBG_FNC_SENDFILE	NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define	DBG_FNC_SENDFILE_WAIT	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define	DBG_FNC_SENDFILE_READ	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define	DBG_FNC_SENDFILE_SEND	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
128
129
130/* TODO: should be in header file */
131int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
132
133static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
134    int32_t *);
135static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
136    int32_t *);
137static int connectit(struct socket *, struct sockaddr *);
138static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
139    size_t, boolean_t);
140static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
141    user_addr_t, size_t, boolean_t);
142static int getsockaddrlist(struct socket *, struct sockaddr_list **,
143    user_addr_t, socklen_t, boolean_t);
144#if SENDFILE
145static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
146    boolean_t);
147#endif /* SENDFILE */
148static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
149static int connectitx(struct socket *, struct sockaddr_list **,
150    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *);
151static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
152static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
153    int *);
154static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
155
156/*
157 * System call interface to the socket abstraction.
158 */
159
160extern const struct fileops socketops;
161
162/*
163 * Returns:	0			Success
164 *		EACCES			Mandatory Access Control failure
165 *	falloc:ENFILE
166 *	falloc:EMFILE
167 *	falloc:ENOMEM
168 *	socreate:EAFNOSUPPORT
169 *	socreate:EPROTOTYPE
170 *	socreate:EPROTONOSUPPORT
171 *	socreate:ENOBUFS
172 *	socreate:ENOMEM
173 *	socreate:???			[other protocol families, IPSEC]
174 */
175int
176socket(struct proc *p,
177	struct socket_args *uap,
178	int32_t *retval)
179{
180	return (socket_common(p, uap->domain, uap->type, uap->protocol,
181	    proc_selfpid(), retval, 0));
182}
183
184int
185socket_delegate(struct proc *p,
186		struct socket_delegate_args *uap,
187		int32_t *retval)
188{
189	return socket_common(p, uap->domain, uap->type, uap->protocol,
190	    uap->epid, retval, 1);
191}
192
193static int
194socket_common(struct proc *p,
195		int domain,
196		int type,
197		int protocol,
198		pid_t epid,
199		int32_t *retval,
200		int delegate)
201{
202	struct socket *so;
203	struct fileproc *fp;
204	int fd, error;
205
206	AUDIT_ARG(socket, domain, type, protocol);
207#if CONFIG_MACF_SOCKET_SUBSET
208	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
209	    type, protocol)) != 0)
210		return (error);
211#endif /* MAC_SOCKET_SUBSET */
212
213	if (delegate) {
214		error = priv_check_cred(kauth_cred_get(),
215		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
216		if (error)
217			return (EACCES);
218	}
219
220	error = falloc(p, &fp, &fd, vfs_context_current());
221	if (error) {
222		return (error);
223	}
224	fp->f_flag = FREAD|FWRITE;
225	fp->f_ops = &socketops;
226
227	if (delegate)
228		error = socreate_delegate(domain, &so, type, protocol, epid);
229	else
230		error = socreate(domain, &so, type, protocol);
231
232	if (error) {
233		fp_free(p, fd, fp);
234	} else {
235		fp->f_data = (caddr_t)so;
236
237		proc_fdlock(p);
238		procfdtbl_releasefd(p, fd, NULL);
239
240		fp_drop(p, fd, fp, 1);
241		proc_fdunlock(p);
242
243		*retval = fd;
244	}
245	return (error);
246}
247
248/*
249 * Returns:	0			Success
250 *		EDESTADDRREQ		Destination address required
251 *		EBADF			Bad file descriptor
252 *		EACCES			Mandatory Access Control failure
253 *	file_socket:ENOTSOCK
254 *	file_socket:EBADF
255 *	getsockaddr:ENAMETOOLONG	Filename too long
256 *	getsockaddr:EINVAL		Invalid argument
257 *	getsockaddr:ENOMEM		Not enough space
258 *	getsockaddr:EFAULT		Bad address
259 *	sobindlock:???
260 */
261/* ARGSUSED */
262int
263bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
264{
265	struct sockaddr_storage ss;
266	struct sockaddr *sa = NULL;
267	struct socket *so;
268	boolean_t want_free = TRUE;
269	int error;
270
271	AUDIT_ARG(fd, uap->s);
272	error = file_socket(uap->s, &so);
273	if (error != 0)
274		return (error);
275	if (so == NULL) {
276		error = EBADF;
277		goto out;
278	}
279	if (uap->name == USER_ADDR_NULL) {
280		error = EDESTADDRREQ;
281		goto out;
282	}
283	if (uap->namelen > sizeof (ss)) {
284		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
285	} else {
286		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
287		if (error == 0) {
288			sa = (struct sockaddr *)&ss;
289			want_free = FALSE;
290		}
291	}
292	if (error != 0)
293		goto out;
294	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
295#if CONFIG_MACF_SOCKET_SUBSET
296	if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
297		error = sobindlock(so, sa, 1);	/* will lock socket */
298#else
299		error = sobindlock(so, sa, 1);	/* will lock socket */
300#endif /* MAC_SOCKET_SUBSET */
301	if (want_free)
302		FREE(sa, M_SONAME);
303out:
304	file_drop(uap->s);
305	return (error);
306}
307
308/*
309 * Returns:	0			Success
310 *		EBADF
311 *		EACCES			Mandatory Access Control failure
312 *	file_socket:ENOTSOCK
313 *	file_socket:EBADF
314 *	solisten:EINVAL
315 *	solisten:EOPNOTSUPP
316 *	solisten:???
317 */
318int
319listen(__unused struct proc *p, struct listen_args *uap,
320    __unused int32_t *retval)
321{
322	int error;
323	struct socket *so;
324
325	AUDIT_ARG(fd, uap->s);
326	error = file_socket(uap->s, &so);
327	if (error)
328		return (error);
329	if (so != NULL)
330#if CONFIG_MACF_SOCKET_SUBSET
331	{
332		error = mac_socket_check_listen(kauth_cred_get(), so);
333		if (error == 0)
334			error = solisten(so, uap->backlog);
335	}
336#else
337		error =  solisten(so, uap->backlog);
338#endif /* MAC_SOCKET_SUBSET */
339	else
340		error = EBADF;
341
342	file_drop(uap->s);
343	return (error);
344}
345
346/*
347 * Returns:	fp_getfsock:EBADF	Bad file descriptor
348 *		fp_getfsock:EOPNOTSUPP	...
349 *		xlate => :ENOTSOCK	Socket operation on non-socket
350 *		:EFAULT			Bad address on copyin/copyout
351 *		:EBADF			Bad file descriptor
352 *		:EOPNOTSUPP		Operation not supported on socket
353 *		:EINVAL			Invalid argument
354 *		:EWOULDBLOCK		Operation would block
355 *		:ECONNABORTED		Connection aborted
356 *		:EINTR			Interrupted function
357 *		:EACCES			Mandatory Access Control failure
358 *		falloc_locked:ENFILE	Too many files open in system
359 *		falloc_locked::EMFILE	Too many open files
360 *		falloc_locked::ENOMEM	Not enough space
361 *		0			Success
362 */
363int
364accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
365    int32_t *retval)
366{
367	struct fileproc *fp;
368	struct sockaddr *sa = NULL;
369	socklen_t namelen;
370	int error;
371	struct socket *head, *so = NULL;
372	lck_mtx_t *mutex_held;
373	int fd = uap->s;
374	int newfd;
375	short fflag;		/* type must match fp->f_flag */
376	int dosocklock = 0;
377
378	*retval = -1;
379
380	AUDIT_ARG(fd, uap->s);
381
382	if (uap->name) {
383		error = copyin(uap->anamelen, (caddr_t)&namelen,
384		    sizeof (socklen_t));
385		if (error)
386			return (error);
387	}
388	error = fp_getfsock(p, fd, &fp, &head);
389	if (error) {
390		if (error == EOPNOTSUPP)
391			error = ENOTSOCK;
392		return (error);
393	}
394	if (head == NULL) {
395		error = EBADF;
396		goto out;
397	}
398#if CONFIG_MACF_SOCKET_SUBSET
399	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
400		goto out;
401#endif /* MAC_SOCKET_SUBSET */
402
403	socket_lock(head, 1);
404
405	if (head->so_proto->pr_getlock != NULL)  {
406		mutex_held = (*head->so_proto->pr_getlock)(head, 0);
407		dosocklock = 1;
408	} else {
409		mutex_held = head->so_proto->pr_domain->dom_mtx;
410		dosocklock = 0;
411	}
412
413	if ((head->so_options & SO_ACCEPTCONN) == 0) {
414		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
415			error = EOPNOTSUPP;
416		} else {
417			/* POSIX: The socket is not accepting connections */
418			error = EINVAL;
419		}
420		socket_unlock(head, 1);
421		goto out;
422	}
423	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
424		socket_unlock(head, 1);
425		error = EWOULDBLOCK;
426		goto out;
427	}
428	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
429		if (head->so_state & SS_CANTRCVMORE) {
430			head->so_error = ECONNABORTED;
431			break;
432		}
433		if (head->so_usecount < 1)
434			panic("accept: head=%p refcount=%d\n", head,
435			    head->so_usecount);
436		error = msleep((caddr_t)&head->so_timeo, mutex_held,
437		    PSOCK | PCATCH, "accept", 0);
438		if (head->so_usecount < 1)
439			panic("accept: 2 head=%p refcount=%d\n", head,
440			    head->so_usecount);
441		if ((head->so_state & SS_DRAINING)) {
442			error = ECONNABORTED;
443		}
444		if (error) {
445			socket_unlock(head, 1);
446			goto out;
447		}
448	}
449	if (head->so_error) {
450		error = head->so_error;
451		head->so_error = 0;
452		socket_unlock(head, 1);
453		goto out;
454	}
455
456
457	/*
458	 * At this point we know that there is at least one connection
459	 * ready to be accepted. Remove it from the queue prior to
460	 * allocating the file descriptor for it since falloc() may
461	 * block allowing another process to accept the connection
462	 * instead.
463	 */
464	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
465	so = TAILQ_FIRST(&head->so_comp);
466	TAILQ_REMOVE(&head->so_comp, so, so_list);
467	head->so_qlen--;
468	/* unlock head to avoid deadlock with select, keep a ref on head */
469	socket_unlock(head, 0);
470
471#if CONFIG_MACF_SOCKET_SUBSET
472	/*
473	 * Pass the pre-accepted socket to the MAC framework. This is
474	 * cheaper than allocating a file descriptor for the socket,
475	 * calling the protocol accept callback, and possibly freeing
476	 * the file descriptor should the MAC check fails.
477	 */
478	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
479		socket_lock(so, 1);
480		so->so_state &= ~(SS_NOFDREF | SS_COMP);
481		so->so_head = NULL;
482		socket_unlock(so, 1);
483		soclose(so);
484		/* Drop reference on listening socket */
485		sodereference(head);
486		goto out;
487	}
488#endif /* MAC_SOCKET_SUBSET */
489
490	/*
491	 * Pass the pre-accepted socket to any interested socket filter(s).
492	 * Upon failure, the socket would have been closed by the callee.
493	 */
494	if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
495		/* Drop reference on listening socket */
496		sodereference(head);
497		/* Propagate socket filter's error code to the caller */
498		goto out;
499	}
500
501	fflag = fp->f_flag;
502	error = falloc(p, &fp, &newfd, vfs_context_current());
503	if (error) {
504		/*
505		 * Probably ran out of file descriptors.
506		 *
507		 * <rdar://problem/8554930>
508		 * Don't put this back on the socket like we used to, that
509		 * just causes the client to spin. Drop the socket.
510		 */
511		socket_lock(so, 1);
512		so->so_state &= ~(SS_NOFDREF | SS_COMP);
513		so->so_head = NULL;
514		socket_unlock(so, 1);
515		soclose(so);
516		sodereference(head);
517		goto out;
518	}
519	*retval = newfd;
520	fp->f_flag = fflag;
521	fp->f_ops = &socketops;
522	fp->f_data = (caddr_t)so;
523	socket_lock(head, 0);
524	if (dosocklock)
525		socket_lock(so, 1);
526	so->so_state &= ~SS_COMP;
527	so->so_head = NULL;
528	(void) soacceptlock(so, &sa, 0);
529	socket_unlock(head, 1);
530	if (sa == NULL) {
531		namelen = 0;
532		if (uap->name)
533			goto gotnoname;
534		error = 0;
535		goto releasefd;
536	}
537	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
538
539	if (uap->name) {
540		socklen_t	sa_len;
541
542		/* save sa_len before it is destroyed */
543		sa_len = sa->sa_len;
544		namelen = MIN(namelen, sa_len);
545		error = copyout(sa, uap->name, namelen);
546		if (!error)
547			/* return the actual, untruncated address length */
548			namelen = sa_len;
549gotnoname:
550		error = copyout((caddr_t)&namelen, uap->anamelen,
551		    sizeof (socklen_t));
552	}
553	FREE(sa, M_SONAME);
554
555releasefd:
556	/*
557	 * If the socket has been marked as inactive by sosetdefunct(),
558	 * disallow further operations on it.
559	 */
560	if (so->so_flags & SOF_DEFUNCT) {
561		sodefunct(current_proc(), so,
562		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
563	}
564
565	if (dosocklock)
566		socket_unlock(so, 1);
567
568	proc_fdlock(p);
569	procfdtbl_releasefd(p, newfd, NULL);
570	fp_drop(p, newfd, fp, 1);
571	proc_fdunlock(p);
572
573out:
574	file_drop(fd);
575	return (error);
576}
577
578int
579accept(struct proc *p, struct accept_args *uap, int32_t *retval)
580{
581	__pthread_testcancel(1);
582	return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
583}
584
585/*
586 * Returns:	0			Success
587 *		EBADF			Bad file descriptor
588 *		EALREADY		Connection already in progress
589 *		EINPROGRESS		Operation in progress
590 *		ECONNABORTED		Connection aborted
591 *		EINTR			Interrupted function
592 *		EACCES			Mandatory Access Control failure
593 *	file_socket:ENOTSOCK
594 *	file_socket:EBADF
595 *	getsockaddr:ENAMETOOLONG	Filename too long
596 *	getsockaddr:EINVAL		Invalid argument
597 *	getsockaddr:ENOMEM		Not enough space
598 *	getsockaddr:EFAULT		Bad address
599 *	soconnectlock:EOPNOTSUPP
600 *	soconnectlock:EISCONN
601 *	soconnectlock:???		[depends on protocol, filters]
602 *	msleep:EINTR
603 *
604 * Imputed:	so_error		error may be set from so_error, which
605 *					may have been set by soconnectlock.
606 */
607/* ARGSUSED */
608int
609connect(struct proc *p, struct connect_args *uap, int32_t *retval)
610{
611	__pthread_testcancel(1);
612	return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
613}
614
615int
616connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
617{
618#pragma unused(p, retval)
619	struct socket *so;
620	struct sockaddr_storage ss;
621	struct sockaddr *sa = NULL;
622	int error;
623	int fd = uap->s;
624	boolean_t dgram;
625
626	AUDIT_ARG(fd, uap->s);
627	error = file_socket(fd, &so);
628	if (error != 0)
629		return (error);
630	if (so == NULL) {
631		error = EBADF;
632		goto out;
633	}
634
635	/*
636	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
637	 * if this is a datagram socket; translate for other types.
638	 */
639	dgram = (so->so_type == SOCK_DGRAM);
640
641	/* Get socket address now before we obtain socket lock */
642	if (uap->namelen > sizeof (ss)) {
643		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
644	} else {
645		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
646		if (error == 0)
647			sa = (struct sockaddr *)&ss;
648	}
649	if (error != 0)
650		goto out;
651
652	error = connectit(so, sa);
653
654	if (sa != NULL && sa != SA(&ss))
655		FREE(sa, M_SONAME);
656	if (error == ERESTART)
657		error = EINTR;
658out:
659	file_drop(fd);
660	return (error);
661}
662
663static int
664connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
665{
666#pragma unused(p, retval)
667	struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
668	struct socket *so;
669	int error, fd = uap->s;
670	boolean_t dgram;
671	connid_t cid = CONNID_ANY;
672
673	AUDIT_ARG(fd, uap->s);
674	error = file_socket(fd, &so);
675	if (error != 0)
676		return (error);
677	if (so == NULL) {
678		error = EBADF;
679		goto out;
680	}
681
682	/*
683	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
684	 * if this is a datagram socket; translate for other types.
685	 */
686	dgram = (so->so_type == SOCK_DGRAM);
687
688	/*
689	 * Get socket address(es) now before we obtain socket lock; use
690	 * sockaddr_list for src address for convenience, if present,
691	 * even though it won't hold more than one.
692	 */
693	if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so,
694	    &src_sl, uap->src, uap->srclen, dgram)) != 0)
695		goto out;
696
697	error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram);
698	if (error != 0)
699		goto out;
700
701	VERIFY(dst_sl != NULL &&
702	    !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
703
704	error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope,
705	    uap->aid, &cid);
706	if (error == ERESTART)
707		error = EINTR;
708
709	if (uap->cid != USER_ADDR_NULL)
710		(void) copyout(&cid, uap->cid, sizeof (cid));
711
712out:
713	file_drop(fd);
714	if (src_sl != NULL)
715		sockaddrlist_free(src_sl);
716	if (dst_sl != NULL)
717		sockaddrlist_free(dst_sl);
718	return (error);
719}
720
721int
722connectx(struct proc *p, struct connectx_args *uap, int *retval)
723{
724	/*
725	 * Due to similiarity with a POSIX interface, define as
726	 * an unofficial cancellation point.
727	 */
728	__pthread_testcancel(1);
729	return (connectx_nocancel(p, uap, retval));
730}
731
732static int
733connectit(struct socket *so, struct sockaddr *sa)
734{
735	int error;
736
737	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
738#if CONFIG_MACF_SOCKET_SUBSET
739	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
740		return (error);
741#endif /* MAC_SOCKET_SUBSET */
742
743	socket_lock(so, 1);
744	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
745		error = EALREADY;
746		goto out;
747	}
748	error = soconnectlock(so, sa, 0);
749	if (error != 0) {
750		so->so_state &= ~SS_ISCONNECTING;
751		goto out;
752	}
753	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
754		error = EINPROGRESS;
755		goto out;
756	}
757	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
758		lck_mtx_t *mutex_held;
759
760		if (so->so_proto->pr_getlock != NULL)
761			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
762		else
763			mutex_held = so->so_proto->pr_domain->dom_mtx;
764		error = msleep((caddr_t)&so->so_timeo, mutex_held,
765		    PSOCK | PCATCH, __func__, 0);
766		if (so->so_state & SS_DRAINING) {
767			error = ECONNABORTED;
768		}
769		if (error != 0)
770			break;
771	}
772	if (error == 0) {
773		error = so->so_error;
774		so->so_error = 0;
775	}
776out:
777	socket_unlock(so, 1);
778	return (error);
779}
780
781static int
782connectitx(struct socket *so, struct sockaddr_list **src_sl,
783    struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
784    associd_t aid, connid_t *pcid)
785{
786	struct sockaddr_entry *se;
787	int error;
788
789	VERIFY(dst_sl != NULL && *dst_sl != NULL);
790
791	TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
792		VERIFY(se->se_addr != NULL);
793		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
794		    se->se_addr);
795#if CONFIG_MACF_SOCKET_SUBSET
796		if ((error = mac_socket_check_connect(kauth_cred_get(),
797		    so, se->se_addr)) != 0)
798			return (error);
799#endif /* MAC_SOCKET_SUBSET */
800	}
801
802	socket_lock(so, 1);
803	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
804		error = EALREADY;
805		goto out;
806	}
807	error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
808	    aid, pcid, 0, NULL, 0);
809	if (error != 0) {
810		so->so_state &= ~SS_ISCONNECTING;
811		goto out;
812	}
813	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
814		error = EINPROGRESS;
815		goto out;
816	}
817	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
818		lck_mtx_t *mutex_held;
819
820		if (so->so_proto->pr_getlock != NULL)
821			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
822		else
823			mutex_held = so->so_proto->pr_domain->dom_mtx;
824		error = msleep((caddr_t)&so->so_timeo, mutex_held,
825		    PSOCK | PCATCH, __func__, 0);
826		if (so->so_state & SS_DRAINING) {
827			error = ECONNABORTED;
828		}
829		if (error != 0)
830			break;
831	}
832	if (error == 0) {
833		error = so->so_error;
834		so->so_error = 0;
835	}
836out:
837	socket_unlock(so, 1);
838	return (error);
839}
840
841int
842peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
843{
844	/*
845	 * Due to similiarity with a POSIX interface, define as
846	 * an unofficial cancellation point.
847	 */
848	__pthread_testcancel(1);
849	return (peeloff_nocancel(p, uap, retval));
850}
851
852static int
853peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
854{
855	struct fileproc *fp;
856	struct socket *mp_so, *so = NULL;
857	int newfd, fd = uap->s;
858	short fflag;		/* type must match fp->f_flag */
859	int error;
860
861	*retval = -1;
862
863	error = fp_getfsock(p, fd, &fp, &mp_so);
864	if (error != 0) {
865		if (error == EOPNOTSUPP)
866			error = ENOTSOCK;
867		goto out_nofile;
868	}
869	if (mp_so == NULL) {
870		error = EBADF;
871		goto out;
872	}
873
874	socket_lock(mp_so, 1);
875	error = sopeelofflocked(mp_so, uap->aid, &so);
876	if (error != 0) {
877		socket_unlock(mp_so, 1);
878		goto out;
879	}
880	VERIFY(so != NULL);
881	socket_unlock(mp_so, 0);		/* keep ref on mp_so for us */
882
883	fflag = fp->f_flag;
884	error = falloc(p, &fp, &newfd, vfs_context_current());
885	if (error != 0) {
886		/* drop this socket (probably ran out of file descriptors) */
887		soclose(so);
888		sodereference(mp_so);		/* our mp_so ref */
889		goto out;
890	}
891
892	fp->f_flag = fflag;
893	fp->f_ops = &socketops;
894	fp->f_data = (caddr_t)so;
895
896	/*
897	 * If the socket has been marked as inactive by sosetdefunct(),
898	 * disallow further operations on it.
899	 */
900	if (so->so_flags & SOF_DEFUNCT) {
901		sodefunct(current_proc(), so,
902		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
903	}
904
905	proc_fdlock(p);
906	procfdtbl_releasefd(p, newfd, NULL);
907	fp_drop(p, newfd, fp, 1);
908	proc_fdunlock(p);
909
910	sodereference(mp_so);			/* our mp_so ref */
911	*retval = newfd;
912
913out:
914	file_drop(fd);
915
916out_nofile:
917	return (error);
918}
919
920int
921disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
922{
923	/*
924	 * Due to similiarity with a POSIX interface, define as
925	 * an unofficial cancellation point.
926	 */
927	__pthread_testcancel(1);
928	return (disconnectx_nocancel(p, uap, retval));
929}
930
931static int
932disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
933{
934#pragma unused(p, retval)
935	struct socket *so;
936	int fd = uap->s;
937	int error;
938
939	error = file_socket(fd, &so);
940	if (error != 0)
941		return (error);
942	if (so == NULL) {
943		error = EBADF;
944		goto out;
945	}
946
947	error = sodisconnectx(so, uap->aid, uap->cid);
948out:
949	file_drop(fd);
950	return (error);
951}
952
953/*
954 * Returns:	0			Success
955 *	socreate:EAFNOSUPPORT
956 *	socreate:EPROTOTYPE
957 *	socreate:EPROTONOSUPPORT
958 *	socreate:ENOBUFS
959 *	socreate:ENOMEM
960 *	socreate:EISCONN
961 *	socreate:???			[other protocol families, IPSEC]
962 *	falloc:ENFILE
963 *	falloc:EMFILE
964 *	falloc:ENOMEM
965 *	copyout:EFAULT
966 *	soconnect2:EINVAL
967 *	soconnect2:EPROTOTYPE
968 *	soconnect2:???			[other protocol families[
969 */
970int
971socketpair(struct proc *p, struct socketpair_args *uap,
972    __unused int32_t *retval)
973{
974	struct fileproc *fp1, *fp2;
975	struct socket *so1, *so2;
976	int fd, error, sv[2];
977
978	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
979	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
980	if (error)
981		return (error);
982	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
983	if (error)
984		goto free1;
985
986	error = falloc(p, &fp1, &fd, vfs_context_current());
987	if (error) {
988		goto free2;
989	}
990	fp1->f_flag = FREAD|FWRITE;
991	fp1->f_ops = &socketops;
992	fp1->f_data = (caddr_t)so1;
993	sv[0] = fd;
994
995	error = falloc(p, &fp2, &fd, vfs_context_current());
996	if (error) {
997		goto free3;
998	}
999	fp2->f_flag = FREAD|FWRITE;
1000	fp2->f_ops = &socketops;
1001	fp2->f_data = (caddr_t)so2;
1002	sv[1] = fd;
1003
1004	error = soconnect2(so1, so2);
1005	if (error) {
1006		goto free4;
1007	}
1008	if (uap->type == SOCK_DGRAM) {
1009		/*
1010		 * Datagram socket connection is asymmetric.
1011		 */
1012		error = soconnect2(so2, so1);
1013		if (error) {
1014			goto free4;
1015		}
1016	}
1017
1018	if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1019		goto free4;
1020
1021	proc_fdlock(p);
1022	procfdtbl_releasefd(p, sv[0], NULL);
1023	procfdtbl_releasefd(p, sv[1], NULL);
1024	fp_drop(p, sv[0], fp1, 1);
1025	fp_drop(p, sv[1], fp2, 1);
1026	proc_fdunlock(p);
1027
1028	return (0);
1029free4:
1030	fp_free(p, sv[1], fp2);
1031free3:
1032	fp_free(p, sv[0], fp1);
1033free2:
1034	(void) soclose(so2);
1035free1:
1036	(void) soclose(so1);
1037	return (error);
1038}
1039
1040/*
1041 * Returns:	0			Success
1042 *		EINVAL
1043 *		ENOBUFS
1044 *		EBADF
1045 *		EPIPE
1046 *		EACCES			Mandatory Access Control failure
1047 *	file_socket:ENOTSOCK
1048 *	file_socket:EBADF
1049 *	getsockaddr:ENAMETOOLONG	Filename too long
1050 *	getsockaddr:EINVAL		Invalid argument
1051 *	getsockaddr:ENOMEM		Not enough space
1052 *	getsockaddr:EFAULT		Bad address
1053 *	<pru_sosend>:EACCES[TCP]
1054 *	<pru_sosend>:EADDRINUSE[TCP]
1055 *	<pru_sosend>:EADDRNOTAVAIL[TCP]
1056 *	<pru_sosend>:EAFNOSUPPORT[TCP]
1057 *	<pru_sosend>:EAGAIN[TCP]
1058 *	<pru_sosend>:EBADF
1059 *	<pru_sosend>:ECONNRESET[TCP]
1060 *	<pru_sosend>:EFAULT
1061 *	<pru_sosend>:EHOSTUNREACH[TCP]
1062 *	<pru_sosend>:EINTR
1063 *	<pru_sosend>:EINVAL
1064 *	<pru_sosend>:EISCONN[AF_INET]
1065 *	<pru_sosend>:EMSGSIZE[TCP]
1066 *	<pru_sosend>:ENETDOWN[TCP]
1067 *	<pru_sosend>:ENETUNREACH[TCP]
1068 *	<pru_sosend>:ENOBUFS
1069 *	<pru_sosend>:ENOMEM[TCP]
1070 *	<pru_sosend>:ENOTCONN[AF_INET]
1071 *	<pru_sosend>:EOPNOTSUPP
1072 *	<pru_sosend>:EPERM[TCP]
1073 *	<pru_sosend>:EPIPE
1074 *	<pru_sosend>:EWOULDBLOCK
1075 *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1076 *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
1077 *	<pru_sosend>:???		[value from so_error]
1078 *	sockargs:???
1079 */
1080static int
1081sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1082    int flags, int32_t *retval)
1083{
1084	struct mbuf *control = NULL;
1085	struct sockaddr_storage ss;
1086	struct sockaddr *to = NULL;
1087	boolean_t want_free = TRUE;
1088	int error;
1089	struct socket *so;
1090	user_ssize_t len;
1091
1092	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1093
1094	error = file_socket(s, &so);
1095	if (error) {
1096		KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1097		return (error);
1098	}
1099	if (so == NULL) {
1100		error = EBADF;
1101		goto out;
1102	}
1103	if (mp->msg_name != USER_ADDR_NULL) {
1104		if (mp->msg_namelen > sizeof (ss)) {
1105			error = getsockaddr(so, &to, mp->msg_name,
1106			    mp->msg_namelen, TRUE);
1107		} else {
1108			error = getsockaddr_s(so, &ss, mp->msg_name,
1109			    mp->msg_namelen, TRUE);
1110			if (error == 0) {
1111				to = (struct sockaddr *)&ss;
1112				want_free = FALSE;
1113			}
1114		}
1115		if (error != 0)
1116			goto out;
1117		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1118	}
1119	if (mp->msg_control != USER_ADDR_NULL) {
1120		if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1121			error = EINVAL;
1122			goto bad;
1123		}
1124		error = sockargs(&control, mp->msg_control,
1125		    mp->msg_controllen, MT_CONTROL);
1126		if (error != 0)
1127			goto bad;
1128	}
1129
1130#if CONFIG_MACF_SOCKET_SUBSET
1131	/*
1132	 * We check the state without holding the socket lock;
1133	 * if a race condition occurs, it would simply result
1134	 * in an extra call to the MAC check function.
1135	 */
1136	if ( to != NULL &&
1137	    !(so->so_state & SS_DEFUNCT) &&
1138	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1139		goto bad;
1140#endif /* MAC_SOCKET_SUBSET */
1141
1142	len = uio_resid(uiop);
1143	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1144		control, flags);
1145	if (error != 0) {
1146		if (uio_resid(uiop) != len && (error == ERESTART ||
1147		    error == EINTR || error == EWOULDBLOCK))
1148			error = 0;
1149		/* Generation of SIGPIPE can be controlled per socket */
1150		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1151			psignal(p, SIGPIPE);
1152	}
1153	if (error == 0)
1154		*retval = (int)(len - uio_resid(uiop));
1155bad:
1156	if (to != NULL && want_free)
1157		FREE(to, M_SONAME);
1158out:
1159	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1160	file_drop(s);
1161	return (error);
1162}
1163
1164/*
1165 * Returns:	0			Success
1166 *		ENOMEM
1167 *	sendit:???			[see sendit definition in this file]
1168 *	write:???			[4056224: applicable for pipes]
1169 */
1170int
1171sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1172{
1173	__pthread_testcancel(1);
1174	return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
1175}
1176
1177int
1178sendto_nocancel(struct proc *p,
1179		struct sendto_nocancel_args *uap,
1180		int32_t *retval)
1181{
1182	struct user_msghdr msg;
1183	int error;
1184	uio_t auio = NULL;
1185
1186	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1187	AUDIT_ARG(fd, uap->s);
1188
1189	auio = uio_create(1, 0,
1190	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1191	    UIO_WRITE);
1192	if (auio == NULL) {
1193		return (ENOMEM);
1194	}
1195	uio_addiov(auio, uap->buf, uap->len);
1196
1197	msg.msg_name = uap->to;
1198	msg.msg_namelen = uap->tolen;
1199	/* no need to set up msg_iov.  sendit uses uio_t we send it */
1200	msg.msg_iov = 0;
1201	msg.msg_iovlen = 0;
1202	msg.msg_control = 0;
1203	msg.msg_flags = 0;
1204
1205	error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
1206
1207	if (auio != NULL) {
1208		uio_free(auio);
1209	}
1210
1211	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1212
1213	return (error);
1214}
1215
1216/*
1217 * Returns:	0			Success
1218 *		ENOBUFS
1219 *	copyin:EFAULT
1220 *	sendit:???			[see sendit definition in this file]
1221 */
1222int
1223sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1224{
1225	__pthread_testcancel(1);
1226	return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
1227}
1228
1229int
1230sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
1231{
1232	struct user32_msghdr msg32;
1233	struct user64_msghdr msg64;
1234	struct user_msghdr user_msg;
1235	caddr_t msghdrp;
1236	int	size_of_msghdr;
1237	int error;
1238	uio_t auio = NULL;
1239	struct user_iovec *iovp;
1240
1241	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1242	AUDIT_ARG(fd, uap->s);
1243	if (IS_64BIT_PROCESS(p)) {
1244		msghdrp = (caddr_t)&msg64;
1245		size_of_msghdr = sizeof (msg64);
1246	} else {
1247		msghdrp = (caddr_t)&msg32;
1248		size_of_msghdr = sizeof (msg32);
1249	}
1250	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1251	if (error) {
1252		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1253		return (error);
1254	}
1255
1256	if (IS_64BIT_PROCESS(p)) {
1257		user_msg.msg_flags = msg64.msg_flags;
1258		user_msg.msg_controllen = msg64.msg_controllen;
1259		user_msg.msg_control = msg64.msg_control;
1260		user_msg.msg_iovlen = msg64.msg_iovlen;
1261		user_msg.msg_iov = msg64.msg_iov;
1262		user_msg.msg_namelen = msg64.msg_namelen;
1263		user_msg.msg_name = msg64.msg_name;
1264	} else {
1265		user_msg.msg_flags = msg32.msg_flags;
1266		user_msg.msg_controllen = msg32.msg_controllen;
1267		user_msg.msg_control = msg32.msg_control;
1268		user_msg.msg_iovlen = msg32.msg_iovlen;
1269		user_msg.msg_iov = msg32.msg_iov;
1270		user_msg.msg_namelen = msg32.msg_namelen;
1271		user_msg.msg_name = msg32.msg_name;
1272	}
1273
1274	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1275		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1276		    0, 0, 0, 0);
1277		return (EMSGSIZE);
1278	}
1279
1280	/* allocate a uio large enough to hold the number of iovecs passed */
1281	auio = uio_create(user_msg.msg_iovlen, 0,
1282	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1283	    UIO_WRITE);
1284	if (auio == NULL) {
1285		error = ENOBUFS;
1286		goto done;
1287	}
1288
1289	if (user_msg.msg_iovlen) {
1290		/*
1291		 * get location of iovecs within the uio.
1292		 * then copyin the iovecs from user space.
1293		 */
1294		iovp = uio_iovsaddr(auio);
1295		if (iovp == NULL) {
1296			error = ENOBUFS;
1297			goto done;
1298		}
1299		error = copyin_user_iovec_array(user_msg.msg_iov,
1300			IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1301			user_msg.msg_iovlen, iovp);
1302		if (error)
1303			goto done;
1304		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1305
1306		/* finish setup of uio_t */
1307		error = uio_calculateresid(auio);
1308		if (error) {
1309			goto done;
1310		}
1311	} else {
1312		user_msg.msg_iov = 0;
1313	}
1314
1315	/* msg_flags is ignored for send */
1316	user_msg.msg_flags = 0;
1317
1318	error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1319done:
1320	if (auio != NULL) {
1321		uio_free(auio);
1322	}
1323	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1324
1325	return (error);
1326}
1327
1328/*
1329 * Returns:	0			Success
1330 *		ENOTSOCK
1331 *		EINVAL
1332 *		EBADF
1333 *		EACCES			Mandatory Access Control failure
1334 *	copyout:EFAULT
1335 *	fp_lookup:EBADF
1336 *	<pru_soreceive>:ENOBUFS
1337 *	<pru_soreceive>:ENOTCONN
1338 *	<pru_soreceive>:EWOULDBLOCK
1339 *	<pru_soreceive>:EFAULT
1340 *	<pru_soreceive>:EINTR
1341 *	<pru_soreceive>:EBADF
1342 *	<pru_soreceive>:EINVAL
1343 *	<pru_soreceive>:EMSGSIZE
1344 *	<pru_soreceive>:???
1345 *
1346 * Notes:	Additional return values from calls through <pru_soreceive>
1347 *		depend on protocols other than TCP or AF_UNIX, which are
1348 *		documented above.
1349 */
1350static int
1351recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1352    user_addr_t namelenp, int32_t *retval)
1353{
1354	ssize_t len;
1355	int error;
1356	struct mbuf *m, *control = 0;
1357	user_addr_t ctlbuf;
1358	struct socket *so;
1359	struct sockaddr *fromsa = 0;
1360	struct fileproc *fp;
1361
1362	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1363	proc_fdlock(p);
1364	if ((error = fp_lookup(p, s, &fp, 1))) {
1365		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1366		proc_fdunlock(p);
1367		return (error);
1368	}
1369	if (fp->f_type != DTYPE_SOCKET) {
1370		fp_drop(p, s, fp, 1);
1371		proc_fdunlock(p);
1372		return (ENOTSOCK);
1373	}
1374
1375	so = (struct socket *)fp->f_data;
1376	if (so == NULL) {
1377		fp_drop(p, s, fp, 1);
1378		proc_fdunlock(p);
1379		return (EBADF);
1380	}
1381
1382	proc_fdunlock(p);
1383
1384#if CONFIG_MACF_SOCKET_SUBSET
1385	/*
1386	 * We check the state without holding the socket lock;
1387	 * if a race condition occurs, it would simply result
1388	 * in an extra call to the MAC check function.
1389	 */
1390	if (!(so->so_state & SS_DEFUNCT) &&
1391	    !(so->so_state & SS_ISCONNECTED) &&
1392	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1393	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1394		goto out1;
1395#endif /* MAC_SOCKET_SUBSET */
1396	if (uio_resid(uiop) < 0) {
1397		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1398		error = EINVAL;
1399		goto out1;
1400	}
1401
1402	len = uio_resid(uiop);
1403	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1404	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1405	    &mp->msg_flags);
1406	if (fromsa)
1407		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1408		    fromsa);
1409	if (error) {
1410		if (uio_resid(uiop) != len && (error == ERESTART ||
1411		    error == EINTR || error == EWOULDBLOCK))
1412			error = 0;
1413	}
1414
1415	if (error)
1416		goto out;
1417
1418	*retval = len - uio_resid(uiop);
1419	if (mp->msg_name) {
1420		socklen_t sa_len = 0;
1421
1422		len = mp->msg_namelen;
1423		if (len <= 0 || fromsa == 0) {
1424			len = 0;
1425		} else {
1426#ifndef MIN
1427#define	MIN(a, b) ((a) > (b) ? (b) : (a))
1428#endif
1429			sa_len = fromsa->sa_len;
1430			len = MIN((unsigned int)len, sa_len);
1431			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1432			if (error)
1433				goto out;
1434		}
1435		mp->msg_namelen = sa_len;
1436		/* return the actual, untruncated address length */
1437		if (namelenp &&
1438		    (error = copyout((caddr_t)&sa_len, namelenp,
1439		    sizeof (int)))) {
1440			goto out;
1441		}
1442	}
1443	if (mp->msg_control) {
1444		len = mp->msg_controllen;
1445		m = control;
1446		mp->msg_controllen = 0;
1447		ctlbuf = mp->msg_control;
1448
1449		while (m && len > 0) {
1450			unsigned int tocopy;
1451			struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1452			int cp_size = CMSG_ALIGN(cp->cmsg_len);
1453			int buflen = m->m_len;
1454
1455			while (buflen > 0 && len > 0) {
1456
1457				/*
1458				 SCM_TIMESTAMP hack because  struct timeval has a
1459				 * different size for 32 bits and 64 bits processes
1460				 */
1461				if (cp->cmsg_level == SOL_SOCKET &&  cp->cmsg_type == SCM_TIMESTAMP) {
1462					unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1463					struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1464					int tmp_space;
1465					struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1466
1467					tmp_cp->cmsg_level = SOL_SOCKET;
1468					tmp_cp->cmsg_type = SCM_TIMESTAMP;
1469
1470					if (proc_is64bit(p)) {
1471						struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1472
1473						tv64->tv_sec = tv->tv_sec;
1474						tv64->tv_usec = tv->tv_usec;
1475
1476						tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1477						tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1478					} else {
1479						struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1480
1481						tv32->tv_sec = tv->tv_sec;
1482						tv32->tv_usec = tv->tv_usec;
1483
1484						tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1485						tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1486					}
1487					if (len >= tmp_space) {
1488						tocopy = tmp_space;
1489					} else {
1490						mp->msg_flags |= MSG_CTRUNC;
1491						tocopy = len;
1492					}
1493					error = copyout(tmp_buffer, ctlbuf, tocopy);
1494					if (error)
1495						goto out;
1496
1497				} else {
1498
1499					if (cp_size > buflen) {
1500						panic("cp_size > buflen, something wrong with alignment!");
1501					}
1502
1503					if (len >= cp_size) {
1504						tocopy = cp_size;
1505					} else {
1506						mp->msg_flags |= MSG_CTRUNC;
1507						tocopy = len;
1508					}
1509
1510					error = copyout((caddr_t) cp, ctlbuf,
1511									tocopy);
1512					if (error)
1513						goto out;
1514				}
1515
1516
1517				ctlbuf += tocopy;
1518				len -= tocopy;
1519
1520				buflen -= cp_size;
1521				cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size);
1522				cp_size = CMSG_ALIGN(cp->cmsg_len);
1523			}
1524
1525			m = m->m_next;
1526		}
1527		mp->msg_controllen = ctlbuf - mp->msg_control;
1528	}
1529out:
1530	if (fromsa)
1531		FREE(fromsa, M_SONAME);
1532	if (control)
1533		m_freem(control);
1534	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1535out1:
1536	fp_drop(p, s, fp, 0);
1537	return (error);
1538}
1539
1540/*
1541 * Returns:	0			Success
1542 *		ENOMEM
1543 *	copyin:EFAULT
1544 *	recvit:???
1545 *	read:???			[4056224: applicable for pipes]
1546 *
1547 * Notes:	The read entry point is only called as part of support for
1548 *		binary backward compatability; new code should use read
1549 *		instead of recv or recvfrom when attempting to read data
1550 *		from pipes.
1551 *
1552 *		For full documentation of the return codes from recvit, see
1553 *		the block header for the recvit function.
1554 */
1555int
1556recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1557{
1558	__pthread_testcancel(1);
1559	return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1560}
1561
1562int
1563recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1564{
1565	struct user_msghdr msg;
1566	int error;
1567	uio_t auio = NULL;
1568
1569	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1570	AUDIT_ARG(fd, uap->s);
1571
1572	if (uap->fromlenaddr) {
1573		error = copyin(uap->fromlenaddr,
1574		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1575		if (error)
1576			return (error);
1577	} else {
1578		msg.msg_namelen = 0;
1579	}
1580	msg.msg_name = uap->from;
1581	auio = uio_create(1, 0,
1582	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1583	    UIO_READ);
1584	if (auio == NULL) {
1585		return (ENOMEM);
1586	}
1587
1588	uio_addiov(auio, uap->buf, uap->len);
1589	/* no need to set up msg_iov.  recvit uses uio_t we send it */
1590	msg.msg_iov = 0;
1591	msg.msg_iovlen = 0;
1592	msg.msg_control = 0;
1593	msg.msg_controllen = 0;
1594	msg.msg_flags = uap->flags;
1595	error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1596	if (auio != NULL) {
1597		uio_free(auio);
1598	}
1599
1600	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1601
1602	return (error);
1603}
1604
1605/*
1606 * Returns:	0			Success
1607 *		EMSGSIZE
1608 *		ENOMEM
1609 *	copyin:EFAULT
1610 *	copyout:EFAULT
1611 *	recvit:???
1612 *
1613 * Notes:	For full documentation of the return codes from recvit, see
1614 *		the block header for the recvit function.
1615 */
1616int
1617recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1618{
1619	__pthread_testcancel(1);
1620	return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1621}
1622
1623int
1624recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1625{
1626	struct user32_msghdr msg32;
1627	struct user64_msghdr msg64;
1628	struct user_msghdr user_msg;
1629	caddr_t msghdrp;
1630	int	size_of_msghdr;
1631	user_addr_t uiov;
1632	int error;
1633	uio_t auio = NULL;
1634	struct user_iovec *iovp;
1635
1636	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1637	AUDIT_ARG(fd, uap->s);
1638	if (IS_64BIT_PROCESS(p)) {
1639		msghdrp = (caddr_t)&msg64;
1640		size_of_msghdr = sizeof (msg64);
1641	} else {
1642		msghdrp = (caddr_t)&msg32;
1643		size_of_msghdr = sizeof (msg32);
1644	}
1645	error = copyin(uap->msg, msghdrp, size_of_msghdr);
1646	if (error) {
1647		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1648		return (error);
1649	}
1650
1651	/* only need to copy if user process is not 64-bit */
1652	if (IS_64BIT_PROCESS(p)) {
1653		user_msg.msg_flags = msg64.msg_flags;
1654		user_msg.msg_controllen = msg64.msg_controllen;
1655		user_msg.msg_control = msg64.msg_control;
1656		user_msg.msg_iovlen = msg64.msg_iovlen;
1657		user_msg.msg_iov = msg64.msg_iov;
1658		user_msg.msg_namelen = msg64.msg_namelen;
1659		user_msg.msg_name = msg64.msg_name;
1660	} else {
1661		user_msg.msg_flags = msg32.msg_flags;
1662		user_msg.msg_controllen = msg32.msg_controllen;
1663		user_msg.msg_control = msg32.msg_control;
1664		user_msg.msg_iovlen = msg32.msg_iovlen;
1665		user_msg.msg_iov = msg32.msg_iov;
1666		user_msg.msg_namelen = msg32.msg_namelen;
1667		user_msg.msg_name = msg32.msg_name;
1668	}
1669
1670	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1671		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1672		    0, 0, 0, 0);
1673		return (EMSGSIZE);
1674	}
1675
1676	user_msg.msg_flags = uap->flags;
1677
1678	/* allocate a uio large enough to hold the number of iovecs passed */
1679	auio = uio_create(user_msg.msg_iovlen, 0,
1680	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1681	    UIO_READ);
1682	if (auio == NULL) {
1683		error = ENOMEM;
1684		goto done;
1685	}
1686
1687	/*
1688	 * get location of iovecs within the uio.  then copyin the iovecs from
1689	 * user space.
1690	 */
1691	iovp = uio_iovsaddr(auio);
1692	if (iovp == NULL) {
1693		error = ENOMEM;
1694		goto done;
1695	}
1696	uiov = user_msg.msg_iov;
1697	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1698	error = copyin_user_iovec_array(uiov,
1699		IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1700		user_msg.msg_iovlen, iovp);
1701	if (error)
1702		goto done;
1703
1704	/* finish setup of uio_t */
1705	error = uio_calculateresid(auio);
1706	if (error) {
1707		goto done;
1708	}
1709
1710	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1711	if (!error) {
1712		user_msg.msg_iov = uiov;
1713		if (IS_64BIT_PROCESS(p)) {
1714			msg64.msg_flags = user_msg.msg_flags;
1715			msg64.msg_controllen = user_msg.msg_controllen;
1716			msg64.msg_control = user_msg.msg_control;
1717			msg64.msg_iovlen = user_msg.msg_iovlen;
1718			msg64.msg_iov = user_msg.msg_iov;
1719			msg64.msg_namelen = user_msg.msg_namelen;
1720			msg64.msg_name = user_msg.msg_name;
1721		} else {
1722			msg32.msg_flags = user_msg.msg_flags;
1723			msg32.msg_controllen = user_msg.msg_controllen;
1724			msg32.msg_control = user_msg.msg_control;
1725			msg32.msg_iovlen = user_msg.msg_iovlen;
1726			msg32.msg_iov = user_msg.msg_iov;
1727			msg32.msg_namelen = user_msg.msg_namelen;
1728			msg32.msg_name = user_msg.msg_name;
1729		}
1730		error = copyout(msghdrp, uap->msg, size_of_msghdr);
1731	}
1732done:
1733	if (auio != NULL) {
1734		uio_free(auio);
1735	}
1736	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1737	return (error);
1738}
1739
1740/*
1741 * Returns:	0			Success
1742 *		EBADF
1743 *	file_socket:ENOTSOCK
1744 *	file_socket:EBADF
1745 *	soshutdown:EINVAL
1746 *	soshutdown:ENOTCONN
1747 *	soshutdown:EADDRNOTAVAIL[TCP]
1748 *	soshutdown:ENOBUFS[TCP]
1749 *	soshutdown:EMSGSIZE[TCP]
1750 *	soshutdown:EHOSTUNREACH[TCP]
1751 *	soshutdown:ENETUNREACH[TCP]
1752 *	soshutdown:ENETDOWN[TCP]
1753 *	soshutdown:ENOMEM[TCP]
1754 *	soshutdown:EACCES[TCP]
1755 *	soshutdown:EMSGSIZE[TCP]
1756 *	soshutdown:ENOBUFS[TCP]
1757 *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
1758 *	soshutdown:???			[other protocol families]
1759 */
1760/* ARGSUSED */
1761int
1762shutdown(__unused struct proc *p, struct shutdown_args *uap,
1763    __unused int32_t *retval)
1764{
1765	struct socket *so;
1766	int error;
1767
1768	AUDIT_ARG(fd, uap->s);
1769	error = file_socket(uap->s, &so);
1770	if (error)
1771		return (error);
1772	if (so == NULL) {
1773		error = EBADF;
1774		goto out;
1775	}
1776	error =  soshutdown((struct socket *)so, uap->how);
1777out:
1778	file_drop(uap->s);
1779	return (error);
1780}
1781
1782/*
1783 * Returns:	0			Success
1784 *		EFAULT
1785 *		EINVAL
1786 *		EACCES			Mandatory Access Control failure
1787 *	file_socket:ENOTSOCK
1788 *	file_socket:EBADF
1789 *	sosetopt:EINVAL
1790 *	sosetopt:ENOPROTOOPT
1791 *	sosetopt:ENOBUFS
1792 *	sosetopt:EDOM
1793 *	sosetopt:EFAULT
1794 *	sosetopt:EOPNOTSUPP[AF_UNIX]
1795 *	sosetopt:???
1796 */
1797/* ARGSUSED */
1798int
1799setsockopt(struct proc *p, struct setsockopt_args *uap,
1800    __unused int32_t *retval)
1801{
1802	struct socket *so;
1803	struct sockopt sopt;
1804	int error;
1805
1806	AUDIT_ARG(fd, uap->s);
1807	if (uap->val == 0 && uap->valsize != 0)
1808		return (EFAULT);
1809	/* No bounds checking on size (it's unsigned) */
1810
1811	error = file_socket(uap->s, &so);
1812	if (error)
1813		return (error);
1814
1815	sopt.sopt_dir = SOPT_SET;
1816	sopt.sopt_level = uap->level;
1817	sopt.sopt_name = uap->name;
1818	sopt.sopt_val = uap->val;
1819	sopt.sopt_valsize = uap->valsize;
1820	sopt.sopt_p = p;
1821
1822	if (so == NULL) {
1823		error = EINVAL;
1824		goto out;
1825	}
1826#if CONFIG_MACF_SOCKET_SUBSET
1827	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1828	    &sopt)) != 0)
1829		goto out;
1830#endif /* MAC_SOCKET_SUBSET */
1831	error = sosetoptlock(so, &sopt, 1);	/* will lock socket */
1832out:
1833	file_drop(uap->s);
1834	return (error);
1835}
1836
1837
1838
1839/*
1840 * Returns:	0			Success
1841 *		EINVAL
1842 *		EBADF
1843 *		EACCES			Mandatory Access Control failure
1844 *	copyin:EFAULT
1845 *	copyout:EFAULT
1846 *	file_socket:ENOTSOCK
1847 *	file_socket:EBADF
1848 *	sogetopt:???
1849 */
1850int
1851getsockopt(struct proc *p, struct getsockopt_args  *uap,
1852    __unused int32_t *retval)
1853{
1854	int		error;
1855	socklen_t	valsize;
1856	struct sockopt	sopt;
1857	struct socket *so;
1858
1859	error = file_socket(uap->s, &so);
1860	if (error)
1861		return (error);
1862	if (uap->val) {
1863		error = copyin(uap->avalsize, (caddr_t)&valsize,
1864		    sizeof (valsize));
1865		if (error)
1866			goto out;
1867		/* No bounds checking on size (it's unsigned) */
1868	} else {
1869		valsize = 0;
1870	}
1871	sopt.sopt_dir = SOPT_GET;
1872	sopt.sopt_level = uap->level;
1873	sopt.sopt_name = uap->name;
1874	sopt.sopt_val = uap->val;
1875	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1876	sopt.sopt_p = p;
1877
1878	if (so == NULL) {
1879		error = EBADF;
1880		goto out;
1881	}
1882#if CONFIG_MACF_SOCKET_SUBSET
1883	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1884	    &sopt)) != 0)
1885		goto out;
1886#endif /* MAC_SOCKET_SUBSET */
1887	error = sogetoptlock((struct socket *)so, &sopt, 1);	/* will lock */
1888	if (error == 0) {
1889		valsize = sopt.sopt_valsize;
1890		error = copyout((caddr_t)&valsize, uap->avalsize,
1891		    sizeof (valsize));
1892	}
1893out:
1894	file_drop(uap->s);
1895	return (error);
1896}
1897
1898
1899/*
1900 * Get socket name.
1901 *
1902 * Returns:	0			Success
1903 *		EBADF
1904 *	file_socket:ENOTSOCK
1905 *	file_socket:EBADF
1906 *	copyin:EFAULT
1907 *	copyout:EFAULT
1908 *	<pru_sockaddr>:ENOBUFS[TCP]
1909 *	<pru_sockaddr>:ECONNRESET[TCP]
1910 *	<pru_sockaddr>:EINVAL[AF_UNIX]
1911 *	<sf_getsockname>:???
1912 */
1913/* ARGSUSED */
1914int
1915getsockname(__unused struct proc *p, struct getsockname_args *uap,
1916    __unused int32_t *retval)
1917{
1918	struct socket *so;
1919	struct sockaddr *sa;
1920	socklen_t len;
1921	socklen_t sa_len;
1922	int error;
1923
1924	error = file_socket(uap->fdes, &so);
1925	if (error)
1926		return (error);
1927	error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1928	if (error)
1929		goto out;
1930	if (so == NULL) {
1931		error = EBADF;
1932		goto out;
1933	}
1934	sa = 0;
1935	socket_lock(so, 1);
1936	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1937	if (error == 0) {
1938		error = sflt_getsockname(so, &sa);
1939		if (error == EJUSTRETURN)
1940			error = 0;
1941	}
1942	socket_unlock(so, 1);
1943	if (error)
1944		goto bad;
1945	if (sa == 0) {
1946		len = 0;
1947		goto gotnothing;
1948	}
1949
1950	sa_len = sa->sa_len;
1951	len = MIN(len, sa_len);
1952	error = copyout((caddr_t)sa, uap->asa, len);
1953	if (error)
1954		goto bad;
1955	/* return the actual, untruncated address length */
1956	len = sa_len;
1957gotnothing:
1958		error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1959bad:
1960	if (sa)
1961		FREE(sa, M_SONAME);
1962out:
1963	file_drop(uap->fdes);
1964	return (error);
1965}
1966
1967/*
1968 * Get name of peer for connected socket.
1969 *
1970 * Returns:	0			Success
1971 *		EBADF
1972 *		EINVAL
1973 *		ENOTCONN
1974 *	file_socket:ENOTSOCK
1975 *	file_socket:EBADF
1976 *	copyin:EFAULT
1977 *	copyout:EFAULT
1978 *	<pru_peeraddr>:???
1979 *	<sf_getpeername>:???
1980 */
1981/* ARGSUSED */
1982int
1983getpeername(__unused struct proc *p, struct getpeername_args *uap,
1984    __unused int32_t *retval)
1985{
1986	struct socket *so;
1987	struct sockaddr *sa;
1988	socklen_t len;
1989	socklen_t sa_len;
1990	int error;
1991
1992	error = file_socket(uap->fdes, &so);
1993	if (error)
1994		return (error);
1995	if (so == NULL) {
1996		error = EBADF;
1997		goto out;
1998	}
1999
2000	socket_lock(so, 1);
2001
2002	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2003	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2004		/* the socket has been shutdown, no more getpeername's */
2005		socket_unlock(so, 1);
2006		error = EINVAL;
2007		goto out;
2008	}
2009
2010	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2011		socket_unlock(so, 1);
2012		error = ENOTCONN;
2013		goto out;
2014	}
2015	error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
2016	if (error) {
2017		socket_unlock(so, 1);
2018		goto out;
2019	}
2020	sa = 0;
2021	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2022	if (error == 0) {
2023		error = sflt_getpeername(so, &sa);
2024		if (error == EJUSTRETURN)
2025			error = 0;
2026	}
2027	socket_unlock(so, 1);
2028	if (error)
2029		goto bad;
2030	if (sa == 0) {
2031		len = 0;
2032		goto gotnothing;
2033	}
2034	sa_len = sa->sa_len;
2035	len = MIN(len, sa_len);
2036	error = copyout(sa, uap->asa, len);
2037	if (error)
2038		goto bad;
2039	/* return the actual, untruncated address length */
2040	len = sa_len;
2041gotnothing:
2042	error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
2043bad:
2044	if (sa) FREE(sa, M_SONAME);
2045out:
2046	file_drop(uap->fdes);
2047	return (error);
2048}
2049
2050int
2051sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2052{
2053	struct sockaddr *sa;
2054	struct mbuf *m;
2055	int error;
2056
2057	size_t alloc_buflen = (size_t)buflen;
2058
2059	if(alloc_buflen > INT_MAX/2)
2060		return (EINVAL);
2061#ifdef __LP64__
2062	/* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
2063	if(type == MT_CONTROL)
2064		alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
2065#endif
2066	if (alloc_buflen > MLEN) {
2067		if (type == MT_SONAME && alloc_buflen <= 112)
2068			alloc_buflen = MLEN;		/* unix domain compat. hack */
2069		else if (alloc_buflen > MCLBYTES)
2070			return (EINVAL);
2071	}
2072	m = m_get(M_WAIT, type);
2073	if (m == NULL)
2074		return (ENOBUFS);
2075	if (alloc_buflen > MLEN) {
2076		MCLGET(m, M_WAIT);
2077		if ((m->m_flags & M_EXT) == 0) {
2078			m_free(m);
2079			return (ENOBUFS);
2080		}
2081	}
2082	/* K64: We still copyin the original buflen because it gets expanded later
2083	 * and we lie about the size of the mbuf because it only affects unp_* functions
2084	 */
2085	m->m_len = buflen;
2086	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2087	if (error) {
2088		(void) m_free(m);
2089	} else {
2090		*mp = m;
2091		if (type == MT_SONAME) {
2092			sa = mtod(m, struct sockaddr *);
2093			sa->sa_len = buflen;
2094		}
2095	}
2096	return (error);
2097}
2098
2099/*
2100 * Given a user_addr_t of length len, allocate and fill out a *sa.
2101 *
2102 * Returns:	0			Success
2103 *		ENAMETOOLONG		Filename too long
2104 *		EINVAL			Invalid argument
2105 *		ENOMEM			Not enough space
2106 *		copyin:EFAULT		Bad address
2107 */
2108static int
2109getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2110    size_t len, boolean_t translate_unspec)
2111{
2112	struct sockaddr *sa;
2113	int error;
2114
2115	if (len > SOCK_MAXADDRLEN)
2116		return (ENAMETOOLONG);
2117
2118	if (len < offsetof(struct sockaddr, sa_data[0]))
2119		return (EINVAL);
2120
2121	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2122	if (sa == NULL) {
2123		return (ENOMEM);
2124	}
2125	error = copyin(uaddr, (caddr_t)sa, len);
2126	if (error) {
2127		FREE(sa, M_SONAME);
2128	} else {
2129		/*
2130		 * Force sa_family to AF_INET on AF_INET sockets to handle
2131		 * legacy applications that use AF_UNSPEC (0).  On all other
2132		 * sockets we leave it unchanged and let the lower layer
2133		 * handle it.
2134		 */
2135		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2136		    SOCK_CHECK_DOM(so, PF_INET) &&
2137		    len == sizeof (struct sockaddr_in))
2138			sa->sa_family = AF_INET;
2139
2140		sa->sa_len = len;
2141		*namp = sa;
2142	}
2143	return (error);
2144}
2145
2146static int
2147getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2148    user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2149{
2150	int error;
2151
2152	if (ss == NULL || uaddr == USER_ADDR_NULL ||
2153	    len < offsetof(struct sockaddr, sa_data[0]))
2154		return (EINVAL);
2155
2156	/*
2157	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2158	 * so the check here is inclusive.
2159	 */
2160	if (len > sizeof (*ss))
2161		return (ENAMETOOLONG);
2162
2163	bzero(ss, sizeof (*ss));
2164	error = copyin(uaddr, (caddr_t)ss, len);
2165	if (error == 0) {
2166		/*
2167		 * Force sa_family to AF_INET on AF_INET sockets to handle
2168		 * legacy applications that use AF_UNSPEC (0).  On all other
2169		 * sockets we leave it unchanged and let the lower layer
2170		 * handle it.
2171		 */
2172		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2173		    SOCK_CHECK_DOM(so, PF_INET) &&
2174		    len == sizeof (struct sockaddr_in))
2175			ss->ss_family = AF_INET;
2176
2177		ss->ss_len = len;
2178	}
2179	return (error);
2180}
2181
2182/*
2183 * Hard limit on the number of source and/or destination addresses
2184 * that can be specified by an application.
2185 */
2186#define	SOCKADDRLIST_MAX_ENTRIES	64
2187
2188static int
2189getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
2190    user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
2191{
2192	struct sockaddr_list *sl;
2193	int error = 0;
2194
2195	*slp = NULL;
2196
2197	if (uaddr == USER_ADDR_NULL || uaddrlen == 0)
2198		return (EINVAL);
2199
2200	sl = sockaddrlist_alloc(M_WAITOK);
2201	if (sl == NULL)
2202		return (ENOMEM);
2203
2204	VERIFY(sl->sl_cnt == 0);
2205	while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
2206		struct sockaddr_storage ss;
2207		struct sockaddr_entry *se;
2208		struct sockaddr *sa;
2209
2210		if (uaddrlen < sizeof (struct sockaddr)) {
2211			error = EINVAL;
2212			break;
2213		}
2214
2215		bzero(&ss, sizeof (ss));
2216		error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
2217		if (error != 0)
2218			break;
2219
2220		/* getsockaddr does the same but we need them now */
2221		if (uaddrlen < ss.ss_len ||
2222		    ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
2223			error = EINVAL;
2224			break;
2225		} else if (ss.ss_len > sizeof (ss)) {
2226			/*
2227			 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2228			 * so the check here is inclusive.  We could user the
2229			 * latter instead, but seems like an overkill for now.
2230			 */
2231			error = ENAMETOOLONG;
2232			break;
2233		}
2234
2235		se = sockaddrentry_alloc(M_WAITOK);
2236		if (se == NULL)
2237			break;
2238
2239		sockaddrlist_insert(sl, se);
2240
2241		error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
2242		if (error != 0)
2243			break;
2244
2245		VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
2246		se->se_addr = sa;
2247
2248		uaddr += ss.ss_len;
2249		VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
2250		uaddrlen -= ss.ss_len;
2251	}
2252
2253	if (error != 0)
2254		sockaddrlist_free(sl);
2255	else
2256		*slp = sl;
2257
2258	return (error);
2259}
2260
2261#if SENDFILE
2262
2263#define	SFUIOBUFS 64
2264
2265/* Macros to compute the number of mbufs needed depending on cluster size */
2266#define	HOWMANY_16K(n)	((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
2267#define	HOWMANY_4K(n)	((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
2268
2269/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
2270#define SENDFILE_MAX_BYTES	(SFUIOBUFS << PGSHIFT)
2271
2272/* Upper send limit in the number of mbuf clusters */
2273#define	SENDFILE_MAX_16K	HOWMANY_16K(SENDFILE_MAX_BYTES)
2274#define	SENDFILE_MAX_4K		HOWMANY_4K(SENDFILE_MAX_BYTES)
2275
2276size_t mbuf_pkt_maxlen(mbuf_t m);
2277
2278__private_extern__ size_t
2279mbuf_pkt_maxlen(mbuf_t m)
2280{
2281	size_t maxlen = 0;
2282
2283	while (m) {
2284		maxlen += mbuf_maxlen(m);
2285		m = mbuf_next(m);
2286	}
2287	return (maxlen);
2288}
2289
2290static void
2291alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2292    struct mbuf **m, boolean_t jumbocl)
2293{
2294	unsigned int needed;
2295
2296	if (pktlen == 0)
2297		panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
2298
2299	/*
2300	 * Try to allocate for the whole thing.  Since we want full control
2301	 * over the buffer size and be able to accept partial result, we can't
2302	 * use mbuf_allocpacket().  The logic below is similar to sosend().
2303	 */
2304	*m = NULL;
2305	if (pktlen > MBIGCLBYTES && jumbocl) {
2306		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2307		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2308	}
2309	if (*m == NULL) {
2310		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
2311		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2312	}
2313
2314	/*
2315	 * Our previous attempt(s) at allocation had failed; the system
2316	 * may be short on mbufs, and we want to block until they are
2317	 * available.  This time, ask just for 1 mbuf and don't return
2318	 * until we get it.
2319	 */
2320	if (*m == NULL) {
2321		needed = 1;
2322		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
2323	}
2324	if (*m == NULL)
2325		panic("%s: blocking allocation returned NULL\n", __func__);
2326
2327	*maxchunks = needed;
2328}
2329
2330/*
2331 * sendfile(2).
2332 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2333 *	 struct sf_hdtr *hdtr, int flags)
2334 *
2335 * Send a file specified by 'fd' and starting at 'offset' to a socket
2336 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2337 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2338 * output. If specified, write the total number of bytes sent into *nbytes.
2339 */
2340int
2341sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
2342{
2343	struct fileproc *fp;
2344	struct vnode *vp;
2345	struct socket *so;
2346	struct writev_nocancel_args nuap;
2347	user_ssize_t writev_retval;
2348	struct user_sf_hdtr user_hdtr;
2349	struct user32_sf_hdtr user32_hdtr;
2350	struct user64_sf_hdtr user64_hdtr;
2351	off_t off, xfsize;
2352	off_t nbytes = 0, sbytes = 0;
2353	int error = 0;
2354	size_t sizeof_hdtr;
2355	off_t file_size;
2356	struct vfs_context context = *vfs_context_current();
2357#define ENXIO_10146739_DBG(err_str) {	\
2358	if (error == ENXIO) {		\
2359		printf(err_str,		\
2360		__func__,		\
2361		"File a radar related to rdar://10146739 \n");	\
2362	}				\
2363}
2364	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2365	    0, 0, 0, 0);
2366
2367	AUDIT_ARG(fd, uap->fd);
2368	AUDIT_ARG(value32, uap->s);
2369
2370	/*
2371	 * Do argument checking. Must be a regular file in, stream
2372	 * type and connected socket out, positive offset.
2373	 */
2374	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
2375		ENXIO_10146739_DBG("%s: fp_getfvp error. %s");
2376		goto done;
2377	}
2378	if ((fp->f_flag & FREAD) == 0) {
2379		error = EBADF;
2380		goto done1;
2381	}
2382	if (vnode_isreg(vp) == 0) {
2383		error = ENOTSUP;
2384		goto done1;
2385	}
2386	error = file_socket(uap->s, &so);
2387	if (error) {
2388		ENXIO_10146739_DBG("%s: file_socket error. %s");
2389		goto done1;
2390	}
2391	if (so == NULL) {
2392		error = EBADF;
2393		goto done2;
2394	}
2395	if (so->so_type != SOCK_STREAM) {
2396		error = EINVAL;
2397		goto done2;
2398	}
2399	if ((so->so_state & SS_ISCONNECTED) == 0) {
2400		error = ENOTCONN;
2401		goto done2;
2402	}
2403	if (uap->offset < 0) {
2404		error = EINVAL;
2405		goto done2;
2406	}
2407	if (uap->nbytes == USER_ADDR_NULL) {
2408		error = EINVAL;
2409		goto done2;
2410	}
2411	if (uap->flags != 0) {
2412		error = EINVAL;
2413		goto done2;
2414	}
2415
2416	context.vc_ucred = fp->f_fglob->fg_cred;
2417
2418#if CONFIG_MACF_SOCKET_SUBSET
2419	/* JMM - fetch connected sockaddr? */
2420	error = mac_socket_check_send(context.vc_ucred, so, NULL);
2421	if (error)
2422		goto done2;
2423#endif
2424
2425	/*
2426	 * Get number of bytes to send
2427	 * Should it applies to size of header and trailer?
2428	 * JMM - error handling?
2429	 */
2430	copyin(uap->nbytes, &nbytes, sizeof (off_t));
2431
2432	/*
2433	 * If specified, get the pointer to the sf_hdtr struct for
2434	 * any headers/trailers.
2435	 */
2436	if (uap->hdtr != USER_ADDR_NULL) {
2437		caddr_t hdtrp;
2438
2439		bzero(&user_hdtr, sizeof (user_hdtr));
2440		if (IS_64BIT_PROCESS(p)) {
2441			hdtrp = (caddr_t)&user64_hdtr;
2442			sizeof_hdtr = sizeof (user64_hdtr);
2443		} else {
2444			hdtrp = (caddr_t)&user32_hdtr;
2445			sizeof_hdtr = sizeof (user32_hdtr);
2446		}
2447		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
2448		if (error)
2449			goto done2;
2450		if (IS_64BIT_PROCESS(p)) {
2451			user_hdtr.headers = user64_hdtr.headers;
2452			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2453			user_hdtr.trailers = user64_hdtr.trailers;
2454			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2455		} else {
2456			user_hdtr.headers = user32_hdtr.headers;
2457			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2458			user_hdtr.trailers = user32_hdtr.trailers;
2459			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2460		}
2461
2462		/*
2463		 * Send any headers. Wimp out and use writev(2).
2464		 */
2465		if (user_hdtr.headers != USER_ADDR_NULL) {
2466			bzero(&nuap, sizeof (struct writev_args));
2467			nuap.fd = uap->s;
2468			nuap.iovp = user_hdtr.headers;
2469			nuap.iovcnt = user_hdtr.hdr_cnt;
2470			error = writev_nocancel(p, &nuap, &writev_retval);
2471			if (error) {
2472				ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2473				goto done2;
2474			}
2475			sbytes += writev_retval;
2476		}
2477	}
2478
2479	/*
2480	 * Get the file size for 2 reasons:
2481	 *  1. We don't want to allocate more mbufs than necessary
2482	 *  2. We don't want to read past the end of file
2483	 */
2484	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2485		ENXIO_10146739_DBG("%s: vnode_size error. %s");
2486		goto done2;
2487	}
2488
2489	/*
2490	 * Simply read file data into a chain of mbufs that used with scatter
2491	 * gather reads. We're not (yet?) setup to use zero copy external
2492	 * mbufs that point to the file pages.
2493	 */
2494	socket_lock(so, 1);
2495	error = sblock(&so->so_snd, SBL_WAIT);
2496	if (error) {
2497		socket_unlock(so, 1);
2498		goto done2;
2499	}
2500	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2501		mbuf_t	m0 = NULL, m;
2502		unsigned int	nbufs = SFUIOBUFS, i;
2503		uio_t	auio;
2504		char	uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2505		size_t	uiolen;
2506		user_ssize_t	rlen;
2507		off_t	pgoff;
2508		size_t	pktlen;
2509		boolean_t jumbocl;
2510
2511		/*
2512		 * Calculate the amount to transfer.
2513		 * Align to round number of pages.
2514		 * Not to exceed send socket buffer,
2515		 * the EOF, or the passed in nbytes.
2516		 */
2517		xfsize = sbspace(&so->so_snd);
2518
2519		if (xfsize <= 0) {
2520			if (so->so_state & SS_CANTSENDMORE) {
2521				error = EPIPE;
2522				goto done3;
2523			} else if ((so->so_state & SS_NBIO)) {
2524				error = EAGAIN;
2525				goto done3;
2526			} else {
2527				xfsize = PAGE_SIZE;
2528			}
2529		}
2530
2531		if (xfsize > SENDFILE_MAX_BYTES)
2532			xfsize = SENDFILE_MAX_BYTES;
2533		else if (xfsize > PAGE_SIZE)
2534			xfsize = trunc_page(xfsize);
2535		pgoff = off & PAGE_MASK_64;
2536		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
2537			xfsize = PAGE_SIZE_64 - pgoff;
2538		if (nbytes && xfsize > (nbytes - sbytes))
2539			xfsize = nbytes - sbytes;
2540		if (xfsize <= 0)
2541			break;
2542		if (off + xfsize > file_size)
2543			xfsize = file_size - off;
2544		if (xfsize <= 0)
2545			break;
2546
2547		/*
2548		 * Attempt to use larger than system page-size clusters for
2549		 * large writes only if there is a jumbo cluster pool and
2550		 * if the socket is marked accordingly.
2551		 */
2552		jumbocl = sosendjcl && njcl > 0 &&
2553		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2554
2555		socket_unlock(so, 0);
2556		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2557		pktlen = mbuf_pkt_maxlen(m0);
2558		if (pktlen < (size_t)xfsize)
2559			xfsize = pktlen;
2560
2561		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2562		    UIO_READ, &uio_buf[0], sizeof (uio_buf));
2563		if (auio == NULL) {
2564			printf("sendfile failed. nbufs = %d. %s", nbufs,
2565				"File a radar related to rdar://10146739.\n");
2566			mbuf_freem(m0);
2567			error = ENXIO;
2568			socket_lock(so, 0);
2569			goto done3;
2570		}
2571
2572		for (i = 0, m = m0, uiolen = 0;
2573		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2574		    i++, m = mbuf_next(m)) {
2575			size_t mlen = mbuf_maxlen(m);
2576
2577			if (mlen + uiolen > (size_t)xfsize)
2578				mlen = xfsize - uiolen;
2579			mbuf_setlen(m, mlen);
2580			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2581			    mlen);
2582			uiolen += mlen;
2583		}
2584
2585		if (xfsize != uio_resid(auio))
2586			printf("sendfile: xfsize: %lld != uio_resid(auio): "
2587				"%lld\n", xfsize, (long long)uio_resid(auio));
2588
2589		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2590		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2591		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2592		error = fo_read(fp, auio, FOF_OFFSET, &context);
2593		socket_lock(so, 0);
2594		if (error != 0) {
2595			if (uio_resid(auio) != xfsize && (error == ERESTART ||
2596			    error == EINTR || error == EWOULDBLOCK)) {
2597				error = 0;
2598			} else {
2599				ENXIO_10146739_DBG("%s: fo_read error. %s");
2600				mbuf_freem(m0);
2601				goto done3;
2602			}
2603		}
2604		xfsize -= uio_resid(auio);
2605		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2606		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2607		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2608
2609		if (xfsize == 0) {
2610			//printf("sendfile: fo_read 0 bytes, EOF\n");
2611			break;
2612		}
2613		if (xfsize + off > file_size)
2614			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2615			    "%lld\n", xfsize, off, file_size);
2616		for (i = 0, m = m0, rlen = 0;
2617		    i < nbufs && m != NULL && rlen < xfsize;
2618		    i++, m = mbuf_next(m)) {
2619			size_t mlen = mbuf_maxlen(m);
2620
2621			if (rlen + mlen > (size_t)xfsize)
2622				mlen = xfsize - rlen;
2623			mbuf_setlen(m, mlen);
2624
2625			rlen += mlen;
2626		}
2627		mbuf_pkthdr_setlen(m0, xfsize);
2628
2629retry_space:
2630		/*
2631		 * Make sure that the socket is still able to take more data.
2632		 * CANTSENDMORE being true usually means that the connection
2633		 * was closed. so_error is true when an error was sensed after
2634		 * a previous send.
2635		 * The state is checked after the page mapping and buffer
2636		 * allocation above since those operations may block and make
2637		 * any socket checks stale. From this point forward, nothing
2638		 * blocks before the pru_send (or more accurately, any blocking
2639		 * results in a loop back to here to re-check).
2640		 */
2641		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2642			if (so->so_state & SS_CANTSENDMORE) {
2643				error = EPIPE;
2644			} else {
2645				error = so->so_error;
2646				so->so_error = 0;
2647			}
2648			m_freem(m0);
2649			ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
2650			goto done3;
2651		}
2652		/*
2653		 * Wait for socket space to become available. We do this just
2654		 * after checking the connection state above in order to avoid
2655		 * a race condition with sbwait().
2656		 */
2657		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
2658			if (so->so_state & SS_NBIO) {
2659				m_freem(m0);
2660				error = EAGAIN;
2661				goto done3;
2662			}
2663			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2664			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
2665			error = sbwait(&so->so_snd);
2666			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2667			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
2668			/*
2669			 * An error from sbwait usually indicates that we've
2670			 * been interrupted by a signal. If we've sent anything
2671			 * then return bytes sent, otherwise return the error.
2672			 */
2673			if (error) {
2674				m_freem(m0);
2675				goto done3;
2676			}
2677			goto retry_space;
2678		}
2679
2680		struct mbuf *control = NULL;
2681		{
2682			/*
2683			 * Socket filter processing
2684			 */
2685
2686			error = sflt_data_out(so, NULL, &m0, &control, 0);
2687			if (error) {
2688				if (error == EJUSTRETURN) {
2689					error = 0;
2690					continue;
2691				}
2692				ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
2693				goto done3;
2694			}
2695			/*
2696			 * End Socket filter processing
2697			 */
2698		}
2699		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2700		    uap->s, 0, 0, 0, 0);
2701		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2702		    0, control, p);
2703		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2704		    uap->s, 0, 0, 0, 0);
2705		if (error) {
2706			ENXIO_10146739_DBG("%s: pru_send error. %s");
2707			goto done3;
2708		}
2709	}
2710	sbunlock(&so->so_snd, FALSE);	/* will unlock socket */
2711	/*
2712	 * Send trailers. Wimp out and use writev(2).
2713	 */
2714	if (uap->hdtr != USER_ADDR_NULL &&
2715	    user_hdtr.trailers != USER_ADDR_NULL) {
2716		bzero(&nuap, sizeof (struct writev_args));
2717		nuap.fd = uap->s;
2718		nuap.iovp = user_hdtr.trailers;
2719		nuap.iovcnt = user_hdtr.trl_cnt;
2720		error = writev_nocancel(p, &nuap, &writev_retval);
2721		if (error) {
2722			ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2723			goto done2;
2724		}
2725		sbytes += writev_retval;
2726	}
2727done2:
2728	file_drop(uap->s);
2729done1:
2730	file_drop(uap->fd);
2731done:
2732	if (uap->nbytes != USER_ADDR_NULL) {
2733		/* XXX this appears bogus for some early failure conditions */
2734		copyout(&sbytes, uap->nbytes, sizeof (off_t));
2735	}
2736	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2737	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2738	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
2739	return (error);
2740done3:
2741	sbunlock(&so->so_snd, FALSE);	/* will unlock socket */
2742	goto done2;
2743}
2744
2745
2746#endif /* SENDFILE */
2747