kern_sendfile.c revision 253823
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 253823 2013-07-30 23:26:05Z scottl $");
37
38#include "opt_capsicum.h"
39#include "opt_inet.h"
40#include "opt_inet6.h"
41#include "opt_sctp.h"
42#include "opt_compat.h"
43#include "opt_ktrace.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/capability.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/malloc.h>
53#include <sys/filedesc.h>
54#include <sys/event.h>
55#include <sys/proc.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/filio.h>
59#include <sys/jail.h>
60#include <sys/mount.h>
61#include <sys/mbuf.h>
62#include <sys/protosw.h>
63#include <sys/rwlock.h>
64#include <sys/sf_buf.h>
65#include <sys/sysent.h>
66#include <sys/socket.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69#include <sys/syscallsubr.h>
70#include <sys/sysctl.h>
71#include <sys/uio.h>
72#include <sys/vnode.h>
73#ifdef KTRACE
74#include <sys/ktrace.h>
75#endif
76#ifdef COMPAT_FREEBSD32
77#include <compat/freebsd32/freebsd32_util.h>
78#endif
79
80#include <net/vnet.h>
81
82#include <security/audit/audit.h>
83#include <security/mac/mac_framework.h>
84
85#include <vm/vm.h>
86#include <vm/vm_param.h>
87#include <vm/vm_object.h>
88#include <vm/vm_page.h>
89#include <vm/vm_pageout.h>
90#include <vm/vm_kern.h>
91#include <vm/vm_extern.h>
92
93#if defined(INET) || defined(INET6)
94#ifdef SCTP
95#include <netinet/sctp.h>
96#include <netinet/sctp_peeloff.h>
97#endif /* SCTP */
98#endif /* INET || INET6 */
99
100/*
101 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
102 * and SOCK_NONBLOCK.
103 */
104#define	ACCEPT4_INHERIT	0x1
105#define	ACCEPT4_COMPAT	0x2
106
107static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
108static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
109
110static int accept1(struct thread *td, int s, struct sockaddr *uname,
111		   socklen_t *anamelen, int flags);
112static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
113static int getsockname1(struct thread *td, struct getsockname_args *uap,
114			int compat);
115static int getpeername1(struct thread *td, struct getpeername_args *uap,
116			int compat);
117
118counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
119/*
120 * NSFBUFS-related variables and associated sysctls
121 */
122int nsfbufs;
123int nsfbufspeak;
124int nsfbufsused;
125static int sfreadahead = MAXPHYS / MAXBSIZE;
126
127SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
128    "Maximum number of sendfile(2) sf_bufs available");
129SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
130    "Number of sendfile(2) sf_bufs at peak usage");
131SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
132    "Number of sendfile(2) sf_bufs in use");
133SYSCTL_INT(_kern_ipc, OID_AUTO, sfreadahead, CTLFLAG_RW, &sfreadahead, 0,
134    "Number of sendfile(2) read-ahead MAXBSIZE blocks");
135
136
137static void
138sfstat_init(const void *unused)
139{
140
141	COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
142	    M_WAITOK);
143}
144SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
145
146static int
147sfstat_sysctl(SYSCTL_HANDLER_ARGS)
148{
149	struct sfstat s;
150
151	COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
152	if (req->newptr)
153		COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
154	return (SYSCTL_OUT(req, &s, sizeof(s)));
155}
156SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
157    NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
158/*
159 * Convert a user file descriptor to a kernel file entry and check if required
160 * capability rights are present.
161 * A reference on the file entry is held upon returning.
162 */
163static int
164getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
165    struct file **fpp, u_int *fflagp)
166{
167	struct file *fp;
168	int error;
169
170	error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL);
171	if (error != 0)
172		return (error);
173	if (fp->f_type != DTYPE_SOCKET) {
174		fdrop(fp, curthread);
175		return (ENOTSOCK);
176	}
177	if (fflagp != NULL)
178		*fflagp = fp->f_flag;
179	*fpp = fp;
180	return (0);
181}
182
183/*
184 * System call interface to the socket abstraction.
185 */
186#if defined(COMPAT_43)
187#define COMPAT_OLDSOCK
188#endif
189
190int
191sys_socket(td, uap)
192	struct thread *td;
193	struct socket_args /* {
194		int	domain;
195		int	type;
196		int	protocol;
197	} */ *uap;
198{
199	struct socket *so;
200	struct file *fp;
201	int fd, error, type, oflag, fflag;
202
203	AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
204
205	type = uap->type;
206	oflag = 0;
207	fflag = 0;
208	if ((type & SOCK_CLOEXEC) != 0) {
209		type &= ~SOCK_CLOEXEC;
210		oflag |= O_CLOEXEC;
211	}
212	if ((type & SOCK_NONBLOCK) != 0) {
213		type &= ~SOCK_NONBLOCK;
214		fflag |= FNONBLOCK;
215	}
216
217#ifdef MAC
218	error = mac_socket_check_create(td->td_ucred, uap->domain, type,
219	    uap->protocol);
220	if (error)
221		return (error);
222#endif
223	error = falloc(td, &fp, &fd, oflag);
224	if (error)
225		return (error);
226	/* An extra reference on `fp' has been held for us by falloc(). */
227	error = socreate(uap->domain, &so, type, uap->protocol,
228	    td->td_ucred, td);
229	if (error) {
230		fdclose(td->td_proc->p_fd, fp, fd, td);
231	} else {
232		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
233		if ((fflag & FNONBLOCK) != 0)
234			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
235		td->td_retval[0] = fd;
236	}
237	fdrop(fp, td);
238	return (error);
239}
240
241/* ARGSUSED */
242int
243sys_bind(td, uap)
244	struct thread *td;
245	struct bind_args /* {
246		int	s;
247		caddr_t	name;
248		int	namelen;
249	} */ *uap;
250{
251	struct sockaddr *sa;
252	int error;
253
254	error = getsockaddr(&sa, uap->name, uap->namelen);
255	if (error == 0) {
256		error = kern_bind(td, uap->s, sa);
257		free(sa, M_SONAME);
258	}
259	return (error);
260}
261
262static int
263kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
264{
265	struct socket *so;
266	struct file *fp;
267	int error;
268
269	AUDIT_ARG_FD(fd);
270	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
271	error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
272	if (error)
273		return (error);
274	so = fp->f_data;
275#ifdef KTRACE
276	if (KTRPOINT(td, KTR_STRUCT))
277		ktrsockaddr(sa);
278#endif
279#ifdef MAC
280	error = mac_socket_check_bind(td->td_ucred, so, sa);
281	if (error == 0) {
282#endif
283		if (dirfd == AT_FDCWD)
284			error = sobind(so, sa, td);
285		else
286			error = sobindat(dirfd, so, sa, td);
287#ifdef MAC
288	}
289#endif
290	fdrop(fp, td);
291	return (error);
292}
293
294int
295kern_bind(struct thread *td, int fd, struct sockaddr *sa)
296{
297
298	return (kern_bindat(td, AT_FDCWD, fd, sa));
299}
300
301/* ARGSUSED */
302int
303sys_bindat(td, uap)
304	struct thread *td;
305	struct bindat_args /* {
306		int	fd;
307		int	s;
308		caddr_t	name;
309		int	namelen;
310	} */ *uap;
311{
312	struct sockaddr *sa;
313	int error;
314
315	error = getsockaddr(&sa, uap->name, uap->namelen);
316	if (error == 0) {
317		error = kern_bindat(td, uap->fd, uap->s, sa);
318		free(sa, M_SONAME);
319	}
320	return (error);
321}
322
323/* ARGSUSED */
324int
325sys_listen(td, uap)
326	struct thread *td;
327	struct listen_args /* {
328		int	s;
329		int	backlog;
330	} */ *uap;
331{
332	struct socket *so;
333	struct file *fp;
334	int error;
335
336	AUDIT_ARG_FD(uap->s);
337	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
338	if (error == 0) {
339		so = fp->f_data;
340#ifdef MAC
341		error = mac_socket_check_listen(td->td_ucred, so);
342		if (error == 0)
343#endif
344			error = solisten(so, uap->backlog, td);
345		fdrop(fp, td);
346	}
347	return(error);
348}
349
350/*
351 * accept1()
352 */
353static int
354accept1(td, s, uname, anamelen, flags)
355	struct thread *td;
356	int s;
357	struct sockaddr *uname;
358	socklen_t *anamelen;
359	int flags;
360{
361	struct sockaddr *name;
362	socklen_t namelen;
363	struct file *fp;
364	int error;
365
366	if (uname == NULL)
367		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
368
369	error = copyin(anamelen, &namelen, sizeof (namelen));
370	if (error)
371		return (error);
372
373	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
374
375	/*
376	 * return a namelen of zero for older code which might
377	 * ignore the return value from accept.
378	 */
379	if (error) {
380		(void) copyout(&namelen, anamelen, sizeof(*anamelen));
381		return (error);
382	}
383
384	if (error == 0 && uname != NULL) {
385#ifdef COMPAT_OLDSOCK
386		if (flags & ACCEPT4_COMPAT)
387			((struct osockaddr *)name)->sa_family =
388			    name->sa_family;
389#endif
390		error = copyout(name, uname, namelen);
391	}
392	if (error == 0)
393		error = copyout(&namelen, anamelen,
394		    sizeof(namelen));
395	if (error)
396		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
397	fdrop(fp, td);
398	free(name, M_SONAME);
399	return (error);
400}
401
402int
403kern_accept(struct thread *td, int s, struct sockaddr **name,
404    socklen_t *namelen, struct file **fp)
405{
406	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
407}
408
409int
410kern_accept4(struct thread *td, int s, struct sockaddr **name,
411    socklen_t *namelen, int flags, struct file **fp)
412{
413	struct filedesc *fdp;
414	struct file *headfp, *nfp = NULL;
415	struct sockaddr *sa = NULL;
416	int error;
417	struct socket *head, *so;
418	int fd;
419	u_int fflag;
420	pid_t pgid;
421	int tmp;
422
423	if (name)
424		*name = NULL;
425
426	AUDIT_ARG_FD(s);
427	fdp = td->td_proc->p_fd;
428	error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
429	if (error)
430		return (error);
431	head = headfp->f_data;
432	if ((head->so_options & SO_ACCEPTCONN) == 0) {
433		error = EINVAL;
434		goto done;
435	}
436#ifdef MAC
437	error = mac_socket_check_accept(td->td_ucred, head);
438	if (error != 0)
439		goto done;
440#endif
441	error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0);
442	if (error)
443		goto done;
444	ACCEPT_LOCK();
445	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
446		ACCEPT_UNLOCK();
447		error = EWOULDBLOCK;
448		goto noconnection;
449	}
450	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
451		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
452			head->so_error = ECONNABORTED;
453			break;
454		}
455		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
456		    "accept", 0);
457		if (error) {
458			ACCEPT_UNLOCK();
459			goto noconnection;
460		}
461	}
462	if (head->so_error) {
463		error = head->so_error;
464		head->so_error = 0;
465		ACCEPT_UNLOCK();
466		goto noconnection;
467	}
468	so = TAILQ_FIRST(&head->so_comp);
469	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
470	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
471
472	/*
473	 * Before changing the flags on the socket, we have to bump the
474	 * reference count.  Otherwise, if the protocol calls sofree(),
475	 * the socket will be released due to a zero refcount.
476	 */
477	SOCK_LOCK(so);			/* soref() and so_state update */
478	soref(so);			/* file descriptor reference */
479
480	TAILQ_REMOVE(&head->so_comp, so, so_list);
481	head->so_qlen--;
482	if (flags & ACCEPT4_INHERIT)
483		so->so_state |= (head->so_state & SS_NBIO);
484	else
485		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
486	so->so_qstate &= ~SQ_COMP;
487	so->so_head = NULL;
488
489	SOCK_UNLOCK(so);
490	ACCEPT_UNLOCK();
491
492	/* An extra reference on `nfp' has been held for us by falloc(). */
493	td->td_retval[0] = fd;
494
495	/* connection has been removed from the listen queue */
496	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
497
498	if (flags & ACCEPT4_INHERIT) {
499		pgid = fgetown(&head->so_sigio);
500		if (pgid != 0)
501			fsetown(pgid, &so->so_sigio);
502	} else {
503		fflag &= ~(FNONBLOCK | FASYNC);
504		if (flags & SOCK_NONBLOCK)
505			fflag |= FNONBLOCK;
506	}
507
508	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
509	/* Sync socket nonblocking/async state with file flags */
510	tmp = fflag & FNONBLOCK;
511	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
512	tmp = fflag & FASYNC;
513	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
514	sa = 0;
515	error = soaccept(so, &sa);
516	if (error) {
517		/*
518		 * return a namelen of zero for older code which might
519		 * ignore the return value from accept.
520		 */
521		if (name)
522			*namelen = 0;
523		goto noconnection;
524	}
525	if (sa == NULL) {
526		if (name)
527			*namelen = 0;
528		goto done;
529	}
530	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
531	if (name) {
532		/* check sa_len before it is destroyed */
533		if (*namelen > sa->sa_len)
534			*namelen = sa->sa_len;
535#ifdef KTRACE
536		if (KTRPOINT(td, KTR_STRUCT))
537			ktrsockaddr(sa);
538#endif
539		*name = sa;
540		sa = NULL;
541	}
542noconnection:
543	if (sa)
544		free(sa, M_SONAME);
545
546	/*
547	 * close the new descriptor, assuming someone hasn't ripped it
548	 * out from under us.
549	 */
550	if (error)
551		fdclose(fdp, nfp, fd, td);
552
553	/*
554	 * Release explicitly held references before returning.  We return
555	 * a reference on nfp to the caller on success if they request it.
556	 */
557done:
558	if (fp != NULL) {
559		if (error == 0) {
560			*fp = nfp;
561			nfp = NULL;
562		} else
563			*fp = NULL;
564	}
565	if (nfp != NULL)
566		fdrop(nfp, td);
567	fdrop(headfp, td);
568	return (error);
569}
570
571int
572sys_accept(td, uap)
573	struct thread *td;
574	struct accept_args *uap;
575{
576
577	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
578}
579
580int
581sys_accept4(td, uap)
582	struct thread *td;
583	struct accept4_args *uap;
584{
585	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
586		return (EINVAL);
587
588	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
589}
590
591#ifdef COMPAT_OLDSOCK
592int
593oaccept(td, uap)
594	struct thread *td;
595	struct accept_args *uap;
596{
597
598	return (accept1(td, uap->s, uap->name, uap->anamelen,
599	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
600}
601#endif /* COMPAT_OLDSOCK */
602
603/* ARGSUSED */
604int
605sys_connect(td, uap)
606	struct thread *td;
607	struct connect_args /* {
608		int	s;
609		caddr_t	name;
610		int	namelen;
611	} */ *uap;
612{
613	struct sockaddr *sa;
614	int error;
615
616	error = getsockaddr(&sa, uap->name, uap->namelen);
617	if (error == 0) {
618		error = kern_connect(td, uap->s, sa);
619		free(sa, M_SONAME);
620	}
621	return (error);
622}
623
624static int
625kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
626{
627	struct socket *so;
628	struct file *fp;
629	int error;
630	int interrupted = 0;
631
632	AUDIT_ARG_FD(fd);
633	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
634	error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
635	if (error)
636		return (error);
637	so = fp->f_data;
638	if (so->so_state & SS_ISCONNECTING) {
639		error = EALREADY;
640		goto done1;
641	}
642#ifdef KTRACE
643	if (KTRPOINT(td, KTR_STRUCT))
644		ktrsockaddr(sa);
645#endif
646#ifdef MAC
647	error = mac_socket_check_connect(td->td_ucred, so, sa);
648	if (error)
649		goto bad;
650#endif
651	if (dirfd == AT_FDCWD)
652		error = soconnect(so, sa, td);
653	else
654		error = soconnectat(dirfd, so, sa, td);
655	if (error)
656		goto bad;
657	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
658		error = EINPROGRESS;
659		goto done1;
660	}
661	SOCK_LOCK(so);
662	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
663		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
664		    "connec", 0);
665		if (error) {
666			if (error == EINTR || error == ERESTART)
667				interrupted = 1;
668			break;
669		}
670	}
671	if (error == 0) {
672		error = so->so_error;
673		so->so_error = 0;
674	}
675	SOCK_UNLOCK(so);
676bad:
677	if (!interrupted)
678		so->so_state &= ~SS_ISCONNECTING;
679	if (error == ERESTART)
680		error = EINTR;
681done1:
682	fdrop(fp, td);
683	return (error);
684}
685
686int
687kern_connect(struct thread *td, int fd, struct sockaddr *sa)
688{
689
690	return (kern_connectat(td, AT_FDCWD, fd, sa));
691}
692
693/* ARGSUSED */
694int
695sys_connectat(td, uap)
696	struct thread *td;
697	struct connectat_args /* {
698		int	fd;
699		int	s;
700		caddr_t	name;
701		int	namelen;
702	} */ *uap;
703{
704	struct sockaddr *sa;
705	int error;
706
707	error = getsockaddr(&sa, uap->name, uap->namelen);
708	if (error == 0) {
709		error = kern_connectat(td, uap->fd, uap->s, sa);
710		free(sa, M_SONAME);
711	}
712	return (error);
713}
714
715int
716kern_socketpair(struct thread *td, int domain, int type, int protocol,
717    int *rsv)
718{
719	struct filedesc *fdp = td->td_proc->p_fd;
720	struct file *fp1, *fp2;
721	struct socket *so1, *so2;
722	int fd, error, oflag, fflag;
723
724	AUDIT_ARG_SOCKET(domain, type, protocol);
725
726	oflag = 0;
727	fflag = 0;
728	if ((type & SOCK_CLOEXEC) != 0) {
729		type &= ~SOCK_CLOEXEC;
730		oflag |= O_CLOEXEC;
731	}
732	if ((type & SOCK_NONBLOCK) != 0) {
733		type &= ~SOCK_NONBLOCK;
734		fflag |= FNONBLOCK;
735	}
736#ifdef MAC
737	/* We might want to have a separate check for socket pairs. */
738	error = mac_socket_check_create(td->td_ucred, domain, type,
739	    protocol);
740	if (error)
741		return (error);
742#endif
743	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
744	if (error)
745		return (error);
746	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
747	if (error)
748		goto free1;
749	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
750	error = falloc(td, &fp1, &fd, oflag);
751	if (error)
752		goto free2;
753	rsv[0] = fd;
754	fp1->f_data = so1;	/* so1 already has ref count */
755	error = falloc(td, &fp2, &fd, oflag);
756	if (error)
757		goto free3;
758	fp2->f_data = so2;	/* so2 already has ref count */
759	rsv[1] = fd;
760	error = soconnect2(so1, so2);
761	if (error)
762		goto free4;
763	if (type == SOCK_DGRAM) {
764		/*
765		 * Datagram socket connection is asymmetric.
766		 */
767		 error = soconnect2(so2, so1);
768		 if (error)
769			goto free4;
770	}
771	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
772	    &socketops);
773	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
774	    &socketops);
775	if ((fflag & FNONBLOCK) != 0) {
776		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
777		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
778	}
779	fdrop(fp1, td);
780	fdrop(fp2, td);
781	return (0);
782free4:
783	fdclose(fdp, fp2, rsv[1], td);
784	fdrop(fp2, td);
785free3:
786	fdclose(fdp, fp1, rsv[0], td);
787	fdrop(fp1, td);
788free2:
789	if (so2 != NULL)
790		(void)soclose(so2);
791free1:
792	if (so1 != NULL)
793		(void)soclose(so1);
794	return (error);
795}
796
797int
798sys_socketpair(struct thread *td, struct socketpair_args *uap)
799{
800	int error, sv[2];
801
802	error = kern_socketpair(td, uap->domain, uap->type,
803	    uap->protocol, sv);
804	if (error)
805		return (error);
806	error = copyout(sv, uap->rsv, 2 * sizeof(int));
807	if (error) {
808		(void)kern_close(td, sv[0]);
809		(void)kern_close(td, sv[1]);
810	}
811	return (error);
812}
813
814static int
815sendit(td, s, mp, flags)
816	struct thread *td;
817	int s;
818	struct msghdr *mp;
819	int flags;
820{
821	struct mbuf *control;
822	struct sockaddr *to;
823	int error;
824
825#ifdef CAPABILITY_MODE
826	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
827		return (ECAPMODE);
828#endif
829
830	if (mp->msg_name != NULL) {
831		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
832		if (error) {
833			to = NULL;
834			goto bad;
835		}
836		mp->msg_name = to;
837	} else {
838		to = NULL;
839	}
840
841	if (mp->msg_control) {
842		if (mp->msg_controllen < sizeof(struct cmsghdr)
843#ifdef COMPAT_OLDSOCK
844		    && mp->msg_flags != MSG_COMPAT
845#endif
846		) {
847			error = EINVAL;
848			goto bad;
849		}
850		error = sockargs(&control, mp->msg_control,
851		    mp->msg_controllen, MT_CONTROL);
852		if (error)
853			goto bad;
854#ifdef COMPAT_OLDSOCK
855		if (mp->msg_flags == MSG_COMPAT) {
856			struct cmsghdr *cm;
857
858			M_PREPEND(control, sizeof(*cm), M_WAITOK);
859			cm = mtod(control, struct cmsghdr *);
860			cm->cmsg_len = control->m_len;
861			cm->cmsg_level = SOL_SOCKET;
862			cm->cmsg_type = SCM_RIGHTS;
863		}
864#endif
865	} else {
866		control = NULL;
867	}
868
869	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
870
871bad:
872	if (to)
873		free(to, M_SONAME);
874	return (error);
875}
876
877int
878kern_sendit(td, s, mp, flags, control, segflg)
879	struct thread *td;
880	int s;
881	struct msghdr *mp;
882	int flags;
883	struct mbuf *control;
884	enum uio_seg segflg;
885{
886	struct file *fp;
887	struct uio auio;
888	struct iovec *iov;
889	struct socket *so;
890	int i, error;
891	ssize_t len;
892	cap_rights_t rights;
893#ifdef KTRACE
894	struct uio *ktruio = NULL;
895#endif
896
897	AUDIT_ARG_FD(s);
898	rights = CAP_SEND;
899	if (mp->msg_name != NULL) {
900		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
901		rights |= CAP_CONNECT;
902	}
903	error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
904	if (error)
905		return (error);
906	so = (struct socket *)fp->f_data;
907
908#ifdef KTRACE
909	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
910		ktrsockaddr(mp->msg_name);
911#endif
912#ifdef MAC
913	if (mp->msg_name != NULL) {
914		error = mac_socket_check_connect(td->td_ucred, so,
915		    mp->msg_name);
916		if (error)
917			goto bad;
918	}
919	error = mac_socket_check_send(td->td_ucred, so);
920	if (error)
921		goto bad;
922#endif
923
924	auio.uio_iov = mp->msg_iov;
925	auio.uio_iovcnt = mp->msg_iovlen;
926	auio.uio_segflg = segflg;
927	auio.uio_rw = UIO_WRITE;
928	auio.uio_td = td;
929	auio.uio_offset = 0;			/* XXX */
930	auio.uio_resid = 0;
931	iov = mp->msg_iov;
932	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
933		if ((auio.uio_resid += iov->iov_len) < 0) {
934			error = EINVAL;
935			goto bad;
936		}
937	}
938#ifdef KTRACE
939	if (KTRPOINT(td, KTR_GENIO))
940		ktruio = cloneuio(&auio);
941#endif
942	len = auio.uio_resid;
943	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
944	if (error) {
945		if (auio.uio_resid != len && (error == ERESTART ||
946		    error == EINTR || error == EWOULDBLOCK))
947			error = 0;
948		/* Generation of SIGPIPE can be controlled per socket */
949		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
950		    !(flags & MSG_NOSIGNAL)) {
951			PROC_LOCK(td->td_proc);
952			tdsignal(td, SIGPIPE);
953			PROC_UNLOCK(td->td_proc);
954		}
955	}
956	if (error == 0)
957		td->td_retval[0] = len - auio.uio_resid;
958#ifdef KTRACE
959	if (ktruio != NULL) {
960		ktruio->uio_resid = td->td_retval[0];
961		ktrgenio(s, UIO_WRITE, ktruio, error);
962	}
963#endif
964bad:
965	fdrop(fp, td);
966	return (error);
967}
968
969int
970sys_sendto(td, uap)
971	struct thread *td;
972	struct sendto_args /* {
973		int	s;
974		caddr_t	buf;
975		size_t	len;
976		int	flags;
977		caddr_t	to;
978		int	tolen;
979	} */ *uap;
980{
981	struct msghdr msg;
982	struct iovec aiov;
983	int error;
984
985	msg.msg_name = uap->to;
986	msg.msg_namelen = uap->tolen;
987	msg.msg_iov = &aiov;
988	msg.msg_iovlen = 1;
989	msg.msg_control = 0;
990#ifdef COMPAT_OLDSOCK
991	msg.msg_flags = 0;
992#endif
993	aiov.iov_base = uap->buf;
994	aiov.iov_len = uap->len;
995	error = sendit(td, uap->s, &msg, uap->flags);
996	return (error);
997}
998
999#ifdef COMPAT_OLDSOCK
1000int
1001osend(td, uap)
1002	struct thread *td;
1003	struct osend_args /* {
1004		int	s;
1005		caddr_t	buf;
1006		int	len;
1007		int	flags;
1008	} */ *uap;
1009{
1010	struct msghdr msg;
1011	struct iovec aiov;
1012	int error;
1013
1014	msg.msg_name = 0;
1015	msg.msg_namelen = 0;
1016	msg.msg_iov = &aiov;
1017	msg.msg_iovlen = 1;
1018	aiov.iov_base = uap->buf;
1019	aiov.iov_len = uap->len;
1020	msg.msg_control = 0;
1021	msg.msg_flags = 0;
1022	error = sendit(td, uap->s, &msg, uap->flags);
1023	return (error);
1024}
1025
1026int
1027osendmsg(td, uap)
1028	struct thread *td;
1029	struct osendmsg_args /* {
1030		int	s;
1031		caddr_t	msg;
1032		int	flags;
1033	} */ *uap;
1034{
1035	struct msghdr msg;
1036	struct iovec *iov;
1037	int error;
1038
1039	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1040	if (error)
1041		return (error);
1042	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1043	if (error)
1044		return (error);
1045	msg.msg_iov = iov;
1046	msg.msg_flags = MSG_COMPAT;
1047	error = sendit(td, uap->s, &msg, uap->flags);
1048	free(iov, M_IOV);
1049	return (error);
1050}
1051#endif
1052
1053int
1054sys_sendmsg(td, uap)
1055	struct thread *td;
1056	struct sendmsg_args /* {
1057		int	s;
1058		caddr_t	msg;
1059		int	flags;
1060	} */ *uap;
1061{
1062	struct msghdr msg;
1063	struct iovec *iov;
1064	int error;
1065
1066	error = copyin(uap->msg, &msg, sizeof (msg));
1067	if (error)
1068		return (error);
1069	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1070	if (error)
1071		return (error);
1072	msg.msg_iov = iov;
1073#ifdef COMPAT_OLDSOCK
1074	msg.msg_flags = 0;
1075#endif
1076	error = sendit(td, uap->s, &msg, uap->flags);
1077	free(iov, M_IOV);
1078	return (error);
1079}
1080
1081int
1082kern_recvit(td, s, mp, fromseg, controlp)
1083	struct thread *td;
1084	int s;
1085	struct msghdr *mp;
1086	enum uio_seg fromseg;
1087	struct mbuf **controlp;
1088{
1089	struct uio auio;
1090	struct iovec *iov;
1091	int i;
1092	ssize_t len;
1093	int error;
1094	struct mbuf *m, *control = NULL;
1095	caddr_t ctlbuf;
1096	struct file *fp;
1097	struct socket *so;
1098	struct sockaddr *fromsa = NULL;
1099#ifdef KTRACE
1100	struct uio *ktruio = NULL;
1101#endif
1102
1103	if (controlp != NULL)
1104		*controlp = NULL;
1105
1106	AUDIT_ARG_FD(s);
1107	error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL);
1108	if (error)
1109		return (error);
1110	so = fp->f_data;
1111
1112#ifdef MAC
1113	error = mac_socket_check_receive(td->td_ucred, so);
1114	if (error) {
1115		fdrop(fp, td);
1116		return (error);
1117	}
1118#endif
1119
1120	auio.uio_iov = mp->msg_iov;
1121	auio.uio_iovcnt = mp->msg_iovlen;
1122	auio.uio_segflg = UIO_USERSPACE;
1123	auio.uio_rw = UIO_READ;
1124	auio.uio_td = td;
1125	auio.uio_offset = 0;			/* XXX */
1126	auio.uio_resid = 0;
1127	iov = mp->msg_iov;
1128	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
1129		if ((auio.uio_resid += iov->iov_len) < 0) {
1130			fdrop(fp, td);
1131			return (EINVAL);
1132		}
1133	}
1134#ifdef KTRACE
1135	if (KTRPOINT(td, KTR_GENIO))
1136		ktruio = cloneuio(&auio);
1137#endif
1138	len = auio.uio_resid;
1139	error = soreceive(so, &fromsa, &auio, NULL,
1140	    (mp->msg_control || controlp) ? &control : NULL,
1141	    &mp->msg_flags);
1142	if (error) {
1143		if (auio.uio_resid != len && (error == ERESTART ||
1144		    error == EINTR || error == EWOULDBLOCK))
1145			error = 0;
1146	}
1147	if (fromsa != NULL)
1148		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
1149#ifdef KTRACE
1150	if (ktruio != NULL) {
1151		ktruio->uio_resid = len - auio.uio_resid;
1152		ktrgenio(s, UIO_READ, ktruio, error);
1153	}
1154#endif
1155	if (error)
1156		goto out;
1157	td->td_retval[0] = len - auio.uio_resid;
1158	if (mp->msg_name) {
1159		len = mp->msg_namelen;
1160		if (len <= 0 || fromsa == NULL)
1161			len = 0;
1162		else {
1163			/* save sa_len before it is destroyed by MSG_COMPAT */
1164			len = MIN(len, fromsa->sa_len);
1165#ifdef COMPAT_OLDSOCK
1166			if (mp->msg_flags & MSG_COMPAT)
1167				((struct osockaddr *)fromsa)->sa_family =
1168				    fromsa->sa_family;
1169#endif
1170			if (fromseg == UIO_USERSPACE) {
1171				error = copyout(fromsa, mp->msg_name,
1172				    (unsigned)len);
1173				if (error)
1174					goto out;
1175			} else
1176				bcopy(fromsa, mp->msg_name, len);
1177		}
1178		mp->msg_namelen = len;
1179	}
1180	if (mp->msg_control && controlp == NULL) {
1181#ifdef COMPAT_OLDSOCK
1182		/*
1183		 * We assume that old recvmsg calls won't receive access
1184		 * rights and other control info, esp. as control info
1185		 * is always optional and those options didn't exist in 4.3.
1186		 * If we receive rights, trim the cmsghdr; anything else
1187		 * is tossed.
1188		 */
1189		if (control && mp->msg_flags & MSG_COMPAT) {
1190			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1191			    SOL_SOCKET ||
1192			    mtod(control, struct cmsghdr *)->cmsg_type !=
1193			    SCM_RIGHTS) {
1194				mp->msg_controllen = 0;
1195				goto out;
1196			}
1197			control->m_len -= sizeof (struct cmsghdr);
1198			control->m_data += sizeof (struct cmsghdr);
1199		}
1200#endif
1201		len = mp->msg_controllen;
1202		m = control;
1203		mp->msg_controllen = 0;
1204		ctlbuf = mp->msg_control;
1205
1206		while (m && len > 0) {
1207			unsigned int tocopy;
1208
1209			if (len >= m->m_len)
1210				tocopy = m->m_len;
1211			else {
1212				mp->msg_flags |= MSG_CTRUNC;
1213				tocopy = len;
1214			}
1215
1216			if ((error = copyout(mtod(m, caddr_t),
1217					ctlbuf, tocopy)) != 0)
1218				goto out;
1219
1220			ctlbuf += tocopy;
1221			len -= tocopy;
1222			m = m->m_next;
1223		}
1224		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1225	}
1226out:
1227	fdrop(fp, td);
1228#ifdef KTRACE
1229	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1230		ktrsockaddr(fromsa);
1231#endif
1232	if (fromsa)
1233		free(fromsa, M_SONAME);
1234
1235	if (error == 0 && controlp != NULL)
1236		*controlp = control;
1237	else  if (control)
1238		m_freem(control);
1239
1240	return (error);
1241}
1242
1243static int
1244recvit(td, s, mp, namelenp)
1245	struct thread *td;
1246	int s;
1247	struct msghdr *mp;
1248	void *namelenp;
1249{
1250	int error;
1251
1252	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1253	if (error)
1254		return (error);
1255	if (namelenp) {
1256		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1257#ifdef COMPAT_OLDSOCK
1258		if (mp->msg_flags & MSG_COMPAT)
1259			error = 0;	/* old recvfrom didn't check */
1260#endif
1261	}
1262	return (error);
1263}
1264
1265int
1266sys_recvfrom(td, uap)
1267	struct thread *td;
1268	struct recvfrom_args /* {
1269		int	s;
1270		caddr_t	buf;
1271		size_t	len;
1272		int	flags;
1273		struct sockaddr * __restrict	from;
1274		socklen_t * __restrict fromlenaddr;
1275	} */ *uap;
1276{
1277	struct msghdr msg;
1278	struct iovec aiov;
1279	int error;
1280
1281	if (uap->fromlenaddr) {
1282		error = copyin(uap->fromlenaddr,
1283		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1284		if (error)
1285			goto done2;
1286	} else {
1287		msg.msg_namelen = 0;
1288	}
1289	msg.msg_name = uap->from;
1290	msg.msg_iov = &aiov;
1291	msg.msg_iovlen = 1;
1292	aiov.iov_base = uap->buf;
1293	aiov.iov_len = uap->len;
1294	msg.msg_control = 0;
1295	msg.msg_flags = uap->flags;
1296	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1297done2:
1298	return(error);
1299}
1300
1301#ifdef COMPAT_OLDSOCK
1302int
1303orecvfrom(td, uap)
1304	struct thread *td;
1305	struct recvfrom_args *uap;
1306{
1307
1308	uap->flags |= MSG_COMPAT;
1309	return (sys_recvfrom(td, uap));
1310}
1311#endif
1312
1313#ifdef COMPAT_OLDSOCK
1314int
1315orecv(td, uap)
1316	struct thread *td;
1317	struct orecv_args /* {
1318		int	s;
1319		caddr_t	buf;
1320		int	len;
1321		int	flags;
1322	} */ *uap;
1323{
1324	struct msghdr msg;
1325	struct iovec aiov;
1326	int error;
1327
1328	msg.msg_name = 0;
1329	msg.msg_namelen = 0;
1330	msg.msg_iov = &aiov;
1331	msg.msg_iovlen = 1;
1332	aiov.iov_base = uap->buf;
1333	aiov.iov_len = uap->len;
1334	msg.msg_control = 0;
1335	msg.msg_flags = uap->flags;
1336	error = recvit(td, uap->s, &msg, NULL);
1337	return (error);
1338}
1339
1340/*
1341 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1342 * overlays the new one, missing only the flags, and with the (old) access
1343 * rights where the control fields are now.
1344 */
1345int
1346orecvmsg(td, uap)
1347	struct thread *td;
1348	struct orecvmsg_args /* {
1349		int	s;
1350		struct	omsghdr *msg;
1351		int	flags;
1352	} */ *uap;
1353{
1354	struct msghdr msg;
1355	struct iovec *iov;
1356	int error;
1357
1358	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1359	if (error)
1360		return (error);
1361	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1362	if (error)
1363		return (error);
1364	msg.msg_flags = uap->flags | MSG_COMPAT;
1365	msg.msg_iov = iov;
1366	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1367	if (msg.msg_controllen && error == 0)
1368		error = copyout(&msg.msg_controllen,
1369		    &uap->msg->msg_accrightslen, sizeof (int));
1370	free(iov, M_IOV);
1371	return (error);
1372}
1373#endif
1374
1375int
1376sys_recvmsg(td, uap)
1377	struct thread *td;
1378	struct recvmsg_args /* {
1379		int	s;
1380		struct	msghdr *msg;
1381		int	flags;
1382	} */ *uap;
1383{
1384	struct msghdr msg;
1385	struct iovec *uiov, *iov;
1386	int error;
1387
1388	error = copyin(uap->msg, &msg, sizeof (msg));
1389	if (error)
1390		return (error);
1391	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1392	if (error)
1393		return (error);
1394	msg.msg_flags = uap->flags;
1395#ifdef COMPAT_OLDSOCK
1396	msg.msg_flags &= ~MSG_COMPAT;
1397#endif
1398	uiov = msg.msg_iov;
1399	msg.msg_iov = iov;
1400	error = recvit(td, uap->s, &msg, NULL);
1401	if (error == 0) {
1402		msg.msg_iov = uiov;
1403		error = copyout(&msg, uap->msg, sizeof(msg));
1404	}
1405	free(iov, M_IOV);
1406	return (error);
1407}
1408
1409/* ARGSUSED */
1410int
1411sys_shutdown(td, uap)
1412	struct thread *td;
1413	struct shutdown_args /* {
1414		int	s;
1415		int	how;
1416	} */ *uap;
1417{
1418	struct socket *so;
1419	struct file *fp;
1420	int error;
1421
1422	AUDIT_ARG_FD(uap->s);
1423	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
1424	    NULL);
1425	if (error == 0) {
1426		so = fp->f_data;
1427		error = soshutdown(so, uap->how);
1428		fdrop(fp, td);
1429	}
1430	return (error);
1431}
1432
1433/* ARGSUSED */
1434int
1435sys_setsockopt(td, uap)
1436	struct thread *td;
1437	struct setsockopt_args /* {
1438		int	s;
1439		int	level;
1440		int	name;
1441		caddr_t	val;
1442		int	valsize;
1443	} */ *uap;
1444{
1445
1446	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1447	    uap->val, UIO_USERSPACE, uap->valsize));
1448}
1449
1450int
1451kern_setsockopt(td, s, level, name, val, valseg, valsize)
1452	struct thread *td;
1453	int s;
1454	int level;
1455	int name;
1456	void *val;
1457	enum uio_seg valseg;
1458	socklen_t valsize;
1459{
1460	int error;
1461	struct socket *so;
1462	struct file *fp;
1463	struct sockopt sopt;
1464
1465	if (val == NULL && valsize != 0)
1466		return (EFAULT);
1467	if ((int)valsize < 0)
1468		return (EINVAL);
1469
1470	sopt.sopt_dir = SOPT_SET;
1471	sopt.sopt_level = level;
1472	sopt.sopt_name = name;
1473	sopt.sopt_val = val;
1474	sopt.sopt_valsize = valsize;
1475	switch (valseg) {
1476	case UIO_USERSPACE:
1477		sopt.sopt_td = td;
1478		break;
1479	case UIO_SYSSPACE:
1480		sopt.sopt_td = NULL;
1481		break;
1482	default:
1483		panic("kern_setsockopt called with bad valseg");
1484	}
1485
1486	AUDIT_ARG_FD(s);
1487	error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
1488	if (error == 0) {
1489		so = fp->f_data;
1490		error = sosetopt(so, &sopt);
1491		fdrop(fp, td);
1492	}
1493	return(error);
1494}
1495
1496/* ARGSUSED */
1497int
1498sys_getsockopt(td, uap)
1499	struct thread *td;
1500	struct getsockopt_args /* {
1501		int	s;
1502		int	level;
1503		int	name;
1504		void * __restrict	val;
1505		socklen_t * __restrict avalsize;
1506	} */ *uap;
1507{
1508	socklen_t valsize;
1509	int	error;
1510
1511	if (uap->val) {
1512		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1513		if (error)
1514			return (error);
1515	}
1516
1517	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1518	    uap->val, UIO_USERSPACE, &valsize);
1519
1520	if (error == 0)
1521		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1522	return (error);
1523}
1524
1525/*
1526 * Kernel version of getsockopt.
1527 * optval can be a userland or userspace. optlen is always a kernel pointer.
1528 */
1529int
1530kern_getsockopt(td, s, level, name, val, valseg, valsize)
1531	struct thread *td;
1532	int s;
1533	int level;
1534	int name;
1535	void *val;
1536	enum uio_seg valseg;
1537	socklen_t *valsize;
1538{
1539	int error;
1540	struct  socket *so;
1541	struct file *fp;
1542	struct	sockopt sopt;
1543
1544	if (val == NULL)
1545		*valsize = 0;
1546	if ((int)*valsize < 0)
1547		return (EINVAL);
1548
1549	sopt.sopt_dir = SOPT_GET;
1550	sopt.sopt_level = level;
1551	sopt.sopt_name = name;
1552	sopt.sopt_val = val;
1553	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1554	switch (valseg) {
1555	case UIO_USERSPACE:
1556		sopt.sopt_td = td;
1557		break;
1558	case UIO_SYSSPACE:
1559		sopt.sopt_td = NULL;
1560		break;
1561	default:
1562		panic("kern_getsockopt called with bad valseg");
1563	}
1564
1565	AUDIT_ARG_FD(s);
1566	error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
1567	if (error == 0) {
1568		so = fp->f_data;
1569		error = sogetopt(so, &sopt);
1570		*valsize = sopt.sopt_valsize;
1571		fdrop(fp, td);
1572	}
1573	return (error);
1574}
1575
1576/*
1577 * getsockname1() - Get socket name.
1578 */
1579/* ARGSUSED */
1580static int
1581getsockname1(td, uap, compat)
1582	struct thread *td;
1583	struct getsockname_args /* {
1584		int	fdes;
1585		struct sockaddr * __restrict asa;
1586		socklen_t * __restrict alen;
1587	} */ *uap;
1588	int compat;
1589{
1590	struct sockaddr *sa;
1591	socklen_t len;
1592	int error;
1593
1594	error = copyin(uap->alen, &len, sizeof(len));
1595	if (error)
1596		return (error);
1597
1598	error = kern_getsockname(td, uap->fdes, &sa, &len);
1599	if (error)
1600		return (error);
1601
1602	if (len != 0) {
1603#ifdef COMPAT_OLDSOCK
1604		if (compat)
1605			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1606#endif
1607		error = copyout(sa, uap->asa, (u_int)len);
1608	}
1609	free(sa, M_SONAME);
1610	if (error == 0)
1611		error = copyout(&len, uap->alen, sizeof(len));
1612	return (error);
1613}
1614
1615int
1616kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1617    socklen_t *alen)
1618{
1619	struct socket *so;
1620	struct file *fp;
1621	socklen_t len;
1622	int error;
1623
1624	AUDIT_ARG_FD(fd);
1625	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
1626	if (error)
1627		return (error);
1628	so = fp->f_data;
1629	*sa = NULL;
1630	CURVNET_SET(so->so_vnet);
1631	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1632	CURVNET_RESTORE();
1633	if (error)
1634		goto bad;
1635	if (*sa == NULL)
1636		len = 0;
1637	else
1638		len = MIN(*alen, (*sa)->sa_len);
1639	*alen = len;
1640#ifdef KTRACE
1641	if (KTRPOINT(td, KTR_STRUCT))
1642		ktrsockaddr(*sa);
1643#endif
1644bad:
1645	fdrop(fp, td);
1646	if (error && *sa) {
1647		free(*sa, M_SONAME);
1648		*sa = NULL;
1649	}
1650	return (error);
1651}
1652
1653int
1654sys_getsockname(td, uap)
1655	struct thread *td;
1656	struct getsockname_args *uap;
1657{
1658
1659	return (getsockname1(td, uap, 0));
1660}
1661
1662#ifdef COMPAT_OLDSOCK
1663int
1664ogetsockname(td, uap)
1665	struct thread *td;
1666	struct getsockname_args *uap;
1667{
1668
1669	return (getsockname1(td, uap, 1));
1670}
1671#endif /* COMPAT_OLDSOCK */
1672
1673/*
1674 * getpeername1() - Get name of peer for connected socket.
1675 */
1676/* ARGSUSED */
1677static int
1678getpeername1(td, uap, compat)
1679	struct thread *td;
1680	struct getpeername_args /* {
1681		int	fdes;
1682		struct sockaddr * __restrict	asa;
1683		socklen_t * __restrict	alen;
1684	} */ *uap;
1685	int compat;
1686{
1687	struct sockaddr *sa;
1688	socklen_t len;
1689	int error;
1690
1691	error = copyin(uap->alen, &len, sizeof (len));
1692	if (error)
1693		return (error);
1694
1695	error = kern_getpeername(td, uap->fdes, &sa, &len);
1696	if (error)
1697		return (error);
1698
1699	if (len != 0) {
1700#ifdef COMPAT_OLDSOCK
1701		if (compat)
1702			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1703#endif
1704		error = copyout(sa, uap->asa, (u_int)len);
1705	}
1706	free(sa, M_SONAME);
1707	if (error == 0)
1708		error = copyout(&len, uap->alen, sizeof(len));
1709	return (error);
1710}
1711
1712int
1713kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1714    socklen_t *alen)
1715{
1716	struct socket *so;
1717	struct file *fp;
1718	socklen_t len;
1719	int error;
1720
1721	AUDIT_ARG_FD(fd);
1722	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
1723	if (error)
1724		return (error);
1725	so = fp->f_data;
1726	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1727		error = ENOTCONN;
1728		goto done;
1729	}
1730	*sa = NULL;
1731	CURVNET_SET(so->so_vnet);
1732	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1733	CURVNET_RESTORE();
1734	if (error)
1735		goto bad;
1736	if (*sa == NULL)
1737		len = 0;
1738	else
1739		len = MIN(*alen, (*sa)->sa_len);
1740	*alen = len;
1741#ifdef KTRACE
1742	if (KTRPOINT(td, KTR_STRUCT))
1743		ktrsockaddr(*sa);
1744#endif
1745bad:
1746	if (error && *sa) {
1747		free(*sa, M_SONAME);
1748		*sa = NULL;
1749	}
1750done:
1751	fdrop(fp, td);
1752	return (error);
1753}
1754
1755int
1756sys_getpeername(td, uap)
1757	struct thread *td;
1758	struct getpeername_args *uap;
1759{
1760
1761	return (getpeername1(td, uap, 0));
1762}
1763
1764#ifdef COMPAT_OLDSOCK
1765int
1766ogetpeername(td, uap)
1767	struct thread *td;
1768	struct ogetpeername_args *uap;
1769{
1770
1771	/* XXX uap should have type `getpeername_args *' to begin with. */
1772	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1773}
1774#endif /* COMPAT_OLDSOCK */
1775
1776int
1777sockargs(mp, buf, buflen, type)
1778	struct mbuf **mp;
1779	caddr_t buf;
1780	int buflen, type;
1781{
1782	struct sockaddr *sa;
1783	struct mbuf *m;
1784	int error;
1785
1786	if (buflen > MLEN) {
1787#ifdef COMPAT_OLDSOCK
1788		if (type == MT_SONAME && buflen <= 112)
1789			buflen = MLEN;		/* unix domain compat. hack */
1790		else
1791#endif
1792			if (buflen > MCLBYTES)
1793				return (EINVAL);
1794	}
1795	m = m_get2(buflen, M_WAITOK, type, 0);
1796	m->m_len = buflen;
1797	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1798	if (error)
1799		(void) m_free(m);
1800	else {
1801		*mp = m;
1802		if (type == MT_SONAME) {
1803			sa = mtod(m, struct sockaddr *);
1804
1805#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1806			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1807				sa->sa_family = sa->sa_len;
1808#endif
1809			sa->sa_len = buflen;
1810		}
1811	}
1812	return (error);
1813}
1814
1815int
1816getsockaddr(namp, uaddr, len)
1817	struct sockaddr **namp;
1818	caddr_t uaddr;
1819	size_t len;
1820{
1821	struct sockaddr *sa;
1822	int error;
1823
1824	if (len > SOCK_MAXADDRLEN)
1825		return (ENAMETOOLONG);
1826	if (len < offsetof(struct sockaddr, sa_data[0]))
1827		return (EINVAL);
1828	sa = malloc(len, M_SONAME, M_WAITOK);
1829	error = copyin(uaddr, sa, len);
1830	if (error) {
1831		free(sa, M_SONAME);
1832	} else {
1833#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1834		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1835			sa->sa_family = sa->sa_len;
1836#endif
1837		sa->sa_len = len;
1838		*namp = sa;
1839	}
1840	return (error);
1841}
1842
1843#include <sys/condvar.h>
1844
1845struct sendfile_sync {
1846	struct mtx	mtx;
1847	struct cv	cv;
1848	unsigned	count;
1849};
1850
1851/*
1852 * Detach mapped page and release resources back to the system.
1853 */
1854void
1855sf_buf_mext(void *addr, void *args)
1856{
1857	vm_page_t m;
1858	struct sendfile_sync *sfs;
1859
1860	m = sf_buf_page(args);
1861	sf_buf_free(args);
1862	vm_page_lock(m);
1863	vm_page_unwire(m, 0);
1864	/*
1865	 * Check for the object going away on us. This can
1866	 * happen since we don't hold a reference to it.
1867	 * If so, we're responsible for freeing the page.
1868	 */
1869	if (m->wire_count == 0 && m->object == NULL)
1870		vm_page_free(m);
1871	vm_page_unlock(m);
1872	if (addr == NULL)
1873		return;
1874	sfs = addr;
1875	mtx_lock(&sfs->mtx);
1876	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1877	if (--sfs->count == 0)
1878		cv_signal(&sfs->cv);
1879	mtx_unlock(&sfs->mtx);
1880}
1881
1882/*
1883 * sendfile(2)
1884 *
1885 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1886 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1887 *
1888 * Send a file specified by 'fd' and starting at 'offset' to a socket
1889 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1890 * 0.  Optionally add a header and/or trailer to the socket output.  If
1891 * specified, write the total number of bytes sent into *sbytes.
1892 */
1893int
1894sys_sendfile(struct thread *td, struct sendfile_args *uap)
1895{
1896
1897	return (do_sendfile(td, uap, 0));
1898}
1899
1900static int
1901do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1902{
1903	struct sf_hdtr hdtr;
1904	struct uio *hdr_uio, *trl_uio;
1905	int error;
1906
1907	hdr_uio = trl_uio = NULL;
1908
1909	if (uap->hdtr != NULL) {
1910		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1911		if (error)
1912			goto out;
1913		if (hdtr.headers != NULL) {
1914			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1915			if (error)
1916				goto out;
1917		}
1918		if (hdtr.trailers != NULL) {
1919			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1920			if (error)
1921				goto out;
1922
1923		}
1924	}
1925
1926	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1927out:
1928	if (hdr_uio)
1929		free(hdr_uio, M_IOV);
1930	if (trl_uio)
1931		free(trl_uio, M_IOV);
1932	return (error);
1933}
1934
1935#ifdef COMPAT_FREEBSD4
1936int
1937freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1938{
1939	struct sendfile_args args;
1940
1941	args.fd = uap->fd;
1942	args.s = uap->s;
1943	args.offset = uap->offset;
1944	args.nbytes = uap->nbytes;
1945	args.hdtr = uap->hdtr;
1946	args.sbytes = uap->sbytes;
1947	args.flags = uap->flags;
1948
1949	return (do_sendfile(td, &args, 1));
1950}
1951#endif /* COMPAT_FREEBSD4 */
1952
1953int
1954kern_sendfile(struct thread *td, struct sendfile_args *uap,
1955    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1956{
1957	struct file *sock_fp;
1958	struct vnode *vp;
1959	struct vm_object *obj = NULL;
1960	struct socket *so = NULL;
1961	struct mbuf *m = NULL;
1962	struct sf_buf *sf;
1963	struct vm_page *pg;
1964	struct vattr va;
1965	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1966	int error, hdrlen = 0, mnw = 0;
1967	int bsize;
1968	struct sendfile_sync *sfs = NULL;
1969
1970	/*
1971	 * The file descriptor must be a regular file and have a
1972	 * backing VM object.
1973	 * File offset must be positive.  If it goes beyond EOF
1974	 * we send only the header/trailer and no payload data.
1975	 */
1976	AUDIT_ARG_FD(uap->fd);
1977	/*
1978	 * sendfile(2) can start at any offset within a file so we require
1979	 * CAP_READ+CAP_SEEK = CAP_PREAD.
1980	 */
1981	if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0)
1982		goto out;
1983	vn_lock(vp, LK_SHARED | LK_RETRY);
1984	if (vp->v_type == VREG) {
1985		bsize = vp->v_mount->mnt_stat.f_iosize;
1986		if (uap->nbytes == 0) {
1987			error = VOP_GETATTR(vp, &va, td->td_ucred);
1988			if (error != 0) {
1989				VOP_UNLOCK(vp, 0);
1990				obj = NULL;
1991				goto out;
1992			}
1993			rem = va.va_size;
1994		} else
1995			rem = uap->nbytes;
1996		obj = vp->v_object;
1997		if (obj != NULL) {
1998			/*
1999			 * Temporarily increase the backing VM
2000			 * object's reference count so that a forced
2001			 * reclamation of its vnode does not
2002			 * immediately destroy it.
2003			 */
2004			VM_OBJECT_WLOCK(obj);
2005			if ((obj->flags & OBJ_DEAD) == 0) {
2006				vm_object_reference_locked(obj);
2007				VM_OBJECT_WUNLOCK(obj);
2008			} else {
2009				VM_OBJECT_WUNLOCK(obj);
2010				obj = NULL;
2011			}
2012		}
2013	} else
2014		bsize = 0;	/* silence gcc */
2015	VOP_UNLOCK(vp, 0);
2016	if (obj == NULL) {
2017		error = EINVAL;
2018		goto out;
2019	}
2020	if (uap->offset < 0) {
2021		error = EINVAL;
2022		goto out;
2023	}
2024
2025	/*
2026	 * The socket must be a stream socket and connected.
2027	 * Remember if it a blocking or non-blocking socket.
2028	 */
2029	if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND,
2030	    &sock_fp, NULL)) != 0)
2031		goto out;
2032	so = sock_fp->f_data;
2033	if (so->so_type != SOCK_STREAM) {
2034		error = EINVAL;
2035		goto out;
2036	}
2037	if ((so->so_state & SS_ISCONNECTED) == 0) {
2038		error = ENOTCONN;
2039		goto out;
2040	}
2041	/*
2042	 * Do not wait on memory allocations but return ENOMEM for
2043	 * caller to retry later.
2044	 * XXX: Experimental.
2045	 */
2046	if (uap->flags & SF_MNOWAIT)
2047		mnw = 1;
2048
2049	if (uap->flags & SF_SYNC) {
2050		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
2051		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
2052		cv_init(&sfs->cv, "sendfile");
2053	}
2054
2055#ifdef MAC
2056	error = mac_socket_check_send(td->td_ucred, so);
2057	if (error)
2058		goto out;
2059#endif
2060
2061	/* If headers are specified copy them into mbufs. */
2062	if (hdr_uio != NULL) {
2063		hdr_uio->uio_td = td;
2064		hdr_uio->uio_rw = UIO_WRITE;
2065		if (hdr_uio->uio_resid > 0) {
2066			/*
2067			 * In FBSD < 5.0 the nbytes to send also included
2068			 * the header.  If compat is specified subtract the
2069			 * header size from nbytes.
2070			 */
2071			if (compat) {
2072				if (uap->nbytes > hdr_uio->uio_resid)
2073					uap->nbytes -= hdr_uio->uio_resid;
2074				else
2075					uap->nbytes = 0;
2076			}
2077			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
2078			    0, 0, 0);
2079			if (m == NULL) {
2080				error = mnw ? EAGAIN : ENOBUFS;
2081				goto out;
2082			}
2083			hdrlen = m_length(m, NULL);
2084		}
2085	}
2086
2087	/*
2088	 * Protect against multiple writers to the socket.
2089	 *
2090	 * XXXRW: Historically this has assumed non-interruptibility, so now
2091	 * we implement that, but possibly shouldn't.
2092	 */
2093	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
2094
2095	/*
2096	 * Loop through the pages of the file, starting with the requested
2097	 * offset. Get a file page (do I/O if necessary), map the file page
2098	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
2099	 * it on the socket.
2100	 * This is done in two loops.  The inner loop turns as many pages
2101	 * as it can, up to available socket buffer space, without blocking
2102	 * into mbufs to have it bulk delivered into the socket send buffer.
2103	 * The outer loop checks the state and available space of the socket
2104	 * and takes care of the overall progress.
2105	 */
2106	for (off = uap->offset; ; ) {
2107		struct mbuf *mtail;
2108		int loopbytes;
2109		int space;
2110		int done;
2111
2112		if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
2113		    (uap->nbytes == 0 && va.va_size == fsbytes))
2114			break;
2115
2116		mtail = NULL;
2117		loopbytes = 0;
2118		space = 0;
2119		done = 0;
2120
2121		/*
2122		 * Check the socket state for ongoing connection,
2123		 * no errors and space in socket buffer.
2124		 * If space is low allow for the remainder of the
2125		 * file to be processed if it fits the socket buffer.
2126		 * Otherwise block in waiting for sufficient space
2127		 * to proceed, or if the socket is nonblocking, return
2128		 * to userland with EAGAIN while reporting how far
2129		 * we've come.
2130		 * We wait until the socket buffer has significant free
2131		 * space to do bulk sends.  This makes good use of file
2132		 * system read ahead and allows packet segmentation
2133		 * offloading hardware to take over lots of work.  If
2134		 * we were not careful here we would send off only one
2135		 * sfbuf at a time.
2136		 */
2137		SOCKBUF_LOCK(&so->so_snd);
2138		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
2139			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
2140retry_space:
2141		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2142			error = EPIPE;
2143			SOCKBUF_UNLOCK(&so->so_snd);
2144			goto done;
2145		} else if (so->so_error) {
2146			error = so->so_error;
2147			so->so_error = 0;
2148			SOCKBUF_UNLOCK(&so->so_snd);
2149			goto done;
2150		}
2151		space = sbspace(&so->so_snd);
2152		if (space < rem &&
2153		    (space <= 0 ||
2154		     space < so->so_snd.sb_lowat)) {
2155			if (so->so_state & SS_NBIO) {
2156				SOCKBUF_UNLOCK(&so->so_snd);
2157				error = EAGAIN;
2158				goto done;
2159			}
2160			/*
2161			 * sbwait drops the lock while sleeping.
2162			 * When we loop back to retry_space the
2163			 * state may have changed and we retest
2164			 * for it.
2165			 */
2166			error = sbwait(&so->so_snd);
2167			/*
2168			 * An error from sbwait usually indicates that we've
2169			 * been interrupted by a signal. If we've sent anything
2170			 * then return bytes sent, otherwise return the error.
2171			 */
2172			if (error) {
2173				SOCKBUF_UNLOCK(&so->so_snd);
2174				goto done;
2175			}
2176			goto retry_space;
2177		}
2178		SOCKBUF_UNLOCK(&so->so_snd);
2179
2180		/*
2181		 * Reduce space in the socket buffer by the size of
2182		 * the header mbuf chain.
2183		 * hdrlen is set to 0 after the first loop.
2184		 */
2185		space -= hdrlen;
2186
2187		error = vn_lock(vp, LK_SHARED);
2188		if (error != 0)
2189			goto done;
2190		error = VOP_GETATTR(vp, &va, td->td_ucred);
2191		if (error != 0 || off >= va.va_size) {
2192			VOP_UNLOCK(vp, 0);
2193			goto done;
2194		}
2195
2196		/*
2197		 * Loop and construct maximum sized mbuf chain to be bulk
2198		 * dumped into socket buffer.
2199		 */
2200		while (space > loopbytes) {
2201			vm_pindex_t pindex;
2202			vm_offset_t pgoff;
2203			struct mbuf *m0;
2204
2205			/*
2206			 * Calculate the amount to transfer.
2207			 * Not to exceed a page, the EOF,
2208			 * or the passed in nbytes.
2209			 */
2210			pgoff = (vm_offset_t)(off & PAGE_MASK);
2211			if (uap->nbytes)
2212				rem = (uap->nbytes - fsbytes - loopbytes);
2213			else
2214				rem = va.va_size -
2215				    uap->offset - fsbytes - loopbytes;
2216			xfsize = omin(PAGE_SIZE - pgoff, rem);
2217			xfsize = omin(space - loopbytes, xfsize);
2218			if (xfsize <= 0) {
2219				done = 1;		/* all data sent */
2220				break;
2221			}
2222
2223			/*
2224			 * Attempt to look up the page.  Allocate
2225			 * if not found or wait and loop if busy.
2226			 */
2227			pindex = OFF_TO_IDX(off);
2228			VM_OBJECT_WLOCK(obj);
2229			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2230			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2231
2232			/*
2233			 * Check if page is valid for what we need,
2234			 * otherwise initiate I/O.
2235			 * If we already turned some pages into mbufs,
2236			 * send them off before we come here again and
2237			 * block.
2238			 */
2239			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2240				VM_OBJECT_WUNLOCK(obj);
2241			else if (m != NULL)
2242				error = EAGAIN;	/* send what we already got */
2243			else if (uap->flags & SF_NODISKIO)
2244				error = EBUSY;
2245			else {
2246				ssize_t resid;
2247				int readahead = sfreadahead * MAXBSIZE;
2248
2249				/*
2250				 * Ensure that our page is still around
2251				 * when the I/O completes.
2252				 */
2253				vm_page_io_start(pg);
2254				VM_OBJECT_WUNLOCK(obj);
2255
2256				/*
2257				 * Get the page from backing store.
2258				 * XXXMAC: Because we don't have fp->f_cred
2259				 * here, we pass in NOCRED.  This is probably
2260				 * wrong, but is consistent with our original
2261				 * implementation.
2262				 */
2263				error = vn_rdwr(UIO_READ, vp, NULL, readahead,
2264				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2265				    IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT),
2266				    td->td_ucred, NOCRED, &resid, td);
2267				VM_OBJECT_WLOCK(obj);
2268				vm_page_io_finish(pg);
2269				if (!error)
2270					VM_OBJECT_WUNLOCK(obj);
2271				SFSTAT_INC(sf_iocnt);
2272			}
2273			if (error) {
2274				vm_page_lock(pg);
2275				vm_page_unwire(pg, 0);
2276				/*
2277				 * See if anyone else might know about
2278				 * this page.  If not and it is not valid,
2279				 * then free it.
2280				 */
2281				if (pg->wire_count == 0 && pg->valid == 0 &&
2282				    pg->busy == 0 && !(pg->oflags & VPO_BUSY))
2283					vm_page_free(pg);
2284				vm_page_unlock(pg);
2285				VM_OBJECT_WUNLOCK(obj);
2286				if (error == EAGAIN)
2287					error = 0;	/* not a real error */
2288				break;
2289			}
2290
2291			/*
2292			 * Get a sendfile buf.  When allocating the
2293			 * first buffer for mbuf chain, we usually
2294			 * wait as long as necessary, but this wait
2295			 * can be interrupted.  For consequent
2296			 * buffers, do not sleep, since several
2297			 * threads might exhaust the buffers and then
2298			 * deadlock.
2299			 */
2300			sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
2301			    SFB_CATCH);
2302			if (sf == NULL) {
2303				SFSTAT_INC(sf_allocfail);
2304				vm_page_lock(pg);
2305				vm_page_unwire(pg, 0);
2306				KASSERT(pg->object != NULL,
2307				    ("kern_sendfile: object disappeared"));
2308				vm_page_unlock(pg);
2309				if (m == NULL)
2310					error = (mnw ? EAGAIN : EINTR);
2311				break;
2312			}
2313
2314			/*
2315			 * Get an mbuf and set it up as having
2316			 * external storage.
2317			 */
2318			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2319			if (m0 == NULL) {
2320				error = (mnw ? EAGAIN : ENOBUFS);
2321				sf_buf_mext(NULL, sf);
2322				break;
2323			}
2324			if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE,
2325			    sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF,
2326			    (mnw ? M_NOWAIT : M_WAITOK)) != 0) {
2327				error = (mnw ? EAGAIN : ENOBUFS);
2328				sf_buf_mext(NULL, sf);
2329				m_freem(m0);
2330				break;
2331			}
2332			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2333			m0->m_len = xfsize;
2334
2335			/* Append to mbuf chain. */
2336			if (mtail != NULL)
2337				mtail->m_next = m0;
2338			else if (m != NULL)
2339				m_last(m)->m_next = m0;
2340			else
2341				m = m0;
2342			mtail = m0;
2343
2344			/* Keep track of bits processed. */
2345			loopbytes += xfsize;
2346			off += xfsize;
2347
2348			if (sfs != NULL) {
2349				mtx_lock(&sfs->mtx);
2350				sfs->count++;
2351				mtx_unlock(&sfs->mtx);
2352			}
2353		}
2354
2355		VOP_UNLOCK(vp, 0);
2356
2357		/* Add the buffer chain to the socket buffer. */
2358		if (m != NULL) {
2359			int mlen, err;
2360
2361			mlen = m_length(m, NULL);
2362			SOCKBUF_LOCK(&so->so_snd);
2363			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2364				error = EPIPE;
2365				SOCKBUF_UNLOCK(&so->so_snd);
2366				goto done;
2367			}
2368			SOCKBUF_UNLOCK(&so->so_snd);
2369			CURVNET_SET(so->so_vnet);
2370			/* Avoid error aliasing. */
2371			err = (*so->so_proto->pr_usrreqs->pru_send)
2372				    (so, 0, m, NULL, NULL, td);
2373			CURVNET_RESTORE();
2374			if (err == 0) {
2375				/*
2376				 * We need two counters to get the
2377				 * file offset and nbytes to send
2378				 * right:
2379				 * - sbytes contains the total amount
2380				 *   of bytes sent, including headers.
2381				 * - fsbytes contains the total amount
2382				 *   of bytes sent from the file.
2383				 */
2384				sbytes += mlen;
2385				fsbytes += mlen;
2386				if (hdrlen) {
2387					fsbytes -= hdrlen;
2388					hdrlen = 0;
2389				}
2390			} else if (error == 0)
2391				error = err;
2392			m = NULL;	/* pru_send always consumes */
2393		}
2394
2395		/* Quit outer loop on error or when we're done. */
2396		if (done)
2397			break;
2398		if (error)
2399			goto done;
2400	}
2401
2402	/*
2403	 * Send trailers. Wimp out and use writev(2).
2404	 */
2405	if (trl_uio != NULL) {
2406		sbunlock(&so->so_snd);
2407		error = kern_writev(td, uap->s, trl_uio);
2408		if (error == 0)
2409			sbytes += td->td_retval[0];
2410		goto out;
2411	}
2412
2413done:
2414	sbunlock(&so->so_snd);
2415out:
2416	/*
2417	 * If there was no error we have to clear td->td_retval[0]
2418	 * because it may have been set by writev.
2419	 */
2420	if (error == 0) {
2421		td->td_retval[0] = 0;
2422	}
2423	if (uap->sbytes != NULL) {
2424		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2425	}
2426	if (obj != NULL)
2427		vm_object_deallocate(obj);
2428	if (vp != NULL)
2429		vrele(vp);
2430	if (so)
2431		fdrop(sock_fp, td);
2432	if (m)
2433		m_freem(m);
2434
2435	if (sfs != NULL) {
2436		mtx_lock(&sfs->mtx);
2437		if (sfs->count != 0)
2438			cv_wait(&sfs->cv, &sfs->mtx);
2439		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2440		cv_destroy(&sfs->cv);
2441		mtx_destroy(&sfs->mtx);
2442		free(sfs, M_TEMP);
2443	}
2444
2445	if (error == ERESTART)
2446		error = EINTR;
2447
2448	return (error);
2449}
2450
2451/*
2452 * SCTP syscalls.
2453 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2454 * otherwise all return EOPNOTSUPP.
2455 * XXX: We should make this loadable one day.
2456 */
2457int
2458sys_sctp_peeloff(td, uap)
2459	struct thread *td;
2460	struct sctp_peeloff_args /* {
2461		int	sd;
2462		caddr_t	name;
2463	} */ *uap;
2464{
2465#if (defined(INET) || defined(INET6)) && defined(SCTP)
2466	struct file *nfp = NULL;
2467	int error;
2468	struct socket *head, *so;
2469	int fd;
2470	u_int fflag;
2471
2472	AUDIT_ARG_FD(uap->sd);
2473	error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
2474	if (error)
2475		goto done2;
2476	if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
2477		error = EOPNOTSUPP;
2478		goto done;
2479	}
2480	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2481	if (error)
2482		goto done;
2483	/*
2484	 * At this point we know we do have a assoc to pull
2485	 * we proceed to get the fd setup. This may block
2486	 * but that is ok.
2487	 */
2488
2489	error = falloc(td, &nfp, &fd, 0);
2490	if (error)
2491		goto done;
2492	td->td_retval[0] = fd;
2493
2494	CURVNET_SET(head->so_vnet);
2495	so = sonewconn(head, SS_ISCONNECTED);
2496	if (so == NULL) {
2497		error = ENOMEM;
2498		goto noconnection;
2499	}
2500	/*
2501	 * Before changing the flags on the socket, we have to bump the
2502	 * reference count.  Otherwise, if the protocol calls sofree(),
2503	 * the socket will be released due to a zero refcount.
2504	 */
2505        SOCK_LOCK(so);
2506        soref(so);                      /* file descriptor reference */
2507        SOCK_UNLOCK(so);
2508
2509	ACCEPT_LOCK();
2510
2511	TAILQ_REMOVE(&head->so_comp, so, so_list);
2512	head->so_qlen--;
2513	so->so_state |= (head->so_state & SS_NBIO);
2514	so->so_state &= ~SS_NOFDREF;
2515	so->so_qstate &= ~SQ_COMP;
2516	so->so_head = NULL;
2517	ACCEPT_UNLOCK();
2518	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2519	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2520	if (error)
2521		goto noconnection;
2522	if (head->so_sigio != NULL)
2523		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2524
2525noconnection:
2526	/*
2527	 * close the new descriptor, assuming someone hasn't ripped it
2528	 * out from under us.
2529	 */
2530	if (error)
2531		fdclose(td->td_proc->p_fd, nfp, fd, td);
2532
2533	/*
2534	 * Release explicitly held references before returning.
2535	 */
2536	CURVNET_RESTORE();
2537done:
2538	if (nfp != NULL)
2539		fdrop(nfp, td);
2540	fputsock(head);
2541done2:
2542	return (error);
2543#else  /* SCTP */
2544	return (EOPNOTSUPP);
2545#endif /* SCTP */
2546}
2547
2548int
2549sys_sctp_generic_sendmsg (td, uap)
2550	struct thread *td;
2551	struct sctp_generic_sendmsg_args /* {
2552		int sd,
2553		caddr_t msg,
2554		int mlen,
2555		caddr_t to,
2556		__socklen_t tolen,
2557		struct sctp_sndrcvinfo *sinfo,
2558		int flags
2559	} */ *uap;
2560{
2561#if (defined(INET) || defined(INET6)) && defined(SCTP)
2562	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2563	struct socket *so;
2564	struct file *fp = NULL;
2565	int error = 0, len;
2566	struct sockaddr *to = NULL;
2567#ifdef KTRACE
2568	struct uio *ktruio = NULL;
2569#endif
2570	struct uio auio;
2571	struct iovec iov[1];
2572	cap_rights_t rights;
2573
2574	if (uap->sinfo) {
2575		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2576		if (error)
2577			return (error);
2578		u_sinfo = &sinfo;
2579	}
2580
2581	rights = CAP_SEND;
2582	if (uap->tolen) {
2583		error = getsockaddr(&to, uap->to, uap->tolen);
2584		if (error) {
2585			to = NULL;
2586			goto sctp_bad2;
2587		}
2588		rights |= CAP_CONNECT;
2589	}
2590
2591	AUDIT_ARG_FD(uap->sd);
2592	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2593	if (error)
2594		goto sctp_bad;
2595#ifdef KTRACE
2596	if (to && (KTRPOINT(td, KTR_STRUCT)))
2597		ktrsockaddr(to);
2598#endif
2599
2600	iov[0].iov_base = uap->msg;
2601	iov[0].iov_len = uap->mlen;
2602
2603	so = (struct socket *)fp->f_data;
2604	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2605		error = EOPNOTSUPP;
2606		goto sctp_bad;
2607	}
2608#ifdef MAC
2609	error = mac_socket_check_send(td->td_ucred, so);
2610	if (error)
2611		goto sctp_bad;
2612#endif /* MAC */
2613
2614	auio.uio_iov =  iov;
2615	auio.uio_iovcnt = 1;
2616	auio.uio_segflg = UIO_USERSPACE;
2617	auio.uio_rw = UIO_WRITE;
2618	auio.uio_td = td;
2619	auio.uio_offset = 0;			/* XXX */
2620	auio.uio_resid = 0;
2621	len = auio.uio_resid = uap->mlen;
2622	CURVNET_SET(so->so_vnet);
2623	error = sctp_lower_sosend(so, to, &auio,
2624		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2625		    uap->flags, u_sinfo, td);
2626	CURVNET_RESTORE();
2627	if (error) {
2628		if (auio.uio_resid != len && (error == ERESTART ||
2629		    error == EINTR || error == EWOULDBLOCK))
2630			error = 0;
2631		/* Generation of SIGPIPE can be controlled per socket. */
2632		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2633		    !(uap->flags & MSG_NOSIGNAL)) {
2634			PROC_LOCK(td->td_proc);
2635			tdsignal(td, SIGPIPE);
2636			PROC_UNLOCK(td->td_proc);
2637		}
2638	}
2639	if (error == 0)
2640		td->td_retval[0] = len - auio.uio_resid;
2641#ifdef KTRACE
2642	if (ktruio != NULL) {
2643		ktruio->uio_resid = td->td_retval[0];
2644		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2645	}
2646#endif /* KTRACE */
2647sctp_bad:
2648	if (fp)
2649		fdrop(fp, td);
2650sctp_bad2:
2651	if (to)
2652		free(to, M_SONAME);
2653	return (error);
2654#else  /* SCTP */
2655	return (EOPNOTSUPP);
2656#endif /* SCTP */
2657}
2658
2659int
2660sys_sctp_generic_sendmsg_iov(td, uap)
2661	struct thread *td;
2662	struct sctp_generic_sendmsg_iov_args /* {
2663		int sd,
2664		struct iovec *iov,
2665		int iovlen,
2666		caddr_t to,
2667		__socklen_t tolen,
2668		struct sctp_sndrcvinfo *sinfo,
2669		int flags
2670	} */ *uap;
2671{
2672#if (defined(INET) || defined(INET6)) && defined(SCTP)
2673	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2674	struct socket *so;
2675	struct file *fp = NULL;
2676	int error=0, i;
2677	ssize_t len;
2678	struct sockaddr *to = NULL;
2679#ifdef KTRACE
2680	struct uio *ktruio = NULL;
2681#endif
2682	struct uio auio;
2683	struct iovec *iov, *tiov;
2684	cap_rights_t rights;
2685
2686	if (uap->sinfo) {
2687		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2688		if (error)
2689			return (error);
2690		u_sinfo = &sinfo;
2691	}
2692	rights = CAP_SEND;
2693	if (uap->tolen) {
2694		error = getsockaddr(&to, uap->to, uap->tolen);
2695		if (error) {
2696			to = NULL;
2697			goto sctp_bad2;
2698		}
2699		rights |= CAP_CONNECT;
2700	}
2701
2702	AUDIT_ARG_FD(uap->sd);
2703	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2704	if (error)
2705		goto sctp_bad1;
2706
2707#ifdef COMPAT_FREEBSD32
2708	if (SV_CURPROC_FLAG(SV_ILP32))
2709		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2710		    uap->iovlen, &iov, EMSGSIZE);
2711	else
2712#endif
2713		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2714	if (error)
2715		goto sctp_bad1;
2716#ifdef KTRACE
2717	if (to && (KTRPOINT(td, KTR_STRUCT)))
2718		ktrsockaddr(to);
2719#endif
2720
2721	so = (struct socket *)fp->f_data;
2722	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2723		error = EOPNOTSUPP;
2724		goto sctp_bad;
2725	}
2726#ifdef MAC
2727	error = mac_socket_check_send(td->td_ucred, so);
2728	if (error)
2729		goto sctp_bad;
2730#endif /* MAC */
2731
2732	auio.uio_iov = iov;
2733	auio.uio_iovcnt = uap->iovlen;
2734	auio.uio_segflg = UIO_USERSPACE;
2735	auio.uio_rw = UIO_WRITE;
2736	auio.uio_td = td;
2737	auio.uio_offset = 0;			/* XXX */
2738	auio.uio_resid = 0;
2739	tiov = iov;
2740	for (i = 0; i <uap->iovlen; i++, tiov++) {
2741		if ((auio.uio_resid += tiov->iov_len) < 0) {
2742			error = EINVAL;
2743			goto sctp_bad;
2744		}
2745	}
2746	len = auio.uio_resid;
2747	CURVNET_SET(so->so_vnet);
2748	error = sctp_lower_sosend(so, to, &auio,
2749		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2750		    uap->flags, u_sinfo, td);
2751	CURVNET_RESTORE();
2752	if (error) {
2753		if (auio.uio_resid != len && (error == ERESTART ||
2754		    error == EINTR || error == EWOULDBLOCK))
2755			error = 0;
2756		/* Generation of SIGPIPE can be controlled per socket */
2757		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2758		    !(uap->flags & MSG_NOSIGNAL)) {
2759			PROC_LOCK(td->td_proc);
2760			tdsignal(td, SIGPIPE);
2761			PROC_UNLOCK(td->td_proc);
2762		}
2763	}
2764	if (error == 0)
2765		td->td_retval[0] = len - auio.uio_resid;
2766#ifdef KTRACE
2767	if (ktruio != NULL) {
2768		ktruio->uio_resid = td->td_retval[0];
2769		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2770	}
2771#endif /* KTRACE */
2772sctp_bad:
2773	free(iov, M_IOV);
2774sctp_bad1:
2775	if (fp)
2776		fdrop(fp, td);
2777sctp_bad2:
2778	if (to)
2779		free(to, M_SONAME);
2780	return (error);
2781#else  /* SCTP */
2782	return (EOPNOTSUPP);
2783#endif /* SCTP */
2784}
2785
2786int
2787sys_sctp_generic_recvmsg(td, uap)
2788	struct thread *td;
2789	struct sctp_generic_recvmsg_args /* {
2790		int sd,
2791		struct iovec *iov,
2792		int iovlen,
2793		struct sockaddr *from,
2794		__socklen_t *fromlenaddr,
2795		struct sctp_sndrcvinfo *sinfo,
2796		int *msg_flags
2797	} */ *uap;
2798{
2799#if (defined(INET) || defined(INET6)) && defined(SCTP)
2800	uint8_t sockbufstore[256];
2801	struct uio auio;
2802	struct iovec *iov, *tiov;
2803	struct sctp_sndrcvinfo sinfo;
2804	struct socket *so;
2805	struct file *fp = NULL;
2806	struct sockaddr *fromsa;
2807	int fromlen;
2808	ssize_t len;
2809	int i, msg_flags;
2810	int error = 0;
2811#ifdef KTRACE
2812	struct uio *ktruio = NULL;
2813#endif
2814
2815	AUDIT_ARG_FD(uap->sd);
2816	error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL);
2817	if (error) {
2818		return (error);
2819	}
2820#ifdef COMPAT_FREEBSD32
2821	if (SV_CURPROC_FLAG(SV_ILP32))
2822		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2823		    uap->iovlen, &iov, EMSGSIZE);
2824	else
2825#endif
2826		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2827	if (error)
2828		goto out1;
2829
2830	so = fp->f_data;
2831	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2832		error = EOPNOTSUPP;
2833		goto out;
2834	}
2835#ifdef MAC
2836	error = mac_socket_check_receive(td->td_ucred, so);
2837	if (error) {
2838		goto out;
2839	}
2840#endif /* MAC */
2841
2842	if (uap->fromlenaddr) {
2843		error = copyin(uap->fromlenaddr,
2844		    &fromlen, sizeof (fromlen));
2845		if (error) {
2846			goto out;
2847		}
2848	} else {
2849		fromlen = 0;
2850	}
2851	if (uap->msg_flags) {
2852		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2853		if (error) {
2854			goto out;
2855		}
2856	} else {
2857		msg_flags = 0;
2858	}
2859	auio.uio_iov = iov;
2860	auio.uio_iovcnt = uap->iovlen;
2861	auio.uio_segflg = UIO_USERSPACE;
2862	auio.uio_rw = UIO_READ;
2863	auio.uio_td = td;
2864	auio.uio_offset = 0;			/* XXX */
2865	auio.uio_resid = 0;
2866	tiov = iov;
2867	for (i = 0; i <uap->iovlen; i++, tiov++) {
2868		if ((auio.uio_resid += tiov->iov_len) < 0) {
2869			error = EINVAL;
2870			goto out;
2871		}
2872	}
2873	len = auio.uio_resid;
2874	fromsa = (struct sockaddr *)sockbufstore;
2875
2876#ifdef KTRACE
2877	if (KTRPOINT(td, KTR_GENIO))
2878		ktruio = cloneuio(&auio);
2879#endif /* KTRACE */
2880	memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
2881	CURVNET_SET(so->so_vnet);
2882	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2883		    fromsa, fromlen, &msg_flags,
2884		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2885	CURVNET_RESTORE();
2886	if (error) {
2887		if (auio.uio_resid != len && (error == ERESTART ||
2888		    error == EINTR || error == EWOULDBLOCK))
2889			error = 0;
2890	} else {
2891		if (uap->sinfo)
2892			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2893	}
2894#ifdef KTRACE
2895	if (ktruio != NULL) {
2896		ktruio->uio_resid = len - auio.uio_resid;
2897		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2898	}
2899#endif /* KTRACE */
2900	if (error)
2901		goto out;
2902	td->td_retval[0] = len - auio.uio_resid;
2903
2904	if (fromlen && uap->from) {
2905		len = fromlen;
2906		if (len <= 0 || fromsa == 0)
2907			len = 0;
2908		else {
2909			len = MIN(len, fromsa->sa_len);
2910			error = copyout(fromsa, uap->from, (size_t)len);
2911			if (error)
2912				goto out;
2913		}
2914		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2915		if (error) {
2916			goto out;
2917		}
2918	}
2919#ifdef KTRACE
2920	if (KTRPOINT(td, KTR_STRUCT))
2921		ktrsockaddr(fromsa);
2922#endif
2923	if (uap->msg_flags) {
2924		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2925		if (error) {
2926			goto out;
2927		}
2928	}
2929out:
2930	free(iov, M_IOV);
2931out1:
2932	if (fp)
2933		fdrop(fp, td);
2934
2935	return (error);
2936#else  /* SCTP */
2937	return (EOPNOTSUPP);
2938#endif /* SCTP */
2939}
2940