kern_sendfile.c revision 254842
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 254842 2013-08-25 10:57:09Z andre $");
37
38#include "opt_capsicum.h"
39#include "opt_inet.h"
40#include "opt_inet6.h"
41#include "opt_sctp.h"
42#include "opt_compat.h"
43#include "opt_ktrace.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/capability.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/malloc.h>
53#include <sys/filedesc.h>
54#include <sys/event.h>
55#include <sys/proc.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/filio.h>
59#include <sys/jail.h>
60#include <sys/mount.h>
61#include <sys/mbuf.h>
62#include <sys/protosw.h>
63#include <sys/rwlock.h>
64#include <sys/sf_buf.h>
65#include <sys/sysent.h>
66#include <sys/socket.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69#include <sys/syscallsubr.h>
70#include <sys/sysctl.h>
71#include <sys/uio.h>
72#include <sys/vnode.h>
73#ifdef KTRACE
74#include <sys/ktrace.h>
75#endif
76#ifdef COMPAT_FREEBSD32
77#include <compat/freebsd32/freebsd32_util.h>
78#endif
79
80#include <net/vnet.h>
81
82#include <security/audit/audit.h>
83#include <security/mac/mac_framework.h>
84
85#include <vm/vm.h>
86#include <vm/vm_param.h>
87#include <vm/vm_object.h>
88#include <vm/vm_page.h>
89#include <vm/vm_pageout.h>
90#include <vm/vm_kern.h>
91#include <vm/vm_extern.h>
92
93#if defined(INET) || defined(INET6)
94#ifdef SCTP
95#include <netinet/sctp.h>
96#include <netinet/sctp_peeloff.h>
97#endif /* SCTP */
98#endif /* INET || INET6 */
99
100/*
101 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
102 * and SOCK_NONBLOCK.
103 */
104#define	ACCEPT4_INHERIT	0x1
105#define	ACCEPT4_COMPAT	0x2
106
107static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
108static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
109
110static int accept1(struct thread *td, int s, struct sockaddr *uname,
111		   socklen_t *anamelen, int flags);
112static int do_sendfile(struct thread *td, struct sendfile_args *uap,
113		   int compat);
114static int getsockname1(struct thread *td, struct getsockname_args *uap,
115			int compat);
116static int getpeername1(struct thread *td, struct getpeername_args *uap,
117			int compat);
118
119counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
120
121/*
122 * sendfile(2)-related variables and associated sysctls
123 */
124int nsfbufs;
125int nsfbufspeak;
126int nsfbufsused;
127static int sfreadahead = 1;
128
129SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
130    "Maximum number of sendfile(2) sf_bufs available");
131SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
132    "Number of sendfile(2) sf_bufs at peak usage");
133SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
134    "Number of sendfile(2) sf_bufs in use");
135SYSCTL_INT(_kern_ipc, OID_AUTO, sfreadahead, CTLFLAG_RW, &sfreadahead, 0,
136    "Number of sendfile(2) read-ahead MAXBSIZE blocks");
137
138
139static void
140sfstat_init(const void *unused)
141{
142
143	COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
144	    M_WAITOK);
145}
146SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
147
148static int
149sfstat_sysctl(SYSCTL_HANDLER_ARGS)
150{
151	struct sfstat s;
152
153	COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
154	if (req->newptr)
155		COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
156	return (SYSCTL_OUT(req, &s, sizeof(s)));
157}
158SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
159    NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
160
161/*
162 * Convert a user file descriptor to a kernel file entry and check if required
163 * capability rights are present.
164 * A reference on the file entry is held upon returning.
165 */
166static int
167getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
168    struct file **fpp, u_int *fflagp)
169{
170	struct file *fp;
171	int error;
172
173	error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL);
174	if (error != 0)
175		return (error);
176	if (fp->f_type != DTYPE_SOCKET) {
177		fdrop(fp, curthread);
178		return (ENOTSOCK);
179	}
180	if (fflagp != NULL)
181		*fflagp = fp->f_flag;
182	*fpp = fp;
183	return (0);
184}
185
186/*
187 * System call interface to the socket abstraction.
188 */
189#if defined(COMPAT_43)
190#define COMPAT_OLDSOCK
191#endif
192
193int
194sys_socket(td, uap)
195	struct thread *td;
196	struct socket_args /* {
197		int	domain;
198		int	type;
199		int	protocol;
200	} */ *uap;
201{
202	struct socket *so;
203	struct file *fp;
204	int fd, error, type, oflag, fflag;
205
206	AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
207
208	type = uap->type;
209	oflag = 0;
210	fflag = 0;
211	if ((type & SOCK_CLOEXEC) != 0) {
212		type &= ~SOCK_CLOEXEC;
213		oflag |= O_CLOEXEC;
214	}
215	if ((type & SOCK_NONBLOCK) != 0) {
216		type &= ~SOCK_NONBLOCK;
217		fflag |= FNONBLOCK;
218	}
219
220#ifdef MAC
221	error = mac_socket_check_create(td->td_ucred, uap->domain, type,
222	    uap->protocol);
223	if (error)
224		return (error);
225#endif
226	error = falloc(td, &fp, &fd, oflag);
227	if (error)
228		return (error);
229	/* An extra reference on `fp' has been held for us by falloc(). */
230	error = socreate(uap->domain, &so, type, uap->protocol,
231	    td->td_ucred, td);
232	if (error) {
233		fdclose(td->td_proc->p_fd, fp, fd, td);
234	} else {
235		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
236		if ((fflag & FNONBLOCK) != 0)
237			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
238		td->td_retval[0] = fd;
239	}
240	fdrop(fp, td);
241	return (error);
242}
243
244/* ARGSUSED */
245int
246sys_bind(td, uap)
247	struct thread *td;
248	struct bind_args /* {
249		int	s;
250		caddr_t	name;
251		int	namelen;
252	} */ *uap;
253{
254	struct sockaddr *sa;
255	int error;
256
257	error = getsockaddr(&sa, uap->name, uap->namelen);
258	if (error == 0) {
259		error = kern_bind(td, uap->s, sa);
260		free(sa, M_SONAME);
261	}
262	return (error);
263}
264
265static int
266kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
267{
268	struct socket *so;
269	struct file *fp;
270	int error;
271
272	AUDIT_ARG_FD(fd);
273	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
274	error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
275	if (error)
276		return (error);
277	so = fp->f_data;
278#ifdef KTRACE
279	if (KTRPOINT(td, KTR_STRUCT))
280		ktrsockaddr(sa);
281#endif
282#ifdef MAC
283	error = mac_socket_check_bind(td->td_ucred, so, sa);
284	if (error == 0) {
285#endif
286		if (dirfd == AT_FDCWD)
287			error = sobind(so, sa, td);
288		else
289			error = sobindat(dirfd, so, sa, td);
290#ifdef MAC
291	}
292#endif
293	fdrop(fp, td);
294	return (error);
295}
296
297int
298kern_bind(struct thread *td, int fd, struct sockaddr *sa)
299{
300
301	return (kern_bindat(td, AT_FDCWD, fd, sa));
302}
303
304/* ARGSUSED */
305int
306sys_bindat(td, uap)
307	struct thread *td;
308	struct bindat_args /* {
309		int	fd;
310		int	s;
311		caddr_t	name;
312		int	namelen;
313	} */ *uap;
314{
315	struct sockaddr *sa;
316	int error;
317
318	error = getsockaddr(&sa, uap->name, uap->namelen);
319	if (error == 0) {
320		error = kern_bindat(td, uap->fd, uap->s, sa);
321		free(sa, M_SONAME);
322	}
323	return (error);
324}
325
326/* ARGSUSED */
327int
328sys_listen(td, uap)
329	struct thread *td;
330	struct listen_args /* {
331		int	s;
332		int	backlog;
333	} */ *uap;
334{
335	struct socket *so;
336	struct file *fp;
337	int error;
338
339	AUDIT_ARG_FD(uap->s);
340	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
341	if (error == 0) {
342		so = fp->f_data;
343#ifdef MAC
344		error = mac_socket_check_listen(td->td_ucred, so);
345		if (error == 0)
346#endif
347			error = solisten(so, uap->backlog, td);
348		fdrop(fp, td);
349	}
350	return(error);
351}
352
353/*
354 * accept1()
355 */
356static int
357accept1(td, s, uname, anamelen, flags)
358	struct thread *td;
359	int s;
360	struct sockaddr *uname;
361	socklen_t *anamelen;
362	int flags;
363{
364	struct sockaddr *name;
365	socklen_t namelen;
366	struct file *fp;
367	int error;
368
369	if (uname == NULL)
370		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
371
372	error = copyin(anamelen, &namelen, sizeof (namelen));
373	if (error)
374		return (error);
375
376	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
377
378	/*
379	 * return a namelen of zero for older code which might
380	 * ignore the return value from accept.
381	 */
382	if (error) {
383		(void) copyout(&namelen, anamelen, sizeof(*anamelen));
384		return (error);
385	}
386
387	if (error == 0 && uname != NULL) {
388#ifdef COMPAT_OLDSOCK
389		if (flags & ACCEPT4_COMPAT)
390			((struct osockaddr *)name)->sa_family =
391			    name->sa_family;
392#endif
393		error = copyout(name, uname, namelen);
394	}
395	if (error == 0)
396		error = copyout(&namelen, anamelen,
397		    sizeof(namelen));
398	if (error)
399		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
400	fdrop(fp, td);
401	free(name, M_SONAME);
402	return (error);
403}
404
405int
406kern_accept(struct thread *td, int s, struct sockaddr **name,
407    socklen_t *namelen, struct file **fp)
408{
409	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
410}
411
412int
413kern_accept4(struct thread *td, int s, struct sockaddr **name,
414    socklen_t *namelen, int flags, struct file **fp)
415{
416	struct filedesc *fdp;
417	struct file *headfp, *nfp = NULL;
418	struct sockaddr *sa = NULL;
419	int error;
420	struct socket *head, *so;
421	int fd;
422	u_int fflag;
423	pid_t pgid;
424	int tmp;
425
426	if (name)
427		*name = NULL;
428
429	AUDIT_ARG_FD(s);
430	fdp = td->td_proc->p_fd;
431	error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
432	if (error)
433		return (error);
434	head = headfp->f_data;
435	if ((head->so_options & SO_ACCEPTCONN) == 0) {
436		error = EINVAL;
437		goto done;
438	}
439#ifdef MAC
440	error = mac_socket_check_accept(td->td_ucred, head);
441	if (error != 0)
442		goto done;
443#endif
444	error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0);
445	if (error)
446		goto done;
447	ACCEPT_LOCK();
448	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
449		ACCEPT_UNLOCK();
450		error = EWOULDBLOCK;
451		goto noconnection;
452	}
453	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
454		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
455			head->so_error = ECONNABORTED;
456			break;
457		}
458		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
459		    "accept", 0);
460		if (error) {
461			ACCEPT_UNLOCK();
462			goto noconnection;
463		}
464	}
465	if (head->so_error) {
466		error = head->so_error;
467		head->so_error = 0;
468		ACCEPT_UNLOCK();
469		goto noconnection;
470	}
471	so = TAILQ_FIRST(&head->so_comp);
472	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
473	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
474
475	/*
476	 * Before changing the flags on the socket, we have to bump the
477	 * reference count.  Otherwise, if the protocol calls sofree(),
478	 * the socket will be released due to a zero refcount.
479	 */
480	SOCK_LOCK(so);			/* soref() and so_state update */
481	soref(so);			/* file descriptor reference */
482
483	TAILQ_REMOVE(&head->so_comp, so, so_list);
484	head->so_qlen--;
485	if (flags & ACCEPT4_INHERIT)
486		so->so_state |= (head->so_state & SS_NBIO);
487	else
488		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
489	so->so_qstate &= ~SQ_COMP;
490	so->so_head = NULL;
491
492	SOCK_UNLOCK(so);
493	ACCEPT_UNLOCK();
494
495	/* An extra reference on `nfp' has been held for us by falloc(). */
496	td->td_retval[0] = fd;
497
498	/* connection has been removed from the listen queue */
499	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
500
501	if (flags & ACCEPT4_INHERIT) {
502		pgid = fgetown(&head->so_sigio);
503		if (pgid != 0)
504			fsetown(pgid, &so->so_sigio);
505	} else {
506		fflag &= ~(FNONBLOCK | FASYNC);
507		if (flags & SOCK_NONBLOCK)
508			fflag |= FNONBLOCK;
509	}
510
511	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
512	/* Sync socket nonblocking/async state with file flags */
513	tmp = fflag & FNONBLOCK;
514	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
515	tmp = fflag & FASYNC;
516	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
517	sa = 0;
518	error = soaccept(so, &sa);
519	if (error) {
520		/*
521		 * return a namelen of zero for older code which might
522		 * ignore the return value from accept.
523		 */
524		if (name)
525			*namelen = 0;
526		goto noconnection;
527	}
528	if (sa == NULL) {
529		if (name)
530			*namelen = 0;
531		goto done;
532	}
533	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
534	if (name) {
535		/* check sa_len before it is destroyed */
536		if (*namelen > sa->sa_len)
537			*namelen = sa->sa_len;
538#ifdef KTRACE
539		if (KTRPOINT(td, KTR_STRUCT))
540			ktrsockaddr(sa);
541#endif
542		*name = sa;
543		sa = NULL;
544	}
545noconnection:
546	if (sa)
547		free(sa, M_SONAME);
548
549	/*
550	 * close the new descriptor, assuming someone hasn't ripped it
551	 * out from under us.
552	 */
553	if (error)
554		fdclose(fdp, nfp, fd, td);
555
556	/*
557	 * Release explicitly held references before returning.  We return
558	 * a reference on nfp to the caller on success if they request it.
559	 */
560done:
561	if (fp != NULL) {
562		if (error == 0) {
563			*fp = nfp;
564			nfp = NULL;
565		} else
566			*fp = NULL;
567	}
568	if (nfp != NULL)
569		fdrop(nfp, td);
570	fdrop(headfp, td);
571	return (error);
572}
573
574int
575sys_accept(td, uap)
576	struct thread *td;
577	struct accept_args *uap;
578{
579
580	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
581}
582
583int
584sys_accept4(td, uap)
585	struct thread *td;
586	struct accept4_args *uap;
587{
588	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
589		return (EINVAL);
590
591	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
592}
593
594#ifdef COMPAT_OLDSOCK
595int
596oaccept(td, uap)
597	struct thread *td;
598	struct accept_args *uap;
599{
600
601	return (accept1(td, uap->s, uap->name, uap->anamelen,
602	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
603}
604#endif /* COMPAT_OLDSOCK */
605
606/* ARGSUSED */
607int
608sys_connect(td, uap)
609	struct thread *td;
610	struct connect_args /* {
611		int	s;
612		caddr_t	name;
613		int	namelen;
614	} */ *uap;
615{
616	struct sockaddr *sa;
617	int error;
618
619	error = getsockaddr(&sa, uap->name, uap->namelen);
620	if (error == 0) {
621		error = kern_connect(td, uap->s, sa);
622		free(sa, M_SONAME);
623	}
624	return (error);
625}
626
627static int
628kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
629{
630	struct socket *so;
631	struct file *fp;
632	int error;
633	int interrupted = 0;
634
635	AUDIT_ARG_FD(fd);
636	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
637	error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
638	if (error)
639		return (error);
640	so = fp->f_data;
641	if (so->so_state & SS_ISCONNECTING) {
642		error = EALREADY;
643		goto done1;
644	}
645#ifdef KTRACE
646	if (KTRPOINT(td, KTR_STRUCT))
647		ktrsockaddr(sa);
648#endif
649#ifdef MAC
650	error = mac_socket_check_connect(td->td_ucred, so, sa);
651	if (error)
652		goto bad;
653#endif
654	if (dirfd == AT_FDCWD)
655		error = soconnect(so, sa, td);
656	else
657		error = soconnectat(dirfd, so, sa, td);
658	if (error)
659		goto bad;
660	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
661		error = EINPROGRESS;
662		goto done1;
663	}
664	SOCK_LOCK(so);
665	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
666		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
667		    "connec", 0);
668		if (error) {
669			if (error == EINTR || error == ERESTART)
670				interrupted = 1;
671			break;
672		}
673	}
674	if (error == 0) {
675		error = so->so_error;
676		so->so_error = 0;
677	}
678	SOCK_UNLOCK(so);
679bad:
680	if (!interrupted)
681		so->so_state &= ~SS_ISCONNECTING;
682	if (error == ERESTART)
683		error = EINTR;
684done1:
685	fdrop(fp, td);
686	return (error);
687}
688
689int
690kern_connect(struct thread *td, int fd, struct sockaddr *sa)
691{
692
693	return (kern_connectat(td, AT_FDCWD, fd, sa));
694}
695
696/* ARGSUSED */
697int
698sys_connectat(td, uap)
699	struct thread *td;
700	struct connectat_args /* {
701		int	fd;
702		int	s;
703		caddr_t	name;
704		int	namelen;
705	} */ *uap;
706{
707	struct sockaddr *sa;
708	int error;
709
710	error = getsockaddr(&sa, uap->name, uap->namelen);
711	if (error == 0) {
712		error = kern_connectat(td, uap->fd, uap->s, sa);
713		free(sa, M_SONAME);
714	}
715	return (error);
716}
717
718int
719kern_socketpair(struct thread *td, int domain, int type, int protocol,
720    int *rsv)
721{
722	struct filedesc *fdp = td->td_proc->p_fd;
723	struct file *fp1, *fp2;
724	struct socket *so1, *so2;
725	int fd, error, oflag, fflag;
726
727	AUDIT_ARG_SOCKET(domain, type, protocol);
728
729	oflag = 0;
730	fflag = 0;
731	if ((type & SOCK_CLOEXEC) != 0) {
732		type &= ~SOCK_CLOEXEC;
733		oflag |= O_CLOEXEC;
734	}
735	if ((type & SOCK_NONBLOCK) != 0) {
736		type &= ~SOCK_NONBLOCK;
737		fflag |= FNONBLOCK;
738	}
739#ifdef MAC
740	/* We might want to have a separate check for socket pairs. */
741	error = mac_socket_check_create(td->td_ucred, domain, type,
742	    protocol);
743	if (error)
744		return (error);
745#endif
746	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
747	if (error)
748		return (error);
749	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
750	if (error)
751		goto free1;
752	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
753	error = falloc(td, &fp1, &fd, oflag);
754	if (error)
755		goto free2;
756	rsv[0] = fd;
757	fp1->f_data = so1;	/* so1 already has ref count */
758	error = falloc(td, &fp2, &fd, oflag);
759	if (error)
760		goto free3;
761	fp2->f_data = so2;	/* so2 already has ref count */
762	rsv[1] = fd;
763	error = soconnect2(so1, so2);
764	if (error)
765		goto free4;
766	if (type == SOCK_DGRAM) {
767		/*
768		 * Datagram socket connection is asymmetric.
769		 */
770		 error = soconnect2(so2, so1);
771		 if (error)
772			goto free4;
773	}
774	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
775	    &socketops);
776	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
777	    &socketops);
778	if ((fflag & FNONBLOCK) != 0) {
779		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
780		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
781	}
782	fdrop(fp1, td);
783	fdrop(fp2, td);
784	return (0);
785free4:
786	fdclose(fdp, fp2, rsv[1], td);
787	fdrop(fp2, td);
788free3:
789	fdclose(fdp, fp1, rsv[0], td);
790	fdrop(fp1, td);
791free2:
792	if (so2 != NULL)
793		(void)soclose(so2);
794free1:
795	if (so1 != NULL)
796		(void)soclose(so1);
797	return (error);
798}
799
800int
801sys_socketpair(struct thread *td, struct socketpair_args *uap)
802{
803	int error, sv[2];
804
805	error = kern_socketpair(td, uap->domain, uap->type,
806	    uap->protocol, sv);
807	if (error)
808		return (error);
809	error = copyout(sv, uap->rsv, 2 * sizeof(int));
810	if (error) {
811		(void)kern_close(td, sv[0]);
812		(void)kern_close(td, sv[1]);
813	}
814	return (error);
815}
816
817static int
818sendit(td, s, mp, flags)
819	struct thread *td;
820	int s;
821	struct msghdr *mp;
822	int flags;
823{
824	struct mbuf *control;
825	struct sockaddr *to;
826	int error;
827
828#ifdef CAPABILITY_MODE
829	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
830		return (ECAPMODE);
831#endif
832
833	if (mp->msg_name != NULL) {
834		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
835		if (error) {
836			to = NULL;
837			goto bad;
838		}
839		mp->msg_name = to;
840	} else {
841		to = NULL;
842	}
843
844	if (mp->msg_control) {
845		if (mp->msg_controllen < sizeof(struct cmsghdr)
846#ifdef COMPAT_OLDSOCK
847		    && mp->msg_flags != MSG_COMPAT
848#endif
849		) {
850			error = EINVAL;
851			goto bad;
852		}
853		error = sockargs(&control, mp->msg_control,
854		    mp->msg_controllen, MT_CONTROL);
855		if (error)
856			goto bad;
857#ifdef COMPAT_OLDSOCK
858		if (mp->msg_flags == MSG_COMPAT) {
859			struct cmsghdr *cm;
860
861			M_PREPEND(control, sizeof(*cm), M_WAITOK);
862			cm = mtod(control, struct cmsghdr *);
863			cm->cmsg_len = control->m_len;
864			cm->cmsg_level = SOL_SOCKET;
865			cm->cmsg_type = SCM_RIGHTS;
866		}
867#endif
868	} else {
869		control = NULL;
870	}
871
872	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
873
874bad:
875	if (to)
876		free(to, M_SONAME);
877	return (error);
878}
879
880int
881kern_sendit(td, s, mp, flags, control, segflg)
882	struct thread *td;
883	int s;
884	struct msghdr *mp;
885	int flags;
886	struct mbuf *control;
887	enum uio_seg segflg;
888{
889	struct file *fp;
890	struct uio auio;
891	struct iovec *iov;
892	struct socket *so;
893	int i, error;
894	ssize_t len;
895	cap_rights_t rights;
896#ifdef KTRACE
897	struct uio *ktruio = NULL;
898#endif
899
900	AUDIT_ARG_FD(s);
901	rights = CAP_SEND;
902	if (mp->msg_name != NULL) {
903		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
904		rights |= CAP_CONNECT;
905	}
906	error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
907	if (error)
908		return (error);
909	so = (struct socket *)fp->f_data;
910
911#ifdef KTRACE
912	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
913		ktrsockaddr(mp->msg_name);
914#endif
915#ifdef MAC
916	if (mp->msg_name != NULL) {
917		error = mac_socket_check_connect(td->td_ucred, so,
918		    mp->msg_name);
919		if (error)
920			goto bad;
921	}
922	error = mac_socket_check_send(td->td_ucred, so);
923	if (error)
924		goto bad;
925#endif
926
927	auio.uio_iov = mp->msg_iov;
928	auio.uio_iovcnt = mp->msg_iovlen;
929	auio.uio_segflg = segflg;
930	auio.uio_rw = UIO_WRITE;
931	auio.uio_td = td;
932	auio.uio_offset = 0;			/* XXX */
933	auio.uio_resid = 0;
934	iov = mp->msg_iov;
935	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
936		if ((auio.uio_resid += iov->iov_len) < 0) {
937			error = EINVAL;
938			goto bad;
939		}
940	}
941#ifdef KTRACE
942	if (KTRPOINT(td, KTR_GENIO))
943		ktruio = cloneuio(&auio);
944#endif
945	len = auio.uio_resid;
946	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
947	if (error) {
948		if (auio.uio_resid != len && (error == ERESTART ||
949		    error == EINTR || error == EWOULDBLOCK))
950			error = 0;
951		/* Generation of SIGPIPE can be controlled per socket */
952		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
953		    !(flags & MSG_NOSIGNAL)) {
954			PROC_LOCK(td->td_proc);
955			tdsignal(td, SIGPIPE);
956			PROC_UNLOCK(td->td_proc);
957		}
958	}
959	if (error == 0)
960		td->td_retval[0] = len - auio.uio_resid;
961#ifdef KTRACE
962	if (ktruio != NULL) {
963		ktruio->uio_resid = td->td_retval[0];
964		ktrgenio(s, UIO_WRITE, ktruio, error);
965	}
966#endif
967bad:
968	fdrop(fp, td);
969	return (error);
970}
971
972int
973sys_sendto(td, uap)
974	struct thread *td;
975	struct sendto_args /* {
976		int	s;
977		caddr_t	buf;
978		size_t	len;
979		int	flags;
980		caddr_t	to;
981		int	tolen;
982	} */ *uap;
983{
984	struct msghdr msg;
985	struct iovec aiov;
986	int error;
987
988	msg.msg_name = uap->to;
989	msg.msg_namelen = uap->tolen;
990	msg.msg_iov = &aiov;
991	msg.msg_iovlen = 1;
992	msg.msg_control = 0;
993#ifdef COMPAT_OLDSOCK
994	msg.msg_flags = 0;
995#endif
996	aiov.iov_base = uap->buf;
997	aiov.iov_len = uap->len;
998	error = sendit(td, uap->s, &msg, uap->flags);
999	return (error);
1000}
1001
1002#ifdef COMPAT_OLDSOCK
1003int
1004osend(td, uap)
1005	struct thread *td;
1006	struct osend_args /* {
1007		int	s;
1008		caddr_t	buf;
1009		int	len;
1010		int	flags;
1011	} */ *uap;
1012{
1013	struct msghdr msg;
1014	struct iovec aiov;
1015	int error;
1016
1017	msg.msg_name = 0;
1018	msg.msg_namelen = 0;
1019	msg.msg_iov = &aiov;
1020	msg.msg_iovlen = 1;
1021	aiov.iov_base = uap->buf;
1022	aiov.iov_len = uap->len;
1023	msg.msg_control = 0;
1024	msg.msg_flags = 0;
1025	error = sendit(td, uap->s, &msg, uap->flags);
1026	return (error);
1027}
1028
1029int
1030osendmsg(td, uap)
1031	struct thread *td;
1032	struct osendmsg_args /* {
1033		int	s;
1034		caddr_t	msg;
1035		int	flags;
1036	} */ *uap;
1037{
1038	struct msghdr msg;
1039	struct iovec *iov;
1040	int error;
1041
1042	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1043	if (error)
1044		return (error);
1045	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1046	if (error)
1047		return (error);
1048	msg.msg_iov = iov;
1049	msg.msg_flags = MSG_COMPAT;
1050	error = sendit(td, uap->s, &msg, uap->flags);
1051	free(iov, M_IOV);
1052	return (error);
1053}
1054#endif
1055
1056int
1057sys_sendmsg(td, uap)
1058	struct thread *td;
1059	struct sendmsg_args /* {
1060		int	s;
1061		caddr_t	msg;
1062		int	flags;
1063	} */ *uap;
1064{
1065	struct msghdr msg;
1066	struct iovec *iov;
1067	int error;
1068
1069	error = copyin(uap->msg, &msg, sizeof (msg));
1070	if (error)
1071		return (error);
1072	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1073	if (error)
1074		return (error);
1075	msg.msg_iov = iov;
1076#ifdef COMPAT_OLDSOCK
1077	msg.msg_flags = 0;
1078#endif
1079	error = sendit(td, uap->s, &msg, uap->flags);
1080	free(iov, M_IOV);
1081	return (error);
1082}
1083
1084int
1085kern_recvit(td, s, mp, fromseg, controlp)
1086	struct thread *td;
1087	int s;
1088	struct msghdr *mp;
1089	enum uio_seg fromseg;
1090	struct mbuf **controlp;
1091{
1092	struct uio auio;
1093	struct iovec *iov;
1094	int i;
1095	ssize_t len;
1096	int error;
1097	struct mbuf *m, *control = NULL;
1098	caddr_t ctlbuf;
1099	struct file *fp;
1100	struct socket *so;
1101	struct sockaddr *fromsa = NULL;
1102#ifdef KTRACE
1103	struct uio *ktruio = NULL;
1104#endif
1105
1106	if (controlp != NULL)
1107		*controlp = NULL;
1108
1109	AUDIT_ARG_FD(s);
1110	error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL);
1111	if (error)
1112		return (error);
1113	so = fp->f_data;
1114
1115#ifdef MAC
1116	error = mac_socket_check_receive(td->td_ucred, so);
1117	if (error) {
1118		fdrop(fp, td);
1119		return (error);
1120	}
1121#endif
1122
1123	auio.uio_iov = mp->msg_iov;
1124	auio.uio_iovcnt = mp->msg_iovlen;
1125	auio.uio_segflg = UIO_USERSPACE;
1126	auio.uio_rw = UIO_READ;
1127	auio.uio_td = td;
1128	auio.uio_offset = 0;			/* XXX */
1129	auio.uio_resid = 0;
1130	iov = mp->msg_iov;
1131	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
1132		if ((auio.uio_resid += iov->iov_len) < 0) {
1133			fdrop(fp, td);
1134			return (EINVAL);
1135		}
1136	}
1137#ifdef KTRACE
1138	if (KTRPOINT(td, KTR_GENIO))
1139		ktruio = cloneuio(&auio);
1140#endif
1141	len = auio.uio_resid;
1142	error = soreceive(so, &fromsa, &auio, NULL,
1143	    (mp->msg_control || controlp) ? &control : NULL,
1144	    &mp->msg_flags);
1145	if (error) {
1146		if (auio.uio_resid != len && (error == ERESTART ||
1147		    error == EINTR || error == EWOULDBLOCK))
1148			error = 0;
1149	}
1150	if (fromsa != NULL)
1151		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
1152#ifdef KTRACE
1153	if (ktruio != NULL) {
1154		ktruio->uio_resid = len - auio.uio_resid;
1155		ktrgenio(s, UIO_READ, ktruio, error);
1156	}
1157#endif
1158	if (error)
1159		goto out;
1160	td->td_retval[0] = len - auio.uio_resid;
1161	if (mp->msg_name) {
1162		len = mp->msg_namelen;
1163		if (len <= 0 || fromsa == NULL)
1164			len = 0;
1165		else {
1166			/* save sa_len before it is destroyed by MSG_COMPAT */
1167			len = MIN(len, fromsa->sa_len);
1168#ifdef COMPAT_OLDSOCK
1169			if (mp->msg_flags & MSG_COMPAT)
1170				((struct osockaddr *)fromsa)->sa_family =
1171				    fromsa->sa_family;
1172#endif
1173			if (fromseg == UIO_USERSPACE) {
1174				error = copyout(fromsa, mp->msg_name,
1175				    (unsigned)len);
1176				if (error)
1177					goto out;
1178			} else
1179				bcopy(fromsa, mp->msg_name, len);
1180		}
1181		mp->msg_namelen = len;
1182	}
1183	if (mp->msg_control && controlp == NULL) {
1184#ifdef COMPAT_OLDSOCK
1185		/*
1186		 * We assume that old recvmsg calls won't receive access
1187		 * rights and other control info, esp. as control info
1188		 * is always optional and those options didn't exist in 4.3.
1189		 * If we receive rights, trim the cmsghdr; anything else
1190		 * is tossed.
1191		 */
1192		if (control && mp->msg_flags & MSG_COMPAT) {
1193			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1194			    SOL_SOCKET ||
1195			    mtod(control, struct cmsghdr *)->cmsg_type !=
1196			    SCM_RIGHTS) {
1197				mp->msg_controllen = 0;
1198				goto out;
1199			}
1200			control->m_len -= sizeof (struct cmsghdr);
1201			control->m_data += sizeof (struct cmsghdr);
1202		}
1203#endif
1204		len = mp->msg_controllen;
1205		m = control;
1206		mp->msg_controllen = 0;
1207		ctlbuf = mp->msg_control;
1208
1209		while (m && len > 0) {
1210			unsigned int tocopy;
1211
1212			if (len >= m->m_len)
1213				tocopy = m->m_len;
1214			else {
1215				mp->msg_flags |= MSG_CTRUNC;
1216				tocopy = len;
1217			}
1218
1219			if ((error = copyout(mtod(m, caddr_t),
1220					ctlbuf, tocopy)) != 0)
1221				goto out;
1222
1223			ctlbuf += tocopy;
1224			len -= tocopy;
1225			m = m->m_next;
1226		}
1227		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1228	}
1229out:
1230	fdrop(fp, td);
1231#ifdef KTRACE
1232	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1233		ktrsockaddr(fromsa);
1234#endif
1235	if (fromsa)
1236		free(fromsa, M_SONAME);
1237
1238	if (error == 0 && controlp != NULL)
1239		*controlp = control;
1240	else  if (control)
1241		m_freem(control);
1242
1243	return (error);
1244}
1245
1246static int
1247recvit(td, s, mp, namelenp)
1248	struct thread *td;
1249	int s;
1250	struct msghdr *mp;
1251	void *namelenp;
1252{
1253	int error;
1254
1255	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1256	if (error)
1257		return (error);
1258	if (namelenp) {
1259		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1260#ifdef COMPAT_OLDSOCK
1261		if (mp->msg_flags & MSG_COMPAT)
1262			error = 0;	/* old recvfrom didn't check */
1263#endif
1264	}
1265	return (error);
1266}
1267
1268int
1269sys_recvfrom(td, uap)
1270	struct thread *td;
1271	struct recvfrom_args /* {
1272		int	s;
1273		caddr_t	buf;
1274		size_t	len;
1275		int	flags;
1276		struct sockaddr * __restrict	from;
1277		socklen_t * __restrict fromlenaddr;
1278	} */ *uap;
1279{
1280	struct msghdr msg;
1281	struct iovec aiov;
1282	int error;
1283
1284	if (uap->fromlenaddr) {
1285		error = copyin(uap->fromlenaddr,
1286		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1287		if (error)
1288			goto done2;
1289	} else {
1290		msg.msg_namelen = 0;
1291	}
1292	msg.msg_name = uap->from;
1293	msg.msg_iov = &aiov;
1294	msg.msg_iovlen = 1;
1295	aiov.iov_base = uap->buf;
1296	aiov.iov_len = uap->len;
1297	msg.msg_control = 0;
1298	msg.msg_flags = uap->flags;
1299	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1300done2:
1301	return(error);
1302}
1303
1304#ifdef COMPAT_OLDSOCK
1305int
1306orecvfrom(td, uap)
1307	struct thread *td;
1308	struct recvfrom_args *uap;
1309{
1310
1311	uap->flags |= MSG_COMPAT;
1312	return (sys_recvfrom(td, uap));
1313}
1314#endif
1315
1316#ifdef COMPAT_OLDSOCK
1317int
1318orecv(td, uap)
1319	struct thread *td;
1320	struct orecv_args /* {
1321		int	s;
1322		caddr_t	buf;
1323		int	len;
1324		int	flags;
1325	} */ *uap;
1326{
1327	struct msghdr msg;
1328	struct iovec aiov;
1329	int error;
1330
1331	msg.msg_name = 0;
1332	msg.msg_namelen = 0;
1333	msg.msg_iov = &aiov;
1334	msg.msg_iovlen = 1;
1335	aiov.iov_base = uap->buf;
1336	aiov.iov_len = uap->len;
1337	msg.msg_control = 0;
1338	msg.msg_flags = uap->flags;
1339	error = recvit(td, uap->s, &msg, NULL);
1340	return (error);
1341}
1342
1343/*
1344 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1345 * overlays the new one, missing only the flags, and with the (old) access
1346 * rights where the control fields are now.
1347 */
1348int
1349orecvmsg(td, uap)
1350	struct thread *td;
1351	struct orecvmsg_args /* {
1352		int	s;
1353		struct	omsghdr *msg;
1354		int	flags;
1355	} */ *uap;
1356{
1357	struct msghdr msg;
1358	struct iovec *iov;
1359	int error;
1360
1361	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1362	if (error)
1363		return (error);
1364	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1365	if (error)
1366		return (error);
1367	msg.msg_flags = uap->flags | MSG_COMPAT;
1368	msg.msg_iov = iov;
1369	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1370	if (msg.msg_controllen && error == 0)
1371		error = copyout(&msg.msg_controllen,
1372		    &uap->msg->msg_accrightslen, sizeof (int));
1373	free(iov, M_IOV);
1374	return (error);
1375}
1376#endif
1377
1378int
1379sys_recvmsg(td, uap)
1380	struct thread *td;
1381	struct recvmsg_args /* {
1382		int	s;
1383		struct	msghdr *msg;
1384		int	flags;
1385	} */ *uap;
1386{
1387	struct msghdr msg;
1388	struct iovec *uiov, *iov;
1389	int error;
1390
1391	error = copyin(uap->msg, &msg, sizeof (msg));
1392	if (error)
1393		return (error);
1394	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1395	if (error)
1396		return (error);
1397	msg.msg_flags = uap->flags;
1398#ifdef COMPAT_OLDSOCK
1399	msg.msg_flags &= ~MSG_COMPAT;
1400#endif
1401	uiov = msg.msg_iov;
1402	msg.msg_iov = iov;
1403	error = recvit(td, uap->s, &msg, NULL);
1404	if (error == 0) {
1405		msg.msg_iov = uiov;
1406		error = copyout(&msg, uap->msg, sizeof(msg));
1407	}
1408	free(iov, M_IOV);
1409	return (error);
1410}
1411
1412/* ARGSUSED */
1413int
1414sys_shutdown(td, uap)
1415	struct thread *td;
1416	struct shutdown_args /* {
1417		int	s;
1418		int	how;
1419	} */ *uap;
1420{
1421	struct socket *so;
1422	struct file *fp;
1423	int error;
1424
1425	AUDIT_ARG_FD(uap->s);
1426	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
1427	    NULL);
1428	if (error == 0) {
1429		so = fp->f_data;
1430		error = soshutdown(so, uap->how);
1431		fdrop(fp, td);
1432	}
1433	return (error);
1434}
1435
1436/* ARGSUSED */
1437int
1438sys_setsockopt(td, uap)
1439	struct thread *td;
1440	struct setsockopt_args /* {
1441		int	s;
1442		int	level;
1443		int	name;
1444		caddr_t	val;
1445		int	valsize;
1446	} */ *uap;
1447{
1448
1449	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1450	    uap->val, UIO_USERSPACE, uap->valsize));
1451}
1452
1453int
1454kern_setsockopt(td, s, level, name, val, valseg, valsize)
1455	struct thread *td;
1456	int s;
1457	int level;
1458	int name;
1459	void *val;
1460	enum uio_seg valseg;
1461	socklen_t valsize;
1462{
1463	int error;
1464	struct socket *so;
1465	struct file *fp;
1466	struct sockopt sopt;
1467
1468	if (val == NULL && valsize != 0)
1469		return (EFAULT);
1470	if ((int)valsize < 0)
1471		return (EINVAL);
1472
1473	sopt.sopt_dir = SOPT_SET;
1474	sopt.sopt_level = level;
1475	sopt.sopt_name = name;
1476	sopt.sopt_val = val;
1477	sopt.sopt_valsize = valsize;
1478	switch (valseg) {
1479	case UIO_USERSPACE:
1480		sopt.sopt_td = td;
1481		break;
1482	case UIO_SYSSPACE:
1483		sopt.sopt_td = NULL;
1484		break;
1485	default:
1486		panic("kern_setsockopt called with bad valseg");
1487	}
1488
1489	AUDIT_ARG_FD(s);
1490	error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
1491	if (error == 0) {
1492		so = fp->f_data;
1493		error = sosetopt(so, &sopt);
1494		fdrop(fp, td);
1495	}
1496	return(error);
1497}
1498
1499/* ARGSUSED */
1500int
1501sys_getsockopt(td, uap)
1502	struct thread *td;
1503	struct getsockopt_args /* {
1504		int	s;
1505		int	level;
1506		int	name;
1507		void * __restrict	val;
1508		socklen_t * __restrict avalsize;
1509	} */ *uap;
1510{
1511	socklen_t valsize;
1512	int	error;
1513
1514	if (uap->val) {
1515		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1516		if (error)
1517			return (error);
1518	}
1519
1520	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1521	    uap->val, UIO_USERSPACE, &valsize);
1522
1523	if (error == 0)
1524		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1525	return (error);
1526}
1527
1528/*
1529 * Kernel version of getsockopt.
1530 * optval can be a userland or userspace. optlen is always a kernel pointer.
1531 */
1532int
1533kern_getsockopt(td, s, level, name, val, valseg, valsize)
1534	struct thread *td;
1535	int s;
1536	int level;
1537	int name;
1538	void *val;
1539	enum uio_seg valseg;
1540	socklen_t *valsize;
1541{
1542	int error;
1543	struct  socket *so;
1544	struct file *fp;
1545	struct	sockopt sopt;
1546
1547	if (val == NULL)
1548		*valsize = 0;
1549	if ((int)*valsize < 0)
1550		return (EINVAL);
1551
1552	sopt.sopt_dir = SOPT_GET;
1553	sopt.sopt_level = level;
1554	sopt.sopt_name = name;
1555	sopt.sopt_val = val;
1556	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1557	switch (valseg) {
1558	case UIO_USERSPACE:
1559		sopt.sopt_td = td;
1560		break;
1561	case UIO_SYSSPACE:
1562		sopt.sopt_td = NULL;
1563		break;
1564	default:
1565		panic("kern_getsockopt called with bad valseg");
1566	}
1567
1568	AUDIT_ARG_FD(s);
1569	error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
1570	if (error == 0) {
1571		so = fp->f_data;
1572		error = sogetopt(so, &sopt);
1573		*valsize = sopt.sopt_valsize;
1574		fdrop(fp, td);
1575	}
1576	return (error);
1577}
1578
1579/*
1580 * getsockname1() - Get socket name.
1581 */
1582/* ARGSUSED */
1583static int
1584getsockname1(td, uap, compat)
1585	struct thread *td;
1586	struct getsockname_args /* {
1587		int	fdes;
1588		struct sockaddr * __restrict asa;
1589		socklen_t * __restrict alen;
1590	} */ *uap;
1591	int compat;
1592{
1593	struct sockaddr *sa;
1594	socklen_t len;
1595	int error;
1596
1597	error = copyin(uap->alen, &len, sizeof(len));
1598	if (error)
1599		return (error);
1600
1601	error = kern_getsockname(td, uap->fdes, &sa, &len);
1602	if (error)
1603		return (error);
1604
1605	if (len != 0) {
1606#ifdef COMPAT_OLDSOCK
1607		if (compat)
1608			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1609#endif
1610		error = copyout(sa, uap->asa, (u_int)len);
1611	}
1612	free(sa, M_SONAME);
1613	if (error == 0)
1614		error = copyout(&len, uap->alen, sizeof(len));
1615	return (error);
1616}
1617
1618int
1619kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1620    socklen_t *alen)
1621{
1622	struct socket *so;
1623	struct file *fp;
1624	socklen_t len;
1625	int error;
1626
1627	AUDIT_ARG_FD(fd);
1628	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
1629	if (error)
1630		return (error);
1631	so = fp->f_data;
1632	*sa = NULL;
1633	CURVNET_SET(so->so_vnet);
1634	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1635	CURVNET_RESTORE();
1636	if (error)
1637		goto bad;
1638	if (*sa == NULL)
1639		len = 0;
1640	else
1641		len = MIN(*alen, (*sa)->sa_len);
1642	*alen = len;
1643#ifdef KTRACE
1644	if (KTRPOINT(td, KTR_STRUCT))
1645		ktrsockaddr(*sa);
1646#endif
1647bad:
1648	fdrop(fp, td);
1649	if (error && *sa) {
1650		free(*sa, M_SONAME);
1651		*sa = NULL;
1652	}
1653	return (error);
1654}
1655
1656int
1657sys_getsockname(td, uap)
1658	struct thread *td;
1659	struct getsockname_args *uap;
1660{
1661
1662	return (getsockname1(td, uap, 0));
1663}
1664
1665#ifdef COMPAT_OLDSOCK
1666int
1667ogetsockname(td, uap)
1668	struct thread *td;
1669	struct getsockname_args *uap;
1670{
1671
1672	return (getsockname1(td, uap, 1));
1673}
1674#endif /* COMPAT_OLDSOCK */
1675
1676/*
1677 * getpeername1() - Get name of peer for connected socket.
1678 */
1679/* ARGSUSED */
1680static int
1681getpeername1(td, uap, compat)
1682	struct thread *td;
1683	struct getpeername_args /* {
1684		int	fdes;
1685		struct sockaddr * __restrict	asa;
1686		socklen_t * __restrict	alen;
1687	} */ *uap;
1688	int compat;
1689{
1690	struct sockaddr *sa;
1691	socklen_t len;
1692	int error;
1693
1694	error = copyin(uap->alen, &len, sizeof (len));
1695	if (error)
1696		return (error);
1697
1698	error = kern_getpeername(td, uap->fdes, &sa, &len);
1699	if (error)
1700		return (error);
1701
1702	if (len != 0) {
1703#ifdef COMPAT_OLDSOCK
1704		if (compat)
1705			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1706#endif
1707		error = copyout(sa, uap->asa, (u_int)len);
1708	}
1709	free(sa, M_SONAME);
1710	if (error == 0)
1711		error = copyout(&len, uap->alen, sizeof(len));
1712	return (error);
1713}
1714
1715int
1716kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1717    socklen_t *alen)
1718{
1719	struct socket *so;
1720	struct file *fp;
1721	socklen_t len;
1722	int error;
1723
1724	AUDIT_ARG_FD(fd);
1725	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
1726	if (error)
1727		return (error);
1728	so = fp->f_data;
1729	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1730		error = ENOTCONN;
1731		goto done;
1732	}
1733	*sa = NULL;
1734	CURVNET_SET(so->so_vnet);
1735	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1736	CURVNET_RESTORE();
1737	if (error)
1738		goto bad;
1739	if (*sa == NULL)
1740		len = 0;
1741	else
1742		len = MIN(*alen, (*sa)->sa_len);
1743	*alen = len;
1744#ifdef KTRACE
1745	if (KTRPOINT(td, KTR_STRUCT))
1746		ktrsockaddr(*sa);
1747#endif
1748bad:
1749	if (error && *sa) {
1750		free(*sa, M_SONAME);
1751		*sa = NULL;
1752	}
1753done:
1754	fdrop(fp, td);
1755	return (error);
1756}
1757
1758int
1759sys_getpeername(td, uap)
1760	struct thread *td;
1761	struct getpeername_args *uap;
1762{
1763
1764	return (getpeername1(td, uap, 0));
1765}
1766
1767#ifdef COMPAT_OLDSOCK
1768int
1769ogetpeername(td, uap)
1770	struct thread *td;
1771	struct ogetpeername_args *uap;
1772{
1773
1774	/* XXX uap should have type `getpeername_args *' to begin with. */
1775	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1776}
1777#endif /* COMPAT_OLDSOCK */
1778
1779int
1780sockargs(mp, buf, buflen, type)
1781	struct mbuf **mp;
1782	caddr_t buf;
1783	int buflen, type;
1784{
1785	struct sockaddr *sa;
1786	struct mbuf *m;
1787	int error;
1788
1789	if (buflen > MLEN) {
1790#ifdef COMPAT_OLDSOCK
1791		if (type == MT_SONAME && buflen <= 112)
1792			buflen = MLEN;		/* unix domain compat. hack */
1793		else
1794#endif
1795			if (buflen > MCLBYTES)
1796				return (EINVAL);
1797	}
1798	m = m_get2(buflen, M_WAITOK, type, 0);
1799	m->m_len = buflen;
1800	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1801	if (error)
1802		(void) m_free(m);
1803	else {
1804		*mp = m;
1805		if (type == MT_SONAME) {
1806			sa = mtod(m, struct sockaddr *);
1807
1808#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1809			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1810				sa->sa_family = sa->sa_len;
1811#endif
1812			sa->sa_len = buflen;
1813		}
1814	}
1815	return (error);
1816}
1817
1818int
1819getsockaddr(namp, uaddr, len)
1820	struct sockaddr **namp;
1821	caddr_t uaddr;
1822	size_t len;
1823{
1824	struct sockaddr *sa;
1825	int error;
1826
1827	if (len > SOCK_MAXADDRLEN)
1828		return (ENAMETOOLONG);
1829	if (len < offsetof(struct sockaddr, sa_data[0]))
1830		return (EINVAL);
1831	sa = malloc(len, M_SONAME, M_WAITOK);
1832	error = copyin(uaddr, sa, len);
1833	if (error) {
1834		free(sa, M_SONAME);
1835	} else {
1836#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1837		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1838			sa->sa_family = sa->sa_len;
1839#endif
1840		sa->sa_len = len;
1841		*namp = sa;
1842	}
1843	return (error);
1844}
1845
1846#include <sys/condvar.h>
1847
1848struct sendfile_sync {
1849	struct mtx	mtx;
1850	struct cv	cv;
1851	unsigned	count;
1852};
1853
1854/*
1855 * Detach mapped page and release resources back to the system.
1856 */
1857int
1858sf_buf_mext(struct mbuf *mb, void *addr, void *args)
1859{
1860	vm_page_t m;
1861	struct sendfile_sync *sfs;
1862
1863	m = sf_buf_page(args);
1864	sf_buf_free(args);
1865	vm_page_lock(m);
1866	vm_page_unwire(m, 0);
1867	/*
1868	 * Check for the object going away on us. This can
1869	 * happen since we don't hold a reference to it.
1870	 * If so, we're responsible for freeing the page.
1871	 */
1872	if (m->wire_count == 0 && m->object == NULL)
1873		vm_page_free(m);
1874	vm_page_unlock(m);
1875	if (addr == NULL)
1876		return (EXT_FREE_OK);
1877	sfs = addr;
1878	mtx_lock(&sfs->mtx);
1879	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1880	if (--sfs->count == 0)
1881		cv_signal(&sfs->cv);
1882	mtx_unlock(&sfs->mtx);
1883	return (EXT_FREE_OK);
1884}
1885
1886/*
1887 * sendfile(2)
1888 *
1889 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1890 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1891 *
1892 * Send a file specified by 'fd' and starting at 'offset' to a socket
1893 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1894 * 0.  Optionally add a header and/or trailer to the socket output.  If
1895 * specified, write the total number of bytes sent into *sbytes.
1896 */
1897int
1898sys_sendfile(struct thread *td, struct sendfile_args *uap)
1899{
1900
1901	return (do_sendfile(td, uap, 0));
1902}
1903
1904static int
1905do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1906{
1907	struct sf_hdtr hdtr;
1908	struct uio *hdr_uio, *trl_uio;
1909	struct file *fp;
1910	int error;
1911
1912	if (uap->offset < 0)
1913		return (EINVAL);
1914
1915	hdr_uio = trl_uio = NULL;
1916
1917	if (uap->hdtr != NULL) {
1918		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1919		if (error)
1920			goto out;
1921		if (hdtr.headers != NULL) {
1922			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1923			if (error)
1924				goto out;
1925		}
1926		if (hdtr.trailers != NULL) {
1927			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1928			if (error)
1929				goto out;
1930
1931		}
1932	}
1933
1934	AUDIT_ARG_FD(uap->fd);
1935
1936	/*
1937	 * sendfile(2) can start at any offset within a file so we require
1938	 * CAP_READ+CAP_SEEK = CAP_PREAD.
1939	 */
1940	if ((error = fget_read(td, uap->fd, CAP_PREAD, &fp)) != 0)
1941		goto out;
1942
1943	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
1944	    uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
1945	fdrop(fp, td);
1946
1947out:
1948	if (hdr_uio)
1949		free(hdr_uio, M_IOV);
1950	if (trl_uio)
1951		free(trl_uio, M_IOV);
1952	return (error);
1953}
1954
1955#ifdef COMPAT_FREEBSD4
1956int
1957freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1958{
1959	struct sendfile_args args;
1960
1961	args.fd = uap->fd;
1962	args.s = uap->s;
1963	args.offset = uap->offset;
1964	args.nbytes = uap->nbytes;
1965	args.hdtr = uap->hdtr;
1966	args.sbytes = uap->sbytes;
1967	args.flags = uap->flags;
1968
1969	return (do_sendfile(td, &args, 1));
1970}
1971#endif /* COMPAT_FREEBSD4 */
1972
1973int
1974vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
1975    struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
1976    int kflags, struct thread *td)
1977{
1978	struct vnode *vp = fp->f_vnode;
1979	struct file *sock_fp;
1980	struct vm_object *obj = NULL;
1981	struct socket *so = NULL;
1982	struct mbuf *m = NULL;
1983	struct sf_buf *sf;
1984	struct vm_page *pg;
1985	struct vattr va;
1986	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1987	int error, hdrlen = 0, mnw = 0;
1988	int bsize;
1989	struct sendfile_sync *sfs = NULL;
1990
1991	vn_lock(vp, LK_SHARED | LK_RETRY);
1992	if (vp->v_type == VREG) {
1993		bsize = vp->v_mount->mnt_stat.f_iosize;
1994		if (nbytes == 0) {
1995			error = VOP_GETATTR(vp, &va, td->td_ucred);
1996			if (error != 0) {
1997				VOP_UNLOCK(vp, 0);
1998				obj = NULL;
1999				goto out;
2000			}
2001			rem = va.va_size;
2002		} else
2003			rem = nbytes;
2004		obj = vp->v_object;
2005		if (obj != NULL) {
2006			/*
2007			 * Temporarily increase the backing VM
2008			 * object's reference count so that a forced
2009			 * reclamation of its vnode does not
2010			 * immediately destroy it.
2011			 */
2012			VM_OBJECT_WLOCK(obj);
2013			if ((obj->flags & OBJ_DEAD) == 0) {
2014				vm_object_reference_locked(obj);
2015				VM_OBJECT_WUNLOCK(obj);
2016			} else {
2017				VM_OBJECT_WUNLOCK(obj);
2018				obj = NULL;
2019			}
2020		}
2021	} else
2022		bsize = 0;	/* silence gcc */
2023	VOP_UNLOCK(vp, 0);
2024	if (obj == NULL) {
2025		error = EINVAL;
2026		goto out;
2027	}
2028
2029	/*
2030	 * The socket must be a stream socket and connected.
2031	 * Remember if it a blocking or non-blocking socket.
2032	 */
2033	if ((error = getsock_cap(td->td_proc->p_fd, sockfd, CAP_SEND,
2034	    &sock_fp, NULL)) != 0)
2035		goto out;
2036	so = sock_fp->f_data;
2037	if (so->so_type != SOCK_STREAM) {
2038		error = EINVAL;
2039		goto out;
2040	}
2041	if ((so->so_state & SS_ISCONNECTED) == 0) {
2042		error = ENOTCONN;
2043		goto out;
2044	}
2045	/*
2046	 * Do not wait on memory allocations but return ENOMEM for
2047	 * caller to retry later.
2048	 * XXX: Experimental.
2049	 */
2050	if (flags & SF_MNOWAIT)
2051		mnw = 1;
2052
2053	if (flags & SF_SYNC) {
2054		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
2055		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
2056		cv_init(&sfs->cv, "sendfile");
2057	}
2058
2059#ifdef MAC
2060	error = mac_socket_check_send(td->td_ucred, so);
2061	if (error)
2062		goto out;
2063#endif
2064
2065	/* If headers are specified copy them into mbufs. */
2066	if (hdr_uio != NULL) {
2067		hdr_uio->uio_td = td;
2068		hdr_uio->uio_rw = UIO_WRITE;
2069		if (hdr_uio->uio_resid > 0) {
2070			/*
2071			 * In FBSD < 5.0 the nbytes to send also included
2072			 * the header.  If compat is specified subtract the
2073			 * header size from nbytes.
2074			 */
2075			if (kflags & SFK_COMPAT) {
2076				if (nbytes > hdr_uio->uio_resid)
2077					nbytes -= hdr_uio->uio_resid;
2078				else
2079					nbytes = 0;
2080			}
2081			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
2082			    0, 0, 0);
2083			if (m == NULL) {
2084				error = mnw ? EAGAIN : ENOBUFS;
2085				goto out;
2086			}
2087			hdrlen = m_length(m, NULL);
2088		}
2089	}
2090
2091	/*
2092	 * Protect against multiple writers to the socket.
2093	 *
2094	 * XXXRW: Historically this has assumed non-interruptibility, so now
2095	 * we implement that, but possibly shouldn't.
2096	 */
2097	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
2098
2099	/*
2100	 * Loop through the pages of the file, starting with the requested
2101	 * offset. Get a file page (do I/O if necessary), map the file page
2102	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
2103	 * it on the socket.
2104	 * This is done in two loops.  The inner loop turns as many pages
2105	 * as it can, up to available socket buffer space, without blocking
2106	 * into mbufs to have it bulk delivered into the socket send buffer.
2107	 * The outer loop checks the state and available space of the socket
2108	 * and takes care of the overall progress.
2109	 */
2110	for (off = offset; ; ) {
2111		struct mbuf *mtail;
2112		int loopbytes;
2113		int space;
2114		int done;
2115
2116		if ((nbytes != 0 && nbytes == fsbytes) ||
2117		    (nbytes == 0 && va.va_size == fsbytes))
2118			break;
2119
2120		mtail = NULL;
2121		loopbytes = 0;
2122		space = 0;
2123		done = 0;
2124
2125		/*
2126		 * Check the socket state for ongoing connection,
2127		 * no errors and space in socket buffer.
2128		 * If space is low allow for the remainder of the
2129		 * file to be processed if it fits the socket buffer.
2130		 * Otherwise block in waiting for sufficient space
2131		 * to proceed, or if the socket is nonblocking, return
2132		 * to userland with EAGAIN while reporting how far
2133		 * we've come.
2134		 * We wait until the socket buffer has significant free
2135		 * space to do bulk sends.  This makes good use of file
2136		 * system read ahead and allows packet segmentation
2137		 * offloading hardware to take over lots of work.  If
2138		 * we were not careful here we would send off only one
2139		 * sfbuf at a time.
2140		 */
2141		SOCKBUF_LOCK(&so->so_snd);
2142		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
2143			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
2144retry_space:
2145		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2146			error = EPIPE;
2147			SOCKBUF_UNLOCK(&so->so_snd);
2148			goto done;
2149		} else if (so->so_error) {
2150			error = so->so_error;
2151			so->so_error = 0;
2152			SOCKBUF_UNLOCK(&so->so_snd);
2153			goto done;
2154		}
2155		space = sbspace(&so->so_snd);
2156		if (space < rem &&
2157		    (space <= 0 ||
2158		     space < so->so_snd.sb_lowat)) {
2159			if (so->so_state & SS_NBIO) {
2160				SOCKBUF_UNLOCK(&so->so_snd);
2161				error = EAGAIN;
2162				goto done;
2163			}
2164			/*
2165			 * sbwait drops the lock while sleeping.
2166			 * When we loop back to retry_space the
2167			 * state may have changed and we retest
2168			 * for it.
2169			 */
2170			error = sbwait(&so->so_snd);
2171			/*
2172			 * An error from sbwait usually indicates that we've
2173			 * been interrupted by a signal. If we've sent anything
2174			 * then return bytes sent, otherwise return the error.
2175			 */
2176			if (error) {
2177				SOCKBUF_UNLOCK(&so->so_snd);
2178				goto done;
2179			}
2180			goto retry_space;
2181		}
2182		SOCKBUF_UNLOCK(&so->so_snd);
2183
2184		/*
2185		 * Reduce space in the socket buffer by the size of
2186		 * the header mbuf chain.
2187		 * hdrlen is set to 0 after the first loop.
2188		 */
2189		space -= hdrlen;
2190
2191		error = vn_lock(vp, LK_SHARED);
2192		if (error != 0)
2193			goto done;
2194		error = VOP_GETATTR(vp, &va, td->td_ucred);
2195		if (error != 0 || off >= va.va_size) {
2196			VOP_UNLOCK(vp, 0);
2197			goto done;
2198		}
2199
2200		/*
2201		 * Loop and construct maximum sized mbuf chain to be bulk
2202		 * dumped into socket buffer.
2203		 */
2204		while (space > loopbytes) {
2205			vm_pindex_t pindex;
2206			vm_offset_t pgoff;
2207			struct mbuf *m0;
2208
2209			/*
2210			 * Calculate the amount to transfer.
2211			 * Not to exceed a page, the EOF,
2212			 * or the passed in nbytes.
2213			 */
2214			pgoff = (vm_offset_t)(off & PAGE_MASK);
2215			if (nbytes)
2216				rem = (nbytes - fsbytes - loopbytes);
2217			else
2218				rem = va.va_size -
2219				    offset - fsbytes - loopbytes;
2220			xfsize = omin(PAGE_SIZE - pgoff, rem);
2221			xfsize = omin(space - loopbytes, xfsize);
2222			if (xfsize <= 0) {
2223				done = 1;		/* all data sent */
2224				break;
2225			}
2226
2227			/*
2228			 * Attempt to look up the page.  Allocate
2229			 * if not found or wait and loop if busy.
2230			 */
2231			pindex = OFF_TO_IDX(off);
2232			VM_OBJECT_WLOCK(obj);
2233			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2234			    VM_ALLOC_IGN_SBUSY | VM_ALLOC_NORMAL |
2235			    VM_ALLOC_WIRED);
2236
2237			/*
2238			 * Check if page is valid for what we need,
2239			 * otherwise initiate I/O.
2240			 * If we already turned some pages into mbufs,
2241			 * send them off before we come here again and
2242			 * block.
2243			 */
2244			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2245				VM_OBJECT_WUNLOCK(obj);
2246			else if (m != NULL)
2247				error = EAGAIN;	/* send what we already got */
2248			else if (flags & SF_NODISKIO)
2249				error = EBUSY;
2250			else {
2251				ssize_t resid;
2252				int readahead = sfreadahead * MAXBSIZE;
2253
2254				VM_OBJECT_WUNLOCK(obj);
2255
2256				/*
2257				 * Get the page from backing store.
2258				 * XXXMAC: Because we don't have fp->f_cred
2259				 * here, we pass in NOCRED.  This is probably
2260				 * wrong, but is consistent with our original
2261				 * implementation.
2262				 */
2263				error = vn_rdwr(UIO_READ, vp, NULL, readahead,
2264				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2265				    IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT),
2266				    td->td_ucred, NOCRED, &resid, td);
2267				SFSTAT_INC(sf_iocnt);
2268				if (error)
2269					VM_OBJECT_WLOCK(obj);
2270			}
2271			if (error) {
2272				vm_page_lock(pg);
2273				vm_page_unwire(pg, 0);
2274				/*
2275				 * See if anyone else might know about
2276				 * this page.  If not and it is not valid,
2277				 * then free it.
2278				 */
2279				if (pg->wire_count == 0 && pg->valid == 0 &&
2280				    !vm_page_busied(pg))
2281					vm_page_free(pg);
2282				vm_page_unlock(pg);
2283				VM_OBJECT_WUNLOCK(obj);
2284				if (error == EAGAIN)
2285					error = 0;	/* not a real error */
2286				break;
2287			}
2288
2289			/*
2290			 * Get a sendfile buf.  When allocating the
2291			 * first buffer for mbuf chain, we usually
2292			 * wait as long as necessary, but this wait
2293			 * can be interrupted.  For consequent
2294			 * buffers, do not sleep, since several
2295			 * threads might exhaust the buffers and then
2296			 * deadlock.
2297			 */
2298			sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
2299			    SFB_CATCH);
2300			if (sf == NULL) {
2301				SFSTAT_INC(sf_allocfail);
2302				vm_page_lock(pg);
2303				vm_page_unwire(pg, 0);
2304				KASSERT(pg->object != NULL,
2305				    ("%s: object disappeared", __func__));
2306				vm_page_unlock(pg);
2307				if (m == NULL)
2308					error = (mnw ? EAGAIN : EINTR);
2309				break;
2310			}
2311
2312			/*
2313			 * Get an mbuf and set it up as having
2314			 * external storage.
2315			 */
2316			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2317			if (m0 == NULL) {
2318				error = (mnw ? EAGAIN : ENOBUFS);
2319				(void)sf_buf_mext(NULL, NULL, sf);
2320				break;
2321			}
2322			if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE,
2323			    sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF,
2324			    (mnw ? M_NOWAIT : M_WAITOK)) != 0) {
2325				error = (mnw ? EAGAIN : ENOBUFS);
2326				(void)sf_buf_mext(NULL, NULL, sf);
2327				m_freem(m0);
2328				break;
2329			}
2330			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2331			m0->m_len = xfsize;
2332
2333			/* Append to mbuf chain. */
2334			if (mtail != NULL)
2335				mtail->m_next = m0;
2336			else if (m != NULL)
2337				m_last(m)->m_next = m0;
2338			else
2339				m = m0;
2340			mtail = m0;
2341
2342			/* Keep track of bits processed. */
2343			loopbytes += xfsize;
2344			off += xfsize;
2345
2346			if (sfs != NULL) {
2347				mtx_lock(&sfs->mtx);
2348				sfs->count++;
2349				mtx_unlock(&sfs->mtx);
2350			}
2351		}
2352
2353		VOP_UNLOCK(vp, 0);
2354
2355		/* Add the buffer chain to the socket buffer. */
2356		if (m != NULL) {
2357			int mlen, err;
2358
2359			mlen = m_length(m, NULL);
2360			SOCKBUF_LOCK(&so->so_snd);
2361			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2362				error = EPIPE;
2363				SOCKBUF_UNLOCK(&so->so_snd);
2364				goto done;
2365			}
2366			SOCKBUF_UNLOCK(&so->so_snd);
2367			CURVNET_SET(so->so_vnet);
2368			/* Avoid error aliasing. */
2369			err = (*so->so_proto->pr_usrreqs->pru_send)
2370				    (so, 0, m, NULL, NULL, td);
2371			CURVNET_RESTORE();
2372			if (err == 0) {
2373				/*
2374				 * We need two counters to get the
2375				 * file offset and nbytes to send
2376				 * right:
2377				 * - sbytes contains the total amount
2378				 *   of bytes sent, including headers.
2379				 * - fsbytes contains the total amount
2380				 *   of bytes sent from the file.
2381				 */
2382				sbytes += mlen;
2383				fsbytes += mlen;
2384				if (hdrlen) {
2385					fsbytes -= hdrlen;
2386					hdrlen = 0;
2387				}
2388			} else if (error == 0)
2389				error = err;
2390			m = NULL;	/* pru_send always consumes */
2391		}
2392
2393		/* Quit outer loop on error or when we're done. */
2394		if (done)
2395			break;
2396		if (error)
2397			goto done;
2398	}
2399
2400	/*
2401	 * Send trailers. Wimp out and use writev(2).
2402	 */
2403	if (trl_uio != NULL) {
2404		sbunlock(&so->so_snd);
2405		error = kern_writev(td, sockfd, trl_uio);
2406		if (error == 0)
2407			sbytes += td->td_retval[0];
2408		goto out;
2409	}
2410
2411done:
2412	sbunlock(&so->so_snd);
2413out:
2414	/*
2415	 * If there was no error we have to clear td->td_retval[0]
2416	 * because it may have been set by writev.
2417	 */
2418	if (error == 0) {
2419		td->td_retval[0] = 0;
2420	}
2421	if (sent != NULL) {
2422		copyout(&sbytes, sent, sizeof(off_t));
2423	}
2424	if (obj != NULL)
2425		vm_object_deallocate(obj);
2426	if (so)
2427		fdrop(sock_fp, td);
2428	if (m)
2429		m_freem(m);
2430
2431	if (sfs != NULL) {
2432		mtx_lock(&sfs->mtx);
2433		if (sfs->count != 0)
2434			cv_wait(&sfs->cv, &sfs->mtx);
2435		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2436		cv_destroy(&sfs->cv);
2437		mtx_destroy(&sfs->mtx);
2438		free(sfs, M_TEMP);
2439	}
2440
2441	if (error == ERESTART)
2442		error = EINTR;
2443
2444	return (error);
2445}
2446
2447/*
2448 * SCTP syscalls.
2449 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2450 * otherwise all return EOPNOTSUPP.
2451 * XXX: We should make this loadable one day.
2452 */
2453int
2454sys_sctp_peeloff(td, uap)
2455	struct thread *td;
2456	struct sctp_peeloff_args /* {
2457		int	sd;
2458		caddr_t	name;
2459	} */ *uap;
2460{
2461#if (defined(INET) || defined(INET6)) && defined(SCTP)
2462	struct file *nfp = NULL;
2463	int error;
2464	struct socket *head, *so;
2465	int fd;
2466	u_int fflag;
2467
2468	AUDIT_ARG_FD(uap->sd);
2469	error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
2470	if (error)
2471		goto done2;
2472	if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
2473		error = EOPNOTSUPP;
2474		goto done;
2475	}
2476	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2477	if (error)
2478		goto done;
2479	/*
2480	 * At this point we know we do have a assoc to pull
2481	 * we proceed to get the fd setup. This may block
2482	 * but that is ok.
2483	 */
2484
2485	error = falloc(td, &nfp, &fd, 0);
2486	if (error)
2487		goto done;
2488	td->td_retval[0] = fd;
2489
2490	CURVNET_SET(head->so_vnet);
2491	so = sonewconn(head, SS_ISCONNECTED);
2492	if (so == NULL) {
2493		error = ENOMEM;
2494		goto noconnection;
2495	}
2496	/*
2497	 * Before changing the flags on the socket, we have to bump the
2498	 * reference count.  Otherwise, if the protocol calls sofree(),
2499	 * the socket will be released due to a zero refcount.
2500	 */
2501        SOCK_LOCK(so);
2502        soref(so);                      /* file descriptor reference */
2503        SOCK_UNLOCK(so);
2504
2505	ACCEPT_LOCK();
2506
2507	TAILQ_REMOVE(&head->so_comp, so, so_list);
2508	head->so_qlen--;
2509	so->so_state |= (head->so_state & SS_NBIO);
2510	so->so_state &= ~SS_NOFDREF;
2511	so->so_qstate &= ~SQ_COMP;
2512	so->so_head = NULL;
2513	ACCEPT_UNLOCK();
2514	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2515	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2516	if (error)
2517		goto noconnection;
2518	if (head->so_sigio != NULL)
2519		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2520
2521noconnection:
2522	/*
2523	 * close the new descriptor, assuming someone hasn't ripped it
2524	 * out from under us.
2525	 */
2526	if (error)
2527		fdclose(td->td_proc->p_fd, nfp, fd, td);
2528
2529	/*
2530	 * Release explicitly held references before returning.
2531	 */
2532	CURVNET_RESTORE();
2533done:
2534	if (nfp != NULL)
2535		fdrop(nfp, td);
2536	fputsock(head);
2537done2:
2538	return (error);
2539#else  /* SCTP */
2540	return (EOPNOTSUPP);
2541#endif /* SCTP */
2542}
2543
2544int
2545sys_sctp_generic_sendmsg (td, uap)
2546	struct thread *td;
2547	struct sctp_generic_sendmsg_args /* {
2548		int sd,
2549		caddr_t msg,
2550		int mlen,
2551		caddr_t to,
2552		__socklen_t tolen,
2553		struct sctp_sndrcvinfo *sinfo,
2554		int flags
2555	} */ *uap;
2556{
2557#if (defined(INET) || defined(INET6)) && defined(SCTP)
2558	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2559	struct socket *so;
2560	struct file *fp = NULL;
2561	int error = 0, len;
2562	struct sockaddr *to = NULL;
2563#ifdef KTRACE
2564	struct uio *ktruio = NULL;
2565#endif
2566	struct uio auio;
2567	struct iovec iov[1];
2568	cap_rights_t rights;
2569
2570	if (uap->sinfo) {
2571		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2572		if (error)
2573			return (error);
2574		u_sinfo = &sinfo;
2575	}
2576
2577	rights = CAP_SEND;
2578	if (uap->tolen) {
2579		error = getsockaddr(&to, uap->to, uap->tolen);
2580		if (error) {
2581			to = NULL;
2582			goto sctp_bad2;
2583		}
2584		rights |= CAP_CONNECT;
2585	}
2586
2587	AUDIT_ARG_FD(uap->sd);
2588	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2589	if (error)
2590		goto sctp_bad;
2591#ifdef KTRACE
2592	if (to && (KTRPOINT(td, KTR_STRUCT)))
2593		ktrsockaddr(to);
2594#endif
2595
2596	iov[0].iov_base = uap->msg;
2597	iov[0].iov_len = uap->mlen;
2598
2599	so = (struct socket *)fp->f_data;
2600	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2601		error = EOPNOTSUPP;
2602		goto sctp_bad;
2603	}
2604#ifdef MAC
2605	error = mac_socket_check_send(td->td_ucred, so);
2606	if (error)
2607		goto sctp_bad;
2608#endif /* MAC */
2609
2610	auio.uio_iov =  iov;
2611	auio.uio_iovcnt = 1;
2612	auio.uio_segflg = UIO_USERSPACE;
2613	auio.uio_rw = UIO_WRITE;
2614	auio.uio_td = td;
2615	auio.uio_offset = 0;			/* XXX */
2616	auio.uio_resid = 0;
2617	len = auio.uio_resid = uap->mlen;
2618	CURVNET_SET(so->so_vnet);
2619	error = sctp_lower_sosend(so, to, &auio,
2620		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2621		    uap->flags, u_sinfo, td);
2622	CURVNET_RESTORE();
2623	if (error) {
2624		if (auio.uio_resid != len && (error == ERESTART ||
2625		    error == EINTR || error == EWOULDBLOCK))
2626			error = 0;
2627		/* Generation of SIGPIPE can be controlled per socket. */
2628		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2629		    !(uap->flags & MSG_NOSIGNAL)) {
2630			PROC_LOCK(td->td_proc);
2631			tdsignal(td, SIGPIPE);
2632			PROC_UNLOCK(td->td_proc);
2633		}
2634	}
2635	if (error == 0)
2636		td->td_retval[0] = len - auio.uio_resid;
2637#ifdef KTRACE
2638	if (ktruio != NULL) {
2639		ktruio->uio_resid = td->td_retval[0];
2640		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2641	}
2642#endif /* KTRACE */
2643sctp_bad:
2644	if (fp)
2645		fdrop(fp, td);
2646sctp_bad2:
2647	if (to)
2648		free(to, M_SONAME);
2649	return (error);
2650#else  /* SCTP */
2651	return (EOPNOTSUPP);
2652#endif /* SCTP */
2653}
2654
2655int
2656sys_sctp_generic_sendmsg_iov(td, uap)
2657	struct thread *td;
2658	struct sctp_generic_sendmsg_iov_args /* {
2659		int sd,
2660		struct iovec *iov,
2661		int iovlen,
2662		caddr_t to,
2663		__socklen_t tolen,
2664		struct sctp_sndrcvinfo *sinfo,
2665		int flags
2666	} */ *uap;
2667{
2668#if (defined(INET) || defined(INET6)) && defined(SCTP)
2669	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2670	struct socket *so;
2671	struct file *fp = NULL;
2672	int error=0, i;
2673	ssize_t len;
2674	struct sockaddr *to = NULL;
2675#ifdef KTRACE
2676	struct uio *ktruio = NULL;
2677#endif
2678	struct uio auio;
2679	struct iovec *iov, *tiov;
2680	cap_rights_t rights;
2681
2682	if (uap->sinfo) {
2683		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2684		if (error)
2685			return (error);
2686		u_sinfo = &sinfo;
2687	}
2688	rights = CAP_SEND;
2689	if (uap->tolen) {
2690		error = getsockaddr(&to, uap->to, uap->tolen);
2691		if (error) {
2692			to = NULL;
2693			goto sctp_bad2;
2694		}
2695		rights |= CAP_CONNECT;
2696	}
2697
2698	AUDIT_ARG_FD(uap->sd);
2699	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2700	if (error)
2701		goto sctp_bad1;
2702
2703#ifdef COMPAT_FREEBSD32
2704	if (SV_CURPROC_FLAG(SV_ILP32))
2705		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2706		    uap->iovlen, &iov, EMSGSIZE);
2707	else
2708#endif
2709		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2710	if (error)
2711		goto sctp_bad1;
2712#ifdef KTRACE
2713	if (to && (KTRPOINT(td, KTR_STRUCT)))
2714		ktrsockaddr(to);
2715#endif
2716
2717	so = (struct socket *)fp->f_data;
2718	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2719		error = EOPNOTSUPP;
2720		goto sctp_bad;
2721	}
2722#ifdef MAC
2723	error = mac_socket_check_send(td->td_ucred, so);
2724	if (error)
2725		goto sctp_bad;
2726#endif /* MAC */
2727
2728	auio.uio_iov = iov;
2729	auio.uio_iovcnt = uap->iovlen;
2730	auio.uio_segflg = UIO_USERSPACE;
2731	auio.uio_rw = UIO_WRITE;
2732	auio.uio_td = td;
2733	auio.uio_offset = 0;			/* XXX */
2734	auio.uio_resid = 0;
2735	tiov = iov;
2736	for (i = 0; i <uap->iovlen; i++, tiov++) {
2737		if ((auio.uio_resid += tiov->iov_len) < 0) {
2738			error = EINVAL;
2739			goto sctp_bad;
2740		}
2741	}
2742	len = auio.uio_resid;
2743	CURVNET_SET(so->so_vnet);
2744	error = sctp_lower_sosend(so, to, &auio,
2745		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2746		    uap->flags, u_sinfo, td);
2747	CURVNET_RESTORE();
2748	if (error) {
2749		if (auio.uio_resid != len && (error == ERESTART ||
2750		    error == EINTR || error == EWOULDBLOCK))
2751			error = 0;
2752		/* Generation of SIGPIPE can be controlled per socket */
2753		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2754		    !(uap->flags & MSG_NOSIGNAL)) {
2755			PROC_LOCK(td->td_proc);
2756			tdsignal(td, SIGPIPE);
2757			PROC_UNLOCK(td->td_proc);
2758		}
2759	}
2760	if (error == 0)
2761		td->td_retval[0] = len - auio.uio_resid;
2762#ifdef KTRACE
2763	if (ktruio != NULL) {
2764		ktruio->uio_resid = td->td_retval[0];
2765		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2766	}
2767#endif /* KTRACE */
2768sctp_bad:
2769	free(iov, M_IOV);
2770sctp_bad1:
2771	if (fp)
2772		fdrop(fp, td);
2773sctp_bad2:
2774	if (to)
2775		free(to, M_SONAME);
2776	return (error);
2777#else  /* SCTP */
2778	return (EOPNOTSUPP);
2779#endif /* SCTP */
2780}
2781
2782int
2783sys_sctp_generic_recvmsg(td, uap)
2784	struct thread *td;
2785	struct sctp_generic_recvmsg_args /* {
2786		int sd,
2787		struct iovec *iov,
2788		int iovlen,
2789		struct sockaddr *from,
2790		__socklen_t *fromlenaddr,
2791		struct sctp_sndrcvinfo *sinfo,
2792		int *msg_flags
2793	} */ *uap;
2794{
2795#if (defined(INET) || defined(INET6)) && defined(SCTP)
2796	uint8_t sockbufstore[256];
2797	struct uio auio;
2798	struct iovec *iov, *tiov;
2799	struct sctp_sndrcvinfo sinfo;
2800	struct socket *so;
2801	struct file *fp = NULL;
2802	struct sockaddr *fromsa;
2803	int fromlen;
2804	ssize_t len;
2805	int i, msg_flags;
2806	int error = 0;
2807#ifdef KTRACE
2808	struct uio *ktruio = NULL;
2809#endif
2810
2811	AUDIT_ARG_FD(uap->sd);
2812	error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL);
2813	if (error) {
2814		return (error);
2815	}
2816#ifdef COMPAT_FREEBSD32
2817	if (SV_CURPROC_FLAG(SV_ILP32))
2818		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2819		    uap->iovlen, &iov, EMSGSIZE);
2820	else
2821#endif
2822		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2823	if (error)
2824		goto out1;
2825
2826	so = fp->f_data;
2827	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2828		error = EOPNOTSUPP;
2829		goto out;
2830	}
2831#ifdef MAC
2832	error = mac_socket_check_receive(td->td_ucred, so);
2833	if (error) {
2834		goto out;
2835	}
2836#endif /* MAC */
2837
2838	if (uap->fromlenaddr) {
2839		error = copyin(uap->fromlenaddr,
2840		    &fromlen, sizeof (fromlen));
2841		if (error) {
2842			goto out;
2843		}
2844	} else {
2845		fromlen = 0;
2846	}
2847	if (uap->msg_flags) {
2848		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2849		if (error) {
2850			goto out;
2851		}
2852	} else {
2853		msg_flags = 0;
2854	}
2855	auio.uio_iov = iov;
2856	auio.uio_iovcnt = uap->iovlen;
2857	auio.uio_segflg = UIO_USERSPACE;
2858	auio.uio_rw = UIO_READ;
2859	auio.uio_td = td;
2860	auio.uio_offset = 0;			/* XXX */
2861	auio.uio_resid = 0;
2862	tiov = iov;
2863	for (i = 0; i <uap->iovlen; i++, tiov++) {
2864		if ((auio.uio_resid += tiov->iov_len) < 0) {
2865			error = EINVAL;
2866			goto out;
2867		}
2868	}
2869	len = auio.uio_resid;
2870	fromsa = (struct sockaddr *)sockbufstore;
2871
2872#ifdef KTRACE
2873	if (KTRPOINT(td, KTR_GENIO))
2874		ktruio = cloneuio(&auio);
2875#endif /* KTRACE */
2876	memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
2877	CURVNET_SET(so->so_vnet);
2878	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2879		    fromsa, fromlen, &msg_flags,
2880		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2881	CURVNET_RESTORE();
2882	if (error) {
2883		if (auio.uio_resid != len && (error == ERESTART ||
2884		    error == EINTR || error == EWOULDBLOCK))
2885			error = 0;
2886	} else {
2887		if (uap->sinfo)
2888			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2889	}
2890#ifdef KTRACE
2891	if (ktruio != NULL) {
2892		ktruio->uio_resid = len - auio.uio_resid;
2893		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2894	}
2895#endif /* KTRACE */
2896	if (error)
2897		goto out;
2898	td->td_retval[0] = len - auio.uio_resid;
2899
2900	if (fromlen && uap->from) {
2901		len = fromlen;
2902		if (len <= 0 || fromsa == 0)
2903			len = 0;
2904		else {
2905			len = MIN(len, fromsa->sa_len);
2906			error = copyout(fromsa, uap->from, (size_t)len);
2907			if (error)
2908				goto out;
2909		}
2910		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2911		if (error) {
2912			goto out;
2913		}
2914	}
2915#ifdef KTRACE
2916	if (KTRPOINT(td, KTR_STRUCT))
2917		ktrsockaddr(fromsa);
2918#endif
2919	if (uap->msg_flags) {
2920		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2921		if (error) {
2922			goto out;
2923		}
2924	}
2925out:
2926	free(iov, M_IOV);
2927out1:
2928	if (fp)
2929		fdrop(fp, td);
2930
2931	return (error);
2932#else  /* SCTP */
2933	return (EOPNOTSUPP);
2934#endif /* SCTP */
2935}
2936