kern_sendfile.c revision 193511
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 193511 2009-06-05 14:55:22Z rwatson $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/sysproto.h>
48#include <sys/malloc.h>
49#include <sys/filedesc.h>
50#include <sys/event.h>
51#include <sys/proc.h>
52#include <sys/fcntl.h>
53#include <sys/file.h>
54#include <sys/filio.h>
55#include <sys/mount.h>
56#include <sys/mbuf.h>
57#include <sys/protosw.h>
58#include <sys/sf_buf.h>
59#include <sys/socket.h>
60#include <sys/socketvar.h>
61#include <sys/signalvar.h>
62#include <sys/syscallsubr.h>
63#include <sys/sysctl.h>
64#include <sys/uio.h>
65#include <sys/vimage.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91			int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93			int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103    "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105    "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107    "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry.  A reference on the
111 * file entry is held upon returning.  This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count.  If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120	struct file *fp;
121	int error;
122
123	fp = NULL;
124	if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
125		error = EBADF;
126	} else if (fp->f_type != DTYPE_SOCKET) {
127		fdrop(fp, curthread);
128		fp = NULL;
129		error = ENOTSOCK;
130	} else {
131		if (fflagp != NULL)
132			*fflagp = fp->f_flag;
133		error = 0;
134	}
135	*fpp = fp;
136	return (error);
137}
138
139/*
140 * System call interface to the socket abstraction.
141 */
142#if defined(COMPAT_43)
143#define COMPAT_OLDSOCK
144#endif
145
146int
147socket(td, uap)
148	struct thread *td;
149	struct socket_args /* {
150		int	domain;
151		int	type;
152		int	protocol;
153	} */ *uap;
154{
155	struct filedesc *fdp;
156	struct socket *so;
157	struct file *fp;
158	int fd, error;
159
160#ifdef MAC
161	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
162	    uap->protocol);
163	if (error)
164		return (error);
165#endif
166	fdp = td->td_proc->p_fd;
167	error = falloc(td, &fp, &fd);
168	if (error)
169		return (error);
170	/* An extra reference on `fp' has been held for us by falloc(). */
171	error = socreate(uap->domain, &so, uap->type, uap->protocol,
172	    td->td_ucred, td);
173	if (error) {
174		fdclose(fdp, fp, fd, td);
175	} else {
176		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
177		td->td_retval[0] = fd;
178	}
179	fdrop(fp, td);
180	return (error);
181}
182
183/* ARGSUSED */
184int
185bind(td, uap)
186	struct thread *td;
187	struct bind_args /* {
188		int	s;
189		caddr_t	name;
190		int	namelen;
191	} */ *uap;
192{
193	struct sockaddr *sa;
194	int error;
195
196	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
197		return (error);
198
199	error = kern_bind(td, uap->s, sa);
200	free(sa, M_SONAME);
201	return (error);
202}
203
204int
205kern_bind(td, fd, sa)
206	struct thread *td;
207	int fd;
208	struct sockaddr *sa;
209{
210	struct socket *so;
211	struct file *fp;
212	int error;
213
214	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
215	if (error)
216		return (error);
217	so = fp->f_data;
218#ifdef KTRACE
219	if (KTRPOINT(td, KTR_STRUCT))
220		ktrsockaddr(sa);
221#endif
222#ifdef MAC
223	error = mac_socket_check_bind(td->td_ucred, so, sa);
224	if (error == 0)
225#endif
226		error = sobind(so, sa, td);
227	fdrop(fp, td);
228	return (error);
229}
230
231/* ARGSUSED */
232int
233listen(td, uap)
234	struct thread *td;
235	struct listen_args /* {
236		int	s;
237		int	backlog;
238	} */ *uap;
239{
240	struct socket *so;
241	struct file *fp;
242	int error;
243
244	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
245	if (error == 0) {
246		so = fp->f_data;
247#ifdef MAC
248		error = mac_socket_check_listen(td->td_ucred, so);
249		if (error == 0) {
250#endif
251			CURVNET_SET(so->so_vnet);
252			error = solisten(so, uap->backlog, td);
253			CURVNET_RESTORE();
254#ifdef MAC
255		}
256#endif
257		fdrop(fp, td);
258	}
259	return(error);
260}
261
262/*
263 * accept1()
264 */
265static int
266accept1(td, uap, compat)
267	struct thread *td;
268	struct accept_args /* {
269		int	s;
270		struct sockaddr	* __restrict name;
271		socklen_t	* __restrict anamelen;
272	} */ *uap;
273	int compat;
274{
275	struct sockaddr *name;
276	socklen_t namelen;
277	struct file *fp;
278	int error;
279
280	if (uap->name == NULL)
281		return (kern_accept(td, uap->s, NULL, NULL, NULL));
282
283	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
284	if (error)
285		return (error);
286
287	error = kern_accept(td, uap->s, &name, &namelen, &fp);
288
289	/*
290	 * return a namelen of zero for older code which might
291	 * ignore the return value from accept.
292	 */
293	if (error) {
294		(void) copyout(&namelen,
295		    uap->anamelen, sizeof(*uap->anamelen));
296		return (error);
297	}
298
299	if (error == 0 && name != NULL) {
300#ifdef COMPAT_OLDSOCK
301		if (compat)
302			((struct osockaddr *)name)->sa_family =
303			    name->sa_family;
304#endif
305		error = copyout(name, uap->name, namelen);
306	}
307	if (error == 0)
308		error = copyout(&namelen, uap->anamelen,
309		    sizeof(namelen));
310	if (error)
311		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
312	fdrop(fp, td);
313	free(name, M_SONAME);
314	return (error);
315}
316
317int
318kern_accept(struct thread *td, int s, struct sockaddr **name,
319    socklen_t *namelen, struct file **fp)
320{
321	struct filedesc *fdp;
322	struct file *headfp, *nfp = NULL;
323	struct sockaddr *sa = NULL;
324	int error;
325	struct socket *head, *so;
326	int fd;
327	u_int fflag;
328	pid_t pgid;
329	int tmp;
330
331	if (name) {
332		*name = NULL;
333		if (*namelen < 0)
334			return (EINVAL);
335	}
336
337	fdp = td->td_proc->p_fd;
338	error = getsock(fdp, s, &headfp, &fflag);
339	if (error)
340		return (error);
341	head = headfp->f_data;
342	if ((head->so_options & SO_ACCEPTCONN) == 0) {
343		error = EINVAL;
344		goto done;
345	}
346#ifdef MAC
347	error = mac_socket_check_accept(td->td_ucred, head);
348	if (error != 0)
349		goto done;
350#endif
351	error = falloc(td, &nfp, &fd);
352	if (error)
353		goto done;
354	ACCEPT_LOCK();
355	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
356		ACCEPT_UNLOCK();
357		error = EWOULDBLOCK;
358		goto noconnection;
359	}
360	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
361		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
362			head->so_error = ECONNABORTED;
363			break;
364		}
365		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
366		    "accept", 0);
367		if (error) {
368			ACCEPT_UNLOCK();
369			goto noconnection;
370		}
371	}
372	if (head->so_error) {
373		error = head->so_error;
374		head->so_error = 0;
375		ACCEPT_UNLOCK();
376		goto noconnection;
377	}
378	so = TAILQ_FIRST(&head->so_comp);
379	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
380	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
381
382	/*
383	 * Before changing the flags on the socket, we have to bump the
384	 * reference count.  Otherwise, if the protocol calls sofree(),
385	 * the socket will be released due to a zero refcount.
386	 */
387	SOCK_LOCK(so);			/* soref() and so_state update */
388	soref(so);			/* file descriptor reference */
389
390	TAILQ_REMOVE(&head->so_comp, so, so_list);
391	head->so_qlen--;
392	so->so_state |= (head->so_state & SS_NBIO);
393	so->so_qstate &= ~SQ_COMP;
394	so->so_head = NULL;
395
396	SOCK_UNLOCK(so);
397	ACCEPT_UNLOCK();
398
399	/* An extra reference on `nfp' has been held for us by falloc(). */
400	td->td_retval[0] = fd;
401
402	/* connection has been removed from the listen queue */
403	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
404
405	pgid = fgetown(&head->so_sigio);
406	if (pgid != 0)
407		fsetown(pgid, &so->so_sigio);
408
409	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
410	/* Sync socket nonblocking/async state with file flags */
411	tmp = fflag & FNONBLOCK;
412	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
413	tmp = fflag & FASYNC;
414	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
415	sa = 0;
416	CURVNET_SET(so->so_vnet);
417	error = soaccept(so, &sa);
418	CURVNET_RESTORE();
419	if (error) {
420		/*
421		 * return a namelen of zero for older code which might
422		 * ignore the return value from accept.
423		 */
424		if (name)
425			*namelen = 0;
426		goto noconnection;
427	}
428	if (sa == NULL) {
429		if (name)
430			*namelen = 0;
431		goto done;
432	}
433	if (name) {
434		/* check sa_len before it is destroyed */
435		if (*namelen > sa->sa_len)
436			*namelen = sa->sa_len;
437#ifdef KTRACE
438		if (KTRPOINT(td, KTR_STRUCT))
439			ktrsockaddr(sa);
440#endif
441		*name = sa;
442		sa = NULL;
443	}
444noconnection:
445	if (sa)
446		free(sa, M_SONAME);
447
448	/*
449	 * close the new descriptor, assuming someone hasn't ripped it
450	 * out from under us.
451	 */
452	if (error)
453		fdclose(fdp, nfp, fd, td);
454
455	/*
456	 * Release explicitly held references before returning.  We return
457	 * a reference on nfp to the caller on success if they request it.
458	 */
459done:
460	if (fp != NULL) {
461		if (error == 0) {
462			*fp = nfp;
463			nfp = NULL;
464		} else
465			*fp = NULL;
466	}
467	if (nfp != NULL)
468		fdrop(nfp, td);
469	fdrop(headfp, td);
470	return (error);
471}
472
473int
474accept(td, uap)
475	struct thread *td;
476	struct accept_args *uap;
477{
478
479	return (accept1(td, uap, 0));
480}
481
482#ifdef COMPAT_OLDSOCK
483int
484oaccept(td, uap)
485	struct thread *td;
486	struct accept_args *uap;
487{
488
489	return (accept1(td, uap, 1));
490}
491#endif /* COMPAT_OLDSOCK */
492
493/* ARGSUSED */
494int
495connect(td, uap)
496	struct thread *td;
497	struct connect_args /* {
498		int	s;
499		caddr_t	name;
500		int	namelen;
501	} */ *uap;
502{
503	struct sockaddr *sa;
504	int error;
505
506	error = getsockaddr(&sa, uap->name, uap->namelen);
507	if (error)
508		return (error);
509
510	error = kern_connect(td, uap->s, sa);
511	free(sa, M_SONAME);
512	return (error);
513}
514
515
516int
517kern_connect(td, fd, sa)
518	struct thread *td;
519	int fd;
520	struct sockaddr *sa;
521{
522	struct socket *so;
523	struct file *fp;
524	int error;
525	int interrupted = 0;
526
527	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
528	if (error)
529		return (error);
530	so = fp->f_data;
531	if (so->so_state & SS_ISCONNECTING) {
532		error = EALREADY;
533		goto done1;
534	}
535#ifdef KTRACE
536	if (KTRPOINT(td, KTR_STRUCT))
537		ktrsockaddr(sa);
538#endif
539#ifdef MAC
540	error = mac_socket_check_connect(td->td_ucred, so, sa);
541	if (error)
542		goto bad;
543#endif
544	error = soconnect(so, sa, td);
545	if (error)
546		goto bad;
547	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
548		error = EINPROGRESS;
549		goto done1;
550	}
551	SOCK_LOCK(so);
552	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
553		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
554		    "connec", 0);
555		if (error) {
556			if (error == EINTR || error == ERESTART)
557				interrupted = 1;
558			break;
559		}
560	}
561	if (error == 0) {
562		error = so->so_error;
563		so->so_error = 0;
564	}
565	SOCK_UNLOCK(so);
566bad:
567	if (!interrupted)
568		so->so_state &= ~SS_ISCONNECTING;
569	if (error == ERESTART)
570		error = EINTR;
571done1:
572	fdrop(fp, td);
573	return (error);
574}
575
576int
577kern_socketpair(struct thread *td, int domain, int type, int protocol,
578    int *rsv)
579{
580	struct filedesc *fdp = td->td_proc->p_fd;
581	struct file *fp1, *fp2;
582	struct socket *so1, *so2;
583	int fd, error;
584
585#ifdef MAC
586	/* We might want to have a separate check for socket pairs. */
587	error = mac_socket_check_create(td->td_ucred, domain, type,
588	    protocol);
589	if (error)
590		return (error);
591#endif
592	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
593	if (error)
594		return (error);
595	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
596	if (error)
597		goto free1;
598	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
599	error = falloc(td, &fp1, &fd);
600	if (error)
601		goto free2;
602	rsv[0] = fd;
603	fp1->f_data = so1;	/* so1 already has ref count */
604	error = falloc(td, &fp2, &fd);
605	if (error)
606		goto free3;
607	fp2->f_data = so2;	/* so2 already has ref count */
608	rsv[1] = fd;
609	error = soconnect2(so1, so2);
610	if (error)
611		goto free4;
612	if (type == SOCK_DGRAM) {
613		/*
614		 * Datagram socket connection is asymmetric.
615		 */
616		 error = soconnect2(so2, so1);
617		 if (error)
618			goto free4;
619	}
620	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
621	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
622	fdrop(fp1, td);
623	fdrop(fp2, td);
624	return (0);
625free4:
626	fdclose(fdp, fp2, rsv[1], td);
627	fdrop(fp2, td);
628free3:
629	fdclose(fdp, fp1, rsv[0], td);
630	fdrop(fp1, td);
631free2:
632	if (so2 != NULL)
633		(void)soclose(so2);
634free1:
635	if (so1 != NULL)
636		(void)soclose(so1);
637	return (error);
638}
639
640int
641socketpair(struct thread *td, struct socketpair_args *uap)
642{
643	int error, sv[2];
644
645	error = kern_socketpair(td, uap->domain, uap->type,
646	    uap->protocol, sv);
647	if (error)
648		return (error);
649	error = copyout(sv, uap->rsv, 2 * sizeof(int));
650	if (error) {
651		(void)kern_close(td, sv[0]);
652		(void)kern_close(td, sv[1]);
653	}
654	return (error);
655}
656
657static int
658sendit(td, s, mp, flags)
659	struct thread *td;
660	int s;
661	struct msghdr *mp;
662	int flags;
663{
664	struct mbuf *control;
665	struct sockaddr *to;
666	int error;
667
668	if (mp->msg_name != NULL) {
669		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
670		if (error) {
671			to = NULL;
672			goto bad;
673		}
674		mp->msg_name = to;
675	} else {
676		to = NULL;
677	}
678
679	if (mp->msg_control) {
680		if (mp->msg_controllen < sizeof(struct cmsghdr)
681#ifdef COMPAT_OLDSOCK
682		    && mp->msg_flags != MSG_COMPAT
683#endif
684		) {
685			error = EINVAL;
686			goto bad;
687		}
688		error = sockargs(&control, mp->msg_control,
689		    mp->msg_controllen, MT_CONTROL);
690		if (error)
691			goto bad;
692#ifdef COMPAT_OLDSOCK
693		if (mp->msg_flags == MSG_COMPAT) {
694			struct cmsghdr *cm;
695
696			M_PREPEND(control, sizeof(*cm), M_WAIT);
697			cm = mtod(control, struct cmsghdr *);
698			cm->cmsg_len = control->m_len;
699			cm->cmsg_level = SOL_SOCKET;
700			cm->cmsg_type = SCM_RIGHTS;
701		}
702#endif
703	} else {
704		control = NULL;
705	}
706
707	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
708
709bad:
710	if (to)
711		free(to, M_SONAME);
712	return (error);
713}
714
715int
716kern_sendit(td, s, mp, flags, control, segflg)
717	struct thread *td;
718	int s;
719	struct msghdr *mp;
720	int flags;
721	struct mbuf *control;
722	enum uio_seg segflg;
723{
724	struct file *fp;
725	struct uio auio;
726	struct iovec *iov;
727	struct socket *so;
728	int i;
729	int len, error;
730#ifdef KTRACE
731	struct uio *ktruio = NULL;
732#endif
733
734	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
735	if (error)
736		return (error);
737	so = (struct socket *)fp->f_data;
738
739#ifdef MAC
740	if (mp->msg_name != NULL) {
741		error = mac_socket_check_connect(td->td_ucred, so,
742		    mp->msg_name);
743		if (error)
744			goto bad;
745	}
746	error = mac_socket_check_send(td->td_ucred, so);
747	if (error)
748		goto bad;
749#endif
750
751	auio.uio_iov = mp->msg_iov;
752	auio.uio_iovcnt = mp->msg_iovlen;
753	auio.uio_segflg = segflg;
754	auio.uio_rw = UIO_WRITE;
755	auio.uio_td = td;
756	auio.uio_offset = 0;			/* XXX */
757	auio.uio_resid = 0;
758	iov = mp->msg_iov;
759	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
760		if ((auio.uio_resid += iov->iov_len) < 0) {
761			error = EINVAL;
762			goto bad;
763		}
764	}
765#ifdef KTRACE
766	if (KTRPOINT(td, KTR_GENIO))
767		ktruio = cloneuio(&auio);
768#endif
769	len = auio.uio_resid;
770	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
771	if (error) {
772		if (auio.uio_resid != len && (error == ERESTART ||
773		    error == EINTR || error == EWOULDBLOCK))
774			error = 0;
775		/* Generation of SIGPIPE can be controlled per socket */
776		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
777		    !(flags & MSG_NOSIGNAL)) {
778			PROC_LOCK(td->td_proc);
779			psignal(td->td_proc, SIGPIPE);
780			PROC_UNLOCK(td->td_proc);
781		}
782	}
783	if (error == 0)
784		td->td_retval[0] = len - auio.uio_resid;
785#ifdef KTRACE
786	if (ktruio != NULL) {
787		ktruio->uio_resid = td->td_retval[0];
788		ktrgenio(s, UIO_WRITE, ktruio, error);
789	}
790#endif
791bad:
792	fdrop(fp, td);
793	return (error);
794}
795
796int
797sendto(td, uap)
798	struct thread *td;
799	struct sendto_args /* {
800		int	s;
801		caddr_t	buf;
802		size_t	len;
803		int	flags;
804		caddr_t	to;
805		int	tolen;
806	} */ *uap;
807{
808	struct msghdr msg;
809	struct iovec aiov;
810	int error;
811
812	msg.msg_name = uap->to;
813	msg.msg_namelen = uap->tolen;
814	msg.msg_iov = &aiov;
815	msg.msg_iovlen = 1;
816	msg.msg_control = 0;
817#ifdef COMPAT_OLDSOCK
818	msg.msg_flags = 0;
819#endif
820	aiov.iov_base = uap->buf;
821	aiov.iov_len = uap->len;
822	error = sendit(td, uap->s, &msg, uap->flags);
823	return (error);
824}
825
826#ifdef COMPAT_OLDSOCK
827int
828osend(td, uap)
829	struct thread *td;
830	struct osend_args /* {
831		int	s;
832		caddr_t	buf;
833		int	len;
834		int	flags;
835	} */ *uap;
836{
837	struct msghdr msg;
838	struct iovec aiov;
839	int error;
840
841	msg.msg_name = 0;
842	msg.msg_namelen = 0;
843	msg.msg_iov = &aiov;
844	msg.msg_iovlen = 1;
845	aiov.iov_base = uap->buf;
846	aiov.iov_len = uap->len;
847	msg.msg_control = 0;
848	msg.msg_flags = 0;
849	error = sendit(td, uap->s, &msg, uap->flags);
850	return (error);
851}
852
853int
854osendmsg(td, uap)
855	struct thread *td;
856	struct osendmsg_args /* {
857		int	s;
858		caddr_t	msg;
859		int	flags;
860	} */ *uap;
861{
862	struct msghdr msg;
863	struct iovec *iov;
864	int error;
865
866	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
867	if (error)
868		return (error);
869	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
870	if (error)
871		return (error);
872	msg.msg_iov = iov;
873	msg.msg_flags = MSG_COMPAT;
874	error = sendit(td, uap->s, &msg, uap->flags);
875	free(iov, M_IOV);
876	return (error);
877}
878#endif
879
880int
881sendmsg(td, uap)
882	struct thread *td;
883	struct sendmsg_args /* {
884		int	s;
885		caddr_t	msg;
886		int	flags;
887	} */ *uap;
888{
889	struct msghdr msg;
890	struct iovec *iov;
891	int error;
892
893	error = copyin(uap->msg, &msg, sizeof (msg));
894	if (error)
895		return (error);
896	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
897	if (error)
898		return (error);
899	msg.msg_iov = iov;
900#ifdef COMPAT_OLDSOCK
901	msg.msg_flags = 0;
902#endif
903	error = sendit(td, uap->s, &msg, uap->flags);
904	free(iov, M_IOV);
905	return (error);
906}
907
908int
909kern_recvit(td, s, mp, fromseg, controlp)
910	struct thread *td;
911	int s;
912	struct msghdr *mp;
913	enum uio_seg fromseg;
914	struct mbuf **controlp;
915{
916	struct uio auio;
917	struct iovec *iov;
918	int i;
919	socklen_t len;
920	int error;
921	struct mbuf *m, *control = 0;
922	caddr_t ctlbuf;
923	struct file *fp;
924	struct socket *so;
925	struct sockaddr *fromsa = 0;
926#ifdef KTRACE
927	struct uio *ktruio = NULL;
928#endif
929
930	if(controlp != NULL)
931		*controlp = 0;
932
933	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
934	if (error)
935		return (error);
936	so = fp->f_data;
937
938#ifdef MAC
939	error = mac_socket_check_receive(td->td_ucred, so);
940	if (error) {
941		fdrop(fp, td);
942		return (error);
943	}
944#endif
945
946	auio.uio_iov = mp->msg_iov;
947	auio.uio_iovcnt = mp->msg_iovlen;
948	auio.uio_segflg = UIO_USERSPACE;
949	auio.uio_rw = UIO_READ;
950	auio.uio_td = td;
951	auio.uio_offset = 0;			/* XXX */
952	auio.uio_resid = 0;
953	iov = mp->msg_iov;
954	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
955		if ((auio.uio_resid += iov->iov_len) < 0) {
956			fdrop(fp, td);
957			return (EINVAL);
958		}
959	}
960#ifdef KTRACE
961	if (KTRPOINT(td, KTR_GENIO))
962		ktruio = cloneuio(&auio);
963#endif
964	len = auio.uio_resid;
965	CURVNET_SET(so->so_vnet);
966	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
967	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
968	    &mp->msg_flags);
969	CURVNET_RESTORE();
970	if (error) {
971		if (auio.uio_resid != (int)len && (error == ERESTART ||
972		    error == EINTR || error == EWOULDBLOCK))
973			error = 0;
974	}
975#ifdef KTRACE
976	if (ktruio != NULL) {
977		ktruio->uio_resid = (int)len - auio.uio_resid;
978		ktrgenio(s, UIO_READ, ktruio, error);
979	}
980#endif
981	if (error)
982		goto out;
983	td->td_retval[0] = (int)len - auio.uio_resid;
984	if (mp->msg_name) {
985		len = mp->msg_namelen;
986		if (len <= 0 || fromsa == 0)
987			len = 0;
988		else {
989			/* save sa_len before it is destroyed by MSG_COMPAT */
990			len = MIN(len, fromsa->sa_len);
991#ifdef COMPAT_OLDSOCK
992			if (mp->msg_flags & MSG_COMPAT)
993				((struct osockaddr *)fromsa)->sa_family =
994				    fromsa->sa_family;
995#endif
996			if (fromseg == UIO_USERSPACE) {
997				error = copyout(fromsa, mp->msg_name,
998				    (unsigned)len);
999				if (error)
1000					goto out;
1001			} else
1002				bcopy(fromsa, mp->msg_name, len);
1003		}
1004		mp->msg_namelen = len;
1005	}
1006	if (mp->msg_control && controlp == NULL) {
1007#ifdef COMPAT_OLDSOCK
1008		/*
1009		 * We assume that old recvmsg calls won't receive access
1010		 * rights and other control info, esp. as control info
1011		 * is always optional and those options didn't exist in 4.3.
1012		 * If we receive rights, trim the cmsghdr; anything else
1013		 * is tossed.
1014		 */
1015		if (control && mp->msg_flags & MSG_COMPAT) {
1016			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1017			    SOL_SOCKET ||
1018			    mtod(control, struct cmsghdr *)->cmsg_type !=
1019			    SCM_RIGHTS) {
1020				mp->msg_controllen = 0;
1021				goto out;
1022			}
1023			control->m_len -= sizeof (struct cmsghdr);
1024			control->m_data += sizeof (struct cmsghdr);
1025		}
1026#endif
1027		len = mp->msg_controllen;
1028		m = control;
1029		mp->msg_controllen = 0;
1030		ctlbuf = mp->msg_control;
1031
1032		while (m && len > 0) {
1033			unsigned int tocopy;
1034
1035			if (len >= m->m_len)
1036				tocopy = m->m_len;
1037			else {
1038				mp->msg_flags |= MSG_CTRUNC;
1039				tocopy = len;
1040			}
1041
1042			if ((error = copyout(mtod(m, caddr_t),
1043					ctlbuf, tocopy)) != 0)
1044				goto out;
1045
1046			ctlbuf += tocopy;
1047			len -= tocopy;
1048			m = m->m_next;
1049		}
1050		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1051	}
1052out:
1053	fdrop(fp, td);
1054#ifdef KTRACE
1055	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1056		ktrsockaddr(fromsa);
1057#endif
1058	if (fromsa)
1059		free(fromsa, M_SONAME);
1060
1061	if (error == 0 && controlp != NULL)
1062		*controlp = control;
1063	else  if (control)
1064		m_freem(control);
1065
1066	return (error);
1067}
1068
1069static int
1070recvit(td, s, mp, namelenp)
1071	struct thread *td;
1072	int s;
1073	struct msghdr *mp;
1074	void *namelenp;
1075{
1076	int error;
1077
1078	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1079	if (error)
1080		return (error);
1081	if (namelenp) {
1082		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1083#ifdef COMPAT_OLDSOCK
1084		if (mp->msg_flags & MSG_COMPAT)
1085			error = 0;	/* old recvfrom didn't check */
1086#endif
1087	}
1088	return (error);
1089}
1090
1091int
1092recvfrom(td, uap)
1093	struct thread *td;
1094	struct recvfrom_args /* {
1095		int	s;
1096		caddr_t	buf;
1097		size_t	len;
1098		int	flags;
1099		struct sockaddr * __restrict	from;
1100		socklen_t * __restrict fromlenaddr;
1101	} */ *uap;
1102{
1103	struct msghdr msg;
1104	struct iovec aiov;
1105	int error;
1106
1107	if (uap->fromlenaddr) {
1108		error = copyin(uap->fromlenaddr,
1109		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1110		if (error)
1111			goto done2;
1112	} else {
1113		msg.msg_namelen = 0;
1114	}
1115	msg.msg_name = uap->from;
1116	msg.msg_iov = &aiov;
1117	msg.msg_iovlen = 1;
1118	aiov.iov_base = uap->buf;
1119	aiov.iov_len = uap->len;
1120	msg.msg_control = 0;
1121	msg.msg_flags = uap->flags;
1122	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1123done2:
1124	return(error);
1125}
1126
1127#ifdef COMPAT_OLDSOCK
1128int
1129orecvfrom(td, uap)
1130	struct thread *td;
1131	struct recvfrom_args *uap;
1132{
1133
1134	uap->flags |= MSG_COMPAT;
1135	return (recvfrom(td, uap));
1136}
1137#endif
1138
1139#ifdef COMPAT_OLDSOCK
1140int
1141orecv(td, uap)
1142	struct thread *td;
1143	struct orecv_args /* {
1144		int	s;
1145		caddr_t	buf;
1146		int	len;
1147		int	flags;
1148	} */ *uap;
1149{
1150	struct msghdr msg;
1151	struct iovec aiov;
1152	int error;
1153
1154	msg.msg_name = 0;
1155	msg.msg_namelen = 0;
1156	msg.msg_iov = &aiov;
1157	msg.msg_iovlen = 1;
1158	aiov.iov_base = uap->buf;
1159	aiov.iov_len = uap->len;
1160	msg.msg_control = 0;
1161	msg.msg_flags = uap->flags;
1162	error = recvit(td, uap->s, &msg, NULL);
1163	return (error);
1164}
1165
1166/*
1167 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1168 * overlays the new one, missing only the flags, and with the (old) access
1169 * rights where the control fields are now.
1170 */
1171int
1172orecvmsg(td, uap)
1173	struct thread *td;
1174	struct orecvmsg_args /* {
1175		int	s;
1176		struct	omsghdr *msg;
1177		int	flags;
1178	} */ *uap;
1179{
1180	struct msghdr msg;
1181	struct iovec *iov;
1182	int error;
1183
1184	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1185	if (error)
1186		return (error);
1187	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1188	if (error)
1189		return (error);
1190	msg.msg_flags = uap->flags | MSG_COMPAT;
1191	msg.msg_iov = iov;
1192	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1193	if (msg.msg_controllen && error == 0)
1194		error = copyout(&msg.msg_controllen,
1195		    &uap->msg->msg_accrightslen, sizeof (int));
1196	free(iov, M_IOV);
1197	return (error);
1198}
1199#endif
1200
1201int
1202recvmsg(td, uap)
1203	struct thread *td;
1204	struct recvmsg_args /* {
1205		int	s;
1206		struct	msghdr *msg;
1207		int	flags;
1208	} */ *uap;
1209{
1210	struct msghdr msg;
1211	struct iovec *uiov, *iov;
1212	int error;
1213
1214	error = copyin(uap->msg, &msg, sizeof (msg));
1215	if (error)
1216		return (error);
1217	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1218	if (error)
1219		return (error);
1220	msg.msg_flags = uap->flags;
1221#ifdef COMPAT_OLDSOCK
1222	msg.msg_flags &= ~MSG_COMPAT;
1223#endif
1224	uiov = msg.msg_iov;
1225	msg.msg_iov = iov;
1226	error = recvit(td, uap->s, &msg, NULL);
1227	if (error == 0) {
1228		msg.msg_iov = uiov;
1229		error = copyout(&msg, uap->msg, sizeof(msg));
1230	}
1231	free(iov, M_IOV);
1232	return (error);
1233}
1234
1235/* ARGSUSED */
1236int
1237shutdown(td, uap)
1238	struct thread *td;
1239	struct shutdown_args /* {
1240		int	s;
1241		int	how;
1242	} */ *uap;
1243{
1244	struct socket *so;
1245	struct file *fp;
1246	int error;
1247
1248	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1249	if (error == 0) {
1250		so = fp->f_data;
1251		error = soshutdown(so, uap->how);
1252		fdrop(fp, td);
1253	}
1254	return (error);
1255}
1256
1257/* ARGSUSED */
1258int
1259setsockopt(td, uap)
1260	struct thread *td;
1261	struct setsockopt_args /* {
1262		int	s;
1263		int	level;
1264		int	name;
1265		caddr_t	val;
1266		int	valsize;
1267	} */ *uap;
1268{
1269
1270	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1271	    uap->val, UIO_USERSPACE, uap->valsize));
1272}
1273
1274int
1275kern_setsockopt(td, s, level, name, val, valseg, valsize)
1276	struct thread *td;
1277	int s;
1278	int level;
1279	int name;
1280	void *val;
1281	enum uio_seg valseg;
1282	socklen_t valsize;
1283{
1284	int error;
1285	struct socket *so;
1286	struct file *fp;
1287	struct sockopt sopt;
1288
1289	if (val == NULL && valsize != 0)
1290		return (EFAULT);
1291	if ((int)valsize < 0)
1292		return (EINVAL);
1293
1294	sopt.sopt_dir = SOPT_SET;
1295	sopt.sopt_level = level;
1296	sopt.sopt_name = name;
1297	sopt.sopt_val = val;
1298	sopt.sopt_valsize = valsize;
1299	switch (valseg) {
1300	case UIO_USERSPACE:
1301		sopt.sopt_td = td;
1302		break;
1303	case UIO_SYSSPACE:
1304		sopt.sopt_td = NULL;
1305		break;
1306	default:
1307		panic("kern_setsockopt called with bad valseg");
1308	}
1309
1310	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1311	if (error == 0) {
1312		so = fp->f_data;
1313		CURVNET_SET(so->so_vnet);
1314		error = sosetopt(so, &sopt);
1315		CURVNET_RESTORE();
1316		fdrop(fp, td);
1317	}
1318	return(error);
1319}
1320
1321/* ARGSUSED */
1322int
1323getsockopt(td, uap)
1324	struct thread *td;
1325	struct getsockopt_args /* {
1326		int	s;
1327		int	level;
1328		int	name;
1329		void * __restrict	val;
1330		socklen_t * __restrict avalsize;
1331	} */ *uap;
1332{
1333	socklen_t valsize;
1334	int	error;
1335
1336	if (uap->val) {
1337		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1338		if (error)
1339			return (error);
1340	}
1341
1342	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1343	    uap->val, UIO_USERSPACE, &valsize);
1344
1345	if (error == 0)
1346		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1347	return (error);
1348}
1349
1350/*
1351 * Kernel version of getsockopt.
1352 * optval can be a userland or userspace. optlen is always a kernel pointer.
1353 */
1354int
1355kern_getsockopt(td, s, level, name, val, valseg, valsize)
1356	struct thread *td;
1357	int s;
1358	int level;
1359	int name;
1360	void *val;
1361	enum uio_seg valseg;
1362	socklen_t *valsize;
1363{
1364	int error;
1365	struct  socket *so;
1366	struct file *fp;
1367	struct	sockopt sopt;
1368
1369	if (val == NULL)
1370		*valsize = 0;
1371	if ((int)*valsize < 0)
1372		return (EINVAL);
1373
1374	sopt.sopt_dir = SOPT_GET;
1375	sopt.sopt_level = level;
1376	sopt.sopt_name = name;
1377	sopt.sopt_val = val;
1378	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1379	switch (valseg) {
1380	case UIO_USERSPACE:
1381		sopt.sopt_td = td;
1382		break;
1383	case UIO_SYSSPACE:
1384		sopt.sopt_td = NULL;
1385		break;
1386	default:
1387		panic("kern_getsockopt called with bad valseg");
1388	}
1389
1390	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1391	if (error == 0) {
1392		so = fp->f_data;
1393		CURVNET_SET(so->so_vnet);
1394		error = sogetopt(so, &sopt);
1395		CURVNET_RESTORE();
1396		*valsize = sopt.sopt_valsize;
1397		fdrop(fp, td);
1398	}
1399	return (error);
1400}
1401
1402/*
1403 * getsockname1() - Get socket name.
1404 */
1405/* ARGSUSED */
1406static int
1407getsockname1(td, uap, compat)
1408	struct thread *td;
1409	struct getsockname_args /* {
1410		int	fdes;
1411		struct sockaddr * __restrict asa;
1412		socklen_t * __restrict alen;
1413	} */ *uap;
1414	int compat;
1415{
1416	struct sockaddr *sa;
1417	socklen_t len;
1418	int error;
1419
1420	error = copyin(uap->alen, &len, sizeof(len));
1421	if (error)
1422		return (error);
1423
1424	error = kern_getsockname(td, uap->fdes, &sa, &len);
1425	if (error)
1426		return (error);
1427
1428	if (len != 0) {
1429#ifdef COMPAT_OLDSOCK
1430		if (compat)
1431			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1432#endif
1433		error = copyout(sa, uap->asa, (u_int)len);
1434	}
1435	free(sa, M_SONAME);
1436	if (error == 0)
1437		error = copyout(&len, uap->alen, sizeof(len));
1438	return (error);
1439}
1440
1441int
1442kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1443    socklen_t *alen)
1444{
1445	struct socket *so;
1446	struct file *fp;
1447	socklen_t len;
1448	int error;
1449
1450	if (*alen < 0)
1451		return (EINVAL);
1452
1453	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1454	if (error)
1455		return (error);
1456	so = fp->f_data;
1457	*sa = NULL;
1458	CURVNET_SET(so->so_vnet);
1459	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1460	CURVNET_RESTORE();
1461	if (error)
1462		goto bad;
1463	if (*sa == NULL)
1464		len = 0;
1465	else
1466		len = MIN(*alen, (*sa)->sa_len);
1467	*alen = len;
1468#ifdef KTRACE
1469	if (KTRPOINT(td, KTR_STRUCT))
1470		ktrsockaddr(*sa);
1471#endif
1472bad:
1473	fdrop(fp, td);
1474	if (error && *sa) {
1475		free(*sa, M_SONAME);
1476		*sa = NULL;
1477	}
1478	return (error);
1479}
1480
1481int
1482getsockname(td, uap)
1483	struct thread *td;
1484	struct getsockname_args *uap;
1485{
1486
1487	return (getsockname1(td, uap, 0));
1488}
1489
1490#ifdef COMPAT_OLDSOCK
1491int
1492ogetsockname(td, uap)
1493	struct thread *td;
1494	struct getsockname_args *uap;
1495{
1496
1497	return (getsockname1(td, uap, 1));
1498}
1499#endif /* COMPAT_OLDSOCK */
1500
1501/*
1502 * getpeername1() - Get name of peer for connected socket.
1503 */
1504/* ARGSUSED */
1505static int
1506getpeername1(td, uap, compat)
1507	struct thread *td;
1508	struct getpeername_args /* {
1509		int	fdes;
1510		struct sockaddr * __restrict	asa;
1511		socklen_t * __restrict	alen;
1512	} */ *uap;
1513	int compat;
1514{
1515	struct sockaddr *sa;
1516	socklen_t len;
1517	int error;
1518
1519	error = copyin(uap->alen, &len, sizeof (len));
1520	if (error)
1521		return (error);
1522
1523	error = kern_getpeername(td, uap->fdes, &sa, &len);
1524	if (error)
1525		return (error);
1526
1527	if (len != 0) {
1528#ifdef COMPAT_OLDSOCK
1529		if (compat)
1530			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1531#endif
1532		error = copyout(sa, uap->asa, (u_int)len);
1533	}
1534	free(sa, M_SONAME);
1535	if (error == 0)
1536		error = copyout(&len, uap->alen, sizeof(len));
1537	return (error);
1538}
1539
1540int
1541kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1542    socklen_t *alen)
1543{
1544	struct socket *so;
1545	struct file *fp;
1546	socklen_t len;
1547	int error;
1548
1549	if (*alen < 0)
1550		return (EINVAL);
1551
1552	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1553	if (error)
1554		return (error);
1555	so = fp->f_data;
1556	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1557		error = ENOTCONN;
1558		goto done;
1559	}
1560	*sa = NULL;
1561	CURVNET_SET(so->so_vnet);
1562	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1563	CURVNET_RESTORE();
1564	if (error)
1565		goto bad;
1566	if (*sa == NULL)
1567		len = 0;
1568	else
1569		len = MIN(*alen, (*sa)->sa_len);
1570	*alen = len;
1571#ifdef KTRACE
1572	if (KTRPOINT(td, KTR_STRUCT))
1573		ktrsockaddr(*sa);
1574#endif
1575bad:
1576	if (error && *sa) {
1577		free(*sa, M_SONAME);
1578		*sa = NULL;
1579	}
1580done:
1581	fdrop(fp, td);
1582	return (error);
1583}
1584
1585int
1586getpeername(td, uap)
1587	struct thread *td;
1588	struct getpeername_args *uap;
1589{
1590
1591	return (getpeername1(td, uap, 0));
1592}
1593
1594#ifdef COMPAT_OLDSOCK
1595int
1596ogetpeername(td, uap)
1597	struct thread *td;
1598	struct ogetpeername_args *uap;
1599{
1600
1601	/* XXX uap should have type `getpeername_args *' to begin with. */
1602	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1603}
1604#endif /* COMPAT_OLDSOCK */
1605
1606int
1607sockargs(mp, buf, buflen, type)
1608	struct mbuf **mp;
1609	caddr_t buf;
1610	int buflen, type;
1611{
1612	struct sockaddr *sa;
1613	struct mbuf *m;
1614	int error;
1615
1616	if ((u_int)buflen > MLEN) {
1617#ifdef COMPAT_OLDSOCK
1618		if (type == MT_SONAME && (u_int)buflen <= 112)
1619			buflen = MLEN;		/* unix domain compat. hack */
1620		else
1621#endif
1622			if ((u_int)buflen > MCLBYTES)
1623				return (EINVAL);
1624	}
1625	m = m_get(M_WAIT, type);
1626	if ((u_int)buflen > MLEN)
1627		MCLGET(m, M_WAIT);
1628	m->m_len = buflen;
1629	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1630	if (error)
1631		(void) m_free(m);
1632	else {
1633		*mp = m;
1634		if (type == MT_SONAME) {
1635			sa = mtod(m, struct sockaddr *);
1636
1637#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1638			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1639				sa->sa_family = sa->sa_len;
1640#endif
1641			sa->sa_len = buflen;
1642		}
1643	}
1644	return (error);
1645}
1646
1647int
1648getsockaddr(namp, uaddr, len)
1649	struct sockaddr **namp;
1650	caddr_t uaddr;
1651	size_t len;
1652{
1653	struct sockaddr *sa;
1654	int error;
1655
1656	if (len > SOCK_MAXADDRLEN)
1657		return (ENAMETOOLONG);
1658	if (len < offsetof(struct sockaddr, sa_data[0]))
1659		return (EINVAL);
1660	sa = malloc(len, M_SONAME, M_WAITOK);
1661	error = copyin(uaddr, sa, len);
1662	if (error) {
1663		free(sa, M_SONAME);
1664	} else {
1665#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1666		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1667			sa->sa_family = sa->sa_len;
1668#endif
1669		sa->sa_len = len;
1670		*namp = sa;
1671	}
1672	return (error);
1673}
1674
1675#include <sys/condvar.h>
1676
1677struct sendfile_sync {
1678	struct mtx	mtx;
1679	struct cv	cv;
1680	unsigned 	count;
1681};
1682
1683/*
1684 * Detach mapped page and release resources back to the system.
1685 */
1686void
1687sf_buf_mext(void *addr, void *args)
1688{
1689	vm_page_t m;
1690	struct sendfile_sync *sfs;
1691
1692	m = sf_buf_page(args);
1693	sf_buf_free(args);
1694	vm_page_lock_queues();
1695	vm_page_unwire(m, 0);
1696	/*
1697	 * Check for the object going away on us. This can
1698	 * happen since we don't hold a reference to it.
1699	 * If so, we're responsible for freeing the page.
1700	 */
1701	if (m->wire_count == 0 && m->object == NULL)
1702		vm_page_free(m);
1703	vm_page_unlock_queues();
1704	if (addr == NULL)
1705		return;
1706	sfs = addr;
1707	mtx_lock(&sfs->mtx);
1708	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1709	if (--sfs->count == 0)
1710		cv_signal(&sfs->cv);
1711	mtx_unlock(&sfs->mtx);
1712}
1713
1714/*
1715 * sendfile(2)
1716 *
1717 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1718 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1719 *
1720 * Send a file specified by 'fd' and starting at 'offset' to a socket
1721 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1722 * 0.  Optionally add a header and/or trailer to the socket output.  If
1723 * specified, write the total number of bytes sent into *sbytes.
1724 */
1725int
1726sendfile(struct thread *td, struct sendfile_args *uap)
1727{
1728
1729	return (do_sendfile(td, uap, 0));
1730}
1731
1732static int
1733do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1734{
1735	struct sf_hdtr hdtr;
1736	struct uio *hdr_uio, *trl_uio;
1737	int error;
1738
1739	hdr_uio = trl_uio = NULL;
1740
1741	if (uap->hdtr != NULL) {
1742		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1743		if (error)
1744			goto out;
1745		if (hdtr.headers != NULL) {
1746			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1747			if (error)
1748				goto out;
1749		}
1750		if (hdtr.trailers != NULL) {
1751			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1752			if (error)
1753				goto out;
1754
1755		}
1756	}
1757
1758	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1759out:
1760	if (hdr_uio)
1761		free(hdr_uio, M_IOV);
1762	if (trl_uio)
1763		free(trl_uio, M_IOV);
1764	return (error);
1765}
1766
1767#ifdef COMPAT_FREEBSD4
1768int
1769freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1770{
1771	struct sendfile_args args;
1772
1773	args.fd = uap->fd;
1774	args.s = uap->s;
1775	args.offset = uap->offset;
1776	args.nbytes = uap->nbytes;
1777	args.hdtr = uap->hdtr;
1778	args.sbytes = uap->sbytes;
1779	args.flags = uap->flags;
1780
1781	return (do_sendfile(td, &args, 1));
1782}
1783#endif /* COMPAT_FREEBSD4 */
1784
1785int
1786kern_sendfile(struct thread *td, struct sendfile_args *uap,
1787    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1788{
1789	struct file *sock_fp;
1790	struct vnode *vp;
1791	struct vm_object *obj = NULL;
1792	struct socket *so = NULL;
1793	struct mbuf *m = NULL;
1794	struct sf_buf *sf;
1795	struct vm_page *pg;
1796	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1797	int error, hdrlen = 0, mnw = 0;
1798	int vfslocked;
1799	struct sendfile_sync *sfs = NULL;
1800
1801	/*
1802	 * The file descriptor must be a regular file and have a
1803	 * backing VM object.
1804	 * File offset must be positive.  If it goes beyond EOF
1805	 * we send only the header/trailer and no payload data.
1806	 */
1807	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1808		goto out;
1809	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1810	vn_lock(vp, LK_SHARED | LK_RETRY);
1811	if (vp->v_type == VREG) {
1812		obj = vp->v_object;
1813		if (obj != NULL) {
1814			/*
1815			 * Temporarily increase the backing VM
1816			 * object's reference count so that a forced
1817			 * reclamation of its vnode does not
1818			 * immediately destroy it.
1819			 */
1820			VM_OBJECT_LOCK(obj);
1821			if ((obj->flags & OBJ_DEAD) == 0) {
1822				vm_object_reference_locked(obj);
1823				VM_OBJECT_UNLOCK(obj);
1824			} else {
1825				VM_OBJECT_UNLOCK(obj);
1826				obj = NULL;
1827			}
1828		}
1829	}
1830	VOP_UNLOCK(vp, 0);
1831	VFS_UNLOCK_GIANT(vfslocked);
1832	if (obj == NULL) {
1833		error = EINVAL;
1834		goto out;
1835	}
1836	if (uap->offset < 0) {
1837		error = EINVAL;
1838		goto out;
1839	}
1840
1841	/*
1842	 * The socket must be a stream socket and connected.
1843	 * Remember if it a blocking or non-blocking socket.
1844	 */
1845	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1846	    NULL)) != 0)
1847		goto out;
1848	so = sock_fp->f_data;
1849	if (so->so_type != SOCK_STREAM) {
1850		error = EINVAL;
1851		goto out;
1852	}
1853	if ((so->so_state & SS_ISCONNECTED) == 0) {
1854		error = ENOTCONN;
1855		goto out;
1856	}
1857	/*
1858	 * Do not wait on memory allocations but return ENOMEM for
1859	 * caller to retry later.
1860	 * XXX: Experimental.
1861	 */
1862	if (uap->flags & SF_MNOWAIT)
1863		mnw = 1;
1864
1865	if (uap->flags & SF_SYNC) {
1866		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1867		memset(sfs, 0, sizeof *sfs);
1868		mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1869		cv_init(&sfs->cv, "sendfile");
1870	}
1871
1872#ifdef MAC
1873	error = mac_socket_check_send(td->td_ucred, so);
1874	if (error)
1875		goto out;
1876#endif
1877
1878	/* If headers are specified copy them into mbufs. */
1879	if (hdr_uio != NULL) {
1880		hdr_uio->uio_td = td;
1881		hdr_uio->uio_rw = UIO_WRITE;
1882		if (hdr_uio->uio_resid > 0) {
1883			/*
1884			 * In FBSD < 5.0 the nbytes to send also included
1885			 * the header.  If compat is specified subtract the
1886			 * header size from nbytes.
1887			 */
1888			if (compat) {
1889				if (uap->nbytes > hdr_uio->uio_resid)
1890					uap->nbytes -= hdr_uio->uio_resid;
1891				else
1892					uap->nbytes = 0;
1893			}
1894			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1895			    0, 0, 0);
1896			if (m == NULL) {
1897				error = mnw ? EAGAIN : ENOBUFS;
1898				goto out;
1899			}
1900			hdrlen = m_length(m, NULL);
1901		}
1902	}
1903
1904	/*
1905	 * Protect against multiple writers to the socket.
1906	 *
1907	 * XXXRW: Historically this has assumed non-interruptibility, so now
1908	 * we implement that, but possibly shouldn't.
1909	 */
1910	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1911
1912	/*
1913	 * Loop through the pages of the file, starting with the requested
1914	 * offset. Get a file page (do I/O if necessary), map the file page
1915	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1916	 * it on the socket.
1917	 * This is done in two loops.  The inner loop turns as many pages
1918	 * as it can, up to available socket buffer space, without blocking
1919	 * into mbufs to have it bulk delivered into the socket send buffer.
1920	 * The outer loop checks the state and available space of the socket
1921	 * and takes care of the overall progress.
1922	 */
1923	for (off = uap->offset, rem = uap->nbytes; ; ) {
1924		int loopbytes = 0;
1925		int space = 0;
1926		int done = 0;
1927
1928		/*
1929		 * Check the socket state for ongoing connection,
1930		 * no errors and space in socket buffer.
1931		 * If space is low allow for the remainder of the
1932		 * file to be processed if it fits the socket buffer.
1933		 * Otherwise block in waiting for sufficient space
1934		 * to proceed, or if the socket is nonblocking, return
1935		 * to userland with EAGAIN while reporting how far
1936		 * we've come.
1937		 * We wait until the socket buffer has significant free
1938		 * space to do bulk sends.  This makes good use of file
1939		 * system read ahead and allows packet segmentation
1940		 * offloading hardware to take over lots of work.  If
1941		 * we were not careful here we would send off only one
1942		 * sfbuf at a time.
1943		 */
1944		SOCKBUF_LOCK(&so->so_snd);
1945		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1946			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1947retry_space:
1948		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1949			error = EPIPE;
1950			SOCKBUF_UNLOCK(&so->so_snd);
1951			goto done;
1952		} else if (so->so_error) {
1953			error = so->so_error;
1954			so->so_error = 0;
1955			SOCKBUF_UNLOCK(&so->so_snd);
1956			goto done;
1957		}
1958		space = sbspace(&so->so_snd);
1959		if (space < rem &&
1960		    (space <= 0 ||
1961		     space < so->so_snd.sb_lowat)) {
1962			if (so->so_state & SS_NBIO) {
1963				SOCKBUF_UNLOCK(&so->so_snd);
1964				error = EAGAIN;
1965				goto done;
1966			}
1967			/*
1968			 * sbwait drops the lock while sleeping.
1969			 * When we loop back to retry_space the
1970			 * state may have changed and we retest
1971			 * for it.
1972			 */
1973			error = sbwait(&so->so_snd);
1974			/*
1975			 * An error from sbwait usually indicates that we've
1976			 * been interrupted by a signal. If we've sent anything
1977			 * then return bytes sent, otherwise return the error.
1978			 */
1979			if (error) {
1980				SOCKBUF_UNLOCK(&so->so_snd);
1981				goto done;
1982			}
1983			goto retry_space;
1984		}
1985		SOCKBUF_UNLOCK(&so->so_snd);
1986
1987		/*
1988		 * Reduce space in the socket buffer by the size of
1989		 * the header mbuf chain.
1990		 * hdrlen is set to 0 after the first loop.
1991		 */
1992		space -= hdrlen;
1993
1994		/*
1995		 * Loop and construct maximum sized mbuf chain to be bulk
1996		 * dumped into socket buffer.
1997		 */
1998		while(space > loopbytes) {
1999			vm_pindex_t pindex;
2000			vm_offset_t pgoff;
2001			struct mbuf *m0;
2002
2003			VM_OBJECT_LOCK(obj);
2004			/*
2005			 * Calculate the amount to transfer.
2006			 * Not to exceed a page, the EOF,
2007			 * or the passed in nbytes.
2008			 */
2009			pgoff = (vm_offset_t)(off & PAGE_MASK);
2010			xfsize = omin(PAGE_SIZE - pgoff,
2011			    obj->un_pager.vnp.vnp_size - uap->offset -
2012			    fsbytes - loopbytes);
2013			if (uap->nbytes)
2014				rem = (uap->nbytes - fsbytes - loopbytes);
2015			else
2016				rem = obj->un_pager.vnp.vnp_size -
2017				    uap->offset - fsbytes - loopbytes;
2018			xfsize = omin(rem, xfsize);
2019			if (xfsize <= 0) {
2020				VM_OBJECT_UNLOCK(obj);
2021				done = 1;		/* all data sent */
2022				break;
2023			}
2024			/*
2025			 * Don't overflow the send buffer.
2026			 * Stop here and send out what we've
2027			 * already got.
2028			 */
2029			if (space < loopbytes + xfsize) {
2030				VM_OBJECT_UNLOCK(obj);
2031				break;
2032			}
2033
2034			/*
2035			 * Attempt to look up the page.  Allocate
2036			 * if not found or wait and loop if busy.
2037			 */
2038			pindex = OFF_TO_IDX(off);
2039			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2040			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2041
2042			/*
2043			 * Check if page is valid for what we need,
2044			 * otherwise initiate I/O.
2045			 * If we already turned some pages into mbufs,
2046			 * send them off before we come here again and
2047			 * block.
2048			 */
2049			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2050				VM_OBJECT_UNLOCK(obj);
2051			else if (m != NULL)
2052				error = EAGAIN;	/* send what we already got */
2053			else if (uap->flags & SF_NODISKIO)
2054				error = EBUSY;
2055			else {
2056				int bsize, resid;
2057
2058				/*
2059				 * Ensure that our page is still around
2060				 * when the I/O completes.
2061				 */
2062				vm_page_io_start(pg);
2063				VM_OBJECT_UNLOCK(obj);
2064
2065				/*
2066				 * Get the page from backing store.
2067				 */
2068				bsize = vp->v_mount->mnt_stat.f_iosize;
2069				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2070				vn_lock(vp, LK_SHARED | LK_RETRY);
2071
2072				/*
2073				 * XXXMAC: Because we don't have fp->f_cred
2074				 * here, we pass in NOCRED.  This is probably
2075				 * wrong, but is consistent with our original
2076				 * implementation.
2077				 */
2078				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2079				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2080				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2081				    td->td_ucred, NOCRED, &resid, td);
2082				VOP_UNLOCK(vp, 0);
2083				VFS_UNLOCK_GIANT(vfslocked);
2084				VM_OBJECT_LOCK(obj);
2085				vm_page_io_finish(pg);
2086				if (!error)
2087					VM_OBJECT_UNLOCK(obj);
2088				mbstat.sf_iocnt++;
2089			}
2090			if (error) {
2091				vm_page_lock_queues();
2092				vm_page_unwire(pg, 0);
2093				/*
2094				 * See if anyone else might know about
2095				 * this page.  If not and it is not valid,
2096				 * then free it.
2097				 */
2098				if (pg->wire_count == 0 && pg->valid == 0 &&
2099				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2100				    pg->hold_count == 0) {
2101					vm_page_free(pg);
2102				}
2103				vm_page_unlock_queues();
2104				VM_OBJECT_UNLOCK(obj);
2105				if (error == EAGAIN)
2106					error = 0;	/* not a real error */
2107				break;
2108			}
2109
2110			/*
2111			 * Get a sendfile buf.  We usually wait as long
2112			 * as necessary, but this wait can be interrupted.
2113			 */
2114			if ((sf = sf_buf_alloc(pg,
2115			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2116				mbstat.sf_allocfail++;
2117				vm_page_lock_queues();
2118				vm_page_unwire(pg, 0);
2119				/*
2120				 * XXX: Not same check as above!?
2121				 */
2122				if (pg->wire_count == 0 && pg->object == NULL)
2123					vm_page_free(pg);
2124				vm_page_unlock_queues();
2125				error = (mnw ? EAGAIN : EINTR);
2126				break;
2127			}
2128
2129			/*
2130			 * Get an mbuf and set it up as having
2131			 * external storage.
2132			 */
2133			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2134			if (m0 == NULL) {
2135				error = (mnw ? EAGAIN : ENOBUFS);
2136				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2137				break;
2138			}
2139			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2140			    sfs, sf, M_RDONLY, EXT_SFBUF);
2141			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2142			m0->m_len = xfsize;
2143
2144			/* Append to mbuf chain. */
2145			if (m != NULL)
2146				m_cat(m, m0);
2147			else
2148				m = m0;
2149
2150			/* Keep track of bits processed. */
2151			loopbytes += xfsize;
2152			off += xfsize;
2153
2154			if (sfs != NULL) {
2155				mtx_lock(&sfs->mtx);
2156				sfs->count++;
2157				mtx_unlock(&sfs->mtx);
2158			}
2159		}
2160
2161		/* Add the buffer chain to the socket buffer. */
2162		if (m != NULL) {
2163			int mlen, err;
2164
2165			mlen = m_length(m, NULL);
2166			SOCKBUF_LOCK(&so->so_snd);
2167			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2168				error = EPIPE;
2169				SOCKBUF_UNLOCK(&so->so_snd);
2170				goto done;
2171			}
2172			SOCKBUF_UNLOCK(&so->so_snd);
2173			CURVNET_SET(so->so_vnet);
2174			/* Avoid error aliasing. */
2175			err = (*so->so_proto->pr_usrreqs->pru_send)
2176				    (so, 0, m, NULL, NULL, td);
2177			CURVNET_RESTORE();
2178			if (err == 0) {
2179				/*
2180				 * We need two counters to get the
2181				 * file offset and nbytes to send
2182				 * right:
2183				 * - sbytes contains the total amount
2184				 *   of bytes sent, including headers.
2185				 * - fsbytes contains the total amount
2186				 *   of bytes sent from the file.
2187				 */
2188				sbytes += mlen;
2189				fsbytes += mlen;
2190				if (hdrlen) {
2191					fsbytes -= hdrlen;
2192					hdrlen = 0;
2193				}
2194			} else if (error == 0)
2195				error = err;
2196			m = NULL;	/* pru_send always consumes */
2197		}
2198
2199		/* Quit outer loop on error or when we're done. */
2200		if (done)
2201			break;
2202		if (error)
2203			goto done;
2204	}
2205
2206	/*
2207	 * Send trailers. Wimp out and use writev(2).
2208	 */
2209	if (trl_uio != NULL) {
2210		sbunlock(&so->so_snd);
2211		error = kern_writev(td, uap->s, trl_uio);
2212		if (error == 0)
2213			sbytes += td->td_retval[0];
2214		goto out;
2215	}
2216
2217done:
2218	sbunlock(&so->so_snd);
2219out:
2220	/*
2221	 * If there was no error we have to clear td->td_retval[0]
2222	 * because it may have been set by writev.
2223	 */
2224	if (error == 0) {
2225		td->td_retval[0] = 0;
2226	}
2227	if (uap->sbytes != NULL) {
2228		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2229	}
2230	if (obj != NULL)
2231		vm_object_deallocate(obj);
2232	if (vp != NULL) {
2233		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2234		vrele(vp);
2235		VFS_UNLOCK_GIANT(vfslocked);
2236	}
2237	if (so)
2238		fdrop(sock_fp, td);
2239	if (m)
2240		m_freem(m);
2241
2242	if (sfs != NULL) {
2243		mtx_lock(&sfs->mtx);
2244		if (sfs->count != 0)
2245			cv_wait(&sfs->cv, &sfs->mtx);
2246		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2247		cv_destroy(&sfs->cv);
2248		mtx_destroy(&sfs->mtx);
2249		free(sfs, M_TEMP);
2250	}
2251
2252	if (error == ERESTART)
2253		error = EINTR;
2254
2255	return (error);
2256}
2257
2258/*
2259 * SCTP syscalls.
2260 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2261 * otherwise all return EOPNOTSUPP.
2262 * XXX: We should make this loadable one day.
2263 */
2264int
2265sctp_peeloff(td, uap)
2266	struct thread *td;
2267	struct sctp_peeloff_args /* {
2268		int	sd;
2269		caddr_t	name;
2270	} */ *uap;
2271{
2272#ifdef SCTP
2273	struct filedesc *fdp;
2274	struct file *nfp = NULL;
2275	int error;
2276	struct socket *head, *so;
2277	int fd;
2278	u_int fflag;
2279
2280	fdp = td->td_proc->p_fd;
2281	error = fgetsock(td, uap->sd, &head, &fflag);
2282	if (error)
2283		goto done2;
2284	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2285	if (error)
2286		goto done2;
2287	/*
2288	 * At this point we know we do have a assoc to pull
2289	 * we proceed to get the fd setup. This may block
2290	 * but that is ok.
2291	 */
2292
2293	error = falloc(td, &nfp, &fd);
2294	if (error)
2295		goto done;
2296	td->td_retval[0] = fd;
2297
2298	so = sonewconn(head, SS_ISCONNECTED);
2299	if (so == NULL)
2300		goto noconnection;
2301	/*
2302	 * Before changing the flags on the socket, we have to bump the
2303	 * reference count.  Otherwise, if the protocol calls sofree(),
2304	 * the socket will be released due to a zero refcount.
2305	 */
2306        SOCK_LOCK(so);
2307        soref(so);                      /* file descriptor reference */
2308        SOCK_UNLOCK(so);
2309
2310	ACCEPT_LOCK();
2311
2312	TAILQ_REMOVE(&head->so_comp, so, so_list);
2313	head->so_qlen--;
2314	so->so_state |= (head->so_state & SS_NBIO);
2315	so->so_state &= ~SS_NOFDREF;
2316	so->so_qstate &= ~SQ_COMP;
2317	so->so_head = NULL;
2318	ACCEPT_UNLOCK();
2319	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2320	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2321	if (error)
2322		goto noconnection;
2323	if (head->so_sigio != NULL)
2324		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2325
2326noconnection:
2327	/*
2328	 * close the new descriptor, assuming someone hasn't ripped it
2329	 * out from under us.
2330	 */
2331	if (error)
2332		fdclose(fdp, nfp, fd, td);
2333
2334	/*
2335	 * Release explicitly held references before returning.
2336	 */
2337done:
2338	if (nfp != NULL)
2339		fdrop(nfp, td);
2340	fputsock(head);
2341done2:
2342	return (error);
2343#else  /* SCTP */
2344	return (EOPNOTSUPP);
2345#endif /* SCTP */
2346}
2347
2348int
2349sctp_generic_sendmsg (td, uap)
2350	struct thread *td;
2351	struct sctp_generic_sendmsg_args /* {
2352		int sd,
2353		caddr_t msg,
2354		int mlen,
2355		caddr_t to,
2356		__socklen_t tolen,
2357		struct sctp_sndrcvinfo *sinfo,
2358		int flags
2359	} */ *uap;
2360{
2361#ifdef SCTP
2362	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2363	struct socket *so;
2364	struct file *fp = NULL;
2365	int use_rcvinfo = 1;
2366	int error = 0, len;
2367	struct sockaddr *to = NULL;
2368#ifdef KTRACE
2369	struct uio *ktruio = NULL;
2370#endif
2371	struct uio auio;
2372	struct iovec iov[1];
2373
2374	if (uap->sinfo) {
2375		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2376		if (error)
2377			return (error);
2378		u_sinfo = &sinfo;
2379	}
2380	if (uap->tolen) {
2381		error = getsockaddr(&to, uap->to, uap->tolen);
2382		if (error) {
2383			to = NULL;
2384			goto sctp_bad2;
2385		}
2386	}
2387
2388	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2389	if (error)
2390		goto sctp_bad;
2391#ifdef KTRACE
2392	if (KTRPOINT(td, KTR_STRUCT))
2393		ktrsockaddr(to);
2394#endif
2395
2396	iov[0].iov_base = uap->msg;
2397	iov[0].iov_len = uap->mlen;
2398
2399	so = (struct socket *)fp->f_data;
2400#ifdef MAC
2401	error = mac_socket_check_send(td->td_ucred, so);
2402	if (error)
2403		goto sctp_bad;
2404#endif /* MAC */
2405
2406	auio.uio_iov =  iov;
2407	auio.uio_iovcnt = 1;
2408	auio.uio_segflg = UIO_USERSPACE;
2409	auio.uio_rw = UIO_WRITE;
2410	auio.uio_td = td;
2411	auio.uio_offset = 0;			/* XXX */
2412	auio.uio_resid = 0;
2413	len = auio.uio_resid = uap->mlen;
2414	error = sctp_lower_sosend(so, to, &auio,
2415		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2416		    uap->flags, use_rcvinfo, u_sinfo, td);
2417	if (error) {
2418		if (auio.uio_resid != len && (error == ERESTART ||
2419		    error == EINTR || error == EWOULDBLOCK))
2420			error = 0;
2421		/* Generation of SIGPIPE can be controlled per socket. */
2422		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2423		    !(uap->flags & MSG_NOSIGNAL)) {
2424			PROC_LOCK(td->td_proc);
2425			psignal(td->td_proc, SIGPIPE);
2426			PROC_UNLOCK(td->td_proc);
2427		}
2428	}
2429	if (error == 0)
2430		td->td_retval[0] = len - auio.uio_resid;
2431#ifdef KTRACE
2432	if (ktruio != NULL) {
2433		ktruio->uio_resid = td->td_retval[0];
2434		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2435	}
2436#endif /* KTRACE */
2437sctp_bad:
2438	if (fp)
2439		fdrop(fp, td);
2440sctp_bad2:
2441	if (to)
2442		free(to, M_SONAME);
2443	return (error);
2444#else  /* SCTP */
2445	return (EOPNOTSUPP);
2446#endif /* SCTP */
2447}
2448
2449int
2450sctp_generic_sendmsg_iov(td, uap)
2451	struct thread *td;
2452	struct sctp_generic_sendmsg_iov_args /* {
2453		int sd,
2454		struct iovec *iov,
2455		int iovlen,
2456		caddr_t to,
2457		__socklen_t tolen,
2458		struct sctp_sndrcvinfo *sinfo,
2459		int flags
2460	} */ *uap;
2461{
2462#ifdef SCTP
2463	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2464	struct socket *so;
2465	struct file *fp = NULL;
2466	int use_rcvinfo = 1;
2467	int error=0, len, i;
2468	struct sockaddr *to = NULL;
2469#ifdef KTRACE
2470	struct uio *ktruio = NULL;
2471#endif
2472	struct uio auio;
2473	struct iovec *iov, *tiov;
2474
2475	if (uap->sinfo) {
2476		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2477		if (error)
2478			return (error);
2479		u_sinfo = &sinfo;
2480	}
2481	if (uap->tolen) {
2482		error = getsockaddr(&to, uap->to, uap->tolen);
2483		if (error) {
2484			to = NULL;
2485			goto sctp_bad2;
2486		}
2487	}
2488
2489	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2490	if (error)
2491		goto sctp_bad1;
2492
2493	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2494	if (error)
2495		goto sctp_bad1;
2496#ifdef KTRACE
2497	if (KTRPOINT(td, KTR_STRUCT))
2498		ktrsockaddr(to);
2499#endif
2500
2501	so = (struct socket *)fp->f_data;
2502#ifdef MAC
2503	error = mac_socket_check_send(td->td_ucred, so);
2504	if (error)
2505		goto sctp_bad;
2506#endif /* MAC */
2507
2508	auio.uio_iov =  iov;
2509	auio.uio_iovcnt = uap->iovlen;
2510	auio.uio_segflg = UIO_USERSPACE;
2511	auio.uio_rw = UIO_WRITE;
2512	auio.uio_td = td;
2513	auio.uio_offset = 0;			/* XXX */
2514	auio.uio_resid = 0;
2515	tiov = iov;
2516	for (i = 0; i <uap->iovlen; i++, tiov++) {
2517		if ((auio.uio_resid += tiov->iov_len) < 0) {
2518			error = EINVAL;
2519			goto sctp_bad;
2520		}
2521	}
2522	len = auio.uio_resid;
2523	error = sctp_lower_sosend(so, to, &auio,
2524		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2525		    uap->flags, use_rcvinfo, u_sinfo, td);
2526	if (error) {
2527		if (auio.uio_resid != len && (error == ERESTART ||
2528		    error == EINTR || error == EWOULDBLOCK))
2529			error = 0;
2530		/* Generation of SIGPIPE can be controlled per socket */
2531		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2532		    !(uap->flags & MSG_NOSIGNAL)) {
2533			PROC_LOCK(td->td_proc);
2534			psignal(td->td_proc, SIGPIPE);
2535			PROC_UNLOCK(td->td_proc);
2536		}
2537	}
2538	if (error == 0)
2539		td->td_retval[0] = len - auio.uio_resid;
2540#ifdef KTRACE
2541	if (ktruio != NULL) {
2542		ktruio->uio_resid = td->td_retval[0];
2543		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2544	}
2545#endif /* KTRACE */
2546sctp_bad:
2547	free(iov, M_IOV);
2548sctp_bad1:
2549	if (fp)
2550		fdrop(fp, td);
2551sctp_bad2:
2552	if (to)
2553		free(to, M_SONAME);
2554	return (error);
2555#else  /* SCTP */
2556	return (EOPNOTSUPP);
2557#endif /* SCTP */
2558}
2559
2560int
2561sctp_generic_recvmsg(td, uap)
2562	struct thread *td;
2563	struct sctp_generic_recvmsg_args /* {
2564		int sd,
2565		struct iovec *iov,
2566		int iovlen,
2567		struct sockaddr *from,
2568		__socklen_t *fromlenaddr,
2569		struct sctp_sndrcvinfo *sinfo,
2570		int *msg_flags
2571	} */ *uap;
2572{
2573#ifdef SCTP
2574	u_int8_t sockbufstore[256];
2575	struct uio auio;
2576	struct iovec *iov, *tiov;
2577	struct sctp_sndrcvinfo sinfo;
2578	struct socket *so;
2579	struct file *fp = NULL;
2580	struct sockaddr *fromsa;
2581	int fromlen;
2582	int len, i, msg_flags;
2583	int error = 0;
2584#ifdef KTRACE
2585	struct uio *ktruio = NULL;
2586#endif
2587	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2588	if (error) {
2589		return (error);
2590	}
2591	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2592	if (error) {
2593		goto out1;
2594	}
2595
2596	so = fp->f_data;
2597#ifdef MAC
2598	error = mac_socket_check_receive(td->td_ucred, so);
2599	if (error) {
2600		goto out;
2601		return (error);
2602	}
2603#endif /* MAC */
2604
2605	if (uap->fromlenaddr) {
2606		error = copyin(uap->fromlenaddr,
2607		    &fromlen, sizeof (fromlen));
2608		if (error) {
2609			goto out;
2610		}
2611	} else {
2612		fromlen = 0;
2613	}
2614	if(uap->msg_flags) {
2615		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2616		if (error) {
2617			goto out;
2618		}
2619	} else {
2620		msg_flags = 0;
2621	}
2622	auio.uio_iov = iov;
2623	auio.uio_iovcnt = uap->iovlen;
2624  	auio.uio_segflg = UIO_USERSPACE;
2625	auio.uio_rw = UIO_READ;
2626	auio.uio_td = td;
2627	auio.uio_offset = 0;			/* XXX */
2628	auio.uio_resid = 0;
2629	tiov = iov;
2630	for (i = 0; i <uap->iovlen; i++, tiov++) {
2631		if ((auio.uio_resid += tiov->iov_len) < 0) {
2632			error = EINVAL;
2633			goto out;
2634		}
2635	}
2636	len = auio.uio_resid;
2637	fromsa = (struct sockaddr *)sockbufstore;
2638
2639#ifdef KTRACE
2640	if (KTRPOINT(td, KTR_GENIO))
2641		ktruio = cloneuio(&auio);
2642#endif /* KTRACE */
2643	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2644		    fromsa, fromlen, &msg_flags,
2645		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2646	if (error) {
2647		if (auio.uio_resid != (int)len && (error == ERESTART ||
2648		    error == EINTR || error == EWOULDBLOCK))
2649			error = 0;
2650	} else {
2651		if (uap->sinfo)
2652			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2653	}
2654#ifdef KTRACE
2655	if (ktruio != NULL) {
2656		ktruio->uio_resid = (int)len - auio.uio_resid;
2657		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2658	}
2659#endif /* KTRACE */
2660	if (error)
2661		goto out;
2662	td->td_retval[0] = (int)len - auio.uio_resid;
2663
2664	if (fromlen && uap->from) {
2665		len = fromlen;
2666		if (len <= 0 || fromsa == 0)
2667			len = 0;
2668		else {
2669			len = MIN(len, fromsa->sa_len);
2670			error = copyout(fromsa, uap->from, (unsigned)len);
2671			if (error)
2672				goto out;
2673		}
2674		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2675		if (error) {
2676			goto out;
2677		}
2678	}
2679#ifdef KTRACE
2680	if (KTRPOINT(td, KTR_STRUCT))
2681		ktrsockaddr(fromsa);
2682#endif
2683	if (uap->msg_flags) {
2684		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2685		if (error) {
2686			goto out;
2687		}
2688	}
2689out:
2690	free(iov, M_IOV);
2691out1:
2692	if (fp)
2693		fdrop(fp, td);
2694
2695	return (error);
2696#else  /* SCTP */
2697	return (EOPNOTSUPP);
2698#endif /* SCTP */
2699}
2700