uipc_syscalls.c revision 192080
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 192080 2009-05-14 03:24:22Z jeff $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vimage.h>
67#include <sys/vnode.h>
68#ifdef KTRACE
69#include <sys/ktrace.h>
70#endif
71
72#include <security/mac/mac_framework.h>
73
74#include <vm/vm.h>
75#include <vm/vm_object.h>
76#include <vm/vm_page.h>
77#include <vm/vm_pageout.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80
81#ifdef SCTP
82#include <netinet/sctp.h>
83#include <netinet/sctp_peeloff.h>
84#endif /* SCTP */
85
86static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
87static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
88
89static int accept1(struct thread *td, struct accept_args *uap, int compat);
90static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
91static int getsockname1(struct thread *td, struct getsockname_args *uap,
92			int compat);
93static int getpeername1(struct thread *td, struct getpeername_args *uap,
94			int compat);
95
96/*
97 * NSFBUFS-related variables and associated sysctls
98 */
99int nsfbufs;
100int nsfbufspeak;
101int nsfbufsused;
102
103SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
104    "Maximum number of sendfile(2) sf_bufs available");
105SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
106    "Number of sendfile(2) sf_bufs at peak usage");
107SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
108    "Number of sendfile(2) sf_bufs in use");
109
110/*
111 * Convert a user file descriptor to a kernel file entry.  A reference on the
112 * file entry is held upon returning.  This is lighter weight than
113 * fgetsock(), which bumps the socket reference drops the file reference
114 * count instead, as this approach avoids several additional mutex operations
115 * associated with the additional reference count.  If requested, return the
116 * open file flags.
117 */
118static int
119getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
120{
121	struct file *fp;
122	int error;
123
124	fp = NULL;
125	if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
126		error = EBADF;
127	} else if (fp->f_type != DTYPE_SOCKET) {
128		fdrop(fp, curthread);
129		fp = NULL;
130		error = ENOTSOCK;
131	} else {
132		if (fflagp != NULL)
133			*fflagp = fp->f_flag;
134		error = 0;
135	}
136	*fpp = fp;
137	return (error);
138}
139
140/*
141 * System call interface to the socket abstraction.
142 */
143#if defined(COMPAT_43)
144#define COMPAT_OLDSOCK
145#endif
146
147int
148socket(td, uap)
149	struct thread *td;
150	struct socket_args /* {
151		int	domain;
152		int	type;
153		int	protocol;
154	} */ *uap;
155{
156	struct filedesc *fdp;
157	struct socket *so;
158	struct file *fp;
159	int fd, error;
160
161#ifdef MAC
162	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
163	    uap->protocol);
164	if (error)
165		return (error);
166#endif
167	fdp = td->td_proc->p_fd;
168	error = falloc(td, &fp, &fd);
169	if (error)
170		return (error);
171	/* An extra reference on `fp' has been held for us by falloc(). */
172	error = socreate(uap->domain, &so, uap->type, uap->protocol,
173	    td->td_ucred, td);
174	if (error) {
175		fdclose(fdp, fp, fd, td);
176	} else {
177		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
178		td->td_retval[0] = fd;
179	}
180	fdrop(fp, td);
181	return (error);
182}
183
184/* ARGSUSED */
185int
186bind(td, uap)
187	struct thread *td;
188	struct bind_args /* {
189		int	s;
190		caddr_t	name;
191		int	namelen;
192	} */ *uap;
193{
194	struct sockaddr *sa;
195	int error;
196
197	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
198		return (error);
199
200	error = kern_bind(td, uap->s, sa);
201	free(sa, M_SONAME);
202	return (error);
203}
204
205int
206kern_bind(td, fd, sa)
207	struct thread *td;
208	int fd;
209	struct sockaddr *sa;
210{
211	struct socket *so;
212	struct file *fp;
213	int error;
214
215	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
216	if (error)
217		return (error);
218	so = fp->f_data;
219#ifdef KTRACE
220	if (KTRPOINT(td, KTR_STRUCT))
221		ktrsockaddr(sa);
222#endif
223#ifdef MAC
224	SOCK_LOCK(so);
225	error = mac_socket_check_bind(td->td_ucred, so, sa);
226	SOCK_UNLOCK(so);
227	if (error)
228		goto done;
229#endif
230	error = sobind(so, sa, td);
231#ifdef MAC
232done:
233#endif
234	fdrop(fp, td);
235	return (error);
236}
237
238/* ARGSUSED */
239int
240listen(td, uap)
241	struct thread *td;
242	struct listen_args /* {
243		int	s;
244		int	backlog;
245	} */ *uap;
246{
247	struct socket *so;
248	struct file *fp;
249	int error;
250
251	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
252	if (error == 0) {
253		so = fp->f_data;
254#ifdef MAC
255		SOCK_LOCK(so);
256		error = mac_socket_check_listen(td->td_ucred, so);
257		SOCK_UNLOCK(so);
258		if (error)
259			goto done;
260#endif
261		CURVNET_SET(so->so_vnet);
262		error = solisten(so, uap->backlog, td);
263		CURVNET_RESTORE();
264#ifdef MAC
265done:
266#endif
267		fdrop(fp, td);
268	}
269	return(error);
270}
271
272/*
273 * accept1()
274 */
275static int
276accept1(td, uap, compat)
277	struct thread *td;
278	struct accept_args /* {
279		int	s;
280		struct sockaddr	* __restrict name;
281		socklen_t	* __restrict anamelen;
282	} */ *uap;
283	int compat;
284{
285	struct sockaddr *name;
286	socklen_t namelen;
287	struct file *fp;
288	int error;
289
290	if (uap->name == NULL)
291		return (kern_accept(td, uap->s, NULL, NULL, NULL));
292
293	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
294	if (error)
295		return (error);
296
297	error = kern_accept(td, uap->s, &name, &namelen, &fp);
298
299	/*
300	 * return a namelen of zero for older code which might
301	 * ignore the return value from accept.
302	 */
303	if (error) {
304		(void) copyout(&namelen,
305		    uap->anamelen, sizeof(*uap->anamelen));
306		return (error);
307	}
308
309	if (error == 0 && name != NULL) {
310#ifdef COMPAT_OLDSOCK
311		if (compat)
312			((struct osockaddr *)name)->sa_family =
313			    name->sa_family;
314#endif
315		error = copyout(name, uap->name, namelen);
316	}
317	if (error == 0)
318		error = copyout(&namelen, uap->anamelen,
319		    sizeof(namelen));
320	if (error)
321		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
322	fdrop(fp, td);
323	free(name, M_SONAME);
324	return (error);
325}
326
327int
328kern_accept(struct thread *td, int s, struct sockaddr **name,
329    socklen_t *namelen, struct file **fp)
330{
331	struct filedesc *fdp;
332	struct file *headfp, *nfp = NULL;
333	struct sockaddr *sa = NULL;
334	int error;
335	struct socket *head, *so;
336	int fd;
337	u_int fflag;
338	pid_t pgid;
339	int tmp;
340
341	if (name) {
342		*name = NULL;
343		if (*namelen < 0)
344			return (EINVAL);
345	}
346
347	fdp = td->td_proc->p_fd;
348	error = getsock(fdp, s, &headfp, &fflag);
349	if (error)
350		return (error);
351	head = headfp->f_data;
352	if ((head->so_options & SO_ACCEPTCONN) == 0) {
353		error = EINVAL;
354		goto done;
355	}
356#ifdef MAC
357	SOCK_LOCK(head);
358	error = mac_socket_check_accept(td->td_ucred, head);
359	SOCK_UNLOCK(head);
360	if (error != 0)
361		goto done;
362#endif
363	error = falloc(td, &nfp, &fd);
364	if (error)
365		goto done;
366	ACCEPT_LOCK();
367	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
368		ACCEPT_UNLOCK();
369		error = EWOULDBLOCK;
370		goto noconnection;
371	}
372	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
373		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
374			head->so_error = ECONNABORTED;
375			break;
376		}
377		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
378		    "accept", 0);
379		if (error) {
380			ACCEPT_UNLOCK();
381			goto noconnection;
382		}
383	}
384	if (head->so_error) {
385		error = head->so_error;
386		head->so_error = 0;
387		ACCEPT_UNLOCK();
388		goto noconnection;
389	}
390	so = TAILQ_FIRST(&head->so_comp);
391	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
392	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
393
394	/*
395	 * Before changing the flags on the socket, we have to bump the
396	 * reference count.  Otherwise, if the protocol calls sofree(),
397	 * the socket will be released due to a zero refcount.
398	 */
399	SOCK_LOCK(so);			/* soref() and so_state update */
400	soref(so);			/* file descriptor reference */
401
402	TAILQ_REMOVE(&head->so_comp, so, so_list);
403	head->so_qlen--;
404	so->so_state |= (head->so_state & SS_NBIO);
405	so->so_qstate &= ~SQ_COMP;
406	so->so_head = NULL;
407
408	SOCK_UNLOCK(so);
409	ACCEPT_UNLOCK();
410
411	/* An extra reference on `nfp' has been held for us by falloc(). */
412	td->td_retval[0] = fd;
413
414	/* connection has been removed from the listen queue */
415	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
416
417	pgid = fgetown(&head->so_sigio);
418	if (pgid != 0)
419		fsetown(pgid, &so->so_sigio);
420
421	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
422	/* Sync socket nonblocking/async state with file flags */
423	tmp = fflag & FNONBLOCK;
424	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
425	tmp = fflag & FASYNC;
426	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
427	sa = 0;
428	CURVNET_SET(so->so_vnet);
429	error = soaccept(so, &sa);
430	CURVNET_RESTORE();
431	if (error) {
432		/*
433		 * return a namelen of zero for older code which might
434		 * ignore the return value from accept.
435		 */
436		if (name)
437			*namelen = 0;
438		goto noconnection;
439	}
440	if (sa == NULL) {
441		if (name)
442			*namelen = 0;
443		goto done;
444	}
445	if (name) {
446		/* check sa_len before it is destroyed */
447		if (*namelen > sa->sa_len)
448			*namelen = sa->sa_len;
449#ifdef KTRACE
450		if (KTRPOINT(td, KTR_STRUCT))
451			ktrsockaddr(sa);
452#endif
453		*name = sa;
454		sa = NULL;
455	}
456noconnection:
457	if (sa)
458		free(sa, M_SONAME);
459
460	/*
461	 * close the new descriptor, assuming someone hasn't ripped it
462	 * out from under us.
463	 */
464	if (error)
465		fdclose(fdp, nfp, fd, td);
466
467	/*
468	 * Release explicitly held references before returning.  We return
469	 * a reference on nfp to the caller on success if they request it.
470	 */
471done:
472	if (fp != NULL) {
473		if (error == 0) {
474			*fp = nfp;
475			nfp = NULL;
476		} else
477			*fp = NULL;
478	}
479	if (nfp != NULL)
480		fdrop(nfp, td);
481	fdrop(headfp, td);
482	return (error);
483}
484
485int
486accept(td, uap)
487	struct thread *td;
488	struct accept_args *uap;
489{
490
491	return (accept1(td, uap, 0));
492}
493
494#ifdef COMPAT_OLDSOCK
495int
496oaccept(td, uap)
497	struct thread *td;
498	struct accept_args *uap;
499{
500
501	return (accept1(td, uap, 1));
502}
503#endif /* COMPAT_OLDSOCK */
504
505/* ARGSUSED */
506int
507connect(td, uap)
508	struct thread *td;
509	struct connect_args /* {
510		int	s;
511		caddr_t	name;
512		int	namelen;
513	} */ *uap;
514{
515	struct sockaddr *sa;
516	int error;
517
518	error = getsockaddr(&sa, uap->name, uap->namelen);
519	if (error)
520		return (error);
521
522	error = kern_connect(td, uap->s, sa);
523	free(sa, M_SONAME);
524	return (error);
525}
526
527
528int
529kern_connect(td, fd, sa)
530	struct thread *td;
531	int fd;
532	struct sockaddr *sa;
533{
534	struct socket *so;
535	struct file *fp;
536	int error;
537	int interrupted = 0;
538
539	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
540	if (error)
541		return (error);
542	so = fp->f_data;
543	if (so->so_state & SS_ISCONNECTING) {
544		error = EALREADY;
545		goto done1;
546	}
547#ifdef KTRACE
548	if (KTRPOINT(td, KTR_STRUCT))
549		ktrsockaddr(sa);
550#endif
551#ifdef MAC
552	SOCK_LOCK(so);
553	error = mac_socket_check_connect(td->td_ucred, so, sa);
554	SOCK_UNLOCK(so);
555	if (error)
556		goto bad;
557#endif
558	error = soconnect(so, sa, td);
559	if (error)
560		goto bad;
561	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
562		error = EINPROGRESS;
563		goto done1;
564	}
565	SOCK_LOCK(so);
566	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
567		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
568		    "connec", 0);
569		if (error) {
570			if (error == EINTR || error == ERESTART)
571				interrupted = 1;
572			break;
573		}
574	}
575	if (error == 0) {
576		error = so->so_error;
577		so->so_error = 0;
578	}
579	SOCK_UNLOCK(so);
580bad:
581	if (!interrupted)
582		so->so_state &= ~SS_ISCONNECTING;
583	if (error == ERESTART)
584		error = EINTR;
585done1:
586	fdrop(fp, td);
587	return (error);
588}
589
590int
591socketpair(td, uap)
592	struct thread *td;
593	struct socketpair_args /* {
594		int	domain;
595		int	type;
596		int	protocol;
597		int	*rsv;
598	} */ *uap;
599{
600	struct filedesc *fdp = td->td_proc->p_fd;
601	struct file *fp1, *fp2;
602	struct socket *so1, *so2;
603	int fd, error, sv[2];
604
605#ifdef MAC
606	/* We might want to have a separate check for socket pairs. */
607	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
608	    uap->protocol);
609	if (error)
610		return (error);
611#endif
612
613	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
614	    td->td_ucred, td);
615	if (error)
616		return (error);
617	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
618	    td->td_ucred, td);
619	if (error)
620		goto free1;
621	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
622	error = falloc(td, &fp1, &fd);
623	if (error)
624		goto free2;
625	sv[0] = fd;
626	fp1->f_data = so1;	/* so1 already has ref count */
627	error = falloc(td, &fp2, &fd);
628	if (error)
629		goto free3;
630	fp2->f_data = so2;	/* so2 already has ref count */
631	sv[1] = fd;
632	error = soconnect2(so1, so2);
633	if (error)
634		goto free4;
635	if (uap->type == SOCK_DGRAM) {
636		/*
637		 * Datagram socket connection is asymmetric.
638		 */
639		 error = soconnect2(so2, so1);
640		 if (error)
641			goto free4;
642	}
643	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
644	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
645	so1 = so2 = NULL;
646	error = copyout(sv, uap->rsv, 2 * sizeof (int));
647	if (error)
648		goto free4;
649	fdrop(fp1, td);
650	fdrop(fp2, td);
651	return (0);
652free4:
653	fdclose(fdp, fp2, sv[1], td);
654	fdrop(fp2, td);
655free3:
656	fdclose(fdp, fp1, sv[0], td);
657	fdrop(fp1, td);
658free2:
659	if (so2 != NULL)
660		(void)soclose(so2);
661free1:
662	if (so1 != NULL)
663		(void)soclose(so1);
664	return (error);
665}
666
667static int
668sendit(td, s, mp, flags)
669	struct thread *td;
670	int s;
671	struct msghdr *mp;
672	int flags;
673{
674	struct mbuf *control;
675	struct sockaddr *to;
676	int error;
677
678	if (mp->msg_name != NULL) {
679		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
680		if (error) {
681			to = NULL;
682			goto bad;
683		}
684		mp->msg_name = to;
685	} else {
686		to = NULL;
687	}
688
689	if (mp->msg_control) {
690		if (mp->msg_controllen < sizeof(struct cmsghdr)
691#ifdef COMPAT_OLDSOCK
692		    && mp->msg_flags != MSG_COMPAT
693#endif
694		) {
695			error = EINVAL;
696			goto bad;
697		}
698		error = sockargs(&control, mp->msg_control,
699		    mp->msg_controllen, MT_CONTROL);
700		if (error)
701			goto bad;
702#ifdef COMPAT_OLDSOCK
703		if (mp->msg_flags == MSG_COMPAT) {
704			struct cmsghdr *cm;
705
706			M_PREPEND(control, sizeof(*cm), M_WAIT);
707			cm = mtod(control, struct cmsghdr *);
708			cm->cmsg_len = control->m_len;
709			cm->cmsg_level = SOL_SOCKET;
710			cm->cmsg_type = SCM_RIGHTS;
711		}
712#endif
713	} else {
714		control = NULL;
715	}
716
717	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
718
719bad:
720	if (to)
721		free(to, M_SONAME);
722	return (error);
723}
724
725int
726kern_sendit(td, s, mp, flags, control, segflg)
727	struct thread *td;
728	int s;
729	struct msghdr *mp;
730	int flags;
731	struct mbuf *control;
732	enum uio_seg segflg;
733{
734	struct file *fp;
735	struct uio auio;
736	struct iovec *iov;
737	struct socket *so;
738	int i;
739	int len, error;
740#ifdef KTRACE
741	struct uio *ktruio = NULL;
742#endif
743
744	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
745	if (error)
746		return (error);
747	so = (struct socket *)fp->f_data;
748
749#ifdef MAC
750	SOCK_LOCK(so);
751	if (mp->msg_name != NULL)
752		error = mac_socket_check_connect(td->td_ucred, so,
753		    mp->msg_name);
754	if (error == 0)
755		error = mac_socket_check_send(td->td_ucred, so);
756	SOCK_UNLOCK(so);
757	if (error)
758		goto bad;
759#endif
760
761	auio.uio_iov = mp->msg_iov;
762	auio.uio_iovcnt = mp->msg_iovlen;
763	auio.uio_segflg = segflg;
764	auio.uio_rw = UIO_WRITE;
765	auio.uio_td = td;
766	auio.uio_offset = 0;			/* XXX */
767	auio.uio_resid = 0;
768	iov = mp->msg_iov;
769	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
770		if ((auio.uio_resid += iov->iov_len) < 0) {
771			error = EINVAL;
772			goto bad;
773		}
774	}
775#ifdef KTRACE
776	if (KTRPOINT(td, KTR_GENIO))
777		ktruio = cloneuio(&auio);
778#endif
779	len = auio.uio_resid;
780	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
781	if (error) {
782		if (auio.uio_resid != len && (error == ERESTART ||
783		    error == EINTR || error == EWOULDBLOCK))
784			error = 0;
785		/* Generation of SIGPIPE can be controlled per socket */
786		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
787		    !(flags & MSG_NOSIGNAL)) {
788			PROC_LOCK(td->td_proc);
789			psignal(td->td_proc, SIGPIPE);
790			PROC_UNLOCK(td->td_proc);
791		}
792	}
793	if (error == 0)
794		td->td_retval[0] = len - auio.uio_resid;
795#ifdef KTRACE
796	if (ktruio != NULL) {
797		ktruio->uio_resid = td->td_retval[0];
798		ktrgenio(s, UIO_WRITE, ktruio, error);
799	}
800#endif
801bad:
802	fdrop(fp, td);
803	return (error);
804}
805
806int
807sendto(td, uap)
808	struct thread *td;
809	struct sendto_args /* {
810		int	s;
811		caddr_t	buf;
812		size_t	len;
813		int	flags;
814		caddr_t	to;
815		int	tolen;
816	} */ *uap;
817{
818	struct msghdr msg;
819	struct iovec aiov;
820	int error;
821
822	msg.msg_name = uap->to;
823	msg.msg_namelen = uap->tolen;
824	msg.msg_iov = &aiov;
825	msg.msg_iovlen = 1;
826	msg.msg_control = 0;
827#ifdef COMPAT_OLDSOCK
828	msg.msg_flags = 0;
829#endif
830	aiov.iov_base = uap->buf;
831	aiov.iov_len = uap->len;
832	error = sendit(td, uap->s, &msg, uap->flags);
833	return (error);
834}
835
836#ifdef COMPAT_OLDSOCK
837int
838osend(td, uap)
839	struct thread *td;
840	struct osend_args /* {
841		int	s;
842		caddr_t	buf;
843		int	len;
844		int	flags;
845	} */ *uap;
846{
847	struct msghdr msg;
848	struct iovec aiov;
849	int error;
850
851	msg.msg_name = 0;
852	msg.msg_namelen = 0;
853	msg.msg_iov = &aiov;
854	msg.msg_iovlen = 1;
855	aiov.iov_base = uap->buf;
856	aiov.iov_len = uap->len;
857	msg.msg_control = 0;
858	msg.msg_flags = 0;
859	error = sendit(td, uap->s, &msg, uap->flags);
860	return (error);
861}
862
863int
864osendmsg(td, uap)
865	struct thread *td;
866	struct osendmsg_args /* {
867		int	s;
868		caddr_t	msg;
869		int	flags;
870	} */ *uap;
871{
872	struct msghdr msg;
873	struct iovec *iov;
874	int error;
875
876	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
877	if (error)
878		return (error);
879	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
880	if (error)
881		return (error);
882	msg.msg_iov = iov;
883	msg.msg_flags = MSG_COMPAT;
884	error = sendit(td, uap->s, &msg, uap->flags);
885	free(iov, M_IOV);
886	return (error);
887}
888#endif
889
890int
891sendmsg(td, uap)
892	struct thread *td;
893	struct sendmsg_args /* {
894		int	s;
895		caddr_t	msg;
896		int	flags;
897	} */ *uap;
898{
899	struct msghdr msg;
900	struct iovec *iov;
901	int error;
902
903	error = copyin(uap->msg, &msg, sizeof (msg));
904	if (error)
905		return (error);
906	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
907	if (error)
908		return (error);
909	msg.msg_iov = iov;
910#ifdef COMPAT_OLDSOCK
911	msg.msg_flags = 0;
912#endif
913	error = sendit(td, uap->s, &msg, uap->flags);
914	free(iov, M_IOV);
915	return (error);
916}
917
918int
919kern_recvit(td, s, mp, fromseg, controlp)
920	struct thread *td;
921	int s;
922	struct msghdr *mp;
923	enum uio_seg fromseg;
924	struct mbuf **controlp;
925{
926	struct uio auio;
927	struct iovec *iov;
928	int i;
929	socklen_t len;
930	int error;
931	struct mbuf *m, *control = 0;
932	caddr_t ctlbuf;
933	struct file *fp;
934	struct socket *so;
935	struct sockaddr *fromsa = 0;
936#ifdef KTRACE
937	struct uio *ktruio = NULL;
938#endif
939
940	if(controlp != NULL)
941		*controlp = 0;
942
943	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
944	if (error)
945		return (error);
946	so = fp->f_data;
947
948#ifdef MAC
949	SOCK_LOCK(so);
950	error = mac_socket_check_receive(td->td_ucred, so);
951	SOCK_UNLOCK(so);
952	if (error) {
953		fdrop(fp, td);
954		return (error);
955	}
956#endif
957
958	auio.uio_iov = mp->msg_iov;
959	auio.uio_iovcnt = mp->msg_iovlen;
960	auio.uio_segflg = UIO_USERSPACE;
961	auio.uio_rw = UIO_READ;
962	auio.uio_td = td;
963	auio.uio_offset = 0;			/* XXX */
964	auio.uio_resid = 0;
965	iov = mp->msg_iov;
966	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
967		if ((auio.uio_resid += iov->iov_len) < 0) {
968			fdrop(fp, td);
969			return (EINVAL);
970		}
971	}
972#ifdef KTRACE
973	if (KTRPOINT(td, KTR_GENIO))
974		ktruio = cloneuio(&auio);
975#endif
976	len = auio.uio_resid;
977	CURVNET_SET(so->so_vnet);
978	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
979	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
980	    &mp->msg_flags);
981	CURVNET_RESTORE();
982	if (error) {
983		if (auio.uio_resid != (int)len && (error == ERESTART ||
984		    error == EINTR || error == EWOULDBLOCK))
985			error = 0;
986	}
987#ifdef KTRACE
988	if (ktruio != NULL) {
989		ktruio->uio_resid = (int)len - auio.uio_resid;
990		ktrgenio(s, UIO_READ, ktruio, error);
991	}
992#endif
993	if (error)
994		goto out;
995	td->td_retval[0] = (int)len - auio.uio_resid;
996	if (mp->msg_name) {
997		len = mp->msg_namelen;
998		if (len <= 0 || fromsa == 0)
999			len = 0;
1000		else {
1001			/* save sa_len before it is destroyed by MSG_COMPAT */
1002			len = MIN(len, fromsa->sa_len);
1003#ifdef COMPAT_OLDSOCK
1004			if (mp->msg_flags & MSG_COMPAT)
1005				((struct osockaddr *)fromsa)->sa_family =
1006				    fromsa->sa_family;
1007#endif
1008			if (fromseg == UIO_USERSPACE) {
1009				error = copyout(fromsa, mp->msg_name,
1010				    (unsigned)len);
1011				if (error)
1012					goto out;
1013			} else
1014				bcopy(fromsa, mp->msg_name, len);
1015		}
1016		mp->msg_namelen = len;
1017	}
1018	if (mp->msg_control && controlp == NULL) {
1019#ifdef COMPAT_OLDSOCK
1020		/*
1021		 * We assume that old recvmsg calls won't receive access
1022		 * rights and other control info, esp. as control info
1023		 * is always optional and those options didn't exist in 4.3.
1024		 * If we receive rights, trim the cmsghdr; anything else
1025		 * is tossed.
1026		 */
1027		if (control && mp->msg_flags & MSG_COMPAT) {
1028			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1029			    SOL_SOCKET ||
1030			    mtod(control, struct cmsghdr *)->cmsg_type !=
1031			    SCM_RIGHTS) {
1032				mp->msg_controllen = 0;
1033				goto out;
1034			}
1035			control->m_len -= sizeof (struct cmsghdr);
1036			control->m_data += sizeof (struct cmsghdr);
1037		}
1038#endif
1039		len = mp->msg_controllen;
1040		m = control;
1041		mp->msg_controllen = 0;
1042		ctlbuf = mp->msg_control;
1043
1044		while (m && len > 0) {
1045			unsigned int tocopy;
1046
1047			if (len >= m->m_len)
1048				tocopy = m->m_len;
1049			else {
1050				mp->msg_flags |= MSG_CTRUNC;
1051				tocopy = len;
1052			}
1053
1054			if ((error = copyout(mtod(m, caddr_t),
1055					ctlbuf, tocopy)) != 0)
1056				goto out;
1057
1058			ctlbuf += tocopy;
1059			len -= tocopy;
1060			m = m->m_next;
1061		}
1062		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1063	}
1064out:
1065	fdrop(fp, td);
1066#ifdef KTRACE
1067	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1068		ktrsockaddr(fromsa);
1069#endif
1070	if (fromsa)
1071		free(fromsa, M_SONAME);
1072
1073	if (error == 0 && controlp != NULL)
1074		*controlp = control;
1075	else  if (control)
1076		m_freem(control);
1077
1078	return (error);
1079}
1080
1081static int
1082recvit(td, s, mp, namelenp)
1083	struct thread *td;
1084	int s;
1085	struct msghdr *mp;
1086	void *namelenp;
1087{
1088	int error;
1089
1090	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1091	if (error)
1092		return (error);
1093	if (namelenp) {
1094		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1095#ifdef COMPAT_OLDSOCK
1096		if (mp->msg_flags & MSG_COMPAT)
1097			error = 0;	/* old recvfrom didn't check */
1098#endif
1099	}
1100	return (error);
1101}
1102
1103int
1104recvfrom(td, uap)
1105	struct thread *td;
1106	struct recvfrom_args /* {
1107		int	s;
1108		caddr_t	buf;
1109		size_t	len;
1110		int	flags;
1111		struct sockaddr * __restrict	from;
1112		socklen_t * __restrict fromlenaddr;
1113	} */ *uap;
1114{
1115	struct msghdr msg;
1116	struct iovec aiov;
1117	int error;
1118
1119	if (uap->fromlenaddr) {
1120		error = copyin(uap->fromlenaddr,
1121		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1122		if (error)
1123			goto done2;
1124	} else {
1125		msg.msg_namelen = 0;
1126	}
1127	msg.msg_name = uap->from;
1128	msg.msg_iov = &aiov;
1129	msg.msg_iovlen = 1;
1130	aiov.iov_base = uap->buf;
1131	aiov.iov_len = uap->len;
1132	msg.msg_control = 0;
1133	msg.msg_flags = uap->flags;
1134	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1135done2:
1136	return(error);
1137}
1138
1139#ifdef COMPAT_OLDSOCK
1140int
1141orecvfrom(td, uap)
1142	struct thread *td;
1143	struct recvfrom_args *uap;
1144{
1145
1146	uap->flags |= MSG_COMPAT;
1147	return (recvfrom(td, uap));
1148}
1149#endif
1150
1151#ifdef COMPAT_OLDSOCK
1152int
1153orecv(td, uap)
1154	struct thread *td;
1155	struct orecv_args /* {
1156		int	s;
1157		caddr_t	buf;
1158		int	len;
1159		int	flags;
1160	} */ *uap;
1161{
1162	struct msghdr msg;
1163	struct iovec aiov;
1164	int error;
1165
1166	msg.msg_name = 0;
1167	msg.msg_namelen = 0;
1168	msg.msg_iov = &aiov;
1169	msg.msg_iovlen = 1;
1170	aiov.iov_base = uap->buf;
1171	aiov.iov_len = uap->len;
1172	msg.msg_control = 0;
1173	msg.msg_flags = uap->flags;
1174	error = recvit(td, uap->s, &msg, NULL);
1175	return (error);
1176}
1177
1178/*
1179 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1180 * overlays the new one, missing only the flags, and with the (old) access
1181 * rights where the control fields are now.
1182 */
1183int
1184orecvmsg(td, uap)
1185	struct thread *td;
1186	struct orecvmsg_args /* {
1187		int	s;
1188		struct	omsghdr *msg;
1189		int	flags;
1190	} */ *uap;
1191{
1192	struct msghdr msg;
1193	struct iovec *iov;
1194	int error;
1195
1196	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1197	if (error)
1198		return (error);
1199	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1200	if (error)
1201		return (error);
1202	msg.msg_flags = uap->flags | MSG_COMPAT;
1203	msg.msg_iov = iov;
1204	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205	if (msg.msg_controllen && error == 0)
1206		error = copyout(&msg.msg_controllen,
1207		    &uap->msg->msg_accrightslen, sizeof (int));
1208	free(iov, M_IOV);
1209	return (error);
1210}
1211#endif
1212
1213int
1214recvmsg(td, uap)
1215	struct thread *td;
1216	struct recvmsg_args /* {
1217		int	s;
1218		struct	msghdr *msg;
1219		int	flags;
1220	} */ *uap;
1221{
1222	struct msghdr msg;
1223	struct iovec *uiov, *iov;
1224	int error;
1225
1226	error = copyin(uap->msg, &msg, sizeof (msg));
1227	if (error)
1228		return (error);
1229	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1230	if (error)
1231		return (error);
1232	msg.msg_flags = uap->flags;
1233#ifdef COMPAT_OLDSOCK
1234	msg.msg_flags &= ~MSG_COMPAT;
1235#endif
1236	uiov = msg.msg_iov;
1237	msg.msg_iov = iov;
1238	error = recvit(td, uap->s, &msg, NULL);
1239	if (error == 0) {
1240		msg.msg_iov = uiov;
1241		error = copyout(&msg, uap->msg, sizeof(msg));
1242	}
1243	free(iov, M_IOV);
1244	return (error);
1245}
1246
1247/* ARGSUSED */
1248int
1249shutdown(td, uap)
1250	struct thread *td;
1251	struct shutdown_args /* {
1252		int	s;
1253		int	how;
1254	} */ *uap;
1255{
1256	struct socket *so;
1257	struct file *fp;
1258	int error;
1259
1260	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1261	if (error == 0) {
1262		so = fp->f_data;
1263		error = soshutdown(so, uap->how);
1264		fdrop(fp, td);
1265	}
1266	return (error);
1267}
1268
1269/* ARGSUSED */
1270int
1271setsockopt(td, uap)
1272	struct thread *td;
1273	struct setsockopt_args /* {
1274		int	s;
1275		int	level;
1276		int	name;
1277		caddr_t	val;
1278		int	valsize;
1279	} */ *uap;
1280{
1281
1282	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1283	    uap->val, UIO_USERSPACE, uap->valsize));
1284}
1285
1286int
1287kern_setsockopt(td, s, level, name, val, valseg, valsize)
1288	struct thread *td;
1289	int s;
1290	int level;
1291	int name;
1292	void *val;
1293	enum uio_seg valseg;
1294	socklen_t valsize;
1295{
1296	int error;
1297	struct socket *so;
1298	struct file *fp;
1299	struct sockopt sopt;
1300
1301	if (val == NULL && valsize != 0)
1302		return (EFAULT);
1303	if ((int)valsize < 0)
1304		return (EINVAL);
1305
1306	sopt.sopt_dir = SOPT_SET;
1307	sopt.sopt_level = level;
1308	sopt.sopt_name = name;
1309	sopt.sopt_val = val;
1310	sopt.sopt_valsize = valsize;
1311	switch (valseg) {
1312	case UIO_USERSPACE:
1313		sopt.sopt_td = td;
1314		break;
1315	case UIO_SYSSPACE:
1316		sopt.sopt_td = NULL;
1317		break;
1318	default:
1319		panic("kern_setsockopt called with bad valseg");
1320	}
1321
1322	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1323	if (error == 0) {
1324		so = fp->f_data;
1325		CURVNET_SET(so->so_vnet);
1326		error = sosetopt(so, &sopt);
1327		CURVNET_RESTORE();
1328		fdrop(fp, td);
1329	}
1330	return(error);
1331}
1332
1333/* ARGSUSED */
1334int
1335getsockopt(td, uap)
1336	struct thread *td;
1337	struct getsockopt_args /* {
1338		int	s;
1339		int	level;
1340		int	name;
1341		void * __restrict	val;
1342		socklen_t * __restrict avalsize;
1343	} */ *uap;
1344{
1345	socklen_t valsize;
1346	int	error;
1347
1348	if (uap->val) {
1349		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1350		if (error)
1351			return (error);
1352	}
1353
1354	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1355	    uap->val, UIO_USERSPACE, &valsize);
1356
1357	if (error == 0)
1358		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1359	return (error);
1360}
1361
1362/*
1363 * Kernel version of getsockopt.
1364 * optval can be a userland or userspace. optlen is always a kernel pointer.
1365 */
1366int
1367kern_getsockopt(td, s, level, name, val, valseg, valsize)
1368	struct thread *td;
1369	int s;
1370	int level;
1371	int name;
1372	void *val;
1373	enum uio_seg valseg;
1374	socklen_t *valsize;
1375{
1376	int error;
1377	struct  socket *so;
1378	struct file *fp;
1379	struct	sockopt sopt;
1380
1381	if (val == NULL)
1382		*valsize = 0;
1383	if ((int)*valsize < 0)
1384		return (EINVAL);
1385
1386	sopt.sopt_dir = SOPT_GET;
1387	sopt.sopt_level = level;
1388	sopt.sopt_name = name;
1389	sopt.sopt_val = val;
1390	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1391	switch (valseg) {
1392	case UIO_USERSPACE:
1393		sopt.sopt_td = td;
1394		break;
1395	case UIO_SYSSPACE:
1396		sopt.sopt_td = NULL;
1397		break;
1398	default:
1399		panic("kern_getsockopt called with bad valseg");
1400	}
1401
1402	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1403	if (error == 0) {
1404		so = fp->f_data;
1405		CURVNET_SET(so->so_vnet);
1406		error = sogetopt(so, &sopt);
1407		CURVNET_RESTORE();
1408		*valsize = sopt.sopt_valsize;
1409		fdrop(fp, td);
1410	}
1411	return (error);
1412}
1413
1414/*
1415 * getsockname1() - Get socket name.
1416 */
1417/* ARGSUSED */
1418static int
1419getsockname1(td, uap, compat)
1420	struct thread *td;
1421	struct getsockname_args /* {
1422		int	fdes;
1423		struct sockaddr * __restrict asa;
1424		socklen_t * __restrict alen;
1425	} */ *uap;
1426	int compat;
1427{
1428	struct sockaddr *sa;
1429	socklen_t len;
1430	int error;
1431
1432	error = copyin(uap->alen, &len, sizeof(len));
1433	if (error)
1434		return (error);
1435
1436	error = kern_getsockname(td, uap->fdes, &sa, &len);
1437	if (error)
1438		return (error);
1439
1440	if (len != 0) {
1441#ifdef COMPAT_OLDSOCK
1442		if (compat)
1443			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1444#endif
1445		error = copyout(sa, uap->asa, (u_int)len);
1446	}
1447	free(sa, M_SONAME);
1448	if (error == 0)
1449		error = copyout(&len, uap->alen, sizeof(len));
1450	return (error);
1451}
1452
1453int
1454kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1455    socklen_t *alen)
1456{
1457	struct socket *so;
1458	struct file *fp;
1459	socklen_t len;
1460	int error;
1461
1462	if (*alen < 0)
1463		return (EINVAL);
1464
1465	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1466	if (error)
1467		return (error);
1468	so = fp->f_data;
1469	*sa = NULL;
1470	CURVNET_SET(so->so_vnet);
1471	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1472	CURVNET_RESTORE();
1473	if (error)
1474		goto bad;
1475	if (*sa == NULL)
1476		len = 0;
1477	else
1478		len = MIN(*alen, (*sa)->sa_len);
1479	*alen = len;
1480#ifdef KTRACE
1481	if (KTRPOINT(td, KTR_STRUCT))
1482		ktrsockaddr(*sa);
1483#endif
1484bad:
1485	fdrop(fp, td);
1486	if (error && *sa) {
1487		free(*sa, M_SONAME);
1488		*sa = NULL;
1489	}
1490	return (error);
1491}
1492
1493int
1494getsockname(td, uap)
1495	struct thread *td;
1496	struct getsockname_args *uap;
1497{
1498
1499	return (getsockname1(td, uap, 0));
1500}
1501
1502#ifdef COMPAT_OLDSOCK
1503int
1504ogetsockname(td, uap)
1505	struct thread *td;
1506	struct getsockname_args *uap;
1507{
1508
1509	return (getsockname1(td, uap, 1));
1510}
1511#endif /* COMPAT_OLDSOCK */
1512
1513/*
1514 * getpeername1() - Get name of peer for connected socket.
1515 */
1516/* ARGSUSED */
1517static int
1518getpeername1(td, uap, compat)
1519	struct thread *td;
1520	struct getpeername_args /* {
1521		int	fdes;
1522		struct sockaddr * __restrict	asa;
1523		socklen_t * __restrict	alen;
1524	} */ *uap;
1525	int compat;
1526{
1527	struct sockaddr *sa;
1528	socklen_t len;
1529	int error;
1530
1531	error = copyin(uap->alen, &len, sizeof (len));
1532	if (error)
1533		return (error);
1534
1535	error = kern_getpeername(td, uap->fdes, &sa, &len);
1536	if (error)
1537		return (error);
1538
1539	if (len != 0) {
1540#ifdef COMPAT_OLDSOCK
1541		if (compat)
1542			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1543#endif
1544		error = copyout(sa, uap->asa, (u_int)len);
1545	}
1546	free(sa, M_SONAME);
1547	if (error == 0)
1548		error = copyout(&len, uap->alen, sizeof(len));
1549	return (error);
1550}
1551
1552int
1553kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1554    socklen_t *alen)
1555{
1556	struct socket *so;
1557	struct file *fp;
1558	socklen_t len;
1559	int error;
1560
1561	if (*alen < 0)
1562		return (EINVAL);
1563
1564	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1565	if (error)
1566		return (error);
1567	so = fp->f_data;
1568	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1569		error = ENOTCONN;
1570		goto done;
1571	}
1572	*sa = NULL;
1573	CURVNET_SET(so->so_vnet);
1574	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1575	CURVNET_RESTORE();
1576	if (error)
1577		goto bad;
1578	if (*sa == NULL)
1579		len = 0;
1580	else
1581		len = MIN(*alen, (*sa)->sa_len);
1582	*alen = len;
1583#ifdef KTRACE
1584	if (KTRPOINT(td, KTR_STRUCT))
1585		ktrsockaddr(*sa);
1586#endif
1587bad:
1588	if (error && *sa) {
1589		free(*sa, M_SONAME);
1590		*sa = NULL;
1591	}
1592done:
1593	fdrop(fp, td);
1594	return (error);
1595}
1596
1597int
1598getpeername(td, uap)
1599	struct thread *td;
1600	struct getpeername_args *uap;
1601{
1602
1603	return (getpeername1(td, uap, 0));
1604}
1605
1606#ifdef COMPAT_OLDSOCK
1607int
1608ogetpeername(td, uap)
1609	struct thread *td;
1610	struct ogetpeername_args *uap;
1611{
1612
1613	/* XXX uap should have type `getpeername_args *' to begin with. */
1614	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1615}
1616#endif /* COMPAT_OLDSOCK */
1617
1618int
1619sockargs(mp, buf, buflen, type)
1620	struct mbuf **mp;
1621	caddr_t buf;
1622	int buflen, type;
1623{
1624	struct sockaddr *sa;
1625	struct mbuf *m;
1626	int error;
1627
1628	if ((u_int)buflen > MLEN) {
1629#ifdef COMPAT_OLDSOCK
1630		if (type == MT_SONAME && (u_int)buflen <= 112)
1631			buflen = MLEN;		/* unix domain compat. hack */
1632		else
1633#endif
1634			if ((u_int)buflen > MCLBYTES)
1635				return (EINVAL);
1636	}
1637	m = m_get(M_WAIT, type);
1638	if ((u_int)buflen > MLEN)
1639		MCLGET(m, M_WAIT);
1640	m->m_len = buflen;
1641	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1642	if (error)
1643		(void) m_free(m);
1644	else {
1645		*mp = m;
1646		if (type == MT_SONAME) {
1647			sa = mtod(m, struct sockaddr *);
1648
1649#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1650			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1651				sa->sa_family = sa->sa_len;
1652#endif
1653			sa->sa_len = buflen;
1654		}
1655	}
1656	return (error);
1657}
1658
1659int
1660getsockaddr(namp, uaddr, len)
1661	struct sockaddr **namp;
1662	caddr_t uaddr;
1663	size_t len;
1664{
1665	struct sockaddr *sa;
1666	int error;
1667
1668	if (len > SOCK_MAXADDRLEN)
1669		return (ENAMETOOLONG);
1670	if (len < offsetof(struct sockaddr, sa_data[0]))
1671		return (EINVAL);
1672	sa = malloc(len, M_SONAME, M_WAITOK);
1673	error = copyin(uaddr, sa, len);
1674	if (error) {
1675		free(sa, M_SONAME);
1676	} else {
1677#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1678		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1679			sa->sa_family = sa->sa_len;
1680#endif
1681		sa->sa_len = len;
1682		*namp = sa;
1683	}
1684	return (error);
1685}
1686
1687#include <sys/condvar.h>
1688
1689struct sendfile_sync {
1690	struct mtx	mtx;
1691	struct cv	cv;
1692	unsigned 	count;
1693};
1694
1695/*
1696 * Detach mapped page and release resources back to the system.
1697 */
1698void
1699sf_buf_mext(void *addr, void *args)
1700{
1701	vm_page_t m;
1702	struct sendfile_sync *sfs;
1703
1704	m = sf_buf_page(args);
1705	sf_buf_free(args);
1706	vm_page_lock_queues();
1707	vm_page_unwire(m, 0);
1708	/*
1709	 * Check for the object going away on us. This can
1710	 * happen since we don't hold a reference to it.
1711	 * If so, we're responsible for freeing the page.
1712	 */
1713	if (m->wire_count == 0 && m->object == NULL)
1714		vm_page_free(m);
1715	vm_page_unlock_queues();
1716	if (addr == NULL)
1717		return;
1718	sfs = addr;
1719	mtx_lock(&sfs->mtx);
1720	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1721	if (--sfs->count == 0)
1722		cv_signal(&sfs->cv);
1723	mtx_unlock(&sfs->mtx);
1724}
1725
1726/*
1727 * sendfile(2)
1728 *
1729 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1730 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1731 *
1732 * Send a file specified by 'fd' and starting at 'offset' to a socket
1733 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1734 * 0.  Optionally add a header and/or trailer to the socket output.  If
1735 * specified, write the total number of bytes sent into *sbytes.
1736 */
1737int
1738sendfile(struct thread *td, struct sendfile_args *uap)
1739{
1740
1741	return (do_sendfile(td, uap, 0));
1742}
1743
1744static int
1745do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1746{
1747	struct sf_hdtr hdtr;
1748	struct uio *hdr_uio, *trl_uio;
1749	int error;
1750
1751	hdr_uio = trl_uio = NULL;
1752
1753	if (uap->hdtr != NULL) {
1754		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1755		if (error)
1756			goto out;
1757		if (hdtr.headers != NULL) {
1758			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1759			if (error)
1760				goto out;
1761		}
1762		if (hdtr.trailers != NULL) {
1763			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1764			if (error)
1765				goto out;
1766
1767		}
1768	}
1769
1770	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1771out:
1772	if (hdr_uio)
1773		free(hdr_uio, M_IOV);
1774	if (trl_uio)
1775		free(trl_uio, M_IOV);
1776	return (error);
1777}
1778
1779#ifdef COMPAT_FREEBSD4
1780int
1781freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1782{
1783	struct sendfile_args args;
1784
1785	args.fd = uap->fd;
1786	args.s = uap->s;
1787	args.offset = uap->offset;
1788	args.nbytes = uap->nbytes;
1789	args.hdtr = uap->hdtr;
1790	args.sbytes = uap->sbytes;
1791	args.flags = uap->flags;
1792
1793	return (do_sendfile(td, &args, 1));
1794}
1795#endif /* COMPAT_FREEBSD4 */
1796
1797int
1798kern_sendfile(struct thread *td, struct sendfile_args *uap,
1799    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1800{
1801	struct file *sock_fp;
1802	struct vnode *vp;
1803	struct vm_object *obj = NULL;
1804	struct socket *so = NULL;
1805	struct mbuf *m = NULL;
1806	struct sf_buf *sf;
1807	struct vm_page *pg;
1808	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1809	int error, hdrlen = 0, mnw = 0;
1810	int vfslocked;
1811	struct sendfile_sync *sfs = NULL;
1812
1813	/*
1814	 * The file descriptor must be a regular file and have a
1815	 * backing VM object.
1816	 * File offset must be positive.  If it goes beyond EOF
1817	 * we send only the header/trailer and no payload data.
1818	 */
1819	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1820		goto out;
1821	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1822	vn_lock(vp, LK_SHARED | LK_RETRY);
1823	if (vp->v_type == VREG) {
1824		obj = vp->v_object;
1825		if (obj != NULL) {
1826			/*
1827			 * Temporarily increase the backing VM
1828			 * object's reference count so that a forced
1829			 * reclamation of its vnode does not
1830			 * immediately destroy it.
1831			 */
1832			VM_OBJECT_LOCK(obj);
1833			if ((obj->flags & OBJ_DEAD) == 0) {
1834				vm_object_reference_locked(obj);
1835				VM_OBJECT_UNLOCK(obj);
1836			} else {
1837				VM_OBJECT_UNLOCK(obj);
1838				obj = NULL;
1839			}
1840		}
1841	}
1842	VOP_UNLOCK(vp, 0);
1843	VFS_UNLOCK_GIANT(vfslocked);
1844	if (obj == NULL) {
1845		error = EINVAL;
1846		goto out;
1847	}
1848	if (uap->offset < 0) {
1849		error = EINVAL;
1850		goto out;
1851	}
1852
1853	/*
1854	 * The socket must be a stream socket and connected.
1855	 * Remember if it a blocking or non-blocking socket.
1856	 */
1857	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1858	    NULL)) != 0)
1859		goto out;
1860	so = sock_fp->f_data;
1861	if (so->so_type != SOCK_STREAM) {
1862		error = EINVAL;
1863		goto out;
1864	}
1865	if ((so->so_state & SS_ISCONNECTED) == 0) {
1866		error = ENOTCONN;
1867		goto out;
1868	}
1869	/*
1870	 * Do not wait on memory allocations but return ENOMEM for
1871	 * caller to retry later.
1872	 * XXX: Experimental.
1873	 */
1874	if (uap->flags & SF_MNOWAIT)
1875		mnw = 1;
1876
1877	if (uap->flags & SF_SYNC) {
1878		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1879		memset(sfs, 0, sizeof *sfs);
1880		mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1881		cv_init(&sfs->cv, "sendfile");
1882	}
1883
1884#ifdef MAC
1885	SOCK_LOCK(so);
1886	error = mac_socket_check_send(td->td_ucred, so);
1887	SOCK_UNLOCK(so);
1888	if (error)
1889		goto out;
1890#endif
1891
1892	/* If headers are specified copy them into mbufs. */
1893	if (hdr_uio != NULL) {
1894		hdr_uio->uio_td = td;
1895		hdr_uio->uio_rw = UIO_WRITE;
1896		if (hdr_uio->uio_resid > 0) {
1897			/*
1898			 * In FBSD < 5.0 the nbytes to send also included
1899			 * the header.  If compat is specified subtract the
1900			 * header size from nbytes.
1901			 */
1902			if (compat) {
1903				if (uap->nbytes > hdr_uio->uio_resid)
1904					uap->nbytes -= hdr_uio->uio_resid;
1905				else
1906					uap->nbytes = 0;
1907			}
1908			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1909			    0, 0, 0);
1910			if (m == NULL) {
1911				error = mnw ? EAGAIN : ENOBUFS;
1912				goto out;
1913			}
1914			hdrlen = m_length(m, NULL);
1915		}
1916	}
1917
1918	/*
1919	 * Protect against multiple writers to the socket.
1920	 *
1921	 * XXXRW: Historically this has assumed non-interruptibility, so now
1922	 * we implement that, but possibly shouldn't.
1923	 */
1924	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1925
1926	/*
1927	 * Loop through the pages of the file, starting with the requested
1928	 * offset. Get a file page (do I/O if necessary), map the file page
1929	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1930	 * it on the socket.
1931	 * This is done in two loops.  The inner loop turns as many pages
1932	 * as it can, up to available socket buffer space, without blocking
1933	 * into mbufs to have it bulk delivered into the socket send buffer.
1934	 * The outer loop checks the state and available space of the socket
1935	 * and takes care of the overall progress.
1936	 */
1937	for (off = uap->offset, rem = uap->nbytes; ; ) {
1938		int loopbytes = 0;
1939		int space = 0;
1940		int done = 0;
1941
1942		/*
1943		 * Check the socket state for ongoing connection,
1944		 * no errors and space in socket buffer.
1945		 * If space is low allow for the remainder of the
1946		 * file to be processed if it fits the socket buffer.
1947		 * Otherwise block in waiting for sufficient space
1948		 * to proceed, or if the socket is nonblocking, return
1949		 * to userland with EAGAIN while reporting how far
1950		 * we've come.
1951		 * We wait until the socket buffer has significant free
1952		 * space to do bulk sends.  This makes good use of file
1953		 * system read ahead and allows packet segmentation
1954		 * offloading hardware to take over lots of work.  If
1955		 * we were not careful here we would send off only one
1956		 * sfbuf at a time.
1957		 */
1958		SOCKBUF_LOCK(&so->so_snd);
1959		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1960			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1961retry_space:
1962		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1963			error = EPIPE;
1964			SOCKBUF_UNLOCK(&so->so_snd);
1965			goto done;
1966		} else if (so->so_error) {
1967			error = so->so_error;
1968			so->so_error = 0;
1969			SOCKBUF_UNLOCK(&so->so_snd);
1970			goto done;
1971		}
1972		space = sbspace(&so->so_snd);
1973		if (space < rem &&
1974		    (space <= 0 ||
1975		     space < so->so_snd.sb_lowat)) {
1976			if (so->so_state & SS_NBIO) {
1977				SOCKBUF_UNLOCK(&so->so_snd);
1978				error = EAGAIN;
1979				goto done;
1980			}
1981			/*
1982			 * sbwait drops the lock while sleeping.
1983			 * When we loop back to retry_space the
1984			 * state may have changed and we retest
1985			 * for it.
1986			 */
1987			error = sbwait(&so->so_snd);
1988			/*
1989			 * An error from sbwait usually indicates that we've
1990			 * been interrupted by a signal. If we've sent anything
1991			 * then return bytes sent, otherwise return the error.
1992			 */
1993			if (error) {
1994				SOCKBUF_UNLOCK(&so->so_snd);
1995				goto done;
1996			}
1997			goto retry_space;
1998		}
1999		SOCKBUF_UNLOCK(&so->so_snd);
2000
2001		/*
2002		 * Reduce space in the socket buffer by the size of
2003		 * the header mbuf chain.
2004		 * hdrlen is set to 0 after the first loop.
2005		 */
2006		space -= hdrlen;
2007
2008		/*
2009		 * Loop and construct maximum sized mbuf chain to be bulk
2010		 * dumped into socket buffer.
2011		 */
2012		while(space > loopbytes) {
2013			vm_pindex_t pindex;
2014			vm_offset_t pgoff;
2015			struct mbuf *m0;
2016
2017			VM_OBJECT_LOCK(obj);
2018			/*
2019			 * Calculate the amount to transfer.
2020			 * Not to exceed a page, the EOF,
2021			 * or the passed in nbytes.
2022			 */
2023			pgoff = (vm_offset_t)(off & PAGE_MASK);
2024			xfsize = omin(PAGE_SIZE - pgoff,
2025			    obj->un_pager.vnp.vnp_size - uap->offset -
2026			    fsbytes - loopbytes);
2027			if (uap->nbytes)
2028				rem = (uap->nbytes - fsbytes - loopbytes);
2029			else
2030				rem = obj->un_pager.vnp.vnp_size -
2031				    uap->offset - fsbytes - loopbytes;
2032			xfsize = omin(rem, xfsize);
2033			if (xfsize <= 0) {
2034				VM_OBJECT_UNLOCK(obj);
2035				done = 1;		/* all data sent */
2036				break;
2037			}
2038			/*
2039			 * Don't overflow the send buffer.
2040			 * Stop here and send out what we've
2041			 * already got.
2042			 */
2043			if (space < loopbytes + xfsize) {
2044				VM_OBJECT_UNLOCK(obj);
2045				break;
2046			}
2047
2048			/*
2049			 * Attempt to look up the page.  Allocate
2050			 * if not found or wait and loop if busy.
2051			 */
2052			pindex = OFF_TO_IDX(off);
2053			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2054			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2055
2056			/*
2057			 * Check if page is valid for what we need,
2058			 * otherwise initiate I/O.
2059			 * If we already turned some pages into mbufs,
2060			 * send them off before we come here again and
2061			 * block.
2062			 */
2063			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2064				VM_OBJECT_UNLOCK(obj);
2065			else if (m != NULL)
2066				error = EAGAIN;	/* send what we already got */
2067			else if (uap->flags & SF_NODISKIO)
2068				error = EBUSY;
2069			else {
2070				int bsize, resid;
2071
2072				/*
2073				 * Ensure that our page is still around
2074				 * when the I/O completes.
2075				 */
2076				vm_page_io_start(pg);
2077				VM_OBJECT_UNLOCK(obj);
2078
2079				/*
2080				 * Get the page from backing store.
2081				 */
2082				bsize = vp->v_mount->mnt_stat.f_iosize;
2083				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2084				vn_lock(vp, LK_SHARED | LK_RETRY);
2085
2086				/*
2087				 * XXXMAC: Because we don't have fp->f_cred
2088				 * here, we pass in NOCRED.  This is probably
2089				 * wrong, but is consistent with our original
2090				 * implementation.
2091				 */
2092				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2093				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2094				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2095				    td->td_ucred, NOCRED, &resid, td);
2096				VOP_UNLOCK(vp, 0);
2097				VFS_UNLOCK_GIANT(vfslocked);
2098				VM_OBJECT_LOCK(obj);
2099				vm_page_io_finish(pg);
2100				if (!error)
2101					VM_OBJECT_UNLOCK(obj);
2102				mbstat.sf_iocnt++;
2103			}
2104			if (error) {
2105				vm_page_lock_queues();
2106				vm_page_unwire(pg, 0);
2107				/*
2108				 * See if anyone else might know about
2109				 * this page.  If not and it is not valid,
2110				 * then free it.
2111				 */
2112				if (pg->wire_count == 0 && pg->valid == 0 &&
2113				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2114				    pg->hold_count == 0) {
2115					vm_page_free(pg);
2116				}
2117				vm_page_unlock_queues();
2118				VM_OBJECT_UNLOCK(obj);
2119				if (error == EAGAIN)
2120					error = 0;	/* not a real error */
2121				break;
2122			}
2123
2124			/*
2125			 * Get a sendfile buf.  We usually wait as long
2126			 * as necessary, but this wait can be interrupted.
2127			 */
2128			if ((sf = sf_buf_alloc(pg,
2129			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2130				mbstat.sf_allocfail++;
2131				vm_page_lock_queues();
2132				vm_page_unwire(pg, 0);
2133				/*
2134				 * XXX: Not same check as above!?
2135				 */
2136				if (pg->wire_count == 0 && pg->object == NULL)
2137					vm_page_free(pg);
2138				vm_page_unlock_queues();
2139				error = (mnw ? EAGAIN : EINTR);
2140				break;
2141			}
2142
2143			/*
2144			 * Get an mbuf and set it up as having
2145			 * external storage.
2146			 */
2147			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2148			if (m0 == NULL) {
2149				error = (mnw ? EAGAIN : ENOBUFS);
2150				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2151				break;
2152			}
2153			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2154			    sfs, sf, M_RDONLY, EXT_SFBUF);
2155			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2156			m0->m_len = xfsize;
2157
2158			/* Append to mbuf chain. */
2159			if (m != NULL)
2160				m_cat(m, m0);
2161			else
2162				m = m0;
2163
2164			/* Keep track of bits processed. */
2165			loopbytes += xfsize;
2166			off += xfsize;
2167
2168			if (sfs != NULL) {
2169				mtx_lock(&sfs->mtx);
2170				sfs->count++;
2171				mtx_unlock(&sfs->mtx);
2172			}
2173		}
2174
2175		/* Add the buffer chain to the socket buffer. */
2176		if (m != NULL) {
2177			int mlen, err;
2178
2179			mlen = m_length(m, NULL);
2180			SOCKBUF_LOCK(&so->so_snd);
2181			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2182				error = EPIPE;
2183				SOCKBUF_UNLOCK(&so->so_snd);
2184				goto done;
2185			}
2186			SOCKBUF_UNLOCK(&so->so_snd);
2187			CURVNET_SET(so->so_vnet);
2188			/* Avoid error aliasing. */
2189			err = (*so->so_proto->pr_usrreqs->pru_send)
2190				    (so, 0, m, NULL, NULL, td);
2191			CURVNET_RESTORE();
2192			if (err == 0) {
2193				/*
2194				 * We need two counters to get the
2195				 * file offset and nbytes to send
2196				 * right:
2197				 * - sbytes contains the total amount
2198				 *   of bytes sent, including headers.
2199				 * - fsbytes contains the total amount
2200				 *   of bytes sent from the file.
2201				 */
2202				sbytes += mlen;
2203				fsbytes += mlen;
2204				if (hdrlen) {
2205					fsbytes -= hdrlen;
2206					hdrlen = 0;
2207				}
2208			} else if (error == 0)
2209				error = err;
2210			m = NULL;	/* pru_send always consumes */
2211		}
2212
2213		/* Quit outer loop on error or when we're done. */
2214		if (done)
2215			break;
2216		if (error)
2217			goto done;
2218	}
2219
2220	/*
2221	 * Send trailers. Wimp out and use writev(2).
2222	 */
2223	if (trl_uio != NULL) {
2224		sbunlock(&so->so_snd);
2225		error = kern_writev(td, uap->s, trl_uio);
2226		if (error == 0)
2227			sbytes += td->td_retval[0];
2228		goto out;
2229	}
2230
2231done:
2232	sbunlock(&so->so_snd);
2233out:
2234	/*
2235	 * If there was no error we have to clear td->td_retval[0]
2236	 * because it may have been set by writev.
2237	 */
2238	if (error == 0) {
2239		td->td_retval[0] = 0;
2240	}
2241	if (uap->sbytes != NULL) {
2242		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2243	}
2244	if (obj != NULL)
2245		vm_object_deallocate(obj);
2246	if (vp != NULL) {
2247		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2248		vrele(vp);
2249		VFS_UNLOCK_GIANT(vfslocked);
2250	}
2251	if (so)
2252		fdrop(sock_fp, td);
2253	if (m)
2254		m_freem(m);
2255
2256	if (sfs != NULL) {
2257		mtx_lock(&sfs->mtx);
2258		if (sfs->count != 0)
2259			cv_wait(&sfs->cv, &sfs->mtx);
2260		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2261		cv_destroy(&sfs->cv);
2262		mtx_destroy(&sfs->mtx);
2263		free(sfs, M_TEMP);
2264	}
2265
2266	if (error == ERESTART)
2267		error = EINTR;
2268
2269	return (error);
2270}
2271
2272/*
2273 * SCTP syscalls.
2274 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2275 * otherwise all return EOPNOTSUPP.
2276 * XXX: We should make this loadable one day.
2277 */
2278int
2279sctp_peeloff(td, uap)
2280	struct thread *td;
2281	struct sctp_peeloff_args /* {
2282		int	sd;
2283		caddr_t	name;
2284	} */ *uap;
2285{
2286#ifdef SCTP
2287	struct filedesc *fdp;
2288	struct file *nfp = NULL;
2289	int error;
2290	struct socket *head, *so;
2291	int fd;
2292	u_int fflag;
2293
2294	fdp = td->td_proc->p_fd;
2295	error = fgetsock(td, uap->sd, &head, &fflag);
2296	if (error)
2297		goto done2;
2298	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2299	if (error)
2300		goto done2;
2301	/*
2302	 * At this point we know we do have a assoc to pull
2303	 * we proceed to get the fd setup. This may block
2304	 * but that is ok.
2305	 */
2306
2307	error = falloc(td, &nfp, &fd);
2308	if (error)
2309		goto done;
2310	td->td_retval[0] = fd;
2311
2312	so = sonewconn(head, SS_ISCONNECTED);
2313	if (so == NULL)
2314		goto noconnection;
2315	/*
2316	 * Before changing the flags on the socket, we have to bump the
2317	 * reference count.  Otherwise, if the protocol calls sofree(),
2318	 * the socket will be released due to a zero refcount.
2319	 */
2320        SOCK_LOCK(so);
2321        soref(so);                      /* file descriptor reference */
2322        SOCK_UNLOCK(so);
2323
2324	ACCEPT_LOCK();
2325
2326	TAILQ_REMOVE(&head->so_comp, so, so_list);
2327	head->so_qlen--;
2328	so->so_state |= (head->so_state & SS_NBIO);
2329	so->so_state &= ~SS_NOFDREF;
2330	so->so_qstate &= ~SQ_COMP;
2331	so->so_head = NULL;
2332	ACCEPT_UNLOCK();
2333	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2334	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2335	if (error)
2336		goto noconnection;
2337	if (head->so_sigio != NULL)
2338		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2339
2340noconnection:
2341	/*
2342	 * close the new descriptor, assuming someone hasn't ripped it
2343	 * out from under us.
2344	 */
2345	if (error)
2346		fdclose(fdp, nfp, fd, td);
2347
2348	/*
2349	 * Release explicitly held references before returning.
2350	 */
2351done:
2352	if (nfp != NULL)
2353		fdrop(nfp, td);
2354	fputsock(head);
2355done2:
2356	return (error);
2357#else  /* SCTP */
2358	return (EOPNOTSUPP);
2359#endif /* SCTP */
2360}
2361
2362int
2363sctp_generic_sendmsg (td, uap)
2364	struct thread *td;
2365	struct sctp_generic_sendmsg_args /* {
2366		int sd,
2367		caddr_t msg,
2368		int mlen,
2369		caddr_t to,
2370		__socklen_t tolen,
2371		struct sctp_sndrcvinfo *sinfo,
2372		int flags
2373	} */ *uap;
2374{
2375#ifdef SCTP
2376	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2377	struct socket *so;
2378	struct file *fp = NULL;
2379	int use_rcvinfo = 1;
2380	int error = 0, len;
2381	struct sockaddr *to = NULL;
2382#ifdef KTRACE
2383	struct uio *ktruio = NULL;
2384#endif
2385	struct uio auio;
2386	struct iovec iov[1];
2387
2388	if (uap->sinfo) {
2389		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2390		if (error)
2391			return (error);
2392		u_sinfo = &sinfo;
2393	}
2394	if (uap->tolen) {
2395		error = getsockaddr(&to, uap->to, uap->tolen);
2396		if (error) {
2397			to = NULL;
2398			goto sctp_bad2;
2399		}
2400	}
2401
2402	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2403	if (error)
2404		goto sctp_bad;
2405#ifdef KTRACE
2406	if (KTRPOINT(td, KTR_STRUCT))
2407		ktrsockaddr(to);
2408#endif
2409
2410	iov[0].iov_base = uap->msg;
2411	iov[0].iov_len = uap->mlen;
2412
2413	so = (struct socket *)fp->f_data;
2414#ifdef MAC
2415	SOCK_LOCK(so);
2416	error = mac_socket_check_send(td->td_ucred, so);
2417	SOCK_UNLOCK(so);
2418	if (error)
2419		goto sctp_bad;
2420#endif /* MAC */
2421
2422	auio.uio_iov =  iov;
2423	auio.uio_iovcnt = 1;
2424	auio.uio_segflg = UIO_USERSPACE;
2425	auio.uio_rw = UIO_WRITE;
2426	auio.uio_td = td;
2427	auio.uio_offset = 0;			/* XXX */
2428	auio.uio_resid = 0;
2429	len = auio.uio_resid = uap->mlen;
2430	error = sctp_lower_sosend(so, to, &auio,
2431		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2432		    uap->flags, use_rcvinfo, u_sinfo, td);
2433	if (error) {
2434		if (auio.uio_resid != len && (error == ERESTART ||
2435		    error == EINTR || error == EWOULDBLOCK))
2436			error = 0;
2437		/* Generation of SIGPIPE can be controlled per socket. */
2438		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2439		    !(uap->flags & MSG_NOSIGNAL)) {
2440			PROC_LOCK(td->td_proc);
2441			psignal(td->td_proc, SIGPIPE);
2442			PROC_UNLOCK(td->td_proc);
2443		}
2444	}
2445	if (error == 0)
2446		td->td_retval[0] = len - auio.uio_resid;
2447#ifdef KTRACE
2448	if (ktruio != NULL) {
2449		ktruio->uio_resid = td->td_retval[0];
2450		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2451	}
2452#endif /* KTRACE */
2453sctp_bad:
2454	if (fp)
2455		fdrop(fp, td);
2456sctp_bad2:
2457	if (to)
2458		free(to, M_SONAME);
2459	return (error);
2460#else  /* SCTP */
2461	return (EOPNOTSUPP);
2462#endif /* SCTP */
2463}
2464
2465int
2466sctp_generic_sendmsg_iov(td, uap)
2467	struct thread *td;
2468	struct sctp_generic_sendmsg_iov_args /* {
2469		int sd,
2470		struct iovec *iov,
2471		int iovlen,
2472		caddr_t to,
2473		__socklen_t tolen,
2474		struct sctp_sndrcvinfo *sinfo,
2475		int flags
2476	} */ *uap;
2477{
2478#ifdef SCTP
2479	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2480	struct socket *so;
2481	struct file *fp = NULL;
2482	int use_rcvinfo = 1;
2483	int error=0, len, i;
2484	struct sockaddr *to = NULL;
2485#ifdef KTRACE
2486	struct uio *ktruio = NULL;
2487#endif
2488	struct uio auio;
2489	struct iovec *iov, *tiov;
2490
2491	if (uap->sinfo) {
2492		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2493		if (error)
2494			return (error);
2495		u_sinfo = &sinfo;
2496	}
2497	if (uap->tolen) {
2498		error = getsockaddr(&to, uap->to, uap->tolen);
2499		if (error) {
2500			to = NULL;
2501			goto sctp_bad2;
2502		}
2503	}
2504
2505	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2506	if (error)
2507		goto sctp_bad1;
2508
2509	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2510	if (error)
2511		goto sctp_bad1;
2512#ifdef KTRACE
2513	if (KTRPOINT(td, KTR_STRUCT))
2514		ktrsockaddr(to);
2515#endif
2516
2517	so = (struct socket *)fp->f_data;
2518#ifdef MAC
2519	SOCK_LOCK(so);
2520	error = mac_socket_check_send(td->td_ucred, so);
2521	SOCK_UNLOCK(so);
2522	if (error)
2523		goto sctp_bad;
2524#endif /* MAC */
2525
2526	auio.uio_iov =  iov;
2527	auio.uio_iovcnt = uap->iovlen;
2528	auio.uio_segflg = UIO_USERSPACE;
2529	auio.uio_rw = UIO_WRITE;
2530	auio.uio_td = td;
2531	auio.uio_offset = 0;			/* XXX */
2532	auio.uio_resid = 0;
2533	tiov = iov;
2534	for (i = 0; i <uap->iovlen; i++, tiov++) {
2535		if ((auio.uio_resid += tiov->iov_len) < 0) {
2536			error = EINVAL;
2537			goto sctp_bad;
2538		}
2539	}
2540	len = auio.uio_resid;
2541	error = sctp_lower_sosend(so, to, &auio,
2542		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2543		    uap->flags, use_rcvinfo, u_sinfo, td);
2544	if (error) {
2545		if (auio.uio_resid != len && (error == ERESTART ||
2546		    error == EINTR || error == EWOULDBLOCK))
2547			error = 0;
2548		/* Generation of SIGPIPE can be controlled per socket */
2549		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2550		    !(uap->flags & MSG_NOSIGNAL)) {
2551			PROC_LOCK(td->td_proc);
2552			psignal(td->td_proc, SIGPIPE);
2553			PROC_UNLOCK(td->td_proc);
2554		}
2555	}
2556	if (error == 0)
2557		td->td_retval[0] = len - auio.uio_resid;
2558#ifdef KTRACE
2559	if (ktruio != NULL) {
2560		ktruio->uio_resid = td->td_retval[0];
2561		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2562	}
2563#endif /* KTRACE */
2564sctp_bad:
2565	free(iov, M_IOV);
2566sctp_bad1:
2567	if (fp)
2568		fdrop(fp, td);
2569sctp_bad2:
2570	if (to)
2571		free(to, M_SONAME);
2572	return (error);
2573#else  /* SCTP */
2574	return (EOPNOTSUPP);
2575#endif /* SCTP */
2576}
2577
2578int
2579sctp_generic_recvmsg(td, uap)
2580	struct thread *td;
2581	struct sctp_generic_recvmsg_args /* {
2582		int sd,
2583		struct iovec *iov,
2584		int iovlen,
2585		struct sockaddr *from,
2586		__socklen_t *fromlenaddr,
2587		struct sctp_sndrcvinfo *sinfo,
2588		int *msg_flags
2589	} */ *uap;
2590{
2591#ifdef SCTP
2592	u_int8_t sockbufstore[256];
2593	struct uio auio;
2594	struct iovec *iov, *tiov;
2595	struct sctp_sndrcvinfo sinfo;
2596	struct socket *so;
2597	struct file *fp = NULL;
2598	struct sockaddr *fromsa;
2599	int fromlen;
2600	int len, i, msg_flags;
2601	int error = 0;
2602#ifdef KTRACE
2603	struct uio *ktruio = NULL;
2604#endif
2605	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2606	if (error) {
2607		return (error);
2608	}
2609	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2610	if (error) {
2611		goto out1;
2612	}
2613
2614	so = fp->f_data;
2615#ifdef MAC
2616	SOCK_LOCK(so);
2617	error = mac_socket_check_receive(td->td_ucred, so);
2618	SOCK_UNLOCK(so);
2619	if (error) {
2620		goto out;
2621		return (error);
2622	}
2623#endif /* MAC */
2624
2625	if (uap->fromlenaddr) {
2626		error = copyin(uap->fromlenaddr,
2627		    &fromlen, sizeof (fromlen));
2628		if (error) {
2629			goto out;
2630		}
2631	} else {
2632		fromlen = 0;
2633	}
2634	if(uap->msg_flags) {
2635		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2636		if (error) {
2637			goto out;
2638		}
2639	} else {
2640		msg_flags = 0;
2641	}
2642	auio.uio_iov = iov;
2643	auio.uio_iovcnt = uap->iovlen;
2644  	auio.uio_segflg = UIO_USERSPACE;
2645	auio.uio_rw = UIO_READ;
2646	auio.uio_td = td;
2647	auio.uio_offset = 0;			/* XXX */
2648	auio.uio_resid = 0;
2649	tiov = iov;
2650	for (i = 0; i <uap->iovlen; i++, tiov++) {
2651		if ((auio.uio_resid += tiov->iov_len) < 0) {
2652			error = EINVAL;
2653			goto out;
2654		}
2655	}
2656	len = auio.uio_resid;
2657	fromsa = (struct sockaddr *)sockbufstore;
2658
2659#ifdef KTRACE
2660	if (KTRPOINT(td, KTR_GENIO))
2661		ktruio = cloneuio(&auio);
2662#endif /* KTRACE */
2663	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2664		    fromsa, fromlen, &msg_flags,
2665		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2666	if (error) {
2667		if (auio.uio_resid != (int)len && (error == ERESTART ||
2668		    error == EINTR || error == EWOULDBLOCK))
2669			error = 0;
2670	} else {
2671		if (uap->sinfo)
2672			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2673	}
2674#ifdef KTRACE
2675	if (ktruio != NULL) {
2676		ktruio->uio_resid = (int)len - auio.uio_resid;
2677		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2678	}
2679#endif /* KTRACE */
2680	if (error)
2681		goto out;
2682	td->td_retval[0] = (int)len - auio.uio_resid;
2683
2684	if (fromlen && uap->from) {
2685		len = fromlen;
2686		if (len <= 0 || fromsa == 0)
2687			len = 0;
2688		else {
2689			len = MIN(len, fromsa->sa_len);
2690			error = copyout(fromsa, uap->from, (unsigned)len);
2691			if (error)
2692				goto out;
2693		}
2694		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2695		if (error) {
2696			goto out;
2697		}
2698	}
2699#ifdef KTRACE
2700	if (KTRPOINT(td, KTR_STRUCT))
2701		ktrsockaddr(fromsa);
2702#endif
2703	if (uap->msg_flags) {
2704		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2705		if (error) {
2706			goto out;
2707		}
2708	}
2709out:
2710	free(iov, M_IOV);
2711out1:
2712	if (fp)
2713		fdrop(fp, td);
2714
2715	return (error);
2716#else  /* SCTP */
2717	return (EOPNOTSUPP);
2718#endif /* SCTP */
2719}
2720