kern_sendfile.c revision 190958
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 190958 2009-04-12 05:19:35Z kmacy $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91			int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93			int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103    "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105    "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107    "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry.  A reference on the
111 * file entry is held upon returning.  This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count.  If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120	struct file *fp;
121	int error;
122
123	fp = NULL;
124	if (fdp == NULL)
125		error = EBADF;
126	else {
127		FILEDESC_SLOCK(fdp);
128		fp = fget_locked(fdp, fd);
129		if (fp == NULL)
130			error = EBADF;
131		else if (fp->f_type != DTYPE_SOCKET) {
132			fp = NULL;
133			error = ENOTSOCK;
134		} else {
135			fhold(fp);
136			if (fflagp != NULL)
137				*fflagp = fp->f_flag;
138			error = 0;
139		}
140		FILEDESC_SUNLOCK(fdp);
141	}
142	*fpp = fp;
143	return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155	struct thread *td;
156	struct socket_args /* {
157		int	domain;
158		int	type;
159		int	protocol;
160	} */ *uap;
161{
162	struct filedesc *fdp;
163	struct socket *so;
164	struct file *fp;
165	int fd, error;
166
167#ifdef MAC
168	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
169	    uap->protocol);
170	if (error)
171		return (error);
172#endif
173	fdp = td->td_proc->p_fd;
174	error = falloc(td, &fp, &fd);
175	if (error)
176		return (error);
177	/* An extra reference on `fp' has been held for us by falloc(). */
178	error = socreate(uap->domain, &so, uap->type, uap->protocol,
179	    td->td_ucred, td);
180	if (error) {
181		fdclose(fdp, fp, fd, td);
182	} else {
183		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
184		td->td_retval[0] = fd;
185	}
186	fdrop(fp, td);
187	return (error);
188}
189
190/* ARGSUSED */
191int
192bind(td, uap)
193	struct thread *td;
194	struct bind_args /* {
195		int	s;
196		caddr_t	name;
197		int	namelen;
198	} */ *uap;
199{
200	struct sockaddr *sa;
201	int error;
202
203	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
204		return (error);
205
206	error = kern_bind(td, uap->s, sa);
207	free(sa, M_SONAME);
208	return (error);
209}
210
211int
212kern_bind(td, fd, sa)
213	struct thread *td;
214	int fd;
215	struct sockaddr *sa;
216{
217	struct socket *so;
218	struct file *fp;
219	int error;
220
221	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
222	if (error)
223		return (error);
224	so = fp->f_data;
225#ifdef KTRACE
226	if (KTRPOINT(td, KTR_STRUCT))
227		ktrsockaddr(sa);
228#endif
229#ifdef MAC
230	SOCK_LOCK(so);
231	error = mac_socket_check_bind(td->td_ucred, so, sa);
232	SOCK_UNLOCK(so);
233	if (error)
234		goto done;
235#endif
236	error = sobind(so, sa, td);
237#ifdef MAC
238done:
239#endif
240	fdrop(fp, td);
241	return (error);
242}
243
244/* ARGSUSED */
245int
246listen(td, uap)
247	struct thread *td;
248	struct listen_args /* {
249		int	s;
250		int	backlog;
251	} */ *uap;
252{
253	struct socket *so;
254	struct file *fp;
255	int error;
256
257	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
258	if (error == 0) {
259		so = fp->f_data;
260#ifdef MAC
261		SOCK_LOCK(so);
262		error = mac_socket_check_listen(td->td_ucred, so);
263		SOCK_UNLOCK(so);
264		if (error)
265			goto done;
266#endif
267		error = solisten(so, uap->backlog, td);
268#ifdef MAC
269done:
270#endif
271		fdrop(fp, td);
272	}
273	return(error);
274}
275
276/*
277 * accept1()
278 */
279static int
280accept1(td, uap, compat)
281	struct thread *td;
282	struct accept_args /* {
283		int	s;
284		struct sockaddr	* __restrict name;
285		socklen_t	* __restrict anamelen;
286	} */ *uap;
287	int compat;
288{
289	struct sockaddr *name;
290	socklen_t namelen;
291	struct file *fp;
292	int error;
293
294	if (uap->name == NULL)
295		return (kern_accept(td, uap->s, NULL, NULL, NULL));
296
297	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
298	if (error)
299		return (error);
300
301	error = kern_accept(td, uap->s, &name, &namelen, &fp);
302
303	/*
304	 * return a namelen of zero for older code which might
305	 * ignore the return value from accept.
306	 */
307	if (error) {
308		(void) copyout(&namelen,
309		    uap->anamelen, sizeof(*uap->anamelen));
310		return (error);
311	}
312
313	if (error == 0 && name != NULL) {
314#ifdef COMPAT_OLDSOCK
315		if (compat)
316			((struct osockaddr *)name)->sa_family =
317			    name->sa_family;
318#endif
319		error = copyout(name, uap->name, namelen);
320	}
321	if (error == 0)
322		error = copyout(&namelen, uap->anamelen,
323		    sizeof(namelen));
324	if (error)
325		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
326	fdrop(fp, td);
327	free(name, M_SONAME);
328	return (error);
329}
330
331int
332kern_accept(struct thread *td, int s, struct sockaddr **name,
333    socklen_t *namelen, struct file **fp)
334{
335	struct filedesc *fdp;
336	struct file *headfp, *nfp = NULL;
337	struct sockaddr *sa = NULL;
338	int error;
339	struct socket *head, *so;
340	int fd;
341	u_int fflag;
342	pid_t pgid;
343	int tmp;
344
345	if (name) {
346		*name = NULL;
347		if (*namelen < 0)
348			return (EINVAL);
349	}
350
351	fdp = td->td_proc->p_fd;
352	error = getsock(fdp, s, &headfp, &fflag);
353	if (error)
354		return (error);
355	head = headfp->f_data;
356	if ((head->so_options & SO_ACCEPTCONN) == 0) {
357		error = EINVAL;
358		goto done;
359	}
360#ifdef MAC
361	SOCK_LOCK(head);
362	error = mac_socket_check_accept(td->td_ucred, head);
363	SOCK_UNLOCK(head);
364	if (error != 0)
365		goto done;
366#endif
367	error = falloc(td, &nfp, &fd);
368	if (error)
369		goto done;
370	ACCEPT_LOCK();
371	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
372		ACCEPT_UNLOCK();
373		error = EWOULDBLOCK;
374		goto noconnection;
375	}
376	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
377		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
378			head->so_error = ECONNABORTED;
379			break;
380		}
381		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
382		    "accept", 0);
383		if (error) {
384			ACCEPT_UNLOCK();
385			goto noconnection;
386		}
387	}
388	if (head->so_error) {
389		error = head->so_error;
390		head->so_error = 0;
391		ACCEPT_UNLOCK();
392		goto noconnection;
393	}
394	so = TAILQ_FIRST(&head->so_comp);
395	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
396	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
397
398	/*
399	 * Before changing the flags on the socket, we have to bump the
400	 * reference count.  Otherwise, if the protocol calls sofree(),
401	 * the socket will be released due to a zero refcount.
402	 */
403	SOCK_LOCK(so);			/* soref() and so_state update */
404	soref(so);			/* file descriptor reference */
405
406	TAILQ_REMOVE(&head->so_comp, so, so_list);
407	head->so_qlen--;
408	so->so_state |= (head->so_state & SS_NBIO);
409	so->so_qstate &= ~SQ_COMP;
410	so->so_head = NULL;
411
412	SOCK_UNLOCK(so);
413	ACCEPT_UNLOCK();
414
415	/* An extra reference on `nfp' has been held for us by falloc(). */
416	td->td_retval[0] = fd;
417
418	/* connection has been removed from the listen queue */
419	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
420
421	pgid = fgetown(&head->so_sigio);
422	if (pgid != 0)
423		fsetown(pgid, &so->so_sigio);
424
425	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
426	/* Sync socket nonblocking/async state with file flags */
427	tmp = fflag & FNONBLOCK;
428	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
429	tmp = fflag & FASYNC;
430	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
431	sa = 0;
432	error = soaccept(so, &sa);
433	if (error) {
434		/*
435		 * return a namelen of zero for older code which might
436		 * ignore the return value from accept.
437		 */
438		if (name)
439			*namelen = 0;
440		goto noconnection;
441	}
442	if (sa == NULL) {
443		if (name)
444			*namelen = 0;
445		goto done;
446	}
447	if (name) {
448		/* check sa_len before it is destroyed */
449		if (*namelen > sa->sa_len)
450			*namelen = sa->sa_len;
451#ifdef KTRACE
452		if (KTRPOINT(td, KTR_STRUCT))
453			ktrsockaddr(sa);
454#endif
455		*name = sa;
456		sa = NULL;
457	}
458noconnection:
459	if (sa)
460		free(sa, M_SONAME);
461
462	/*
463	 * close the new descriptor, assuming someone hasn't ripped it
464	 * out from under us.
465	 */
466	if (error)
467		fdclose(fdp, nfp, fd, td);
468
469	/*
470	 * Release explicitly held references before returning.  We return
471	 * a reference on nfp to the caller on success if they request it.
472	 */
473done:
474	if (fp != NULL) {
475		if (error == 0) {
476			*fp = nfp;
477			nfp = NULL;
478		} else
479			*fp = NULL;
480	}
481	if (nfp != NULL)
482		fdrop(nfp, td);
483	fdrop(headfp, td);
484	return (error);
485}
486
487int
488accept(td, uap)
489	struct thread *td;
490	struct accept_args *uap;
491{
492
493	return (accept1(td, uap, 0));
494}
495
496#ifdef COMPAT_OLDSOCK
497int
498oaccept(td, uap)
499	struct thread *td;
500	struct accept_args *uap;
501{
502
503	return (accept1(td, uap, 1));
504}
505#endif /* COMPAT_OLDSOCK */
506
507/* ARGSUSED */
508int
509connect(td, uap)
510	struct thread *td;
511	struct connect_args /* {
512		int	s;
513		caddr_t	name;
514		int	namelen;
515	} */ *uap;
516{
517	struct sockaddr *sa;
518	int error;
519
520	error = getsockaddr(&sa, uap->name, uap->namelen);
521	if (error)
522		return (error);
523
524	error = kern_connect(td, uap->s, sa);
525	free(sa, M_SONAME);
526	return (error);
527}
528
529
530int
531kern_connect(td, fd, sa)
532	struct thread *td;
533	int fd;
534	struct sockaddr *sa;
535{
536	struct socket *so;
537	struct file *fp;
538	int error;
539	int interrupted = 0;
540
541	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
542	if (error)
543		return (error);
544	so = fp->f_data;
545	if (so->so_state & SS_ISCONNECTING) {
546		error = EALREADY;
547		goto done1;
548	}
549#ifdef KTRACE
550	if (KTRPOINT(td, KTR_STRUCT))
551		ktrsockaddr(sa);
552#endif
553#ifdef MAC
554	SOCK_LOCK(so);
555	error = mac_socket_check_connect(td->td_ucred, so, sa);
556	SOCK_UNLOCK(so);
557	if (error)
558		goto bad;
559#endif
560	error = soconnect(so, sa, td);
561	if (error)
562		goto bad;
563	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
564		error = EINPROGRESS;
565		goto done1;
566	}
567	SOCK_LOCK(so);
568	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
569		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
570		    "connec", 0);
571		if (error) {
572			if (error == EINTR || error == ERESTART)
573				interrupted = 1;
574			break;
575		}
576	}
577	if (error == 0) {
578		error = so->so_error;
579		so->so_error = 0;
580	}
581	SOCK_UNLOCK(so);
582bad:
583	if (!interrupted)
584		so->so_state &= ~SS_ISCONNECTING;
585	if (error == ERESTART)
586		error = EINTR;
587done1:
588	fdrop(fp, td);
589	return (error);
590}
591
592int
593socketpair(td, uap)
594	struct thread *td;
595	struct socketpair_args /* {
596		int	domain;
597		int	type;
598		int	protocol;
599		int	*rsv;
600	} */ *uap;
601{
602	struct filedesc *fdp = td->td_proc->p_fd;
603	struct file *fp1, *fp2;
604	struct socket *so1, *so2;
605	int fd, error, sv[2];
606
607#ifdef MAC
608	/* We might want to have a separate check for socket pairs. */
609	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
610	    uap->protocol);
611	if (error)
612		return (error);
613#endif
614
615	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
616	    td->td_ucred, td);
617	if (error)
618		return (error);
619	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
620	    td->td_ucred, td);
621	if (error)
622		goto free1;
623	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
624	error = falloc(td, &fp1, &fd);
625	if (error)
626		goto free2;
627	sv[0] = fd;
628	fp1->f_data = so1;	/* so1 already has ref count */
629	error = falloc(td, &fp2, &fd);
630	if (error)
631		goto free3;
632	fp2->f_data = so2;	/* so2 already has ref count */
633	sv[1] = fd;
634	error = soconnect2(so1, so2);
635	if (error)
636		goto free4;
637	if (uap->type == SOCK_DGRAM) {
638		/*
639		 * Datagram socket connection is asymmetric.
640		 */
641		 error = soconnect2(so2, so1);
642		 if (error)
643			goto free4;
644	}
645	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
646	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
647	so1 = so2 = NULL;
648	error = copyout(sv, uap->rsv, 2 * sizeof (int));
649	if (error)
650		goto free4;
651	fdrop(fp1, td);
652	fdrop(fp2, td);
653	return (0);
654free4:
655	fdclose(fdp, fp2, sv[1], td);
656	fdrop(fp2, td);
657free3:
658	fdclose(fdp, fp1, sv[0], td);
659	fdrop(fp1, td);
660free2:
661	if (so2 != NULL)
662		(void)soclose(so2);
663free1:
664	if (so1 != NULL)
665		(void)soclose(so1);
666	return (error);
667}
668
669static int
670sendit(td, s, mp, flags)
671	struct thread *td;
672	int s;
673	struct msghdr *mp;
674	int flags;
675{
676	struct mbuf *control;
677	struct sockaddr *to;
678	int error;
679
680	if (mp->msg_name != NULL) {
681		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
682		if (error) {
683			to = NULL;
684			goto bad;
685		}
686		mp->msg_name = to;
687	} else {
688		to = NULL;
689	}
690
691	if (mp->msg_control) {
692		if (mp->msg_controllen < sizeof(struct cmsghdr)
693#ifdef COMPAT_OLDSOCK
694		    && mp->msg_flags != MSG_COMPAT
695#endif
696		) {
697			error = EINVAL;
698			goto bad;
699		}
700		error = sockargs(&control, mp->msg_control,
701		    mp->msg_controllen, MT_CONTROL);
702		if (error)
703			goto bad;
704#ifdef COMPAT_OLDSOCK
705		if (mp->msg_flags == MSG_COMPAT) {
706			struct cmsghdr *cm;
707
708			M_PREPEND(control, sizeof(*cm), M_WAIT);
709			cm = mtod(control, struct cmsghdr *);
710			cm->cmsg_len = control->m_len;
711			cm->cmsg_level = SOL_SOCKET;
712			cm->cmsg_type = SCM_RIGHTS;
713		}
714#endif
715	} else {
716		control = NULL;
717	}
718
719	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
720
721bad:
722	if (to)
723		free(to, M_SONAME);
724	return (error);
725}
726
727int
728kern_sendit(td, s, mp, flags, control, segflg)
729	struct thread *td;
730	int s;
731	struct msghdr *mp;
732	int flags;
733	struct mbuf *control;
734	enum uio_seg segflg;
735{
736	struct file *fp;
737	struct uio auio;
738	struct iovec *iov;
739	struct socket *so;
740	int i;
741	int len, error;
742#ifdef KTRACE
743	struct uio *ktruio = NULL;
744#endif
745
746	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
747	if (error)
748		return (error);
749	so = (struct socket *)fp->f_data;
750
751#ifdef MAC
752	SOCK_LOCK(so);
753	if (mp->msg_name != NULL)
754		error = mac_socket_check_connect(td->td_ucred, so,
755		    mp->msg_name);
756	if (error == 0)
757		error = mac_socket_check_send(td->td_ucred, so);
758	SOCK_UNLOCK(so);
759	if (error)
760		goto bad;
761#endif
762
763	auio.uio_iov = mp->msg_iov;
764	auio.uio_iovcnt = mp->msg_iovlen;
765	auio.uio_segflg = segflg;
766	auio.uio_rw = UIO_WRITE;
767	auio.uio_td = td;
768	auio.uio_offset = 0;			/* XXX */
769	auio.uio_resid = 0;
770	iov = mp->msg_iov;
771	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
772		if ((auio.uio_resid += iov->iov_len) < 0) {
773			error = EINVAL;
774			goto bad;
775		}
776	}
777#ifdef KTRACE
778	if (KTRPOINT(td, KTR_GENIO))
779		ktruio = cloneuio(&auio);
780#endif
781	len = auio.uio_resid;
782	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
783	if (error) {
784		if (auio.uio_resid != len && (error == ERESTART ||
785		    error == EINTR || error == EWOULDBLOCK))
786			error = 0;
787		/* Generation of SIGPIPE can be controlled per socket */
788		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
789		    !(flags & MSG_NOSIGNAL)) {
790			PROC_LOCK(td->td_proc);
791			psignal(td->td_proc, SIGPIPE);
792			PROC_UNLOCK(td->td_proc);
793		}
794	}
795	if (error == 0)
796		td->td_retval[0] = len - auio.uio_resid;
797#ifdef KTRACE
798	if (ktruio != NULL) {
799		ktruio->uio_resid = td->td_retval[0];
800		ktrgenio(s, UIO_WRITE, ktruio, error);
801	}
802#endif
803bad:
804	fdrop(fp, td);
805	return (error);
806}
807
808int
809sendto(td, uap)
810	struct thread *td;
811	struct sendto_args /* {
812		int	s;
813		caddr_t	buf;
814		size_t	len;
815		int	flags;
816		caddr_t	to;
817		int	tolen;
818	} */ *uap;
819{
820	struct msghdr msg;
821	struct iovec aiov;
822	int error;
823
824	msg.msg_name = uap->to;
825	msg.msg_namelen = uap->tolen;
826	msg.msg_iov = &aiov;
827	msg.msg_iovlen = 1;
828	msg.msg_control = 0;
829#ifdef COMPAT_OLDSOCK
830	msg.msg_flags = 0;
831#endif
832	aiov.iov_base = uap->buf;
833	aiov.iov_len = uap->len;
834	error = sendit(td, uap->s, &msg, uap->flags);
835	return (error);
836}
837
838#ifdef COMPAT_OLDSOCK
839int
840osend(td, uap)
841	struct thread *td;
842	struct osend_args /* {
843		int	s;
844		caddr_t	buf;
845		int	len;
846		int	flags;
847	} */ *uap;
848{
849	struct msghdr msg;
850	struct iovec aiov;
851	int error;
852
853	msg.msg_name = 0;
854	msg.msg_namelen = 0;
855	msg.msg_iov = &aiov;
856	msg.msg_iovlen = 1;
857	aiov.iov_base = uap->buf;
858	aiov.iov_len = uap->len;
859	msg.msg_control = 0;
860	msg.msg_flags = 0;
861	error = sendit(td, uap->s, &msg, uap->flags);
862	return (error);
863}
864
865int
866osendmsg(td, uap)
867	struct thread *td;
868	struct osendmsg_args /* {
869		int	s;
870		caddr_t	msg;
871		int	flags;
872	} */ *uap;
873{
874	struct msghdr msg;
875	struct iovec *iov;
876	int error;
877
878	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
879	if (error)
880		return (error);
881	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
882	if (error)
883		return (error);
884	msg.msg_iov = iov;
885	msg.msg_flags = MSG_COMPAT;
886	error = sendit(td, uap->s, &msg, uap->flags);
887	free(iov, M_IOV);
888	return (error);
889}
890#endif
891
892int
893sendmsg(td, uap)
894	struct thread *td;
895	struct sendmsg_args /* {
896		int	s;
897		caddr_t	msg;
898		int	flags;
899	} */ *uap;
900{
901	struct msghdr msg;
902	struct iovec *iov;
903	int error;
904
905	error = copyin(uap->msg, &msg, sizeof (msg));
906	if (error)
907		return (error);
908	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
909	if (error)
910		return (error);
911	msg.msg_iov = iov;
912#ifdef COMPAT_OLDSOCK
913	msg.msg_flags = 0;
914#endif
915	error = sendit(td, uap->s, &msg, uap->flags);
916	free(iov, M_IOV);
917	return (error);
918}
919
920int
921kern_recvit(td, s, mp, fromseg, controlp)
922	struct thread *td;
923	int s;
924	struct msghdr *mp;
925	enum uio_seg fromseg;
926	struct mbuf **controlp;
927{
928	struct uio auio;
929	struct iovec *iov;
930	int i;
931	socklen_t len;
932	int error;
933	struct mbuf *m, *control = 0;
934	caddr_t ctlbuf;
935	struct file *fp;
936	struct socket *so;
937	struct sockaddr *fromsa = 0;
938#ifdef KTRACE
939	struct uio *ktruio = NULL;
940#endif
941
942	if(controlp != NULL)
943		*controlp = 0;
944
945	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
946	if (error)
947		return (error);
948	so = fp->f_data;
949
950#ifdef MAC
951	SOCK_LOCK(so);
952	error = mac_socket_check_receive(td->td_ucred, so);
953	SOCK_UNLOCK(so);
954	if (error) {
955		fdrop(fp, td);
956		return (error);
957	}
958#endif
959
960	auio.uio_iov = mp->msg_iov;
961	auio.uio_iovcnt = mp->msg_iovlen;
962	auio.uio_segflg = UIO_USERSPACE;
963	auio.uio_rw = UIO_READ;
964	auio.uio_td = td;
965	auio.uio_offset = 0;			/* XXX */
966	auio.uio_resid = 0;
967	iov = mp->msg_iov;
968	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
969		if ((auio.uio_resid += iov->iov_len) < 0) {
970			fdrop(fp, td);
971			return (EINVAL);
972		}
973	}
974#ifdef KTRACE
975	if (KTRPOINT(td, KTR_GENIO))
976		ktruio = cloneuio(&auio);
977#endif
978	len = auio.uio_resid;
979	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
980	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
981	    &mp->msg_flags);
982	if (error) {
983		if (auio.uio_resid != (int)len && (error == ERESTART ||
984		    error == EINTR || error == EWOULDBLOCK))
985			error = 0;
986	}
987#ifdef KTRACE
988	if (ktruio != NULL) {
989		ktruio->uio_resid = (int)len - auio.uio_resid;
990		ktrgenio(s, UIO_READ, ktruio, error);
991	}
992#endif
993	if (error)
994		goto out;
995	td->td_retval[0] = (int)len - auio.uio_resid;
996	if (mp->msg_name) {
997		len = mp->msg_namelen;
998		if (len <= 0 || fromsa == 0)
999			len = 0;
1000		else {
1001			/* save sa_len before it is destroyed by MSG_COMPAT */
1002			len = MIN(len, fromsa->sa_len);
1003#ifdef COMPAT_OLDSOCK
1004			if (mp->msg_flags & MSG_COMPAT)
1005				((struct osockaddr *)fromsa)->sa_family =
1006				    fromsa->sa_family;
1007#endif
1008			if (fromseg == UIO_USERSPACE) {
1009				error = copyout(fromsa, mp->msg_name,
1010				    (unsigned)len);
1011				if (error)
1012					goto out;
1013			} else
1014				bcopy(fromsa, mp->msg_name, len);
1015		}
1016		mp->msg_namelen = len;
1017	}
1018	if (mp->msg_control && controlp == NULL) {
1019#ifdef COMPAT_OLDSOCK
1020		/*
1021		 * We assume that old recvmsg calls won't receive access
1022		 * rights and other control info, esp. as control info
1023		 * is always optional and those options didn't exist in 4.3.
1024		 * If we receive rights, trim the cmsghdr; anything else
1025		 * is tossed.
1026		 */
1027		if (control && mp->msg_flags & MSG_COMPAT) {
1028			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1029			    SOL_SOCKET ||
1030			    mtod(control, struct cmsghdr *)->cmsg_type !=
1031			    SCM_RIGHTS) {
1032				mp->msg_controllen = 0;
1033				goto out;
1034			}
1035			control->m_len -= sizeof (struct cmsghdr);
1036			control->m_data += sizeof (struct cmsghdr);
1037		}
1038#endif
1039		len = mp->msg_controllen;
1040		m = control;
1041		mp->msg_controllen = 0;
1042		ctlbuf = mp->msg_control;
1043
1044		while (m && len > 0) {
1045			unsigned int tocopy;
1046
1047			if (len >= m->m_len)
1048				tocopy = m->m_len;
1049			else {
1050				mp->msg_flags |= MSG_CTRUNC;
1051				tocopy = len;
1052			}
1053
1054			if ((error = copyout(mtod(m, caddr_t),
1055					ctlbuf, tocopy)) != 0)
1056				goto out;
1057
1058			ctlbuf += tocopy;
1059			len -= tocopy;
1060			m = m->m_next;
1061		}
1062		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1063	}
1064out:
1065	fdrop(fp, td);
1066#ifdef KTRACE
1067	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1068		ktrsockaddr(fromsa);
1069#endif
1070	if (fromsa)
1071		free(fromsa, M_SONAME);
1072
1073	if (error == 0 && controlp != NULL)
1074		*controlp = control;
1075	else  if (control)
1076		m_freem(control);
1077
1078	return (error);
1079}
1080
1081static int
1082recvit(td, s, mp, namelenp)
1083	struct thread *td;
1084	int s;
1085	struct msghdr *mp;
1086	void *namelenp;
1087{
1088	int error;
1089
1090	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1091	if (error)
1092		return (error);
1093	if (namelenp) {
1094		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1095#ifdef COMPAT_OLDSOCK
1096		if (mp->msg_flags & MSG_COMPAT)
1097			error = 0;	/* old recvfrom didn't check */
1098#endif
1099	}
1100	return (error);
1101}
1102
1103int
1104recvfrom(td, uap)
1105	struct thread *td;
1106	struct recvfrom_args /* {
1107		int	s;
1108		caddr_t	buf;
1109		size_t	len;
1110		int	flags;
1111		struct sockaddr * __restrict	from;
1112		socklen_t * __restrict fromlenaddr;
1113	} */ *uap;
1114{
1115	struct msghdr msg;
1116	struct iovec aiov;
1117	int error;
1118
1119	if (uap->fromlenaddr) {
1120		error = copyin(uap->fromlenaddr,
1121		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1122		if (error)
1123			goto done2;
1124	} else {
1125		msg.msg_namelen = 0;
1126	}
1127	msg.msg_name = uap->from;
1128	msg.msg_iov = &aiov;
1129	msg.msg_iovlen = 1;
1130	aiov.iov_base = uap->buf;
1131	aiov.iov_len = uap->len;
1132	msg.msg_control = 0;
1133	msg.msg_flags = uap->flags;
1134	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1135done2:
1136	return(error);
1137}
1138
1139#ifdef COMPAT_OLDSOCK
1140int
1141orecvfrom(td, uap)
1142	struct thread *td;
1143	struct recvfrom_args *uap;
1144{
1145
1146	uap->flags |= MSG_COMPAT;
1147	return (recvfrom(td, uap));
1148}
1149#endif
1150
1151#ifdef COMPAT_OLDSOCK
1152int
1153orecv(td, uap)
1154	struct thread *td;
1155	struct orecv_args /* {
1156		int	s;
1157		caddr_t	buf;
1158		int	len;
1159		int	flags;
1160	} */ *uap;
1161{
1162	struct msghdr msg;
1163	struct iovec aiov;
1164	int error;
1165
1166	msg.msg_name = 0;
1167	msg.msg_namelen = 0;
1168	msg.msg_iov = &aiov;
1169	msg.msg_iovlen = 1;
1170	aiov.iov_base = uap->buf;
1171	aiov.iov_len = uap->len;
1172	msg.msg_control = 0;
1173	msg.msg_flags = uap->flags;
1174	error = recvit(td, uap->s, &msg, NULL);
1175	return (error);
1176}
1177
1178/*
1179 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1180 * overlays the new one, missing only the flags, and with the (old) access
1181 * rights where the control fields are now.
1182 */
1183int
1184orecvmsg(td, uap)
1185	struct thread *td;
1186	struct orecvmsg_args /* {
1187		int	s;
1188		struct	omsghdr *msg;
1189		int	flags;
1190	} */ *uap;
1191{
1192	struct msghdr msg;
1193	struct iovec *iov;
1194	int error;
1195
1196	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1197	if (error)
1198		return (error);
1199	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1200	if (error)
1201		return (error);
1202	msg.msg_flags = uap->flags | MSG_COMPAT;
1203	msg.msg_iov = iov;
1204	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205	if (msg.msg_controllen && error == 0)
1206		error = copyout(&msg.msg_controllen,
1207		    &uap->msg->msg_accrightslen, sizeof (int));
1208	free(iov, M_IOV);
1209	return (error);
1210}
1211#endif
1212
1213int
1214recvmsg(td, uap)
1215	struct thread *td;
1216	struct recvmsg_args /* {
1217		int	s;
1218		struct	msghdr *msg;
1219		int	flags;
1220	} */ *uap;
1221{
1222	struct msghdr msg;
1223	struct iovec *uiov, *iov;
1224	int error;
1225
1226	error = copyin(uap->msg, &msg, sizeof (msg));
1227	if (error)
1228		return (error);
1229	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1230	if (error)
1231		return (error);
1232	msg.msg_flags = uap->flags;
1233#ifdef COMPAT_OLDSOCK
1234	msg.msg_flags &= ~MSG_COMPAT;
1235#endif
1236	uiov = msg.msg_iov;
1237	msg.msg_iov = iov;
1238	error = recvit(td, uap->s, &msg, NULL);
1239	if (error == 0) {
1240		msg.msg_iov = uiov;
1241		error = copyout(&msg, uap->msg, sizeof(msg));
1242	}
1243	free(iov, M_IOV);
1244	return (error);
1245}
1246
1247/* ARGSUSED */
1248int
1249shutdown(td, uap)
1250	struct thread *td;
1251	struct shutdown_args /* {
1252		int	s;
1253		int	how;
1254	} */ *uap;
1255{
1256	struct socket *so;
1257	struct file *fp;
1258	int error;
1259
1260	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1261	if (error == 0) {
1262		so = fp->f_data;
1263		error = soshutdown(so, uap->how);
1264		fdrop(fp, td);
1265	}
1266	return (error);
1267}
1268
1269/* ARGSUSED */
1270int
1271setsockopt(td, uap)
1272	struct thread *td;
1273	struct setsockopt_args /* {
1274		int	s;
1275		int	level;
1276		int	name;
1277		caddr_t	val;
1278		int	valsize;
1279	} */ *uap;
1280{
1281
1282	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1283	    uap->val, UIO_USERSPACE, uap->valsize));
1284}
1285
1286int
1287kern_setsockopt(td, s, level, name, val, valseg, valsize)
1288	struct thread *td;
1289	int s;
1290	int level;
1291	int name;
1292	void *val;
1293	enum uio_seg valseg;
1294	socklen_t valsize;
1295{
1296	int error;
1297	struct socket *so;
1298	struct file *fp;
1299	struct sockopt sopt;
1300
1301	if (val == NULL && valsize != 0)
1302		return (EFAULT);
1303	if ((int)valsize < 0)
1304		return (EINVAL);
1305
1306	sopt.sopt_dir = SOPT_SET;
1307	sopt.sopt_level = level;
1308	sopt.sopt_name = name;
1309	sopt.sopt_val = val;
1310	sopt.sopt_valsize = valsize;
1311	switch (valseg) {
1312	case UIO_USERSPACE:
1313		sopt.sopt_td = td;
1314		break;
1315	case UIO_SYSSPACE:
1316		sopt.sopt_td = NULL;
1317		break;
1318	default:
1319		panic("kern_setsockopt called with bad valseg");
1320	}
1321
1322	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1323	if (error == 0) {
1324		so = fp->f_data;
1325		error = sosetopt(so, &sopt);
1326		fdrop(fp, td);
1327	}
1328	return(error);
1329}
1330
1331/* ARGSUSED */
1332int
1333getsockopt(td, uap)
1334	struct thread *td;
1335	struct getsockopt_args /* {
1336		int	s;
1337		int	level;
1338		int	name;
1339		void * __restrict	val;
1340		socklen_t * __restrict avalsize;
1341	} */ *uap;
1342{
1343	socklen_t valsize;
1344	int	error;
1345
1346	if (uap->val) {
1347		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1348		if (error)
1349			return (error);
1350	}
1351
1352	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1353	    uap->val, UIO_USERSPACE, &valsize);
1354
1355	if (error == 0)
1356		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1357	return (error);
1358}
1359
1360/*
1361 * Kernel version of getsockopt.
1362 * optval can be a userland or userspace. optlen is always a kernel pointer.
1363 */
1364int
1365kern_getsockopt(td, s, level, name, val, valseg, valsize)
1366	struct thread *td;
1367	int s;
1368	int level;
1369	int name;
1370	void *val;
1371	enum uio_seg valseg;
1372	socklen_t *valsize;
1373{
1374	int error;
1375	struct  socket *so;
1376	struct file *fp;
1377	struct	sockopt sopt;
1378
1379	if (val == NULL)
1380		*valsize = 0;
1381	if ((int)*valsize < 0)
1382		return (EINVAL);
1383
1384	sopt.sopt_dir = SOPT_GET;
1385	sopt.sopt_level = level;
1386	sopt.sopt_name = name;
1387	sopt.sopt_val = val;
1388	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1389	switch (valseg) {
1390	case UIO_USERSPACE:
1391		sopt.sopt_td = td;
1392		break;
1393	case UIO_SYSSPACE:
1394		sopt.sopt_td = NULL;
1395		break;
1396	default:
1397		panic("kern_getsockopt called with bad valseg");
1398	}
1399
1400	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1401	if (error == 0) {
1402		so = fp->f_data;
1403		error = sogetopt(so, &sopt);
1404		*valsize = sopt.sopt_valsize;
1405		fdrop(fp, td);
1406	}
1407	return (error);
1408}
1409
1410/*
1411 * getsockname1() - Get socket name.
1412 */
1413/* ARGSUSED */
1414static int
1415getsockname1(td, uap, compat)
1416	struct thread *td;
1417	struct getsockname_args /* {
1418		int	fdes;
1419		struct sockaddr * __restrict asa;
1420		socklen_t * __restrict alen;
1421	} */ *uap;
1422	int compat;
1423{
1424	struct sockaddr *sa;
1425	socklen_t len;
1426	int error;
1427
1428	error = copyin(uap->alen, &len, sizeof(len));
1429	if (error)
1430		return (error);
1431
1432	error = kern_getsockname(td, uap->fdes, &sa, &len);
1433	if (error)
1434		return (error);
1435
1436	if (len != 0) {
1437#ifdef COMPAT_OLDSOCK
1438		if (compat)
1439			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1440#endif
1441		error = copyout(sa, uap->asa, (u_int)len);
1442	}
1443	free(sa, M_SONAME);
1444	if (error == 0)
1445		error = copyout(&len, uap->alen, sizeof(len));
1446	return (error);
1447}
1448
1449int
1450kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1451    socklen_t *alen)
1452{
1453	struct socket *so;
1454	struct file *fp;
1455	socklen_t len;
1456	int error;
1457
1458	if (*alen < 0)
1459		return (EINVAL);
1460
1461	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1462	if (error)
1463		return (error);
1464	so = fp->f_data;
1465	*sa = NULL;
1466	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1467	if (error)
1468		goto bad;
1469	if (*sa == NULL)
1470		len = 0;
1471	else
1472		len = MIN(*alen, (*sa)->sa_len);
1473	*alen = len;
1474#ifdef KTRACE
1475	if (KTRPOINT(td, KTR_STRUCT))
1476		ktrsockaddr(*sa);
1477#endif
1478bad:
1479	fdrop(fp, td);
1480	if (error && *sa) {
1481		free(*sa, M_SONAME);
1482		*sa = NULL;
1483	}
1484	return (error);
1485}
1486
1487int
1488getsockname(td, uap)
1489	struct thread *td;
1490	struct getsockname_args *uap;
1491{
1492
1493	return (getsockname1(td, uap, 0));
1494}
1495
1496#ifdef COMPAT_OLDSOCK
1497int
1498ogetsockname(td, uap)
1499	struct thread *td;
1500	struct getsockname_args *uap;
1501{
1502
1503	return (getsockname1(td, uap, 1));
1504}
1505#endif /* COMPAT_OLDSOCK */
1506
1507/*
1508 * getpeername1() - Get name of peer for connected socket.
1509 */
1510/* ARGSUSED */
1511static int
1512getpeername1(td, uap, compat)
1513	struct thread *td;
1514	struct getpeername_args /* {
1515		int	fdes;
1516		struct sockaddr * __restrict	asa;
1517		socklen_t * __restrict	alen;
1518	} */ *uap;
1519	int compat;
1520{
1521	struct sockaddr *sa;
1522	socklen_t len;
1523	int error;
1524
1525	error = copyin(uap->alen, &len, sizeof (len));
1526	if (error)
1527		return (error);
1528
1529	error = kern_getpeername(td, uap->fdes, &sa, &len);
1530	if (error)
1531		return (error);
1532
1533	if (len != 0) {
1534#ifdef COMPAT_OLDSOCK
1535		if (compat)
1536			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1537#endif
1538		error = copyout(sa, uap->asa, (u_int)len);
1539	}
1540	free(sa, M_SONAME);
1541	if (error == 0)
1542		error = copyout(&len, uap->alen, sizeof(len));
1543	return (error);
1544}
1545
1546int
1547kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1548    socklen_t *alen)
1549{
1550	struct socket *so;
1551	struct file *fp;
1552	socklen_t len;
1553	int error;
1554
1555	if (*alen < 0)
1556		return (EINVAL);
1557
1558	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1559	if (error)
1560		return (error);
1561	so = fp->f_data;
1562	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1563		error = ENOTCONN;
1564		goto done;
1565	}
1566	*sa = NULL;
1567	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1568	if (error)
1569		goto bad;
1570	if (*sa == NULL)
1571		len = 0;
1572	else
1573		len = MIN(*alen, (*sa)->sa_len);
1574	*alen = len;
1575#ifdef KTRACE
1576	if (KTRPOINT(td, KTR_STRUCT))
1577		ktrsockaddr(*sa);
1578#endif
1579bad:
1580	if (error && *sa) {
1581		free(*sa, M_SONAME);
1582		*sa = NULL;
1583	}
1584done:
1585	fdrop(fp, td);
1586	return (error);
1587}
1588
1589int
1590getpeername(td, uap)
1591	struct thread *td;
1592	struct getpeername_args *uap;
1593{
1594
1595	return (getpeername1(td, uap, 0));
1596}
1597
1598#ifdef COMPAT_OLDSOCK
1599int
1600ogetpeername(td, uap)
1601	struct thread *td;
1602	struct ogetpeername_args *uap;
1603{
1604
1605	/* XXX uap should have type `getpeername_args *' to begin with. */
1606	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1607}
1608#endif /* COMPAT_OLDSOCK */
1609
1610int
1611sockargs(mp, buf, buflen, type)
1612	struct mbuf **mp;
1613	caddr_t buf;
1614	int buflen, type;
1615{
1616	struct sockaddr *sa;
1617	struct mbuf *m;
1618	int error;
1619
1620	if ((u_int)buflen > MLEN) {
1621#ifdef COMPAT_OLDSOCK
1622		if (type == MT_SONAME && (u_int)buflen <= 112)
1623			buflen = MLEN;		/* unix domain compat. hack */
1624		else
1625#endif
1626			if ((u_int)buflen > MCLBYTES)
1627				return (EINVAL);
1628	}
1629	m = m_get(M_WAIT, type);
1630	if ((u_int)buflen > MLEN)
1631		MCLGET(m, M_WAIT);
1632	m->m_len = buflen;
1633	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1634	if (error)
1635		(void) m_free(m);
1636	else {
1637		*mp = m;
1638		if (type == MT_SONAME) {
1639			sa = mtod(m, struct sockaddr *);
1640
1641#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1642			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1643				sa->sa_family = sa->sa_len;
1644#endif
1645			sa->sa_len = buflen;
1646		}
1647	}
1648	return (error);
1649}
1650
1651int
1652getsockaddr(namp, uaddr, len)
1653	struct sockaddr **namp;
1654	caddr_t uaddr;
1655	size_t len;
1656{
1657	struct sockaddr *sa;
1658	int error;
1659
1660	if (len > SOCK_MAXADDRLEN)
1661		return (ENAMETOOLONG);
1662	if (len < offsetof(struct sockaddr, sa_data[0]))
1663		return (EINVAL);
1664	sa = malloc(len, M_SONAME, M_WAITOK);
1665	error = copyin(uaddr, sa, len);
1666	if (error) {
1667		free(sa, M_SONAME);
1668	} else {
1669#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1670		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1671			sa->sa_family = sa->sa_len;
1672#endif
1673		sa->sa_len = len;
1674		*namp = sa;
1675	}
1676	return (error);
1677}
1678
1679#include <sys/condvar.h>
1680
1681struct sendfile_sync {
1682	struct mtx	mtx;
1683	struct cv	cv;
1684	unsigned 	count;
1685};
1686
1687/*
1688 * Detach mapped page and release resources back to the system.
1689 */
1690void
1691sf_buf_mext(void *addr, void *args)
1692{
1693	vm_page_t m;
1694	struct sendfile_sync *sfs;
1695
1696	m = sf_buf_page(args);
1697	sf_buf_free(args);
1698	vm_page_lock_queues();
1699	vm_page_unwire(m, 0);
1700	/*
1701	 * Check for the object going away on us. This can
1702	 * happen since we don't hold a reference to it.
1703	 * If so, we're responsible for freeing the page.
1704	 */
1705	if (m->wire_count == 0 && m->object == NULL)
1706		vm_page_free(m);
1707	vm_page_unlock_queues();
1708	if (addr == NULL)
1709		return;
1710	sfs = addr;
1711	mtx_lock(&sfs->mtx);
1712	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1713	if (--sfs->count == 0)
1714		cv_signal(&sfs->cv);
1715	mtx_unlock(&sfs->mtx);
1716}
1717
1718/*
1719 * sendfile(2)
1720 *
1721 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1722 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1723 *
1724 * Send a file specified by 'fd' and starting at 'offset' to a socket
1725 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1726 * 0.  Optionally add a header and/or trailer to the socket output.  If
1727 * specified, write the total number of bytes sent into *sbytes.
1728 */
1729int
1730sendfile(struct thread *td, struct sendfile_args *uap)
1731{
1732
1733	return (do_sendfile(td, uap, 0));
1734}
1735
1736static int
1737do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1738{
1739	struct sf_hdtr hdtr;
1740	struct uio *hdr_uio, *trl_uio;
1741	int error;
1742
1743	hdr_uio = trl_uio = NULL;
1744
1745	if (uap->hdtr != NULL) {
1746		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1747		if (error)
1748			goto out;
1749		if (hdtr.headers != NULL) {
1750			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1751			if (error)
1752				goto out;
1753		}
1754		if (hdtr.trailers != NULL) {
1755			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1756			if (error)
1757				goto out;
1758
1759		}
1760	}
1761
1762	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1763out:
1764	if (hdr_uio)
1765		free(hdr_uio, M_IOV);
1766	if (trl_uio)
1767		free(trl_uio, M_IOV);
1768	return (error);
1769}
1770
1771#ifdef COMPAT_FREEBSD4
1772int
1773freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1774{
1775	struct sendfile_args args;
1776
1777	args.fd = uap->fd;
1778	args.s = uap->s;
1779	args.offset = uap->offset;
1780	args.nbytes = uap->nbytes;
1781	args.hdtr = uap->hdtr;
1782	args.sbytes = uap->sbytes;
1783	args.flags = uap->flags;
1784
1785	return (do_sendfile(td, &args, 1));
1786}
1787#endif /* COMPAT_FREEBSD4 */
1788
1789int
1790kern_sendfile(struct thread *td, struct sendfile_args *uap,
1791    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1792{
1793	struct file *sock_fp;
1794	struct vnode *vp;
1795	struct vm_object *obj = NULL;
1796	struct socket *so = NULL;
1797	struct mbuf *m = NULL;
1798	struct sf_buf *sf;
1799	struct vm_page *pg;
1800	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1801	int error, hdrlen = 0, mnw = 0;
1802	int vfslocked;
1803	struct sendfile_sync *sfs = NULL;
1804
1805	/*
1806	 * The file descriptor must be a regular file and have a
1807	 * backing VM object.
1808	 * File offset must be positive.  If it goes beyond EOF
1809	 * we send only the header/trailer and no payload data.
1810	 */
1811	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1812		goto out;
1813	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1814	vn_lock(vp, LK_SHARED | LK_RETRY);
1815	if (vp->v_type == VREG) {
1816		obj = vp->v_object;
1817		if (obj != NULL) {
1818			/*
1819			 * Temporarily increase the backing VM
1820			 * object's reference count so that a forced
1821			 * reclamation of its vnode does not
1822			 * immediately destroy it.
1823			 */
1824			VM_OBJECT_LOCK(obj);
1825			if ((obj->flags & OBJ_DEAD) == 0) {
1826				vm_object_reference_locked(obj);
1827				VM_OBJECT_UNLOCK(obj);
1828			} else {
1829				VM_OBJECT_UNLOCK(obj);
1830				obj = NULL;
1831			}
1832		}
1833	}
1834	VOP_UNLOCK(vp, 0);
1835	VFS_UNLOCK_GIANT(vfslocked);
1836	if (obj == NULL) {
1837		error = EINVAL;
1838		goto out;
1839	}
1840	if (uap->offset < 0) {
1841		error = EINVAL;
1842		goto out;
1843	}
1844
1845	/*
1846	 * The socket must be a stream socket and connected.
1847	 * Remember if it a blocking or non-blocking socket.
1848	 */
1849	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1850	    NULL)) != 0)
1851		goto out;
1852	so = sock_fp->f_data;
1853	if (so->so_type != SOCK_STREAM) {
1854		error = EINVAL;
1855		goto out;
1856	}
1857	if ((so->so_state & SS_ISCONNECTED) == 0) {
1858		error = ENOTCONN;
1859		goto out;
1860	}
1861	/*
1862	 * Do not wait on memory allocations but return ENOMEM for
1863	 * caller to retry later.
1864	 * XXX: Experimental.
1865	 */
1866	if (uap->flags & SF_MNOWAIT)
1867		mnw = 1;
1868
1869	if (uap->flags & SF_SYNC) {
1870		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1871		memset(sfs, 0, sizeof *sfs);
1872		mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1873		cv_init(&sfs->cv, "sendfile");
1874	}
1875
1876#ifdef MAC
1877	SOCK_LOCK(so);
1878	error = mac_socket_check_send(td->td_ucred, so);
1879	SOCK_UNLOCK(so);
1880	if (error)
1881		goto out;
1882#endif
1883
1884	/* If headers are specified copy them into mbufs. */
1885	if (hdr_uio != NULL) {
1886		hdr_uio->uio_td = td;
1887		hdr_uio->uio_rw = UIO_WRITE;
1888		if (hdr_uio->uio_resid > 0) {
1889			/*
1890			 * In FBSD < 5.0 the nbytes to send also included
1891			 * the header.  If compat is specified subtract the
1892			 * header size from nbytes.
1893			 */
1894			if (compat) {
1895				if (uap->nbytes > hdr_uio->uio_resid)
1896					uap->nbytes -= hdr_uio->uio_resid;
1897				else
1898					uap->nbytes = 0;
1899			}
1900			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1901			    0, 0, 0);
1902			if (m == NULL) {
1903				error = mnw ? EAGAIN : ENOBUFS;
1904				goto out;
1905			}
1906			hdrlen = m_length(m, NULL);
1907		}
1908	}
1909
1910	/*
1911	 * Protect against multiple writers to the socket.
1912	 *
1913	 * XXXRW: Historically this has assumed non-interruptibility, so now
1914	 * we implement that, but possibly shouldn't.
1915	 */
1916	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1917
1918	/*
1919	 * Loop through the pages of the file, starting with the requested
1920	 * offset. Get a file page (do I/O if necessary), map the file page
1921	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1922	 * it on the socket.
1923	 * This is done in two loops.  The inner loop turns as many pages
1924	 * as it can, up to available socket buffer space, without blocking
1925	 * into mbufs to have it bulk delivered into the socket send buffer.
1926	 * The outer loop checks the state and available space of the socket
1927	 * and takes care of the overall progress.
1928	 */
1929	for (off = uap->offset, rem = uap->nbytes; ; ) {
1930		int loopbytes = 0;
1931		int space = 0;
1932		int done = 0;
1933
1934		/*
1935		 * Check the socket state for ongoing connection,
1936		 * no errors and space in socket buffer.
1937		 * If space is low allow for the remainder of the
1938		 * file to be processed if it fits the socket buffer.
1939		 * Otherwise block in waiting for sufficient space
1940		 * to proceed, or if the socket is nonblocking, return
1941		 * to userland with EAGAIN while reporting how far
1942		 * we've come.
1943		 * We wait until the socket buffer has significant free
1944		 * space to do bulk sends.  This makes good use of file
1945		 * system read ahead and allows packet segmentation
1946		 * offloading hardware to take over lots of work.  If
1947		 * we were not careful here we would send off only one
1948		 * sfbuf at a time.
1949		 */
1950		SOCKBUF_LOCK(&so->so_snd);
1951		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1952			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1953retry_space:
1954		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1955			error = EPIPE;
1956			SOCKBUF_UNLOCK(&so->so_snd);
1957			goto done;
1958		} else if (so->so_error) {
1959			error = so->so_error;
1960			so->so_error = 0;
1961			SOCKBUF_UNLOCK(&so->so_snd);
1962			goto done;
1963		}
1964		space = sbspace(&so->so_snd);
1965		if (space < rem &&
1966		    (space <= 0 ||
1967		     space < so->so_snd.sb_lowat)) {
1968			if (so->so_state & SS_NBIO) {
1969				SOCKBUF_UNLOCK(&so->so_snd);
1970				error = EAGAIN;
1971				goto done;
1972			}
1973			/*
1974			 * sbwait drops the lock while sleeping.
1975			 * When we loop back to retry_space the
1976			 * state may have changed and we retest
1977			 * for it.
1978			 */
1979			error = sbwait(&so->so_snd);
1980			/*
1981			 * An error from sbwait usually indicates that we've
1982			 * been interrupted by a signal. If we've sent anything
1983			 * then return bytes sent, otherwise return the error.
1984			 */
1985			if (error) {
1986				SOCKBUF_UNLOCK(&so->so_snd);
1987				goto done;
1988			}
1989			goto retry_space;
1990		}
1991		SOCKBUF_UNLOCK(&so->so_snd);
1992
1993		/*
1994		 * Reduce space in the socket buffer by the size of
1995		 * the header mbuf chain.
1996		 * hdrlen is set to 0 after the first loop.
1997		 */
1998		space -= hdrlen;
1999
2000		/*
2001		 * Loop and construct maximum sized mbuf chain to be bulk
2002		 * dumped into socket buffer.
2003		 */
2004		while(space > loopbytes) {
2005			vm_pindex_t pindex;
2006			vm_offset_t pgoff;
2007			struct mbuf *m0;
2008
2009			VM_OBJECT_LOCK(obj);
2010			/*
2011			 * Calculate the amount to transfer.
2012			 * Not to exceed a page, the EOF,
2013			 * or the passed in nbytes.
2014			 */
2015			pgoff = (vm_offset_t)(off & PAGE_MASK);
2016			xfsize = omin(PAGE_SIZE - pgoff,
2017			    obj->un_pager.vnp.vnp_size - uap->offset -
2018			    fsbytes - loopbytes);
2019			if (uap->nbytes)
2020				rem = (uap->nbytes - fsbytes - loopbytes);
2021			else
2022				rem = obj->un_pager.vnp.vnp_size -
2023				    uap->offset - fsbytes - loopbytes;
2024			xfsize = omin(rem, xfsize);
2025			if (xfsize <= 0) {
2026				VM_OBJECT_UNLOCK(obj);
2027				done = 1;		/* all data sent */
2028				break;
2029			}
2030			/*
2031			 * Don't overflow the send buffer.
2032			 * Stop here and send out what we've
2033			 * already got.
2034			 */
2035			if (space < loopbytes + xfsize) {
2036				VM_OBJECT_UNLOCK(obj);
2037				break;
2038			}
2039
2040			/*
2041			 * Attempt to look up the page.  Allocate
2042			 * if not found or wait and loop if busy.
2043			 */
2044			pindex = OFF_TO_IDX(off);
2045			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2046			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2047
2048			/*
2049			 * Check if page is valid for what we need,
2050			 * otherwise initiate I/O.
2051			 * If we already turned some pages into mbufs,
2052			 * send them off before we come here again and
2053			 * block.
2054			 */
2055			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2056				VM_OBJECT_UNLOCK(obj);
2057			else if (m != NULL)
2058				error = EAGAIN;	/* send what we already got */
2059			else if (uap->flags & SF_NODISKIO)
2060				error = EBUSY;
2061			else {
2062				int bsize, resid;
2063
2064				/*
2065				 * Ensure that our page is still around
2066				 * when the I/O completes.
2067				 */
2068				vm_page_io_start(pg);
2069				VM_OBJECT_UNLOCK(obj);
2070
2071				/*
2072				 * Get the page from backing store.
2073				 */
2074				bsize = vp->v_mount->mnt_stat.f_iosize;
2075				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2076				vn_lock(vp, LK_SHARED | LK_RETRY);
2077
2078				/*
2079				 * XXXMAC: Because we don't have fp->f_cred
2080				 * here, we pass in NOCRED.  This is probably
2081				 * wrong, but is consistent with our original
2082				 * implementation.
2083				 */
2084				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2085				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2086				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2087				    td->td_ucred, NOCRED, &resid, td);
2088				VOP_UNLOCK(vp, 0);
2089				VFS_UNLOCK_GIANT(vfslocked);
2090				VM_OBJECT_LOCK(obj);
2091				vm_page_io_finish(pg);
2092				if (!error)
2093					VM_OBJECT_UNLOCK(obj);
2094				mbstat.sf_iocnt++;
2095			}
2096			if (error) {
2097				vm_page_lock_queues();
2098				vm_page_unwire(pg, 0);
2099				/*
2100				 * See if anyone else might know about
2101				 * this page.  If not and it is not valid,
2102				 * then free it.
2103				 */
2104				if (pg->wire_count == 0 && pg->valid == 0 &&
2105				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2106				    pg->hold_count == 0) {
2107					vm_page_free(pg);
2108				}
2109				vm_page_unlock_queues();
2110				VM_OBJECT_UNLOCK(obj);
2111				if (error == EAGAIN)
2112					error = 0;	/* not a real error */
2113				break;
2114			}
2115
2116			/*
2117			 * Get a sendfile buf.  We usually wait as long
2118			 * as necessary, but this wait can be interrupted.
2119			 */
2120			if ((sf = sf_buf_alloc(pg,
2121			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2122				mbstat.sf_allocfail++;
2123				vm_page_lock_queues();
2124				vm_page_unwire(pg, 0);
2125				/*
2126				 * XXX: Not same check as above!?
2127				 */
2128				if (pg->wire_count == 0 && pg->object == NULL)
2129					vm_page_free(pg);
2130				vm_page_unlock_queues();
2131				error = (mnw ? EAGAIN : EINTR);
2132				break;
2133			}
2134
2135			/*
2136			 * Get an mbuf and set it up as having
2137			 * external storage.
2138			 */
2139			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2140			if (m0 == NULL) {
2141				error = (mnw ? EAGAIN : ENOBUFS);
2142				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2143				break;
2144			}
2145			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2146			    sfs, sf, M_RDONLY, EXT_SFBUF);
2147			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2148			m0->m_len = xfsize;
2149
2150			/* Append to mbuf chain. */
2151			if (m != NULL)
2152				m_cat(m, m0);
2153			else
2154				m = m0;
2155
2156			/* Keep track of bits processed. */
2157			loopbytes += xfsize;
2158			off += xfsize;
2159
2160			if (sfs != NULL) {
2161				mtx_lock(&sfs->mtx);
2162				sfs->count++;
2163				mtx_unlock(&sfs->mtx);
2164			}
2165		}
2166
2167		/* Add the buffer chain to the socket buffer. */
2168		if (m != NULL) {
2169			int mlen, err;
2170
2171			mlen = m_length(m, NULL);
2172			SOCKBUF_LOCK(&so->so_snd);
2173			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2174				error = EPIPE;
2175				SOCKBUF_UNLOCK(&so->so_snd);
2176				goto done;
2177			}
2178			SOCKBUF_UNLOCK(&so->so_snd);
2179			/* Avoid error aliasing. */
2180			err = (*so->so_proto->pr_usrreqs->pru_send)
2181				    (so, 0, m, NULL, NULL, td);
2182			if (err == 0) {
2183				/*
2184				 * We need two counters to get the
2185				 * file offset and nbytes to send
2186				 * right:
2187				 * - sbytes contains the total amount
2188				 *   of bytes sent, including headers.
2189				 * - fsbytes contains the total amount
2190				 *   of bytes sent from the file.
2191				 */
2192				sbytes += mlen;
2193				fsbytes += mlen;
2194				if (hdrlen) {
2195					fsbytes -= hdrlen;
2196					hdrlen = 0;
2197				}
2198			} else if (error == 0)
2199				error = err;
2200			m = NULL;	/* pru_send always consumes */
2201		}
2202
2203		/* Quit outer loop on error or when we're done. */
2204		if (done)
2205			break;
2206		if (error)
2207			goto done;
2208	}
2209
2210	/*
2211	 * Send trailers. Wimp out and use writev(2).
2212	 */
2213	if (trl_uio != NULL) {
2214		sbunlock(&so->so_snd);
2215		error = kern_writev(td, uap->s, trl_uio);
2216		if (error == 0)
2217			sbytes += td->td_retval[0];
2218		goto out;
2219	}
2220
2221done:
2222	sbunlock(&so->so_snd);
2223out:
2224	/*
2225	 * If there was no error we have to clear td->td_retval[0]
2226	 * because it may have been set by writev.
2227	 */
2228	if (error == 0) {
2229		td->td_retval[0] = 0;
2230	}
2231	if (uap->sbytes != NULL) {
2232		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2233	}
2234	if (obj != NULL)
2235		vm_object_deallocate(obj);
2236	if (vp != NULL) {
2237		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2238		vrele(vp);
2239		VFS_UNLOCK_GIANT(vfslocked);
2240	}
2241	if (so)
2242		fdrop(sock_fp, td);
2243	if (m)
2244		m_freem(m);
2245
2246	if (sfs != NULL) {
2247		mtx_lock(&sfs->mtx);
2248		if (sfs->count != 0)
2249			cv_wait(&sfs->cv, &sfs->mtx);
2250		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2251		cv_destroy(&sfs->cv);
2252		mtx_destroy(&sfs->mtx);
2253		free(sfs, M_TEMP);
2254	}
2255
2256	if (error == ERESTART)
2257		error = EINTR;
2258
2259	return (error);
2260}
2261
2262/*
2263 * SCTP syscalls.
2264 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2265 * otherwise all return EOPNOTSUPP.
2266 * XXX: We should make this loadable one day.
2267 */
2268int
2269sctp_peeloff(td, uap)
2270	struct thread *td;
2271	struct sctp_peeloff_args /* {
2272		int	sd;
2273		caddr_t	name;
2274	} */ *uap;
2275{
2276#ifdef SCTP
2277	struct filedesc *fdp;
2278	struct file *nfp = NULL;
2279	int error;
2280	struct socket *head, *so;
2281	int fd;
2282	u_int fflag;
2283
2284	fdp = td->td_proc->p_fd;
2285	error = fgetsock(td, uap->sd, &head, &fflag);
2286	if (error)
2287		goto done2;
2288	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2289	if (error)
2290		goto done2;
2291	/*
2292	 * At this point we know we do have a assoc to pull
2293	 * we proceed to get the fd setup. This may block
2294	 * but that is ok.
2295	 */
2296
2297	error = falloc(td, &nfp, &fd);
2298	if (error)
2299		goto done;
2300	td->td_retval[0] = fd;
2301
2302	so = sonewconn(head, SS_ISCONNECTED);
2303	if (so == NULL)
2304		goto noconnection;
2305	/*
2306	 * Before changing the flags on the socket, we have to bump the
2307	 * reference count.  Otherwise, if the protocol calls sofree(),
2308	 * the socket will be released due to a zero refcount.
2309	 */
2310        SOCK_LOCK(so);
2311        soref(so);                      /* file descriptor reference */
2312        SOCK_UNLOCK(so);
2313
2314	ACCEPT_LOCK();
2315
2316	TAILQ_REMOVE(&head->so_comp, so, so_list);
2317	head->so_qlen--;
2318	so->so_state |= (head->so_state & SS_NBIO);
2319	so->so_state &= ~SS_NOFDREF;
2320	so->so_qstate &= ~SQ_COMP;
2321	so->so_head = NULL;
2322	ACCEPT_UNLOCK();
2323	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2324	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2325	if (error)
2326		goto noconnection;
2327	if (head->so_sigio != NULL)
2328		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2329
2330noconnection:
2331	/*
2332	 * close the new descriptor, assuming someone hasn't ripped it
2333	 * out from under us.
2334	 */
2335	if (error)
2336		fdclose(fdp, nfp, fd, td);
2337
2338	/*
2339	 * Release explicitly held references before returning.
2340	 */
2341done:
2342	if (nfp != NULL)
2343		fdrop(nfp, td);
2344	fputsock(head);
2345done2:
2346	return (error);
2347#else  /* SCTP */
2348	return (EOPNOTSUPP);
2349#endif /* SCTP */
2350}
2351
2352int
2353sctp_generic_sendmsg (td, uap)
2354	struct thread *td;
2355	struct sctp_generic_sendmsg_args /* {
2356		int sd,
2357		caddr_t msg,
2358		int mlen,
2359		caddr_t to,
2360		__socklen_t tolen,
2361		struct sctp_sndrcvinfo *sinfo,
2362		int flags
2363	} */ *uap;
2364{
2365#ifdef SCTP
2366	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2367	struct socket *so;
2368	struct file *fp = NULL;
2369	int use_rcvinfo = 1;
2370	int error = 0, len;
2371	struct sockaddr *to = NULL;
2372#ifdef KTRACE
2373	struct uio *ktruio = NULL;
2374#endif
2375	struct uio auio;
2376	struct iovec iov[1];
2377
2378	if (uap->sinfo) {
2379		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2380		if (error)
2381			return (error);
2382		u_sinfo = &sinfo;
2383	}
2384	if (uap->tolen) {
2385		error = getsockaddr(&to, uap->to, uap->tolen);
2386		if (error) {
2387			to = NULL;
2388			goto sctp_bad2;
2389		}
2390	}
2391
2392	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2393	if (error)
2394		goto sctp_bad;
2395#ifdef KTRACE
2396	if (KTRPOINT(td, KTR_STRUCT))
2397		ktrsockaddr(to);
2398#endif
2399
2400	iov[0].iov_base = uap->msg;
2401	iov[0].iov_len = uap->mlen;
2402
2403	so = (struct socket *)fp->f_data;
2404#ifdef MAC
2405	SOCK_LOCK(so);
2406	error = mac_socket_check_send(td->td_ucred, so);
2407	SOCK_UNLOCK(so);
2408	if (error)
2409		goto sctp_bad;
2410#endif /* MAC */
2411
2412	auio.uio_iov =  iov;
2413	auio.uio_iovcnt = 1;
2414	auio.uio_segflg = UIO_USERSPACE;
2415	auio.uio_rw = UIO_WRITE;
2416	auio.uio_td = td;
2417	auio.uio_offset = 0;			/* XXX */
2418	auio.uio_resid = 0;
2419	len = auio.uio_resid = uap->mlen;
2420	error = sctp_lower_sosend(so, to, &auio,
2421		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2422		    uap->flags, use_rcvinfo, u_sinfo, td);
2423	if (error) {
2424		if (auio.uio_resid != len && (error == ERESTART ||
2425		    error == EINTR || error == EWOULDBLOCK))
2426			error = 0;
2427		/* Generation of SIGPIPE can be controlled per socket. */
2428		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2429		    !(uap->flags & MSG_NOSIGNAL)) {
2430			PROC_LOCK(td->td_proc);
2431			psignal(td->td_proc, SIGPIPE);
2432			PROC_UNLOCK(td->td_proc);
2433		}
2434	}
2435	if (error == 0)
2436		td->td_retval[0] = len - auio.uio_resid;
2437#ifdef KTRACE
2438	if (ktruio != NULL) {
2439		ktruio->uio_resid = td->td_retval[0];
2440		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2441	}
2442#endif /* KTRACE */
2443sctp_bad:
2444	if (fp)
2445		fdrop(fp, td);
2446sctp_bad2:
2447	if (to)
2448		free(to, M_SONAME);
2449	return (error);
2450#else  /* SCTP */
2451	return (EOPNOTSUPP);
2452#endif /* SCTP */
2453}
2454
2455int
2456sctp_generic_sendmsg_iov(td, uap)
2457	struct thread *td;
2458	struct sctp_generic_sendmsg_iov_args /* {
2459		int sd,
2460		struct iovec *iov,
2461		int iovlen,
2462		caddr_t to,
2463		__socklen_t tolen,
2464		struct sctp_sndrcvinfo *sinfo,
2465		int flags
2466	} */ *uap;
2467{
2468#ifdef SCTP
2469	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2470	struct socket *so;
2471	struct file *fp = NULL;
2472	int use_rcvinfo = 1;
2473	int error=0, len, i;
2474	struct sockaddr *to = NULL;
2475#ifdef KTRACE
2476	struct uio *ktruio = NULL;
2477#endif
2478	struct uio auio;
2479	struct iovec *iov, *tiov;
2480
2481	if (uap->sinfo) {
2482		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2483		if (error)
2484			return (error);
2485		u_sinfo = &sinfo;
2486	}
2487	if (uap->tolen) {
2488		error = getsockaddr(&to, uap->to, uap->tolen);
2489		if (error) {
2490			to = NULL;
2491			goto sctp_bad2;
2492		}
2493	}
2494
2495	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2496	if (error)
2497		goto sctp_bad1;
2498
2499	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2500	if (error)
2501		goto sctp_bad1;
2502#ifdef KTRACE
2503	if (KTRPOINT(td, KTR_STRUCT))
2504		ktrsockaddr(to);
2505#endif
2506
2507	so = (struct socket *)fp->f_data;
2508#ifdef MAC
2509	SOCK_LOCK(so);
2510	error = mac_socket_check_send(td->td_ucred, so);
2511	SOCK_UNLOCK(so);
2512	if (error)
2513		goto sctp_bad;
2514#endif /* MAC */
2515
2516	auio.uio_iov =  iov;
2517	auio.uio_iovcnt = uap->iovlen;
2518	auio.uio_segflg = UIO_USERSPACE;
2519	auio.uio_rw = UIO_WRITE;
2520	auio.uio_td = td;
2521	auio.uio_offset = 0;			/* XXX */
2522	auio.uio_resid = 0;
2523	tiov = iov;
2524	for (i = 0; i <uap->iovlen; i++, tiov++) {
2525		if ((auio.uio_resid += tiov->iov_len) < 0) {
2526			error = EINVAL;
2527			goto sctp_bad;
2528		}
2529	}
2530	len = auio.uio_resid;
2531	error = sctp_lower_sosend(so, to, &auio,
2532		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2533		    uap->flags, use_rcvinfo, u_sinfo, td);
2534	if (error) {
2535		if (auio.uio_resid != len && (error == ERESTART ||
2536		    error == EINTR || error == EWOULDBLOCK))
2537			error = 0;
2538		/* Generation of SIGPIPE can be controlled per socket */
2539		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2540		    !(uap->flags & MSG_NOSIGNAL)) {
2541			PROC_LOCK(td->td_proc);
2542			psignal(td->td_proc, SIGPIPE);
2543			PROC_UNLOCK(td->td_proc);
2544		}
2545	}
2546	if (error == 0)
2547		td->td_retval[0] = len - auio.uio_resid;
2548#ifdef KTRACE
2549	if (ktruio != NULL) {
2550		ktruio->uio_resid = td->td_retval[0];
2551		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2552	}
2553#endif /* KTRACE */
2554sctp_bad:
2555	free(iov, M_IOV);
2556sctp_bad1:
2557	if (fp)
2558		fdrop(fp, td);
2559sctp_bad2:
2560	if (to)
2561		free(to, M_SONAME);
2562	return (error);
2563#else  /* SCTP */
2564	return (EOPNOTSUPP);
2565#endif /* SCTP */
2566}
2567
2568int
2569sctp_generic_recvmsg(td, uap)
2570	struct thread *td;
2571	struct sctp_generic_recvmsg_args /* {
2572		int sd,
2573		struct iovec *iov,
2574		int iovlen,
2575		struct sockaddr *from,
2576		__socklen_t *fromlenaddr,
2577		struct sctp_sndrcvinfo *sinfo,
2578		int *msg_flags
2579	} */ *uap;
2580{
2581#ifdef SCTP
2582	u_int8_t sockbufstore[256];
2583	struct uio auio;
2584	struct iovec *iov, *tiov;
2585	struct sctp_sndrcvinfo sinfo;
2586	struct socket *so;
2587	struct file *fp = NULL;
2588	struct sockaddr *fromsa;
2589	int fromlen;
2590	int len, i, msg_flags;
2591	int error = 0;
2592#ifdef KTRACE
2593	struct uio *ktruio = NULL;
2594#endif
2595	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2596	if (error) {
2597		return (error);
2598	}
2599	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2600	if (error) {
2601		goto out1;
2602	}
2603
2604	so = fp->f_data;
2605#ifdef MAC
2606	SOCK_LOCK(so);
2607	error = mac_socket_check_receive(td->td_ucred, so);
2608	SOCK_UNLOCK(so);
2609	if (error) {
2610		goto out;
2611		return (error);
2612	}
2613#endif /* MAC */
2614
2615	if (uap->fromlenaddr) {
2616		error = copyin(uap->fromlenaddr,
2617		    &fromlen, sizeof (fromlen));
2618		if (error) {
2619			goto out;
2620		}
2621	} else {
2622		fromlen = 0;
2623	}
2624	if(uap->msg_flags) {
2625		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2626		if (error) {
2627			goto out;
2628		}
2629	} else {
2630		msg_flags = 0;
2631	}
2632	auio.uio_iov = iov;
2633	auio.uio_iovcnt = uap->iovlen;
2634  	auio.uio_segflg = UIO_USERSPACE;
2635	auio.uio_rw = UIO_READ;
2636	auio.uio_td = td;
2637	auio.uio_offset = 0;			/* XXX */
2638	auio.uio_resid = 0;
2639	tiov = iov;
2640	for (i = 0; i <uap->iovlen; i++, tiov++) {
2641		if ((auio.uio_resid += tiov->iov_len) < 0) {
2642			error = EINVAL;
2643			goto out;
2644		}
2645	}
2646	len = auio.uio_resid;
2647	fromsa = (struct sockaddr *)sockbufstore;
2648
2649#ifdef KTRACE
2650	if (KTRPOINT(td, KTR_GENIO))
2651		ktruio = cloneuio(&auio);
2652#endif /* KTRACE */
2653	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2654		    fromsa, fromlen, &msg_flags,
2655		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2656	if (error) {
2657		if (auio.uio_resid != (int)len && (error == ERESTART ||
2658		    error == EINTR || error == EWOULDBLOCK))
2659			error = 0;
2660	} else {
2661		if (uap->sinfo)
2662			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2663	}
2664#ifdef KTRACE
2665	if (ktruio != NULL) {
2666		ktruio->uio_resid = (int)len - auio.uio_resid;
2667		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2668	}
2669#endif /* KTRACE */
2670	if (error)
2671		goto out;
2672	td->td_retval[0] = (int)len - auio.uio_resid;
2673
2674	if (fromlen && uap->from) {
2675		len = fromlen;
2676		if (len <= 0 || fromsa == 0)
2677			len = 0;
2678		else {
2679			len = MIN(len, fromsa->sa_len);
2680			error = copyout(fromsa, uap->from, (unsigned)len);
2681			if (error)
2682				goto out;
2683		}
2684		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2685		if (error) {
2686			goto out;
2687		}
2688	}
2689#ifdef KTRACE
2690	if (KTRPOINT(td, KTR_STRUCT))
2691		ktrsockaddr(fromsa);
2692#endif
2693	if (uap->msg_flags) {
2694		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2695		if (error) {
2696			goto out;
2697		}
2698	}
2699out:
2700	free(iov, M_IOV);
2701out1:
2702	if (fp)
2703		fdrop(fp, td);
2704
2705	return (error);
2706#else  /* SCTP */
2707	return (EOPNOTSUPP);
2708#endif /* SCTP */
2709}
2710