kern_sendfile.c revision 177599
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 177599 2008-03-25 09:39:02Z ru $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91			int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93			int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103    "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105    "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107    "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry.  A reference on the
111 * file entry is held upon returning.  This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count.  If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120	struct file *fp;
121	int error;
122
123	fp = NULL;
124	if (fdp == NULL)
125		error = EBADF;
126	else {
127		FILEDESC_SLOCK(fdp);
128		fp = fget_locked(fdp, fd);
129		if (fp == NULL)
130			error = EBADF;
131		else if (fp->f_type != DTYPE_SOCKET) {
132			fp = NULL;
133			error = ENOTSOCK;
134		} else {
135			fhold(fp);
136			if (fflagp != NULL)
137				*fflagp = fp->f_flag;
138			error = 0;
139		}
140		FILEDESC_SUNLOCK(fdp);
141	}
142	*fpp = fp;
143	return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155	struct thread *td;
156	struct socket_args /* {
157		int	domain;
158		int	type;
159		int	protocol;
160	} */ *uap;
161{
162	struct filedesc *fdp;
163	struct socket *so;
164	struct file *fp;
165	int fd, error;
166
167#ifdef MAC
168	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
169	    uap->protocol);
170	if (error)
171		return (error);
172#endif
173	fdp = td->td_proc->p_fd;
174	error = falloc(td, &fp, &fd);
175	if (error)
176		return (error);
177	/* An extra reference on `fp' has been held for us by falloc(). */
178	error = socreate(uap->domain, &so, uap->type, uap->protocol,
179	    td->td_ucred, td);
180	if (error) {
181		fdclose(fdp, fp, fd, td);
182	} else {
183		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
184		td->td_retval[0] = fd;
185	}
186	fdrop(fp, td);
187	return (error);
188}
189
190/* ARGSUSED */
191int
192bind(td, uap)
193	struct thread *td;
194	struct bind_args /* {
195		int	s;
196		caddr_t	name;
197		int	namelen;
198	} */ *uap;
199{
200	struct sockaddr *sa;
201	int error;
202
203	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
204		return (error);
205
206	error = kern_bind(td, uap->s, sa);
207	free(sa, M_SONAME);
208	return (error);
209}
210
211int
212kern_bind(td, fd, sa)
213	struct thread *td;
214	int fd;
215	struct sockaddr *sa;
216{
217	struct socket *so;
218	struct file *fp;
219	int error;
220
221	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
222	if (error)
223		return (error);
224	so = fp->f_data;
225#ifdef KTRACE
226	if (KTRPOINT(td, KTR_STRUCT))
227		ktrsockaddr(sa);
228#endif
229#ifdef MAC
230	SOCK_LOCK(so);
231	error = mac_socket_check_bind(td->td_ucred, so, sa);
232	SOCK_UNLOCK(so);
233	if (error)
234		goto done;
235#endif
236	error = sobind(so, sa, td);
237#ifdef MAC
238done:
239#endif
240	fdrop(fp, td);
241	return (error);
242}
243
244/* ARGSUSED */
245int
246listen(td, uap)
247	struct thread *td;
248	struct listen_args /* {
249		int	s;
250		int	backlog;
251	} */ *uap;
252{
253	struct socket *so;
254	struct file *fp;
255	int error;
256
257	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
258	if (error == 0) {
259		so = fp->f_data;
260#ifdef MAC
261		SOCK_LOCK(so);
262		error = mac_socket_check_listen(td->td_ucred, so);
263		SOCK_UNLOCK(so);
264		if (error)
265			goto done;
266#endif
267		error = solisten(so, uap->backlog, td);
268#ifdef MAC
269done:
270#endif
271		fdrop(fp, td);
272	}
273	return(error);
274}
275
276/*
277 * accept1()
278 */
279static int
280accept1(td, uap, compat)
281	struct thread *td;
282	struct accept_args /* {
283		int	s;
284		struct sockaddr	* __restrict name;
285		socklen_t	* __restrict anamelen;
286	} */ *uap;
287	int compat;
288{
289	struct sockaddr *name;
290	socklen_t namelen;
291	struct file *fp;
292	int error;
293
294	if (uap->name == NULL)
295		return (kern_accept(td, uap->s, NULL, NULL, NULL));
296
297	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
298	if (error)
299		return (error);
300
301	error = kern_accept(td, uap->s, &name, &namelen, &fp);
302
303	/*
304	 * return a namelen of zero for older code which might
305	 * ignore the return value from accept.
306	 */
307	if (error) {
308		(void) copyout(&namelen,
309		    uap->anamelen, sizeof(*uap->anamelen));
310		return (error);
311	}
312
313	if (error == 0 && name != NULL) {
314#ifdef COMPAT_OLDSOCK
315		if (compat)
316			((struct osockaddr *)name)->sa_family =
317			    name->sa_family;
318#endif
319		error = copyout(name, uap->name, namelen);
320	}
321	if (error == 0)
322		error = copyout(&namelen, uap->anamelen,
323		    sizeof(namelen));
324	if (error)
325		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
326	fdrop(fp, td);
327	free(name, M_SONAME);
328	return (error);
329}
330
331int
332kern_accept(struct thread *td, int s, struct sockaddr **name,
333    socklen_t *namelen, struct file **fp)
334{
335	struct filedesc *fdp;
336	struct file *headfp, *nfp = NULL;
337	struct sockaddr *sa = NULL;
338	int error;
339	struct socket *head, *so;
340	int fd;
341	u_int fflag;
342	pid_t pgid;
343	int tmp;
344
345	if (name) {
346		*name = NULL;
347		if (*namelen < 0)
348			return (EINVAL);
349	}
350
351	fdp = td->td_proc->p_fd;
352	error = getsock(fdp, s, &headfp, &fflag);
353	if (error)
354		return (error);
355	head = headfp->f_data;
356	if ((head->so_options & SO_ACCEPTCONN) == 0) {
357		error = EINVAL;
358		goto done;
359	}
360#ifdef MAC
361	SOCK_LOCK(head);
362	error = mac_socket_check_accept(td->td_ucred, head);
363	SOCK_UNLOCK(head);
364	if (error != 0)
365		goto done;
366#endif
367	error = falloc(td, &nfp, &fd);
368	if (error)
369		goto done;
370	ACCEPT_LOCK();
371	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
372		ACCEPT_UNLOCK();
373		error = EWOULDBLOCK;
374		goto noconnection;
375	}
376	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
377		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
378			head->so_error = ECONNABORTED;
379			break;
380		}
381		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
382		    "accept", 0);
383		if (error) {
384			ACCEPT_UNLOCK();
385			goto noconnection;
386		}
387	}
388	if (head->so_error) {
389		error = head->so_error;
390		head->so_error = 0;
391		ACCEPT_UNLOCK();
392		goto noconnection;
393	}
394	so = TAILQ_FIRST(&head->so_comp);
395	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
396	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
397
398	/*
399	 * Before changing the flags on the socket, we have to bump the
400	 * reference count.  Otherwise, if the protocol calls sofree(),
401	 * the socket will be released due to a zero refcount.
402	 */
403	SOCK_LOCK(so);			/* soref() and so_state update */
404	soref(so);			/* file descriptor reference */
405
406	TAILQ_REMOVE(&head->so_comp, so, so_list);
407	head->so_qlen--;
408	so->so_state |= (head->so_state & SS_NBIO);
409	so->so_qstate &= ~SQ_COMP;
410	so->so_head = NULL;
411
412	SOCK_UNLOCK(so);
413	ACCEPT_UNLOCK();
414
415	/* An extra reference on `nfp' has been held for us by falloc(). */
416	td->td_retval[0] = fd;
417
418	/* connection has been removed from the listen queue */
419	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
420
421	pgid = fgetown(&head->so_sigio);
422	if (pgid != 0)
423		fsetown(pgid, &so->so_sigio);
424
425	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
426	/* Sync socket nonblocking/async state with file flags */
427	tmp = fflag & FNONBLOCK;
428	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
429	tmp = fflag & FASYNC;
430	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
431	sa = 0;
432	error = soaccept(so, &sa);
433	if (error) {
434		/*
435		 * return a namelen of zero for older code which might
436		 * ignore the return value from accept.
437		 */
438		if (name)
439			*namelen = 0;
440		goto noconnection;
441	}
442	if (sa == NULL) {
443		if (name)
444			*namelen = 0;
445		goto done;
446	}
447	if (name) {
448		/* check sa_len before it is destroyed */
449		if (*namelen > sa->sa_len)
450			*namelen = sa->sa_len;
451#ifdef KTRACE
452		if (KTRPOINT(td, KTR_STRUCT))
453			ktrsockaddr(sa);
454#endif
455		*name = sa;
456		sa = NULL;
457	}
458noconnection:
459	if (sa)
460		FREE(sa, M_SONAME);
461
462	/*
463	 * close the new descriptor, assuming someone hasn't ripped it
464	 * out from under us.
465	 */
466	if (error)
467		fdclose(fdp, nfp, fd, td);
468
469	/*
470	 * Release explicitly held references before returning.  We return
471	 * a reference on nfp to the caller on success if they request it.
472	 */
473done:
474	if (fp != NULL) {
475		if (error == 0) {
476			*fp = nfp;
477			nfp = NULL;
478		} else
479			*fp = NULL;
480	}
481	if (nfp != NULL)
482		fdrop(nfp, td);
483	fdrop(headfp, td);
484	return (error);
485}
486
487int
488accept(td, uap)
489	struct thread *td;
490	struct accept_args *uap;
491{
492
493	return (accept1(td, uap, 0));
494}
495
496#ifdef COMPAT_OLDSOCK
497int
498oaccept(td, uap)
499	struct thread *td;
500	struct accept_args *uap;
501{
502
503	return (accept1(td, uap, 1));
504}
505#endif /* COMPAT_OLDSOCK */
506
507/* ARGSUSED */
508int
509connect(td, uap)
510	struct thread *td;
511	struct connect_args /* {
512		int	s;
513		caddr_t	name;
514		int	namelen;
515	} */ *uap;
516{
517	struct sockaddr *sa;
518	int error;
519
520	error = getsockaddr(&sa, uap->name, uap->namelen);
521	if (error)
522		return (error);
523
524	error = kern_connect(td, uap->s, sa);
525	free(sa, M_SONAME);
526	return (error);
527}
528
529
530int
531kern_connect(td, fd, sa)
532	struct thread *td;
533	int fd;
534	struct sockaddr *sa;
535{
536	struct socket *so;
537	struct file *fp;
538	int error;
539	int interrupted = 0;
540
541	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
542	if (error)
543		return (error);
544	so = fp->f_data;
545	if (so->so_state & SS_ISCONNECTING) {
546		error = EALREADY;
547		goto done1;
548	}
549#ifdef KTRACE
550	if (KTRPOINT(td, KTR_STRUCT))
551		ktrsockaddr(sa);
552#endif
553#ifdef MAC
554	SOCK_LOCK(so);
555	error = mac_socket_check_connect(td->td_ucred, so, sa);
556	SOCK_UNLOCK(so);
557	if (error)
558		goto bad;
559#endif
560	error = soconnect(so, sa, td);
561	if (error)
562		goto bad;
563	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
564		error = EINPROGRESS;
565		goto done1;
566	}
567	SOCK_LOCK(so);
568	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
569		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
570		    "connec", 0);
571		if (error) {
572			if (error == EINTR || error == ERESTART)
573				interrupted = 1;
574			break;
575		}
576	}
577	if (error == 0) {
578		error = so->so_error;
579		so->so_error = 0;
580	}
581	SOCK_UNLOCK(so);
582bad:
583	if (!interrupted)
584		so->so_state &= ~SS_ISCONNECTING;
585	if (error == ERESTART)
586		error = EINTR;
587done1:
588	fdrop(fp, td);
589	return (error);
590}
591
592int
593socketpair(td, uap)
594	struct thread *td;
595	struct socketpair_args /* {
596		int	domain;
597		int	type;
598		int	protocol;
599		int	*rsv;
600	} */ *uap;
601{
602	struct filedesc *fdp = td->td_proc->p_fd;
603	struct file *fp1, *fp2;
604	struct socket *so1, *so2;
605	int fd, error, sv[2];
606
607#ifdef MAC
608	/* We might want to have a separate check for socket pairs. */
609	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
610	    uap->protocol);
611	if (error)
612		return (error);
613#endif
614
615	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
616	    td->td_ucred, td);
617	if (error)
618		return (error);
619	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
620	    td->td_ucred, td);
621	if (error)
622		goto free1;
623	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
624	error = falloc(td, &fp1, &fd);
625	if (error)
626		goto free2;
627	sv[0] = fd;
628	fp1->f_data = so1;	/* so1 already has ref count */
629	error = falloc(td, &fp2, &fd);
630	if (error)
631		goto free3;
632	fp2->f_data = so2;	/* so2 already has ref count */
633	sv[1] = fd;
634	error = soconnect2(so1, so2);
635	if (error)
636		goto free4;
637	if (uap->type == SOCK_DGRAM) {
638		/*
639		 * Datagram socket connection is asymmetric.
640		 */
641		 error = soconnect2(so2, so1);
642		 if (error)
643			goto free4;
644	}
645	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
646	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
647	so1 = so2 = NULL;
648	error = copyout(sv, uap->rsv, 2 * sizeof (int));
649	if (error)
650		goto free4;
651	fdrop(fp1, td);
652	fdrop(fp2, td);
653	return (0);
654free4:
655	fdclose(fdp, fp2, sv[1], td);
656	fdrop(fp2, td);
657free3:
658	fdclose(fdp, fp1, sv[0], td);
659	fdrop(fp1, td);
660free2:
661	if (so2 != NULL)
662		(void)soclose(so2);
663free1:
664	if (so1 != NULL)
665		(void)soclose(so1);
666	return (error);
667}
668
669static int
670sendit(td, s, mp, flags)
671	struct thread *td;
672	int s;
673	struct msghdr *mp;
674	int flags;
675{
676	struct mbuf *control;
677	struct sockaddr *to;
678	int error;
679
680	if (mp->msg_name != NULL) {
681		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
682		if (error) {
683			to = NULL;
684			goto bad;
685		}
686		mp->msg_name = to;
687	} else {
688		to = NULL;
689	}
690
691	if (mp->msg_control) {
692		if (mp->msg_controllen < sizeof(struct cmsghdr)
693#ifdef COMPAT_OLDSOCK
694		    && mp->msg_flags != MSG_COMPAT
695#endif
696		) {
697			error = EINVAL;
698			goto bad;
699		}
700		error = sockargs(&control, mp->msg_control,
701		    mp->msg_controllen, MT_CONTROL);
702		if (error)
703			goto bad;
704#ifdef COMPAT_OLDSOCK
705		if (mp->msg_flags == MSG_COMPAT) {
706			struct cmsghdr *cm;
707
708			M_PREPEND(control, sizeof(*cm), M_WAIT);
709			cm = mtod(control, struct cmsghdr *);
710			cm->cmsg_len = control->m_len;
711			cm->cmsg_level = SOL_SOCKET;
712			cm->cmsg_type = SCM_RIGHTS;
713		}
714#endif
715	} else {
716		control = NULL;
717	}
718
719	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
720
721bad:
722	if (to)
723		FREE(to, M_SONAME);
724	return (error);
725}
726
727int
728kern_sendit(td, s, mp, flags, control, segflg)
729	struct thread *td;
730	int s;
731	struct msghdr *mp;
732	int flags;
733	struct mbuf *control;
734	enum uio_seg segflg;
735{
736	struct file *fp;
737	struct uio auio;
738	struct iovec *iov;
739	struct socket *so;
740	int i;
741	int len, error;
742#ifdef KTRACE
743	struct uio *ktruio = NULL;
744#endif
745
746	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
747	if (error)
748		return (error);
749	so = (struct socket *)fp->f_data;
750
751#ifdef MAC
752	SOCK_LOCK(so);
753	error = mac_socket_check_send(td->td_ucred, so);
754	SOCK_UNLOCK(so);
755	if (error)
756		goto bad;
757#endif
758
759	auio.uio_iov = mp->msg_iov;
760	auio.uio_iovcnt = mp->msg_iovlen;
761	auio.uio_segflg = segflg;
762	auio.uio_rw = UIO_WRITE;
763	auio.uio_td = td;
764	auio.uio_offset = 0;			/* XXX */
765	auio.uio_resid = 0;
766	iov = mp->msg_iov;
767	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
768		if ((auio.uio_resid += iov->iov_len) < 0) {
769			error = EINVAL;
770			goto bad;
771		}
772	}
773#ifdef KTRACE
774	if (KTRPOINT(td, KTR_GENIO))
775		ktruio = cloneuio(&auio);
776#endif
777	len = auio.uio_resid;
778	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
779	if (error) {
780		if (auio.uio_resid != len && (error == ERESTART ||
781		    error == EINTR || error == EWOULDBLOCK))
782			error = 0;
783		/* Generation of SIGPIPE can be controlled per socket */
784		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
785		    !(flags & MSG_NOSIGNAL)) {
786			PROC_LOCK(td->td_proc);
787			psignal(td->td_proc, SIGPIPE);
788			PROC_UNLOCK(td->td_proc);
789		}
790	}
791	if (error == 0)
792		td->td_retval[0] = len - auio.uio_resid;
793#ifdef KTRACE
794	if (ktruio != NULL) {
795		ktruio->uio_resid = td->td_retval[0];
796		ktrgenio(s, UIO_WRITE, ktruio, error);
797	}
798#endif
799bad:
800	fdrop(fp, td);
801	return (error);
802}
803
804int
805sendto(td, uap)
806	struct thread *td;
807	struct sendto_args /* {
808		int	s;
809		caddr_t	buf;
810		size_t	len;
811		int	flags;
812		caddr_t	to;
813		int	tolen;
814	} */ *uap;
815{
816	struct msghdr msg;
817	struct iovec aiov;
818	int error;
819
820	msg.msg_name = uap->to;
821	msg.msg_namelen = uap->tolen;
822	msg.msg_iov = &aiov;
823	msg.msg_iovlen = 1;
824	msg.msg_control = 0;
825#ifdef COMPAT_OLDSOCK
826	msg.msg_flags = 0;
827#endif
828	aiov.iov_base = uap->buf;
829	aiov.iov_len = uap->len;
830	error = sendit(td, uap->s, &msg, uap->flags);
831	return (error);
832}
833
834#ifdef COMPAT_OLDSOCK
835int
836osend(td, uap)
837	struct thread *td;
838	struct osend_args /* {
839		int	s;
840		caddr_t	buf;
841		int	len;
842		int	flags;
843	} */ *uap;
844{
845	struct msghdr msg;
846	struct iovec aiov;
847	int error;
848
849	msg.msg_name = 0;
850	msg.msg_namelen = 0;
851	msg.msg_iov = &aiov;
852	msg.msg_iovlen = 1;
853	aiov.iov_base = uap->buf;
854	aiov.iov_len = uap->len;
855	msg.msg_control = 0;
856	msg.msg_flags = 0;
857	error = sendit(td, uap->s, &msg, uap->flags);
858	return (error);
859}
860
861int
862osendmsg(td, uap)
863	struct thread *td;
864	struct osendmsg_args /* {
865		int	s;
866		caddr_t	msg;
867		int	flags;
868	} */ *uap;
869{
870	struct msghdr msg;
871	struct iovec *iov;
872	int error;
873
874	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
875	if (error)
876		return (error);
877	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
878	if (error)
879		return (error);
880	msg.msg_iov = iov;
881	msg.msg_flags = MSG_COMPAT;
882	error = sendit(td, uap->s, &msg, uap->flags);
883	free(iov, M_IOV);
884	return (error);
885}
886#endif
887
888int
889sendmsg(td, uap)
890	struct thread *td;
891	struct sendmsg_args /* {
892		int	s;
893		caddr_t	msg;
894		int	flags;
895	} */ *uap;
896{
897	struct msghdr msg;
898	struct iovec *iov;
899	int error;
900
901	error = copyin(uap->msg, &msg, sizeof (msg));
902	if (error)
903		return (error);
904	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
905	if (error)
906		return (error);
907	msg.msg_iov = iov;
908#ifdef COMPAT_OLDSOCK
909	msg.msg_flags = 0;
910#endif
911	error = sendit(td, uap->s, &msg, uap->flags);
912	free(iov, M_IOV);
913	return (error);
914}
915
916int
917kern_recvit(td, s, mp, fromseg, controlp)
918	struct thread *td;
919	int s;
920	struct msghdr *mp;
921	enum uio_seg fromseg;
922	struct mbuf **controlp;
923{
924	struct uio auio;
925	struct iovec *iov;
926	int i;
927	socklen_t len;
928	int error;
929	struct mbuf *m, *control = 0;
930	caddr_t ctlbuf;
931	struct file *fp;
932	struct socket *so;
933	struct sockaddr *fromsa = 0;
934#ifdef KTRACE
935	struct uio *ktruio = NULL;
936#endif
937
938	if(controlp != NULL)
939		*controlp = 0;
940
941	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
942	if (error)
943		return (error);
944	so = fp->f_data;
945
946#ifdef MAC
947	SOCK_LOCK(so);
948	error = mac_socket_check_receive(td->td_ucred, so);
949	SOCK_UNLOCK(so);
950	if (error) {
951		fdrop(fp, td);
952		return (error);
953	}
954#endif
955
956	auio.uio_iov = mp->msg_iov;
957	auio.uio_iovcnt = mp->msg_iovlen;
958	auio.uio_segflg = UIO_USERSPACE;
959	auio.uio_rw = UIO_READ;
960	auio.uio_td = td;
961	auio.uio_offset = 0;			/* XXX */
962	auio.uio_resid = 0;
963	iov = mp->msg_iov;
964	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
965		if ((auio.uio_resid += iov->iov_len) < 0) {
966			fdrop(fp, td);
967			return (EINVAL);
968		}
969	}
970#ifdef KTRACE
971	if (KTRPOINT(td, KTR_GENIO))
972		ktruio = cloneuio(&auio);
973#endif
974	len = auio.uio_resid;
975	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
976	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
977	    &mp->msg_flags);
978	if (error) {
979		if (auio.uio_resid != (int)len && (error == ERESTART ||
980		    error == EINTR || error == EWOULDBLOCK))
981			error = 0;
982	}
983#ifdef KTRACE
984	if (ktruio != NULL) {
985		ktruio->uio_resid = (int)len - auio.uio_resid;
986		ktrgenio(s, UIO_READ, ktruio, error);
987	}
988#endif
989	if (error)
990		goto out;
991	td->td_retval[0] = (int)len - auio.uio_resid;
992	if (mp->msg_name) {
993		len = mp->msg_namelen;
994		if (len <= 0 || fromsa == 0)
995			len = 0;
996		else {
997			/* save sa_len before it is destroyed by MSG_COMPAT */
998			len = MIN(len, fromsa->sa_len);
999#ifdef COMPAT_OLDSOCK
1000			if (mp->msg_flags & MSG_COMPAT)
1001				((struct osockaddr *)fromsa)->sa_family =
1002				    fromsa->sa_family;
1003#endif
1004			if (fromseg == UIO_USERSPACE) {
1005				error = copyout(fromsa, mp->msg_name,
1006				    (unsigned)len);
1007				if (error)
1008					goto out;
1009			} else
1010				bcopy(fromsa, mp->msg_name, len);
1011		}
1012		mp->msg_namelen = len;
1013	}
1014	if (mp->msg_control && controlp == NULL) {
1015#ifdef COMPAT_OLDSOCK
1016		/*
1017		 * We assume that old recvmsg calls won't receive access
1018		 * rights and other control info, esp. as control info
1019		 * is always optional and those options didn't exist in 4.3.
1020		 * If we receive rights, trim the cmsghdr; anything else
1021		 * is tossed.
1022		 */
1023		if (control && mp->msg_flags & MSG_COMPAT) {
1024			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1025			    SOL_SOCKET ||
1026			    mtod(control, struct cmsghdr *)->cmsg_type !=
1027			    SCM_RIGHTS) {
1028				mp->msg_controllen = 0;
1029				goto out;
1030			}
1031			control->m_len -= sizeof (struct cmsghdr);
1032			control->m_data += sizeof (struct cmsghdr);
1033		}
1034#endif
1035		len = mp->msg_controllen;
1036		m = control;
1037		mp->msg_controllen = 0;
1038		ctlbuf = mp->msg_control;
1039
1040		while (m && len > 0) {
1041			unsigned int tocopy;
1042
1043			if (len >= m->m_len)
1044				tocopy = m->m_len;
1045			else {
1046				mp->msg_flags |= MSG_CTRUNC;
1047				tocopy = len;
1048			}
1049
1050			if ((error = copyout(mtod(m, caddr_t),
1051					ctlbuf, tocopy)) != 0)
1052				goto out;
1053
1054			ctlbuf += tocopy;
1055			len -= tocopy;
1056			m = m->m_next;
1057		}
1058		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1059	}
1060out:
1061	fdrop(fp, td);
1062#ifdef KTRACE
1063	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1064		ktrsockaddr(fromsa);
1065#endif
1066	if (fromsa)
1067		FREE(fromsa, M_SONAME);
1068
1069	if (error == 0 && controlp != NULL)
1070		*controlp = control;
1071	else  if (control)
1072		m_freem(control);
1073
1074	return (error);
1075}
1076
1077static int
1078recvit(td, s, mp, namelenp)
1079	struct thread *td;
1080	int s;
1081	struct msghdr *mp;
1082	void *namelenp;
1083{
1084	int error;
1085
1086	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1087	if (error)
1088		return (error);
1089	if (namelenp) {
1090		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1091#ifdef COMPAT_OLDSOCK
1092		if (mp->msg_flags & MSG_COMPAT)
1093			error = 0;	/* old recvfrom didn't check */
1094#endif
1095	}
1096	return (error);
1097}
1098
1099int
1100recvfrom(td, uap)
1101	struct thread *td;
1102	struct recvfrom_args /* {
1103		int	s;
1104		caddr_t	buf;
1105		size_t	len;
1106		int	flags;
1107		struct sockaddr * __restrict	from;
1108		socklen_t * __restrict fromlenaddr;
1109	} */ *uap;
1110{
1111	struct msghdr msg;
1112	struct iovec aiov;
1113	int error;
1114
1115	if (uap->fromlenaddr) {
1116		error = copyin(uap->fromlenaddr,
1117		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1118		if (error)
1119			goto done2;
1120	} else {
1121		msg.msg_namelen = 0;
1122	}
1123	msg.msg_name = uap->from;
1124	msg.msg_iov = &aiov;
1125	msg.msg_iovlen = 1;
1126	aiov.iov_base = uap->buf;
1127	aiov.iov_len = uap->len;
1128	msg.msg_control = 0;
1129	msg.msg_flags = uap->flags;
1130	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1131done2:
1132	return(error);
1133}
1134
1135#ifdef COMPAT_OLDSOCK
1136int
1137orecvfrom(td, uap)
1138	struct thread *td;
1139	struct recvfrom_args *uap;
1140{
1141
1142	uap->flags |= MSG_COMPAT;
1143	return (recvfrom(td, uap));
1144}
1145#endif
1146
1147#ifdef COMPAT_OLDSOCK
1148int
1149orecv(td, uap)
1150	struct thread *td;
1151	struct orecv_args /* {
1152		int	s;
1153		caddr_t	buf;
1154		int	len;
1155		int	flags;
1156	} */ *uap;
1157{
1158	struct msghdr msg;
1159	struct iovec aiov;
1160	int error;
1161
1162	msg.msg_name = 0;
1163	msg.msg_namelen = 0;
1164	msg.msg_iov = &aiov;
1165	msg.msg_iovlen = 1;
1166	aiov.iov_base = uap->buf;
1167	aiov.iov_len = uap->len;
1168	msg.msg_control = 0;
1169	msg.msg_flags = uap->flags;
1170	error = recvit(td, uap->s, &msg, NULL);
1171	return (error);
1172}
1173
1174/*
1175 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1176 * overlays the new one, missing only the flags, and with the (old) access
1177 * rights where the control fields are now.
1178 */
1179int
1180orecvmsg(td, uap)
1181	struct thread *td;
1182	struct orecvmsg_args /* {
1183		int	s;
1184		struct	omsghdr *msg;
1185		int	flags;
1186	} */ *uap;
1187{
1188	struct msghdr msg;
1189	struct iovec *iov;
1190	int error;
1191
1192	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1193	if (error)
1194		return (error);
1195	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1196	if (error)
1197		return (error);
1198	msg.msg_flags = uap->flags | MSG_COMPAT;
1199	msg.msg_iov = iov;
1200	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1201	if (msg.msg_controllen && error == 0)
1202		error = copyout(&msg.msg_controllen,
1203		    &uap->msg->msg_accrightslen, sizeof (int));
1204	free(iov, M_IOV);
1205	return (error);
1206}
1207#endif
1208
1209int
1210recvmsg(td, uap)
1211	struct thread *td;
1212	struct recvmsg_args /* {
1213		int	s;
1214		struct	msghdr *msg;
1215		int	flags;
1216	} */ *uap;
1217{
1218	struct msghdr msg;
1219	struct iovec *uiov, *iov;
1220	int error;
1221
1222	error = copyin(uap->msg, &msg, sizeof (msg));
1223	if (error)
1224		return (error);
1225	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1226	if (error)
1227		return (error);
1228	msg.msg_flags = uap->flags;
1229#ifdef COMPAT_OLDSOCK
1230	msg.msg_flags &= ~MSG_COMPAT;
1231#endif
1232	uiov = msg.msg_iov;
1233	msg.msg_iov = iov;
1234	error = recvit(td, uap->s, &msg, NULL);
1235	if (error == 0) {
1236		msg.msg_iov = uiov;
1237		error = copyout(&msg, uap->msg, sizeof(msg));
1238	}
1239	free(iov, M_IOV);
1240	return (error);
1241}
1242
1243/* ARGSUSED */
1244int
1245shutdown(td, uap)
1246	struct thread *td;
1247	struct shutdown_args /* {
1248		int	s;
1249		int	how;
1250	} */ *uap;
1251{
1252	struct socket *so;
1253	struct file *fp;
1254	int error;
1255
1256	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1257	if (error == 0) {
1258		so = fp->f_data;
1259		error = soshutdown(so, uap->how);
1260		fdrop(fp, td);
1261	}
1262	return (error);
1263}
1264
1265/* ARGSUSED */
1266int
1267setsockopt(td, uap)
1268	struct thread *td;
1269	struct setsockopt_args /* {
1270		int	s;
1271		int	level;
1272		int	name;
1273		caddr_t	val;
1274		int	valsize;
1275	} */ *uap;
1276{
1277
1278	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1279	    uap->val, UIO_USERSPACE, uap->valsize));
1280}
1281
1282int
1283kern_setsockopt(td, s, level, name, val, valseg, valsize)
1284	struct thread *td;
1285	int s;
1286	int level;
1287	int name;
1288	void *val;
1289	enum uio_seg valseg;
1290	socklen_t valsize;
1291{
1292	int error;
1293	struct socket *so;
1294	struct file *fp;
1295	struct sockopt sopt;
1296
1297	if (val == NULL && valsize != 0)
1298		return (EFAULT);
1299	if ((int)valsize < 0)
1300		return (EINVAL);
1301
1302	sopt.sopt_dir = SOPT_SET;
1303	sopt.sopt_level = level;
1304	sopt.sopt_name = name;
1305	sopt.sopt_val = val;
1306	sopt.sopt_valsize = valsize;
1307	switch (valseg) {
1308	case UIO_USERSPACE:
1309		sopt.sopt_td = td;
1310		break;
1311	case UIO_SYSSPACE:
1312		sopt.sopt_td = NULL;
1313		break;
1314	default:
1315		panic("kern_setsockopt called with bad valseg");
1316	}
1317
1318	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1319	if (error == 0) {
1320		so = fp->f_data;
1321		error = sosetopt(so, &sopt);
1322		fdrop(fp, td);
1323	}
1324	return(error);
1325}
1326
1327/* ARGSUSED */
1328int
1329getsockopt(td, uap)
1330	struct thread *td;
1331	struct getsockopt_args /* {
1332		int	s;
1333		int	level;
1334		int	name;
1335		void * __restrict	val;
1336		socklen_t * __restrict avalsize;
1337	} */ *uap;
1338{
1339	socklen_t valsize;
1340	int	error;
1341
1342	if (uap->val) {
1343		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1344		if (error)
1345			return (error);
1346	}
1347
1348	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1349	    uap->val, UIO_USERSPACE, &valsize);
1350
1351	if (error == 0)
1352		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1353	return (error);
1354}
1355
1356/*
1357 * Kernel version of getsockopt.
1358 * optval can be a userland or userspace. optlen is always a kernel pointer.
1359 */
1360int
1361kern_getsockopt(td, s, level, name, val, valseg, valsize)
1362	struct thread *td;
1363	int s;
1364	int level;
1365	int name;
1366	void *val;
1367	enum uio_seg valseg;
1368	socklen_t *valsize;
1369{
1370	int error;
1371	struct  socket *so;
1372	struct file *fp;
1373	struct	sockopt sopt;
1374
1375	if (val == NULL)
1376		*valsize = 0;
1377	if ((int)*valsize < 0)
1378		return (EINVAL);
1379
1380	sopt.sopt_dir = SOPT_GET;
1381	sopt.sopt_level = level;
1382	sopt.sopt_name = name;
1383	sopt.sopt_val = val;
1384	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1385	switch (valseg) {
1386	case UIO_USERSPACE:
1387		sopt.sopt_td = td;
1388		break;
1389	case UIO_SYSSPACE:
1390		sopt.sopt_td = NULL;
1391		break;
1392	default:
1393		panic("kern_getsockopt called with bad valseg");
1394	}
1395
1396	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1397	if (error == 0) {
1398		so = fp->f_data;
1399		error = sogetopt(so, &sopt);
1400		*valsize = sopt.sopt_valsize;
1401		fdrop(fp, td);
1402	}
1403	return (error);
1404}
1405
1406/*
1407 * getsockname1() - Get socket name.
1408 */
1409/* ARGSUSED */
1410static int
1411getsockname1(td, uap, compat)
1412	struct thread *td;
1413	struct getsockname_args /* {
1414		int	fdes;
1415		struct sockaddr * __restrict asa;
1416		socklen_t * __restrict alen;
1417	} */ *uap;
1418	int compat;
1419{
1420	struct sockaddr *sa;
1421	socklen_t len;
1422	int error;
1423
1424	error = copyin(uap->alen, &len, sizeof(len));
1425	if (error)
1426		return (error);
1427
1428	error = kern_getsockname(td, uap->fdes, &sa, &len);
1429	if (error)
1430		return (error);
1431
1432	if (len != 0) {
1433#ifdef COMPAT_OLDSOCK
1434		if (compat)
1435			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1436#endif
1437		error = copyout(sa, uap->asa, (u_int)len);
1438	}
1439	free(sa, M_SONAME);
1440	if (error == 0)
1441		error = copyout(&len, uap->alen, sizeof(len));
1442	return (error);
1443}
1444
1445int
1446kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1447    socklen_t *alen)
1448{
1449	struct socket *so;
1450	struct file *fp;
1451	socklen_t len;
1452	int error;
1453
1454	if (*alen < 0)
1455		return (EINVAL);
1456
1457	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1458	if (error)
1459		return (error);
1460	so = fp->f_data;
1461	*sa = NULL;
1462	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1463	if (error)
1464		goto bad;
1465	if (*sa == NULL)
1466		len = 0;
1467	else
1468		len = MIN(*alen, (*sa)->sa_len);
1469	*alen = len;
1470#ifdef KTRACE
1471	if (KTRPOINT(td, KTR_STRUCT))
1472		ktrsockaddr(*sa);
1473#endif
1474bad:
1475	fdrop(fp, td);
1476	if (error && *sa) {
1477		free(*sa, M_SONAME);
1478		*sa = NULL;
1479	}
1480	return (error);
1481}
1482
1483int
1484getsockname(td, uap)
1485	struct thread *td;
1486	struct getsockname_args *uap;
1487{
1488
1489	return (getsockname1(td, uap, 0));
1490}
1491
1492#ifdef COMPAT_OLDSOCK
1493int
1494ogetsockname(td, uap)
1495	struct thread *td;
1496	struct getsockname_args *uap;
1497{
1498
1499	return (getsockname1(td, uap, 1));
1500}
1501#endif /* COMPAT_OLDSOCK */
1502
1503/*
1504 * getpeername1() - Get name of peer for connected socket.
1505 */
1506/* ARGSUSED */
1507static int
1508getpeername1(td, uap, compat)
1509	struct thread *td;
1510	struct getpeername_args /* {
1511		int	fdes;
1512		struct sockaddr * __restrict	asa;
1513		socklen_t * __restrict	alen;
1514	} */ *uap;
1515	int compat;
1516{
1517	struct sockaddr *sa;
1518	socklen_t len;
1519	int error;
1520
1521	error = copyin(uap->alen, &len, sizeof (len));
1522	if (error)
1523		return (error);
1524
1525	error = kern_getpeername(td, uap->fdes, &sa, &len);
1526	if (error)
1527		return (error);
1528
1529	if (len != 0) {
1530#ifdef COMPAT_OLDSOCK
1531		if (compat)
1532			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1533#endif
1534		error = copyout(sa, uap->asa, (u_int)len);
1535	}
1536	free(sa, M_SONAME);
1537	if (error == 0)
1538		error = copyout(&len, uap->alen, sizeof(len));
1539	return (error);
1540}
1541
1542int
1543kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1544    socklen_t *alen)
1545{
1546	struct socket *so;
1547	struct file *fp;
1548	socklen_t len;
1549	int error;
1550
1551	if (*alen < 0)
1552		return (EINVAL);
1553
1554	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1555	if (error)
1556		return (error);
1557	so = fp->f_data;
1558	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1559		error = ENOTCONN;
1560		goto done;
1561	}
1562	*sa = NULL;
1563	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1564	if (error)
1565		goto bad;
1566	if (*sa == NULL)
1567		len = 0;
1568	else
1569		len = MIN(*alen, (*sa)->sa_len);
1570	*alen = len;
1571#ifdef KTRACE
1572	if (KTRPOINT(td, KTR_STRUCT))
1573		ktrsockaddr(*sa);
1574#endif
1575bad:
1576	if (error && *sa) {
1577		free(*sa, M_SONAME);
1578		*sa = NULL;
1579	}
1580done:
1581	fdrop(fp, td);
1582	return (error);
1583}
1584
1585int
1586getpeername(td, uap)
1587	struct thread *td;
1588	struct getpeername_args *uap;
1589{
1590
1591	return (getpeername1(td, uap, 0));
1592}
1593
1594#ifdef COMPAT_OLDSOCK
1595int
1596ogetpeername(td, uap)
1597	struct thread *td;
1598	struct ogetpeername_args *uap;
1599{
1600
1601	/* XXX uap should have type `getpeername_args *' to begin with. */
1602	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1603}
1604#endif /* COMPAT_OLDSOCK */
1605
1606int
1607sockargs(mp, buf, buflen, type)
1608	struct mbuf **mp;
1609	caddr_t buf;
1610	int buflen, type;
1611{
1612	struct sockaddr *sa;
1613	struct mbuf *m;
1614	int error;
1615
1616	if ((u_int)buflen > MLEN) {
1617#ifdef COMPAT_OLDSOCK
1618		if (type == MT_SONAME && (u_int)buflen <= 112)
1619			buflen = MLEN;		/* unix domain compat. hack */
1620		else
1621#endif
1622			if ((u_int)buflen > MCLBYTES)
1623				return (EINVAL);
1624	}
1625	m = m_get(M_WAIT, type);
1626	if ((u_int)buflen > MLEN)
1627		MCLGET(m, M_WAIT);
1628	m->m_len = buflen;
1629	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1630	if (error)
1631		(void) m_free(m);
1632	else {
1633		*mp = m;
1634		if (type == MT_SONAME) {
1635			sa = mtod(m, struct sockaddr *);
1636
1637#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1638			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1639				sa->sa_family = sa->sa_len;
1640#endif
1641			sa->sa_len = buflen;
1642		}
1643	}
1644	return (error);
1645}
1646
1647int
1648getsockaddr(namp, uaddr, len)
1649	struct sockaddr **namp;
1650	caddr_t uaddr;
1651	size_t len;
1652{
1653	struct sockaddr *sa;
1654	int error;
1655
1656	if (len > SOCK_MAXADDRLEN)
1657		return (ENAMETOOLONG);
1658	if (len < offsetof(struct sockaddr, sa_data[0]))
1659		return (EINVAL);
1660	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1661	error = copyin(uaddr, sa, len);
1662	if (error) {
1663		FREE(sa, M_SONAME);
1664	} else {
1665#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1666		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1667			sa->sa_family = sa->sa_len;
1668#endif
1669		sa->sa_len = len;
1670		*namp = sa;
1671	}
1672	return (error);
1673}
1674
1675#include <sys/condvar.h>
1676
1677struct sendfile_sync {
1678	struct mtx	mtx;
1679	struct cv	cv;
1680	unsigned 	count;
1681};
1682
1683/*
1684 * Detach mapped page and release resources back to the system.
1685 */
1686void
1687sf_buf_mext(void *addr, void *args)
1688{
1689	vm_page_t m;
1690	struct sendfile_sync *sfs;
1691
1692	m = sf_buf_page(args);
1693	sf_buf_free(args);
1694	vm_page_lock_queues();
1695	vm_page_unwire(m, 0);
1696	/*
1697	 * Check for the object going away on us. This can
1698	 * happen since we don't hold a reference to it.
1699	 * If so, we're responsible for freeing the page.
1700	 */
1701	if (m->wire_count == 0 && m->object == NULL)
1702		vm_page_free(m);
1703	vm_page_unlock_queues();
1704	if (addr == NULL)
1705		return;
1706	sfs = addr;
1707	mtx_lock(&sfs->mtx);
1708	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1709	if (--sfs->count == 0)
1710		cv_signal(&sfs->cv);
1711	mtx_unlock(&sfs->mtx);
1712}
1713
1714/*
1715 * sendfile(2)
1716 *
1717 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1718 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1719 *
1720 * Send a file specified by 'fd' and starting at 'offset' to a socket
1721 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1722 * 0.  Optionally add a header and/or trailer to the socket output.  If
1723 * specified, write the total number of bytes sent into *sbytes.
1724 */
1725int
1726sendfile(struct thread *td, struct sendfile_args *uap)
1727{
1728
1729	return (do_sendfile(td, uap, 0));
1730}
1731
1732static int
1733do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1734{
1735	struct sf_hdtr hdtr;
1736	struct uio *hdr_uio, *trl_uio;
1737	int error;
1738
1739	hdr_uio = trl_uio = NULL;
1740
1741	if (uap->hdtr != NULL) {
1742		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1743		if (error)
1744			goto out;
1745		if (hdtr.headers != NULL) {
1746			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1747			if (error)
1748				goto out;
1749		}
1750		if (hdtr.trailers != NULL) {
1751			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1752			if (error)
1753				goto out;
1754
1755		}
1756	}
1757
1758	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1759out:
1760	if (hdr_uio)
1761		free(hdr_uio, M_IOV);
1762	if (trl_uio)
1763		free(trl_uio, M_IOV);
1764	return (error);
1765}
1766
1767#ifdef COMPAT_FREEBSD4
1768int
1769freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1770{
1771	struct sendfile_args args;
1772
1773	args.fd = uap->fd;
1774	args.s = uap->s;
1775	args.offset = uap->offset;
1776	args.nbytes = uap->nbytes;
1777	args.hdtr = uap->hdtr;
1778	args.sbytes = uap->sbytes;
1779	args.flags = uap->flags;
1780
1781	return (do_sendfile(td, &args, 1));
1782}
1783#endif /* COMPAT_FREEBSD4 */
1784
1785int
1786kern_sendfile(struct thread *td, struct sendfile_args *uap,
1787    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1788{
1789	struct file *sock_fp;
1790	struct vnode *vp;
1791	struct vm_object *obj = NULL;
1792	struct socket *so = NULL;
1793	struct mbuf *m = NULL;
1794	struct sf_buf *sf;
1795	struct vm_page *pg;
1796	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1797	int error, hdrlen = 0, mnw = 0;
1798	int vfslocked;
1799	struct sendfile_sync *sfs = NULL;
1800
1801	/*
1802	 * The file descriptor must be a regular file and have a
1803	 * backing VM object.
1804	 * File offset must be positive.  If it goes beyond EOF
1805	 * we send only the header/trailer and no payload data.
1806	 */
1807	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1808		goto out;
1809	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1810	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1811	if (vp->v_type == VREG) {
1812		obj = vp->v_object;
1813		if (obj != NULL) {
1814			/*
1815			 * Temporarily increase the backing VM
1816			 * object's reference count so that a forced
1817			 * reclamation of its vnode does not
1818			 * immediately destroy it.
1819			 */
1820			VM_OBJECT_LOCK(obj);
1821			if ((obj->flags & OBJ_DEAD) == 0) {
1822				vm_object_reference_locked(obj);
1823				VM_OBJECT_UNLOCK(obj);
1824			} else {
1825				VM_OBJECT_UNLOCK(obj);
1826				obj = NULL;
1827			}
1828		}
1829	}
1830	VOP_UNLOCK(vp, 0);
1831	VFS_UNLOCK_GIANT(vfslocked);
1832	if (obj == NULL) {
1833		error = EINVAL;
1834		goto out;
1835	}
1836	if (uap->offset < 0) {
1837		error = EINVAL;
1838		goto out;
1839	}
1840
1841	/*
1842	 * The socket must be a stream socket and connected.
1843	 * Remember if it a blocking or non-blocking socket.
1844	 */
1845	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1846	    NULL)) != 0)
1847		goto out;
1848	so = sock_fp->f_data;
1849	if (so->so_type != SOCK_STREAM) {
1850		error = EINVAL;
1851		goto out;
1852	}
1853	if ((so->so_state & SS_ISCONNECTED) == 0) {
1854		error = ENOTCONN;
1855		goto out;
1856	}
1857	/*
1858	 * Do not wait on memory allocations but return ENOMEM for
1859	 * caller to retry later.
1860	 * XXX: Experimental.
1861	 */
1862	if (uap->flags & SF_MNOWAIT)
1863		mnw = 1;
1864
1865	if (uap->flags & SF_SYNC) {
1866		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1867		memset(sfs, 0, sizeof *sfs);
1868		mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1869		cv_init(&sfs->cv, "sendfile");
1870	}
1871
1872#ifdef MAC
1873	SOCK_LOCK(so);
1874	error = mac_socket_check_send(td->td_ucred, so);
1875	SOCK_UNLOCK(so);
1876	if (error)
1877		goto out;
1878#endif
1879
1880	/* If headers are specified copy them into mbufs. */
1881	if (hdr_uio != NULL) {
1882		hdr_uio->uio_td = td;
1883		hdr_uio->uio_rw = UIO_WRITE;
1884		if (hdr_uio->uio_resid > 0) {
1885			/*
1886			 * In FBSD < 5.0 the nbytes to send also included
1887			 * the header.  If compat is specified subtract the
1888			 * header size from nbytes.
1889			 */
1890			if (compat) {
1891				if (uap->nbytes > hdr_uio->uio_resid)
1892					uap->nbytes -= hdr_uio->uio_resid;
1893				else
1894					uap->nbytes = 0;
1895			}
1896			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1897			    0, 0, 0);
1898			if (m == NULL) {
1899				error = mnw ? EAGAIN : ENOBUFS;
1900				goto out;
1901			}
1902			hdrlen = m_length(m, NULL);
1903		}
1904	}
1905
1906	/*
1907	 * Protect against multiple writers to the socket.
1908	 *
1909	 * XXXRW: Historically this has assumed non-interruptibility, so now
1910	 * we implement that, but possibly shouldn't.
1911	 */
1912	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1913
1914	/*
1915	 * Loop through the pages of the file, starting with the requested
1916	 * offset. Get a file page (do I/O if necessary), map the file page
1917	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1918	 * it on the socket.
1919	 * This is done in two loops.  The inner loop turns as many pages
1920	 * as it can, up to available socket buffer space, without blocking
1921	 * into mbufs to have it bulk delivered into the socket send buffer.
1922	 * The outer loop checks the state and available space of the socket
1923	 * and takes care of the overall progress.
1924	 */
1925	for (off = uap->offset, rem = uap->nbytes; ; ) {
1926		int loopbytes = 0;
1927		int space = 0;
1928		int done = 0;
1929
1930		/*
1931		 * Check the socket state for ongoing connection,
1932		 * no errors and space in socket buffer.
1933		 * If space is low allow for the remainder of the
1934		 * file to be processed if it fits the socket buffer.
1935		 * Otherwise block in waiting for sufficient space
1936		 * to proceed, or if the socket is nonblocking, return
1937		 * to userland with EAGAIN while reporting how far
1938		 * we've come.
1939		 * We wait until the socket buffer has significant free
1940		 * space to do bulk sends.  This makes good use of file
1941		 * system read ahead and allows packet segmentation
1942		 * offloading hardware to take over lots of work.  If
1943		 * we were not careful here we would send off only one
1944		 * sfbuf at a time.
1945		 */
1946		SOCKBUF_LOCK(&so->so_snd);
1947		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1948			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1949retry_space:
1950		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1951			error = EPIPE;
1952			SOCKBUF_UNLOCK(&so->so_snd);
1953			goto done;
1954		} else if (so->so_error) {
1955			error = so->so_error;
1956			so->so_error = 0;
1957			SOCKBUF_UNLOCK(&so->so_snd);
1958			goto done;
1959		}
1960		space = sbspace(&so->so_snd);
1961		if (space < rem &&
1962		    (space <= 0 ||
1963		     space < so->so_snd.sb_lowat)) {
1964			if (so->so_state & SS_NBIO) {
1965				SOCKBUF_UNLOCK(&so->so_snd);
1966				error = EAGAIN;
1967				goto done;
1968			}
1969			/*
1970			 * sbwait drops the lock while sleeping.
1971			 * When we loop back to retry_space the
1972			 * state may have changed and we retest
1973			 * for it.
1974			 */
1975			error = sbwait(&so->so_snd);
1976			/*
1977			 * An error from sbwait usually indicates that we've
1978			 * been interrupted by a signal. If we've sent anything
1979			 * then return bytes sent, otherwise return the error.
1980			 */
1981			if (error) {
1982				SOCKBUF_UNLOCK(&so->so_snd);
1983				goto done;
1984			}
1985			goto retry_space;
1986		}
1987		SOCKBUF_UNLOCK(&so->so_snd);
1988
1989		/*
1990		 * Reduce space in the socket buffer by the size of
1991		 * the header mbuf chain.
1992		 * hdrlen is set to 0 after the first loop.
1993		 */
1994		space -= hdrlen;
1995
1996		/*
1997		 * Loop and construct maximum sized mbuf chain to be bulk
1998		 * dumped into socket buffer.
1999		 */
2000		while(space > loopbytes) {
2001			vm_pindex_t pindex;
2002			vm_offset_t pgoff;
2003			struct mbuf *m0;
2004
2005			VM_OBJECT_LOCK(obj);
2006			/*
2007			 * Calculate the amount to transfer.
2008			 * Not to exceed a page, the EOF,
2009			 * or the passed in nbytes.
2010			 */
2011			pgoff = (vm_offset_t)(off & PAGE_MASK);
2012			xfsize = omin(PAGE_SIZE - pgoff,
2013			    obj->un_pager.vnp.vnp_size - uap->offset -
2014			    fsbytes - loopbytes);
2015			if (uap->nbytes)
2016				rem = (uap->nbytes - fsbytes - loopbytes);
2017			else
2018				rem = obj->un_pager.vnp.vnp_size -
2019				    uap->offset - fsbytes - loopbytes;
2020			xfsize = omin(rem, xfsize);
2021			if (xfsize <= 0) {
2022				VM_OBJECT_UNLOCK(obj);
2023				done = 1;		/* all data sent */
2024				break;
2025			}
2026			/*
2027			 * Don't overflow the send buffer.
2028			 * Stop here and send out what we've
2029			 * already got.
2030			 */
2031			if (space < loopbytes + xfsize) {
2032				VM_OBJECT_UNLOCK(obj);
2033				break;
2034			}
2035
2036			/*
2037			 * Attempt to look up the page.  Allocate
2038			 * if not found or wait and loop if busy.
2039			 */
2040			pindex = OFF_TO_IDX(off);
2041			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2042			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2043
2044			/*
2045			 * Check if page is valid for what we need,
2046			 * otherwise initiate I/O.
2047			 * If we already turned some pages into mbufs,
2048			 * send them off before we come here again and
2049			 * block.
2050			 */
2051			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2052				VM_OBJECT_UNLOCK(obj);
2053			else if (m != NULL)
2054				error = EAGAIN;	/* send what we already got */
2055			else if (uap->flags & SF_NODISKIO)
2056				error = EBUSY;
2057			else {
2058				int bsize, resid;
2059
2060				/*
2061				 * Ensure that our page is still around
2062				 * when the I/O completes.
2063				 */
2064				vm_page_io_start(pg);
2065				VM_OBJECT_UNLOCK(obj);
2066
2067				/*
2068				 * Get the page from backing store.
2069				 */
2070				bsize = vp->v_mount->mnt_stat.f_iosize;
2071				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2072				vn_lock(vp, LK_SHARED | LK_RETRY);
2073
2074				/*
2075				 * XXXMAC: Because we don't have fp->f_cred
2076				 * here, we pass in NOCRED.  This is probably
2077				 * wrong, but is consistent with our original
2078				 * implementation.
2079				 */
2080				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2081				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2082				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2083				    td->td_ucred, NOCRED, &resid, td);
2084				VOP_UNLOCK(vp, 0);
2085				VFS_UNLOCK_GIANT(vfslocked);
2086				VM_OBJECT_LOCK(obj);
2087				vm_page_io_finish(pg);
2088				if (!error)
2089					VM_OBJECT_UNLOCK(obj);
2090				mbstat.sf_iocnt++;
2091			}
2092			if (error) {
2093				vm_page_lock_queues();
2094				vm_page_unwire(pg, 0);
2095				/*
2096				 * See if anyone else might know about
2097				 * this page.  If not and it is not valid,
2098				 * then free it.
2099				 */
2100				if (pg->wire_count == 0 && pg->valid == 0 &&
2101				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2102				    pg->hold_count == 0) {
2103					vm_page_free(pg);
2104				}
2105				vm_page_unlock_queues();
2106				VM_OBJECT_UNLOCK(obj);
2107				if (error == EAGAIN)
2108					error = 0;	/* not a real error */
2109				break;
2110			}
2111
2112			/*
2113			 * Get a sendfile buf.  We usually wait as long
2114			 * as necessary, but this wait can be interrupted.
2115			 */
2116			if ((sf = sf_buf_alloc(pg,
2117			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2118				mbstat.sf_allocfail++;
2119				vm_page_lock_queues();
2120				vm_page_unwire(pg, 0);
2121				/*
2122				 * XXX: Not same check as above!?
2123				 */
2124				if (pg->wire_count == 0 && pg->object == NULL)
2125					vm_page_free(pg);
2126				vm_page_unlock_queues();
2127				error = (mnw ? EAGAIN : EINTR);
2128				break;
2129			}
2130
2131			/*
2132			 * Get an mbuf and set it up as having
2133			 * external storage.
2134			 */
2135			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2136			if (m0 == NULL) {
2137				error = (mnw ? EAGAIN : ENOBUFS);
2138				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2139				break;
2140			}
2141			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2142			    sfs, sf, M_RDONLY, EXT_SFBUF);
2143			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2144			m0->m_len = xfsize;
2145
2146			/* Append to mbuf chain. */
2147			if (m != NULL)
2148				m_cat(m, m0);
2149			else
2150				m = m0;
2151
2152			/* Keep track of bits processed. */
2153			loopbytes += xfsize;
2154			off += xfsize;
2155
2156			if (sfs != NULL) {
2157				mtx_lock(&sfs->mtx);
2158				sfs->count++;
2159				mtx_unlock(&sfs->mtx);
2160			}
2161		}
2162
2163		/* Add the buffer chain to the socket buffer. */
2164		if (m != NULL) {
2165			int mlen, err;
2166
2167			mlen = m_length(m, NULL);
2168			SOCKBUF_LOCK(&so->so_snd);
2169			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2170				error = EPIPE;
2171				SOCKBUF_UNLOCK(&so->so_snd);
2172				goto done;
2173			}
2174			SOCKBUF_UNLOCK(&so->so_snd);
2175			/* Avoid error aliasing. */
2176			err = (*so->so_proto->pr_usrreqs->pru_send)
2177				    (so, 0, m, NULL, NULL, td);
2178			if (err == 0) {
2179				/*
2180				 * We need two counters to get the
2181				 * file offset and nbytes to send
2182				 * right:
2183				 * - sbytes contains the total amount
2184				 *   of bytes sent, including headers.
2185				 * - fsbytes contains the total amount
2186				 *   of bytes sent from the file.
2187				 */
2188				sbytes += mlen;
2189				fsbytes += mlen;
2190				if (hdrlen) {
2191					fsbytes -= hdrlen;
2192					hdrlen = 0;
2193				}
2194			} else if (error == 0)
2195				error = err;
2196			m = NULL;	/* pru_send always consumes */
2197		}
2198
2199		/* Quit outer loop on error or when we're done. */
2200		if (done)
2201			break;
2202		if (error)
2203			goto done;
2204	}
2205
2206	/*
2207	 * Send trailers. Wimp out and use writev(2).
2208	 */
2209	if (trl_uio != NULL) {
2210		error = kern_writev(td, uap->s, trl_uio);
2211		if (error)
2212			goto done;
2213		sbytes += td->td_retval[0];
2214	}
2215
2216done:
2217	sbunlock(&so->so_snd);
2218out:
2219	/*
2220	 * If there was no error we have to clear td->td_retval[0]
2221	 * because it may have been set by writev.
2222	 */
2223	if (error == 0) {
2224		td->td_retval[0] = 0;
2225	}
2226	if (uap->sbytes != NULL) {
2227		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2228	}
2229	if (obj != NULL)
2230		vm_object_deallocate(obj);
2231	if (vp != NULL) {
2232		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2233		vrele(vp);
2234		VFS_UNLOCK_GIANT(vfslocked);
2235	}
2236	if (so)
2237		fdrop(sock_fp, td);
2238	if (m)
2239		m_freem(m);
2240
2241	if (sfs != NULL) {
2242		mtx_lock(&sfs->mtx);
2243		if (sfs->count != 0)
2244			cv_wait(&sfs->cv, &sfs->mtx);
2245		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2246		cv_destroy(&sfs->cv);
2247		mtx_destroy(&sfs->mtx);
2248		free(sfs, M_TEMP);
2249	}
2250
2251	if (error == ERESTART)
2252		error = EINTR;
2253
2254	return (error);
2255}
2256
2257/*
2258 * SCTP syscalls.
2259 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2260 * otherwise all return EOPNOTSUPP.
2261 * XXX: We should make this loadable one day.
2262 */
2263int
2264sctp_peeloff(td, uap)
2265	struct thread *td;
2266	struct sctp_peeloff_args /* {
2267		int	sd;
2268		caddr_t	name;
2269	} */ *uap;
2270{
2271#ifdef SCTP
2272	struct filedesc *fdp;
2273	struct file *nfp = NULL;
2274	int error;
2275	struct socket *head, *so;
2276	int fd;
2277	u_int fflag;
2278
2279	fdp = td->td_proc->p_fd;
2280	error = fgetsock(td, uap->sd, &head, &fflag);
2281	if (error)
2282		goto done2;
2283	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2284	if (error)
2285		goto done2;
2286	/*
2287	 * At this point we know we do have a assoc to pull
2288	 * we proceed to get the fd setup. This may block
2289	 * but that is ok.
2290	 */
2291
2292	error = falloc(td, &nfp, &fd);
2293	if (error)
2294		goto done;
2295	td->td_retval[0] = fd;
2296
2297	so = sonewconn(head, SS_ISCONNECTED);
2298	if (so == NULL)
2299		goto noconnection;
2300	/*
2301	 * Before changing the flags on the socket, we have to bump the
2302	 * reference count.  Otherwise, if the protocol calls sofree(),
2303	 * the socket will be released due to a zero refcount.
2304	 */
2305        SOCK_LOCK(so);
2306        soref(so);                      /* file descriptor reference */
2307        SOCK_UNLOCK(so);
2308
2309	ACCEPT_LOCK();
2310
2311	TAILQ_REMOVE(&head->so_comp, so, so_list);
2312	head->so_qlen--;
2313	so->so_state |= (head->so_state & SS_NBIO);
2314	so->so_state &= ~SS_NOFDREF;
2315	so->so_qstate &= ~SQ_COMP;
2316	so->so_head = NULL;
2317	ACCEPT_UNLOCK();
2318	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2319	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2320	if (error)
2321		goto noconnection;
2322	if (head->so_sigio != NULL)
2323		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2324
2325noconnection:
2326	/*
2327	 * close the new descriptor, assuming someone hasn't ripped it
2328	 * out from under us.
2329	 */
2330	if (error)
2331		fdclose(fdp, nfp, fd, td);
2332
2333	/*
2334	 * Release explicitly held references before returning.
2335	 */
2336done:
2337	if (nfp != NULL)
2338		fdrop(nfp, td);
2339	fputsock(head);
2340done2:
2341	return (error);
2342#else  /* SCTP */
2343	return (EOPNOTSUPP);
2344#endif /* SCTP */
2345}
2346
2347int
2348sctp_generic_sendmsg (td, uap)
2349	struct thread *td;
2350	struct sctp_generic_sendmsg_args /* {
2351		int sd,
2352		caddr_t msg,
2353		int mlen,
2354		caddr_t to,
2355		__socklen_t tolen,
2356		struct sctp_sndrcvinfo *sinfo,
2357		int flags
2358	} */ *uap;
2359{
2360#ifdef SCTP
2361	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2362	struct socket *so;
2363	struct file *fp = NULL;
2364	int use_rcvinfo = 1;
2365	int error = 0, len;
2366	struct sockaddr *to = NULL;
2367#ifdef KTRACE
2368	struct uio *ktruio = NULL;
2369#endif
2370	struct uio auio;
2371	struct iovec iov[1];
2372
2373	if (uap->sinfo) {
2374		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2375		if (error)
2376			return (error);
2377		u_sinfo = &sinfo;
2378	}
2379	if (uap->tolen) {
2380		error = getsockaddr(&to, uap->to, uap->tolen);
2381		if (error) {
2382			to = NULL;
2383			goto sctp_bad2;
2384		}
2385	}
2386
2387	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2388	if (error)
2389		goto sctp_bad;
2390#ifdef KTRACE
2391	if (KTRPOINT(td, KTR_STRUCT))
2392		ktrsockaddr(to);
2393#endif
2394
2395	iov[0].iov_base = uap->msg;
2396	iov[0].iov_len = uap->mlen;
2397
2398	so = (struct socket *)fp->f_data;
2399#ifdef MAC
2400	SOCK_LOCK(so);
2401	error = mac_socket_check_send(td->td_ucred, so);
2402	SOCK_UNLOCK(so);
2403	if (error)
2404		goto sctp_bad;
2405#endif /* MAC */
2406
2407	auio.uio_iov =  iov;
2408	auio.uio_iovcnt = 1;
2409	auio.uio_segflg = UIO_USERSPACE;
2410	auio.uio_rw = UIO_WRITE;
2411	auio.uio_td = td;
2412	auio.uio_offset = 0;			/* XXX */
2413	auio.uio_resid = 0;
2414	len = auio.uio_resid = uap->mlen;
2415	error = sctp_lower_sosend(so, to, &auio,
2416		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2417		    uap->flags, use_rcvinfo, u_sinfo, td);
2418	if (error) {
2419		if (auio.uio_resid != len && (error == ERESTART ||
2420		    error == EINTR || error == EWOULDBLOCK))
2421			error = 0;
2422		/* Generation of SIGPIPE can be controlled per socket. */
2423		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2424		    !(uap->flags & MSG_NOSIGNAL)) {
2425			PROC_LOCK(td->td_proc);
2426			psignal(td->td_proc, SIGPIPE);
2427			PROC_UNLOCK(td->td_proc);
2428		}
2429	}
2430	if (error == 0)
2431		td->td_retval[0] = len - auio.uio_resid;
2432#ifdef KTRACE
2433	if (ktruio != NULL) {
2434		ktruio->uio_resid = td->td_retval[0];
2435		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2436	}
2437#endif /* KTRACE */
2438sctp_bad:
2439	if (fp)
2440		fdrop(fp, td);
2441sctp_bad2:
2442	if (to)
2443		free(to, M_SONAME);
2444	return (error);
2445#else  /* SCTP */
2446	return (EOPNOTSUPP);
2447#endif /* SCTP */
2448}
2449
2450int
2451sctp_generic_sendmsg_iov(td, uap)
2452	struct thread *td;
2453	struct sctp_generic_sendmsg_iov_args /* {
2454		int sd,
2455		struct iovec *iov,
2456		int iovlen,
2457		caddr_t to,
2458		__socklen_t tolen,
2459		struct sctp_sndrcvinfo *sinfo,
2460		int flags
2461	} */ *uap;
2462{
2463#ifdef SCTP
2464	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2465	struct socket *so;
2466	struct file *fp = NULL;
2467	int use_rcvinfo = 1;
2468	int error=0, len, i;
2469	struct sockaddr *to = NULL;
2470#ifdef KTRACE
2471	struct uio *ktruio = NULL;
2472#endif
2473	struct uio auio;
2474	struct iovec *iov, *tiov;
2475
2476	if (uap->sinfo) {
2477		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2478		if (error)
2479			return (error);
2480		u_sinfo = &sinfo;
2481	}
2482	if (uap->tolen) {
2483		error = getsockaddr(&to, uap->to, uap->tolen);
2484		if (error) {
2485			to = NULL;
2486			goto sctp_bad2;
2487		}
2488	}
2489
2490	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2491	if (error)
2492		goto sctp_bad1;
2493
2494	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2495	if (error)
2496		goto sctp_bad1;
2497#ifdef KTRACE
2498	if (KTRPOINT(td, KTR_STRUCT))
2499		ktrsockaddr(to);
2500#endif
2501
2502	so = (struct socket *)fp->f_data;
2503#ifdef MAC
2504	SOCK_LOCK(so);
2505	error = mac_socket_check_send(td->td_ucred, so);
2506	SOCK_UNLOCK(so);
2507	if (error)
2508		goto sctp_bad;
2509#endif /* MAC */
2510
2511	auio.uio_iov =  iov;
2512	auio.uio_iovcnt = uap->iovlen;
2513	auio.uio_segflg = UIO_USERSPACE;
2514	auio.uio_rw = UIO_WRITE;
2515	auio.uio_td = td;
2516	auio.uio_offset = 0;			/* XXX */
2517	auio.uio_resid = 0;
2518	tiov = iov;
2519	for (i = 0; i <uap->iovlen; i++, tiov++) {
2520		if ((auio.uio_resid += tiov->iov_len) < 0) {
2521			error = EINVAL;
2522			goto sctp_bad;
2523		}
2524	}
2525	len = auio.uio_resid;
2526	error = sctp_lower_sosend(so, to, &auio,
2527		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2528		    uap->flags, use_rcvinfo, u_sinfo, td);
2529	if (error) {
2530		if (auio.uio_resid != len && (error == ERESTART ||
2531		    error == EINTR || error == EWOULDBLOCK))
2532			error = 0;
2533		/* Generation of SIGPIPE can be controlled per socket */
2534		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2535		    !(uap->flags & MSG_NOSIGNAL)) {
2536			PROC_LOCK(td->td_proc);
2537			psignal(td->td_proc, SIGPIPE);
2538			PROC_UNLOCK(td->td_proc);
2539		}
2540	}
2541	if (error == 0)
2542		td->td_retval[0] = len - auio.uio_resid;
2543#ifdef KTRACE
2544	if (ktruio != NULL) {
2545		ktruio->uio_resid = td->td_retval[0];
2546		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2547	}
2548#endif /* KTRACE */
2549sctp_bad:
2550	free(iov, M_IOV);
2551sctp_bad1:
2552	if (fp)
2553		fdrop(fp, td);
2554sctp_bad2:
2555	if (to)
2556		free(to, M_SONAME);
2557	return (error);
2558#else  /* SCTP */
2559	return (EOPNOTSUPP);
2560#endif /* SCTP */
2561}
2562
2563int
2564sctp_generic_recvmsg(td, uap)
2565	struct thread *td;
2566	struct sctp_generic_recvmsg_args /* {
2567		int sd,
2568		struct iovec *iov,
2569		int iovlen,
2570		struct sockaddr *from,
2571		__socklen_t *fromlenaddr,
2572		struct sctp_sndrcvinfo *sinfo,
2573		int *msg_flags
2574	} */ *uap;
2575{
2576#ifdef SCTP
2577	u_int8_t sockbufstore[256];
2578	struct uio auio;
2579	struct iovec *iov, *tiov;
2580	struct sctp_sndrcvinfo sinfo;
2581	struct socket *so;
2582	struct file *fp = NULL;
2583	struct sockaddr *fromsa;
2584	int fromlen;
2585	int len, i, msg_flags;
2586	int error = 0;
2587#ifdef KTRACE
2588	struct uio *ktruio = NULL;
2589#endif
2590	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2591	if (error) {
2592		return (error);
2593	}
2594	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2595	if (error) {
2596		goto out1;
2597	}
2598
2599	so = fp->f_data;
2600#ifdef MAC
2601	SOCK_LOCK(so);
2602	error = mac_socket_check_receive(td->td_ucred, so);
2603	SOCK_UNLOCK(so);
2604	if (error) {
2605		goto out;
2606		return (error);
2607	}
2608#endif /* MAC */
2609
2610	if (uap->fromlenaddr) {
2611		error = copyin(uap->fromlenaddr,
2612		    &fromlen, sizeof (fromlen));
2613		if (error) {
2614			goto out;
2615		}
2616	} else {
2617		fromlen = 0;
2618	}
2619	if(uap->msg_flags) {
2620		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2621		if (error) {
2622			goto out;
2623		}
2624	} else {
2625		msg_flags = 0;
2626	}
2627	auio.uio_iov = iov;
2628	auio.uio_iovcnt = uap->iovlen;
2629  	auio.uio_segflg = UIO_USERSPACE;
2630	auio.uio_rw = UIO_READ;
2631	auio.uio_td = td;
2632	auio.uio_offset = 0;			/* XXX */
2633	auio.uio_resid = 0;
2634	tiov = iov;
2635	for (i = 0; i <uap->iovlen; i++, tiov++) {
2636		if ((auio.uio_resid += tiov->iov_len) < 0) {
2637			error = EINVAL;
2638			goto out;
2639		}
2640	}
2641	len = auio.uio_resid;
2642	fromsa = (struct sockaddr *)sockbufstore;
2643
2644#ifdef KTRACE
2645	if (KTRPOINT(td, KTR_GENIO))
2646		ktruio = cloneuio(&auio);
2647#endif /* KTRACE */
2648	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2649		    fromsa, fromlen, &msg_flags,
2650		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2651	if (error) {
2652		if (auio.uio_resid != (int)len && (error == ERESTART ||
2653		    error == EINTR || error == EWOULDBLOCK))
2654			error = 0;
2655	} else {
2656		if (uap->sinfo)
2657			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2658	}
2659#ifdef KTRACE
2660	if (ktruio != NULL) {
2661		ktruio->uio_resid = (int)len - auio.uio_resid;
2662		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2663	}
2664#endif /* KTRACE */
2665	if (error)
2666		goto out;
2667	td->td_retval[0] = (int)len - auio.uio_resid;
2668
2669	if (fromlen && uap->from) {
2670		len = fromlen;
2671		if (len <= 0 || fromsa == 0)
2672			len = 0;
2673		else {
2674			len = MIN(len, fromsa->sa_len);
2675			error = copyout(fromsa, uap->from, (unsigned)len);
2676			if (error)
2677				goto out;
2678		}
2679		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2680		if (error) {
2681			goto out;
2682		}
2683	}
2684#ifdef KTRACE
2685	if (KTRPOINT(td, KTR_STRUCT))
2686		ktrsockaddr(fromsa);
2687#endif
2688	if (uap->msg_flags) {
2689		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2690		if (error) {
2691			goto out;
2692		}
2693	}
2694out:
2695	free(iov, M_IOV);
2696out1:
2697	if (fp)
2698		fdrop(fp, td);
2699
2700	return (error);
2701#else  /* SCTP */
2702	return (EOPNOTSUPP);
2703#endif /* SCTP */
2704}
2705