kern_sendfile.c revision 167211
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 167211 2007-03-04 22:36:48Z rwatson $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91			int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93			int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103    "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105    "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107    "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry.  A reference on the
111 * file entry is held upon returning.  This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count.  If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120	struct file *fp;
121	int error;
122
123	fp = NULL;
124	if (fdp == NULL)
125		error = EBADF;
126	else {
127		FILEDESC_LOCK_FAST(fdp);
128		fp = fget_locked(fdp, fd);
129		if (fp == NULL)
130			error = EBADF;
131		else if (fp->f_type != DTYPE_SOCKET) {
132			fp = NULL;
133			error = ENOTSOCK;
134		} else {
135			fhold(fp);
136			if (fflagp != NULL)
137				*fflagp = fp->f_flag;
138			error = 0;
139		}
140		FILEDESC_UNLOCK_FAST(fdp);
141	}
142	*fpp = fp;
143	return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155	struct thread *td;
156	register struct socket_args /* {
157		int	domain;
158		int	type;
159		int	protocol;
160	} */ *uap;
161{
162	struct filedesc *fdp;
163	struct socket *so;
164	struct file *fp;
165	int fd, error;
166
167#ifdef MAC
168	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
169	    uap->protocol);
170	if (error)
171		return (error);
172#endif
173	fdp = td->td_proc->p_fd;
174	error = falloc(td, &fp, &fd);
175	if (error)
176		return (error);
177	/* An extra reference on `fp' has been held for us by falloc(). */
178	NET_LOCK_GIANT();
179	error = socreate(uap->domain, &so, uap->type, uap->protocol,
180	    td->td_ucred, td);
181	NET_UNLOCK_GIANT();
182	if (error) {
183		fdclose(fdp, fp, fd, td);
184	} else {
185		FILEDESC_LOCK_FAST(fdp);
186		fp->f_data = so;	/* already has ref count */
187		fp->f_flag = FREAD|FWRITE;
188		fp->f_ops = &socketops;
189		fp->f_type = DTYPE_SOCKET;
190		FILEDESC_UNLOCK_FAST(fdp);
191		td->td_retval[0] = fd;
192	}
193	fdrop(fp, td);
194	return (error);
195}
196
197/* ARGSUSED */
198int
199bind(td, uap)
200	struct thread *td;
201	register struct bind_args /* {
202		int	s;
203		caddr_t	name;
204		int	namelen;
205	} */ *uap;
206{
207	struct sockaddr *sa;
208	int error;
209
210	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
211		return (error);
212
213	error = kern_bind(td, uap->s, sa);
214	free(sa, M_SONAME);
215	return (error);
216}
217
218int
219kern_bind(td, fd, sa)
220	struct thread *td;
221	int fd;
222	struct sockaddr *sa;
223{
224	struct socket *so;
225	struct file *fp;
226	int error;
227
228	NET_LOCK_GIANT();
229	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
230	if (error)
231		goto done2;
232	so = fp->f_data;
233#ifdef MAC
234	SOCK_LOCK(so);
235	error = mac_check_socket_bind(td->td_ucred, so, sa);
236	SOCK_UNLOCK(so);
237	if (error)
238		goto done1;
239#endif
240	error = sobind(so, sa, td);
241#ifdef MAC
242done1:
243#endif
244	fdrop(fp, td);
245done2:
246	NET_UNLOCK_GIANT();
247	return (error);
248}
249
250/* ARGSUSED */
251int
252listen(td, uap)
253	struct thread *td;
254	register struct listen_args /* {
255		int	s;
256		int	backlog;
257	} */ *uap;
258{
259	struct socket *so;
260	struct file *fp;
261	int error;
262
263	NET_LOCK_GIANT();
264	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
265	if (error == 0) {
266		so = fp->f_data;
267#ifdef MAC
268		SOCK_LOCK(so);
269		error = mac_check_socket_listen(td->td_ucred, so);
270		SOCK_UNLOCK(so);
271		if (error)
272			goto done;
273#endif
274		error = solisten(so, uap->backlog, td);
275#ifdef MAC
276done:
277#endif
278		fdrop(fp, td);
279	}
280	NET_UNLOCK_GIANT();
281	return(error);
282}
283
284/*
285 * accept1()
286 */
287static int
288accept1(td, uap, compat)
289	struct thread *td;
290	register struct accept_args /* {
291		int	s;
292		struct sockaddr	* __restrict name;
293		socklen_t	* __restrict anamelen;
294	} */ *uap;
295	int compat;
296{
297	struct sockaddr *name;
298	socklen_t namelen;
299	struct file *fp;
300	int error;
301
302	if (uap->name == NULL)
303		return (kern_accept(td, uap->s, NULL, NULL, NULL));
304
305	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
306	if (error)
307		return (error);
308
309	error = kern_accept(td, uap->s, &name, &namelen, &fp);
310
311	/*
312	 * return a namelen of zero for older code which might
313	 * ignore the return value from accept.
314	 */
315	if (error) {
316		(void) copyout(&namelen,
317		    uap->anamelen, sizeof(*uap->anamelen));
318		return (error);
319	}
320
321	if (error == 0 && name != NULL) {
322#ifdef COMPAT_OLDSOCK
323		if (compat)
324			((struct osockaddr *)name)->sa_family =
325			    name->sa_family;
326#endif
327		error = copyout(name, uap->name, namelen);
328	}
329	if (error == 0)
330		error = copyout(&namelen, uap->anamelen,
331		    sizeof(namelen));
332	if (error)
333		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
334	fdrop(fp, td);
335	free(name, M_SONAME);
336	return (error);
337}
338
339int
340kern_accept(struct thread *td, int s, struct sockaddr **name,
341    socklen_t *namelen, struct file **fp)
342{
343	struct filedesc *fdp;
344	struct file *headfp, *nfp = NULL;
345	struct sockaddr *sa = NULL;
346	int error;
347	struct socket *head, *so;
348	int fd;
349	u_int fflag;
350	pid_t pgid;
351	int tmp;
352
353	if (name) {
354		*name = NULL;
355		if (*namelen < 0)
356			return (EINVAL);
357	}
358
359	fdp = td->td_proc->p_fd;
360	NET_LOCK_GIANT();
361	error = getsock(fdp, s, &headfp, &fflag);
362	if (error)
363		goto done2;
364	head = headfp->f_data;
365	if ((head->so_options & SO_ACCEPTCONN) == 0) {
366		error = EINVAL;
367		goto done;
368	}
369#ifdef MAC
370	SOCK_LOCK(head);
371	error = mac_check_socket_accept(td->td_ucred, head);
372	SOCK_UNLOCK(head);
373	if (error != 0)
374		goto done;
375#endif
376	error = falloc(td, &nfp, &fd);
377	if (error)
378		goto done;
379	ACCEPT_LOCK();
380	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
381		ACCEPT_UNLOCK();
382		error = EWOULDBLOCK;
383		goto noconnection;
384	}
385	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
386		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
387			head->so_error = ECONNABORTED;
388			break;
389		}
390		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
391		    "accept", 0);
392		if (error) {
393			ACCEPT_UNLOCK();
394			goto noconnection;
395		}
396	}
397	if (head->so_error) {
398		error = head->so_error;
399		head->so_error = 0;
400		ACCEPT_UNLOCK();
401		goto noconnection;
402	}
403	so = TAILQ_FIRST(&head->so_comp);
404	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
405	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
406
407	/*
408	 * Before changing the flags on the socket, we have to bump the
409	 * reference count.  Otherwise, if the protocol calls sofree(),
410	 * the socket will be released due to a zero refcount.
411	 */
412	SOCK_LOCK(so);			/* soref() and so_state update */
413	soref(so);			/* file descriptor reference */
414
415	TAILQ_REMOVE(&head->so_comp, so, so_list);
416	head->so_qlen--;
417	so->so_state |= (head->so_state & SS_NBIO);
418	so->so_qstate &= ~SQ_COMP;
419	so->so_head = NULL;
420
421	SOCK_UNLOCK(so);
422	ACCEPT_UNLOCK();
423
424	/* An extra reference on `nfp' has been held for us by falloc(). */
425	td->td_retval[0] = fd;
426
427	/* connection has been removed from the listen queue */
428	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
429
430	pgid = fgetown(&head->so_sigio);
431	if (pgid != 0)
432		fsetown(pgid, &so->so_sigio);
433
434	FILE_LOCK(nfp);
435	nfp->f_data = so;	/* nfp has ref count from falloc */
436	nfp->f_flag = fflag;
437	nfp->f_ops = &socketops;
438	nfp->f_type = DTYPE_SOCKET;
439	FILE_UNLOCK(nfp);
440	/* Sync socket nonblocking/async state with file flags */
441	tmp = fflag & FNONBLOCK;
442	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
443	tmp = fflag & FASYNC;
444	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
445	sa = 0;
446	error = soaccept(so, &sa);
447	if (error) {
448		/*
449		 * return a namelen of zero for older code which might
450		 * ignore the return value from accept.
451		 */
452		if (name)
453			*namelen = 0;
454		goto noconnection;
455	}
456	if (sa == NULL) {
457		if (name)
458			*namelen = 0;
459		goto done;
460	}
461	if (name) {
462		/* check sa_len before it is destroyed */
463		if (*namelen > sa->sa_len)
464			*namelen = sa->sa_len;
465		*name = sa;
466		sa = NULL;
467	}
468noconnection:
469	if (sa)
470		FREE(sa, M_SONAME);
471
472	/*
473	 * close the new descriptor, assuming someone hasn't ripped it
474	 * out from under us.
475	 */
476	if (error)
477		fdclose(fdp, nfp, fd, td);
478
479	/*
480	 * Release explicitly held references before returning.  We return
481	 * a reference on nfp to the caller on success if they request it.
482	 */
483done:
484	if (fp != NULL) {
485		if (error == 0) {
486			*fp = nfp;
487			nfp = NULL;
488		} else
489			*fp = NULL;
490	}
491	if (nfp != NULL)
492		fdrop(nfp, td);
493	fdrop(headfp, td);
494done2:
495	NET_UNLOCK_GIANT();
496	return (error);
497}
498
499int
500accept(td, uap)
501	struct thread *td;
502	struct accept_args *uap;
503{
504
505	return (accept1(td, uap, 0));
506}
507
508#ifdef COMPAT_OLDSOCK
509int
510oaccept(td, uap)
511	struct thread *td;
512	struct accept_args *uap;
513{
514
515	return (accept1(td, uap, 1));
516}
517#endif /* COMPAT_OLDSOCK */
518
519/* ARGSUSED */
520int
521connect(td, uap)
522	struct thread *td;
523	register struct connect_args /* {
524		int	s;
525		caddr_t	name;
526		int	namelen;
527	} */ *uap;
528{
529	struct sockaddr *sa;
530	int error;
531
532	error = getsockaddr(&sa, uap->name, uap->namelen);
533	if (error)
534		return (error);
535
536	error = kern_connect(td, uap->s, sa);
537	free(sa, M_SONAME);
538	return (error);
539}
540
541
542int
543kern_connect(td, fd, sa)
544	struct thread *td;
545	int fd;
546	struct sockaddr *sa;
547{
548	struct socket *so;
549	struct file *fp;
550	int error;
551	int interrupted = 0;
552
553	NET_LOCK_GIANT();
554	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
555	if (error)
556		goto done2;
557	so = fp->f_data;
558	if (so->so_state & SS_ISCONNECTING) {
559		error = EALREADY;
560		goto done1;
561	}
562#ifdef MAC
563	SOCK_LOCK(so);
564	error = mac_check_socket_connect(td->td_ucred, so, sa);
565	SOCK_UNLOCK(so);
566	if (error)
567		goto bad;
568#endif
569	error = soconnect(so, sa, td);
570	if (error)
571		goto bad;
572	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
573		error = EINPROGRESS;
574		goto done1;
575	}
576	SOCK_LOCK(so);
577	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
578		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
579		    "connec", 0);
580		if (error) {
581			if (error == EINTR || error == ERESTART)
582				interrupted = 1;
583			break;
584		}
585	}
586	if (error == 0) {
587		error = so->so_error;
588		so->so_error = 0;
589	}
590	SOCK_UNLOCK(so);
591bad:
592	if (!interrupted)
593		so->so_state &= ~SS_ISCONNECTING;
594	if (error == ERESTART)
595		error = EINTR;
596done1:
597	fdrop(fp, td);
598done2:
599	NET_UNLOCK_GIANT();
600	return (error);
601}
602
603int
604socketpair(td, uap)
605	struct thread *td;
606	register struct socketpair_args /* {
607		int	domain;
608		int	type;
609		int	protocol;
610		int	*rsv;
611	} */ *uap;
612{
613	register struct filedesc *fdp = td->td_proc->p_fd;
614	struct file *fp1, *fp2;
615	struct socket *so1, *so2;
616	int fd, error, sv[2];
617
618#ifdef MAC
619	/* We might want to have a separate check for socket pairs. */
620	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
621	    uap->protocol);
622	if (error)
623		return (error);
624#endif
625
626	NET_LOCK_GIANT();
627	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
628	    td->td_ucred, td);
629	if (error)
630		goto done2;
631	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
632	    td->td_ucred, td);
633	if (error)
634		goto free1;
635	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
636	error = falloc(td, &fp1, &fd);
637	if (error)
638		goto free2;
639	sv[0] = fd;
640	fp1->f_data = so1;	/* so1 already has ref count */
641	error = falloc(td, &fp2, &fd);
642	if (error)
643		goto free3;
644	fp2->f_data = so2;	/* so2 already has ref count */
645	sv[1] = fd;
646	error = soconnect2(so1, so2);
647	if (error)
648		goto free4;
649	if (uap->type == SOCK_DGRAM) {
650		/*
651		 * Datagram socket connection is asymmetric.
652		 */
653		 error = soconnect2(so2, so1);
654		 if (error)
655			goto free4;
656	}
657	FILE_LOCK(fp1);
658	fp1->f_flag = FREAD|FWRITE;
659	fp1->f_ops = &socketops;
660	fp1->f_type = DTYPE_SOCKET;
661	FILE_UNLOCK(fp1);
662	FILE_LOCK(fp2);
663	fp2->f_flag = FREAD|FWRITE;
664	fp2->f_ops = &socketops;
665	fp2->f_type = DTYPE_SOCKET;
666	FILE_UNLOCK(fp2);
667	error = copyout(sv, uap->rsv, 2 * sizeof (int));
668	fdrop(fp1, td);
669	fdrop(fp2, td);
670	goto done2;
671free4:
672	fdclose(fdp, fp2, sv[1], td);
673	fdrop(fp2, td);
674free3:
675	fdclose(fdp, fp1, sv[0], td);
676	fdrop(fp1, td);
677free2:
678	(void)soclose(so2);
679free1:
680	(void)soclose(so1);
681done2:
682	NET_UNLOCK_GIANT();
683	return (error);
684}
685
686static int
687sendit(td, s, mp, flags)
688	register struct thread *td;
689	int s;
690	register struct msghdr *mp;
691	int flags;
692{
693	struct mbuf *control;
694	struct sockaddr *to;
695	int error;
696
697	if (mp->msg_name != NULL) {
698		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
699		if (error) {
700			to = NULL;
701			goto bad;
702		}
703		mp->msg_name = to;
704	} else {
705		to = NULL;
706	}
707
708	if (mp->msg_control) {
709		if (mp->msg_controllen < sizeof(struct cmsghdr)
710#ifdef COMPAT_OLDSOCK
711		    && mp->msg_flags != MSG_COMPAT
712#endif
713		) {
714			error = EINVAL;
715			goto bad;
716		}
717		error = sockargs(&control, mp->msg_control,
718		    mp->msg_controllen, MT_CONTROL);
719		if (error)
720			goto bad;
721#ifdef COMPAT_OLDSOCK
722		if (mp->msg_flags == MSG_COMPAT) {
723			register struct cmsghdr *cm;
724
725			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
726			if (control == 0) {
727				error = ENOBUFS;
728				goto bad;
729			} else {
730				cm = mtod(control, struct cmsghdr *);
731				cm->cmsg_len = control->m_len;
732				cm->cmsg_level = SOL_SOCKET;
733				cm->cmsg_type = SCM_RIGHTS;
734			}
735		}
736#endif
737	} else {
738		control = NULL;
739	}
740
741	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
742
743bad:
744	if (to)
745		FREE(to, M_SONAME);
746	return (error);
747}
748
749int
750kern_sendit(td, s, mp, flags, control, segflg)
751	struct thread *td;
752	int s;
753	struct msghdr *mp;
754	int flags;
755	struct mbuf *control;
756	enum uio_seg segflg;
757{
758	struct file *fp;
759	struct uio auio;
760	struct iovec *iov;
761	struct socket *so;
762	int i;
763	int len, error;
764#ifdef KTRACE
765	struct uio *ktruio = NULL;
766#endif
767
768	NET_LOCK_GIANT();
769	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
770	if (error)
771		goto bad2;
772	so = (struct socket *)fp->f_data;
773
774#ifdef MAC
775	SOCK_LOCK(so);
776	error = mac_check_socket_send(td->td_ucred, so);
777	SOCK_UNLOCK(so);
778	if (error)
779		goto bad;
780#endif
781
782	auio.uio_iov = mp->msg_iov;
783	auio.uio_iovcnt = mp->msg_iovlen;
784	auio.uio_segflg = segflg;
785	auio.uio_rw = UIO_WRITE;
786	auio.uio_td = td;
787	auio.uio_offset = 0;			/* XXX */
788	auio.uio_resid = 0;
789	iov = mp->msg_iov;
790	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
791		if ((auio.uio_resid += iov->iov_len) < 0) {
792			error = EINVAL;
793			goto bad;
794		}
795	}
796#ifdef KTRACE
797	if (KTRPOINT(td, KTR_GENIO))
798		ktruio = cloneuio(&auio);
799#endif
800	len = auio.uio_resid;
801	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
802	if (error) {
803		if (auio.uio_resid != len && (error == ERESTART ||
804		    error == EINTR || error == EWOULDBLOCK))
805			error = 0;
806		/* Generation of SIGPIPE can be controlled per socket */
807		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
808		    !(flags & MSG_NOSIGNAL)) {
809			PROC_LOCK(td->td_proc);
810			psignal(td->td_proc, SIGPIPE);
811			PROC_UNLOCK(td->td_proc);
812		}
813	}
814	if (error == 0)
815		td->td_retval[0] = len - auio.uio_resid;
816#ifdef KTRACE
817	if (ktruio != NULL) {
818		ktruio->uio_resid = td->td_retval[0];
819		ktrgenio(s, UIO_WRITE, ktruio, error);
820	}
821#endif
822bad:
823	fdrop(fp, td);
824bad2:
825	NET_UNLOCK_GIANT();
826	return (error);
827}
828
829int
830sendto(td, uap)
831	struct thread *td;
832	register struct sendto_args /* {
833		int	s;
834		caddr_t	buf;
835		size_t	len;
836		int	flags;
837		caddr_t	to;
838		int	tolen;
839	} */ *uap;
840{
841	struct msghdr msg;
842	struct iovec aiov;
843	int error;
844
845	msg.msg_name = uap->to;
846	msg.msg_namelen = uap->tolen;
847	msg.msg_iov = &aiov;
848	msg.msg_iovlen = 1;
849	msg.msg_control = 0;
850#ifdef COMPAT_OLDSOCK
851	msg.msg_flags = 0;
852#endif
853	aiov.iov_base = uap->buf;
854	aiov.iov_len = uap->len;
855	error = sendit(td, uap->s, &msg, uap->flags);
856	return (error);
857}
858
859#ifdef COMPAT_OLDSOCK
860int
861osend(td, uap)
862	struct thread *td;
863	register struct osend_args /* {
864		int	s;
865		caddr_t	buf;
866		int	len;
867		int	flags;
868	} */ *uap;
869{
870	struct msghdr msg;
871	struct iovec aiov;
872	int error;
873
874	msg.msg_name = 0;
875	msg.msg_namelen = 0;
876	msg.msg_iov = &aiov;
877	msg.msg_iovlen = 1;
878	aiov.iov_base = uap->buf;
879	aiov.iov_len = uap->len;
880	msg.msg_control = 0;
881	msg.msg_flags = 0;
882	error = sendit(td, uap->s, &msg, uap->flags);
883	return (error);
884}
885
886int
887osendmsg(td, uap)
888	struct thread *td;
889	struct osendmsg_args /* {
890		int	s;
891		caddr_t	msg;
892		int	flags;
893	} */ *uap;
894{
895	struct msghdr msg;
896	struct iovec *iov;
897	int error;
898
899	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
900	if (error)
901		return (error);
902	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
903	if (error)
904		return (error);
905	msg.msg_iov = iov;
906	msg.msg_flags = MSG_COMPAT;
907	error = sendit(td, uap->s, &msg, uap->flags);
908	free(iov, M_IOV);
909	return (error);
910}
911#endif
912
913int
914sendmsg(td, uap)
915	struct thread *td;
916	struct sendmsg_args /* {
917		int	s;
918		caddr_t	msg;
919		int	flags;
920	} */ *uap;
921{
922	struct msghdr msg;
923	struct iovec *iov;
924	int error;
925
926	error = copyin(uap->msg, &msg, sizeof (msg));
927	if (error)
928		return (error);
929	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
930	if (error)
931		return (error);
932	msg.msg_iov = iov;
933#ifdef COMPAT_OLDSOCK
934	msg.msg_flags = 0;
935#endif
936	error = sendit(td, uap->s, &msg, uap->flags);
937	free(iov, M_IOV);
938	return (error);
939}
940
941int
942kern_recvit(td, s, mp, fromseg, controlp)
943	struct thread *td;
944	int s;
945	struct msghdr *mp;
946	enum uio_seg fromseg;
947	struct mbuf **controlp;
948{
949	struct uio auio;
950	struct iovec *iov;
951	int i;
952	socklen_t len;
953	int error;
954	struct mbuf *m, *control = 0;
955	caddr_t ctlbuf;
956	struct file *fp;
957	struct socket *so;
958	struct sockaddr *fromsa = 0;
959#ifdef KTRACE
960	struct uio *ktruio = NULL;
961#endif
962
963	if(controlp != NULL)
964		*controlp = 0;
965
966	NET_LOCK_GIANT();
967	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
968	if (error) {
969		NET_UNLOCK_GIANT();
970		return (error);
971	}
972	so = fp->f_data;
973
974#ifdef MAC
975	SOCK_LOCK(so);
976	error = mac_check_socket_receive(td->td_ucred, so);
977	SOCK_UNLOCK(so);
978	if (error) {
979		fdrop(fp, td);
980		NET_UNLOCK_GIANT();
981		return (error);
982	}
983#endif
984
985	auio.uio_iov = mp->msg_iov;
986	auio.uio_iovcnt = mp->msg_iovlen;
987	auio.uio_segflg = UIO_USERSPACE;
988	auio.uio_rw = UIO_READ;
989	auio.uio_td = td;
990	auio.uio_offset = 0;			/* XXX */
991	auio.uio_resid = 0;
992	iov = mp->msg_iov;
993	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
994		if ((auio.uio_resid += iov->iov_len) < 0) {
995			fdrop(fp, td);
996			NET_UNLOCK_GIANT();
997			return (EINVAL);
998		}
999	}
1000#ifdef KTRACE
1001	if (KTRPOINT(td, KTR_GENIO))
1002		ktruio = cloneuio(&auio);
1003#endif
1004	len = auio.uio_resid;
1005	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
1006	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
1007	    &mp->msg_flags);
1008	if (error) {
1009		if (auio.uio_resid != (int)len && (error == ERESTART ||
1010		    error == EINTR || error == EWOULDBLOCK))
1011			error = 0;
1012	}
1013#ifdef KTRACE
1014	if (ktruio != NULL) {
1015		ktruio->uio_resid = (int)len - auio.uio_resid;
1016		ktrgenio(s, UIO_READ, ktruio, error);
1017	}
1018#endif
1019	if (error)
1020		goto out;
1021	td->td_retval[0] = (int)len - auio.uio_resid;
1022	if (mp->msg_name) {
1023		len = mp->msg_namelen;
1024		if (len <= 0 || fromsa == 0)
1025			len = 0;
1026		else {
1027			/* save sa_len before it is destroyed by MSG_COMPAT */
1028			len = MIN(len, fromsa->sa_len);
1029#ifdef COMPAT_OLDSOCK
1030			if (mp->msg_flags & MSG_COMPAT)
1031				((struct osockaddr *)fromsa)->sa_family =
1032				    fromsa->sa_family;
1033#endif
1034			if (fromseg == UIO_USERSPACE) {
1035				error = copyout(fromsa, mp->msg_name,
1036				    (unsigned)len);
1037				if (error)
1038					goto out;
1039			} else
1040				bcopy(fromsa, mp->msg_name, len);
1041		}
1042		mp->msg_namelen = len;
1043	}
1044	if (mp->msg_control && controlp == NULL) {
1045#ifdef COMPAT_OLDSOCK
1046		/*
1047		 * We assume that old recvmsg calls won't receive access
1048		 * rights and other control info, esp. as control info
1049		 * is always optional and those options didn't exist in 4.3.
1050		 * If we receive rights, trim the cmsghdr; anything else
1051		 * is tossed.
1052		 */
1053		if (control && mp->msg_flags & MSG_COMPAT) {
1054			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1055			    SOL_SOCKET ||
1056			    mtod(control, struct cmsghdr *)->cmsg_type !=
1057			    SCM_RIGHTS) {
1058				mp->msg_controllen = 0;
1059				goto out;
1060			}
1061			control->m_len -= sizeof (struct cmsghdr);
1062			control->m_data += sizeof (struct cmsghdr);
1063		}
1064#endif
1065		len = mp->msg_controllen;
1066		m = control;
1067		mp->msg_controllen = 0;
1068		ctlbuf = mp->msg_control;
1069
1070		while (m && len > 0) {
1071			unsigned int tocopy;
1072
1073			if (len >= m->m_len)
1074				tocopy = m->m_len;
1075			else {
1076				mp->msg_flags |= MSG_CTRUNC;
1077				tocopy = len;
1078			}
1079
1080			if ((error = copyout(mtod(m, caddr_t),
1081					ctlbuf, tocopy)) != 0)
1082				goto out;
1083
1084			ctlbuf += tocopy;
1085			len -= tocopy;
1086			m = m->m_next;
1087		}
1088		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1089	}
1090out:
1091	fdrop(fp, td);
1092	NET_UNLOCK_GIANT();
1093	if (fromsa)
1094		FREE(fromsa, M_SONAME);
1095
1096	if (error == 0 && controlp != NULL)
1097		*controlp = control;
1098	else  if (control)
1099		m_freem(control);
1100
1101	return (error);
1102}
1103
1104static int
1105recvit(td, s, mp, namelenp)
1106	struct thread *td;
1107	int s;
1108	struct msghdr *mp;
1109	void *namelenp;
1110{
1111	int error;
1112
1113	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1114	if (error)
1115		return (error);
1116	if (namelenp) {
1117		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1118#ifdef COMPAT_OLDSOCK
1119		if (mp->msg_flags & MSG_COMPAT)
1120			error = 0;	/* old recvfrom didn't check */
1121#endif
1122	}
1123	return (error);
1124}
1125
1126int
1127recvfrom(td, uap)
1128	struct thread *td;
1129	register struct recvfrom_args /* {
1130		int	s;
1131		caddr_t	buf;
1132		size_t	len;
1133		int	flags;
1134		struct sockaddr * __restrict	from;
1135		socklen_t * __restrict fromlenaddr;
1136	} */ *uap;
1137{
1138	struct msghdr msg;
1139	struct iovec aiov;
1140	int error;
1141
1142	if (uap->fromlenaddr) {
1143		error = copyin(uap->fromlenaddr,
1144		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1145		if (error)
1146			goto done2;
1147	} else {
1148		msg.msg_namelen = 0;
1149	}
1150	msg.msg_name = uap->from;
1151	msg.msg_iov = &aiov;
1152	msg.msg_iovlen = 1;
1153	aiov.iov_base = uap->buf;
1154	aiov.iov_len = uap->len;
1155	msg.msg_control = 0;
1156	msg.msg_flags = uap->flags;
1157	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1158done2:
1159	return(error);
1160}
1161
1162#ifdef COMPAT_OLDSOCK
1163int
1164orecvfrom(td, uap)
1165	struct thread *td;
1166	struct recvfrom_args *uap;
1167{
1168
1169	uap->flags |= MSG_COMPAT;
1170	return (recvfrom(td, uap));
1171}
1172#endif
1173
1174
1175#ifdef COMPAT_OLDSOCK
1176int
1177orecv(td, uap)
1178	struct thread *td;
1179	register struct orecv_args /* {
1180		int	s;
1181		caddr_t	buf;
1182		int	len;
1183		int	flags;
1184	} */ *uap;
1185{
1186	struct msghdr msg;
1187	struct iovec aiov;
1188	int error;
1189
1190	msg.msg_name = 0;
1191	msg.msg_namelen = 0;
1192	msg.msg_iov = &aiov;
1193	msg.msg_iovlen = 1;
1194	aiov.iov_base = uap->buf;
1195	aiov.iov_len = uap->len;
1196	msg.msg_control = 0;
1197	msg.msg_flags = uap->flags;
1198	error = recvit(td, uap->s, &msg, NULL);
1199	return (error);
1200}
1201
1202/*
1203 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1204 * overlays the new one, missing only the flags, and with the (old) access
1205 * rights where the control fields are now.
1206 */
1207int
1208orecvmsg(td, uap)
1209	struct thread *td;
1210	struct orecvmsg_args /* {
1211		int	s;
1212		struct	omsghdr *msg;
1213		int	flags;
1214	} */ *uap;
1215{
1216	struct msghdr msg;
1217	struct iovec *iov;
1218	int error;
1219
1220	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1221	if (error)
1222		return (error);
1223	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1224	if (error)
1225		return (error);
1226	msg.msg_flags = uap->flags | MSG_COMPAT;
1227	msg.msg_iov = iov;
1228	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1229	if (msg.msg_controllen && error == 0)
1230		error = copyout(&msg.msg_controllen,
1231		    &uap->msg->msg_accrightslen, sizeof (int));
1232	free(iov, M_IOV);
1233	return (error);
1234}
1235#endif
1236
1237int
1238recvmsg(td, uap)
1239	struct thread *td;
1240	struct recvmsg_args /* {
1241		int	s;
1242		struct	msghdr *msg;
1243		int	flags;
1244	} */ *uap;
1245{
1246	struct msghdr msg;
1247	struct iovec *uiov, *iov;
1248	int error;
1249
1250	error = copyin(uap->msg, &msg, sizeof (msg));
1251	if (error)
1252		return (error);
1253	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1254	if (error)
1255		return (error);
1256	msg.msg_flags = uap->flags;
1257#ifdef COMPAT_OLDSOCK
1258	msg.msg_flags &= ~MSG_COMPAT;
1259#endif
1260	uiov = msg.msg_iov;
1261	msg.msg_iov = iov;
1262	error = recvit(td, uap->s, &msg, NULL);
1263	if (error == 0) {
1264		msg.msg_iov = uiov;
1265		error = copyout(&msg, uap->msg, sizeof(msg));
1266	}
1267	free(iov, M_IOV);
1268	return (error);
1269}
1270
1271/* ARGSUSED */
1272int
1273shutdown(td, uap)
1274	struct thread *td;
1275	register struct shutdown_args /* {
1276		int	s;
1277		int	how;
1278	} */ *uap;
1279{
1280	struct socket *so;
1281	struct file *fp;
1282	int error;
1283
1284	NET_LOCK_GIANT();
1285	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1286	if (error == 0) {
1287		so = fp->f_data;
1288		error = soshutdown(so, uap->how);
1289		fdrop(fp, td);
1290	}
1291	NET_UNLOCK_GIANT();
1292	return (error);
1293}
1294
1295/* ARGSUSED */
1296int
1297setsockopt(td, uap)
1298	struct thread *td;
1299	register struct setsockopt_args /* {
1300		int	s;
1301		int	level;
1302		int	name;
1303		caddr_t	val;
1304		int	valsize;
1305	} */ *uap;
1306{
1307
1308	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1309	    uap->val, UIO_USERSPACE, uap->valsize));
1310}
1311
1312int
1313kern_setsockopt(td, s, level, name, val, valseg, valsize)
1314	struct thread *td;
1315	int s;
1316	int level;
1317	int name;
1318	void *val;
1319	enum uio_seg valseg;
1320	socklen_t valsize;
1321{
1322	int error;
1323	struct socket *so;
1324	struct file *fp;
1325	struct sockopt sopt;
1326
1327	if (val == NULL && valsize != 0)
1328		return (EFAULT);
1329	if ((int)valsize < 0)
1330		return (EINVAL);
1331
1332	sopt.sopt_dir = SOPT_SET;
1333	sopt.sopt_level = level;
1334	sopt.sopt_name = name;
1335	sopt.sopt_val = val;
1336	sopt.sopt_valsize = valsize;
1337	switch (valseg) {
1338	case UIO_USERSPACE:
1339		sopt.sopt_td = td;
1340		break;
1341	case UIO_SYSSPACE:
1342		sopt.sopt_td = NULL;
1343		break;
1344	default:
1345		panic("kern_setsockopt called with bad valseg");
1346	}
1347
1348	NET_LOCK_GIANT();
1349	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1350	if (error == 0) {
1351		so = fp->f_data;
1352		error = sosetopt(so, &sopt);
1353		fdrop(fp, td);
1354	}
1355	NET_UNLOCK_GIANT();
1356	return(error);
1357}
1358
1359/* ARGSUSED */
1360int
1361getsockopt(td, uap)
1362	struct thread *td;
1363	register struct getsockopt_args /* {
1364		int	s;
1365		int	level;
1366		int	name;
1367		void * __restrict	val;
1368		socklen_t * __restrict avalsize;
1369	} */ *uap;
1370{
1371	socklen_t valsize;
1372	int	error;
1373
1374	if (uap->val) {
1375		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1376		if (error)
1377			return (error);
1378	}
1379
1380	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1381	    uap->val, UIO_USERSPACE, &valsize);
1382
1383	if (error == 0)
1384		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1385	return (error);
1386}
1387
1388/*
1389 * Kernel version of getsockopt.
1390 * optval can be a userland or userspace. optlen is always a kernel pointer.
1391 */
1392int
1393kern_getsockopt(td, s, level, name, val, valseg, valsize)
1394	struct thread *td;
1395	int s;
1396	int level;
1397	int name;
1398	void *val;
1399	enum uio_seg valseg;
1400	socklen_t *valsize;
1401{
1402	int error;
1403	struct  socket *so;
1404	struct file *fp;
1405	struct	sockopt sopt;
1406
1407	if (val == NULL)
1408		*valsize = 0;
1409	if ((int)*valsize < 0)
1410		return (EINVAL);
1411
1412	sopt.sopt_dir = SOPT_GET;
1413	sopt.sopt_level = level;
1414	sopt.sopt_name = name;
1415	sopt.sopt_val = val;
1416	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1417	switch (valseg) {
1418	case UIO_USERSPACE:
1419		sopt.sopt_td = td;
1420		break;
1421	case UIO_SYSSPACE:
1422		sopt.sopt_td = NULL;
1423		break;
1424	default:
1425		panic("kern_getsockopt called with bad valseg");
1426	}
1427
1428	NET_LOCK_GIANT();
1429	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1430	if (error == 0) {
1431		so = fp->f_data;
1432		error = sogetopt(so, &sopt);
1433		*valsize = sopt.sopt_valsize;
1434		fdrop(fp, td);
1435	}
1436	NET_UNLOCK_GIANT();
1437	return (error);
1438}
1439
1440/*
1441 * getsockname1() - Get socket name.
1442 */
1443/* ARGSUSED */
1444static int
1445getsockname1(td, uap, compat)
1446	struct thread *td;
1447	register struct getsockname_args /* {
1448		int	fdes;
1449		struct sockaddr * __restrict asa;
1450		socklen_t * __restrict alen;
1451	} */ *uap;
1452	int compat;
1453{
1454	struct sockaddr *sa;
1455	socklen_t len;
1456	int error;
1457
1458	error = copyin(uap->alen, &len, sizeof(len));
1459	if (error)
1460		return (error);
1461
1462	error = kern_getsockname(td, uap->fdes, &sa, &len);
1463	if (error)
1464		return (error);
1465
1466	if (len != 0) {
1467#ifdef COMPAT_OLDSOCK
1468		if (compat)
1469			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1470#endif
1471		error = copyout(sa, uap->asa, (u_int)len);
1472	}
1473	free(sa, M_SONAME);
1474	if (error == 0)
1475		error = copyout(&len, uap->alen, sizeof(len));
1476	return (error);
1477}
1478
1479int
1480kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1481    socklen_t *alen)
1482{
1483	struct socket *so;
1484	struct file *fp;
1485	socklen_t len;
1486	int error;
1487
1488	if (*alen < 0)
1489		return (EINVAL);
1490
1491	NET_LOCK_GIANT();
1492	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1493	if (error)
1494		goto done;
1495	so = fp->f_data;
1496	*sa = NULL;
1497	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1498	if (error)
1499		goto bad;
1500	if (*sa == NULL)
1501		len = 0;
1502	else
1503		len = MIN(*alen, (*sa)->sa_len);
1504	*alen = len;
1505bad:
1506	fdrop(fp, td);
1507	if (error && *sa) {
1508		free(*sa, M_SONAME);
1509		*sa = NULL;
1510	}
1511done:
1512	NET_UNLOCK_GIANT();
1513	return (error);
1514}
1515
1516int
1517getsockname(td, uap)
1518	struct thread *td;
1519	struct getsockname_args *uap;
1520{
1521
1522	return (getsockname1(td, uap, 0));
1523}
1524
1525#ifdef COMPAT_OLDSOCK
1526int
1527ogetsockname(td, uap)
1528	struct thread *td;
1529	struct getsockname_args *uap;
1530{
1531
1532	return (getsockname1(td, uap, 1));
1533}
1534#endif /* COMPAT_OLDSOCK */
1535
1536/*
1537 * getpeername1() - Get name of peer for connected socket.
1538 */
1539/* ARGSUSED */
1540static int
1541getpeername1(td, uap, compat)
1542	struct thread *td;
1543	register struct getpeername_args /* {
1544		int	fdes;
1545		struct sockaddr * __restrict	asa;
1546		socklen_t * __restrict	alen;
1547	} */ *uap;
1548	int compat;
1549{
1550	struct sockaddr *sa;
1551	socklen_t len;
1552	int error;
1553
1554	error = copyin(uap->alen, &len, sizeof (len));
1555	if (error)
1556		return (error);
1557
1558	error = kern_getpeername(td, uap->fdes, &sa, &len);
1559	if (error)
1560		return (error);
1561
1562	if (len != 0) {
1563#ifdef COMPAT_OLDSOCK
1564		if (compat)
1565			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1566#endif
1567		error = copyout(sa, uap->asa, (u_int)len);
1568	}
1569	free(sa, M_SONAME);
1570	if (error == 0)
1571		error = copyout(&len, uap->alen, sizeof(len));
1572	return (error);
1573}
1574
1575int
1576kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1577    socklen_t *alen)
1578{
1579	struct socket *so;
1580	struct file *fp;
1581	socklen_t len;
1582	int error;
1583
1584	if (*alen < 0)
1585		return (EINVAL);
1586
1587	NET_LOCK_GIANT();
1588	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1589	if (error)
1590		goto done2;
1591	so = fp->f_data;
1592	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1593		error = ENOTCONN;
1594		goto done1;
1595	}
1596	*sa = NULL;
1597	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1598	if (error)
1599		goto bad;
1600	if (*sa == NULL)
1601		len = 0;
1602	else
1603		len = MIN(*alen, (*sa)->sa_len);
1604	*alen = len;
1605bad:
1606	if (error && *sa) {
1607		free(*sa, M_SONAME);
1608		*sa = NULL;
1609	}
1610done1:
1611	fdrop(fp, td);
1612done2:
1613	NET_UNLOCK_GIANT();
1614	return (error);
1615}
1616
1617int
1618getpeername(td, uap)
1619	struct thread *td;
1620	struct getpeername_args *uap;
1621{
1622
1623	return (getpeername1(td, uap, 0));
1624}
1625
1626#ifdef COMPAT_OLDSOCK
1627int
1628ogetpeername(td, uap)
1629	struct thread *td;
1630	struct ogetpeername_args *uap;
1631{
1632
1633	/* XXX uap should have type `getpeername_args *' to begin with. */
1634	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1635}
1636#endif /* COMPAT_OLDSOCK */
1637
1638int
1639sockargs(mp, buf, buflen, type)
1640	struct mbuf **mp;
1641	caddr_t buf;
1642	int buflen, type;
1643{
1644	register struct sockaddr *sa;
1645	register struct mbuf *m;
1646	int error;
1647
1648	if ((u_int)buflen > MLEN) {
1649#ifdef COMPAT_OLDSOCK
1650		if (type == MT_SONAME && (u_int)buflen <= 112)
1651			buflen = MLEN;		/* unix domain compat. hack */
1652		else
1653#endif
1654			if ((u_int)buflen > MCLBYTES)
1655				return (EINVAL);
1656	}
1657	m = m_get(M_TRYWAIT, type);
1658	if (m == NULL)
1659		return (ENOBUFS);
1660	if ((u_int)buflen > MLEN) {
1661		MCLGET(m, M_TRYWAIT);
1662		if ((m->m_flags & M_EXT) == 0) {
1663			m_free(m);
1664			return (ENOBUFS);
1665		}
1666	}
1667	m->m_len = buflen;
1668	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1669	if (error)
1670		(void) m_free(m);
1671	else {
1672		*mp = m;
1673		if (type == MT_SONAME) {
1674			sa = mtod(m, struct sockaddr *);
1675
1676#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1677			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1678				sa->sa_family = sa->sa_len;
1679#endif
1680			sa->sa_len = buflen;
1681		}
1682	}
1683	return (error);
1684}
1685
1686int
1687getsockaddr(namp, uaddr, len)
1688	struct sockaddr **namp;
1689	caddr_t uaddr;
1690	size_t len;
1691{
1692	struct sockaddr *sa;
1693	int error;
1694
1695	if (len > SOCK_MAXADDRLEN)
1696		return (ENAMETOOLONG);
1697	if (len < offsetof(struct sockaddr, sa_data[0]))
1698		return (EINVAL);
1699	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1700	error = copyin(uaddr, sa, len);
1701	if (error) {
1702		FREE(sa, M_SONAME);
1703	} else {
1704#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1705		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1706			sa->sa_family = sa->sa_len;
1707#endif
1708		sa->sa_len = len;
1709		*namp = sa;
1710	}
1711	return (error);
1712}
1713
1714/*
1715 * Detach mapped page and release resources back to the system.
1716 */
1717void
1718sf_buf_mext(void *addr, void *args)
1719{
1720	vm_page_t m;
1721
1722	m = sf_buf_page(args);
1723	sf_buf_free(args);
1724	vm_page_lock_queues();
1725	vm_page_unwire(m, 0);
1726	/*
1727	 * Check for the object going away on us. This can
1728	 * happen since we don't hold a reference to it.
1729	 * If so, we're responsible for freeing the page.
1730	 */
1731	if (m->wire_count == 0 && m->object == NULL)
1732		vm_page_free(m);
1733	vm_page_unlock_queues();
1734}
1735
1736/*
1737 * sendfile(2)
1738 *
1739 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1740 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1741 *
1742 * Send a file specified by 'fd' and starting at 'offset' to a socket
1743 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1744 * 0. Optionally add a header and/or trailer to the socket output.  If
1745 * specified, write the total number of bytes sent into *sbytes.
1746 *
1747 */
1748int
1749sendfile(struct thread *td, struct sendfile_args *uap)
1750{
1751
1752	return (do_sendfile(td, uap, 0));
1753}
1754
1755static int
1756do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1757{
1758	struct sf_hdtr hdtr;
1759	struct uio *hdr_uio, *trl_uio;
1760	int error;
1761
1762	hdr_uio = trl_uio = NULL;
1763
1764	if (uap->hdtr != NULL) {
1765		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1766		if (error)
1767			goto out;
1768		if (hdtr.headers != NULL) {
1769			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1770			if (error)
1771				goto out;
1772		}
1773		if (hdtr.trailers != NULL) {
1774			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1775			if (error)
1776				goto out;
1777
1778		}
1779	}
1780
1781	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1782out:
1783	if (hdr_uio)
1784		free(hdr_uio, M_IOV);
1785	if (trl_uio)
1786		free(trl_uio, M_IOV);
1787	return (error);
1788}
1789
1790#ifdef COMPAT_FREEBSD4
1791int
1792freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1793{
1794	struct sendfile_args args;
1795
1796	args.fd = uap->fd;
1797	args.s = uap->s;
1798	args.offset = uap->offset;
1799	args.nbytes = uap->nbytes;
1800	args.hdtr = uap->hdtr;
1801	args.sbytes = uap->sbytes;
1802	args.flags = uap->flags;
1803
1804	return (do_sendfile(td, &args, 1));
1805}
1806#endif /* COMPAT_FREEBSD4 */
1807
1808int
1809kern_sendfile(struct thread *td, struct sendfile_args *uap,
1810    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1811{
1812	struct file *sock_fp;
1813	struct vnode *vp;
1814	struct vm_object *obj = NULL;
1815	struct socket *so = NULL;
1816	struct mbuf *m = NULL;
1817	struct sf_buf *sf;
1818	struct vm_page *pg;
1819	off_t off, xfsize, sbytes = 0, rem = 0;
1820	int error, mnw = 0;
1821	int vfslocked;
1822
1823	NET_LOCK_GIANT();
1824
1825	/*
1826	 * The file descriptor must be a regular file and have a
1827	 * backing VM object.
1828	 * File offset must be positive.  If it goes beyond EOF
1829	 * we send only the header/trailer and no payload data.
1830	 */
1831	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1832		goto out;
1833	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1834	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1835	obj = vp->v_object;
1836	if (obj != NULL) {
1837		/*
1838		 * Temporarily increase the backing VM object's reference
1839		 * count so that a forced reclamation of its vnode does not
1840		 * immediately destroy it.
1841		 */
1842		VM_OBJECT_LOCK(obj);
1843		if ((obj->flags & OBJ_DEAD) == 0) {
1844			vm_object_reference_locked(obj);
1845			VM_OBJECT_UNLOCK(obj);
1846		} else {
1847			VM_OBJECT_UNLOCK(obj);
1848			obj = NULL;
1849		}
1850	}
1851	VOP_UNLOCK(vp, 0, td);
1852	VFS_UNLOCK_GIANT(vfslocked);
1853	if (obj == NULL) {
1854		error = EINVAL;
1855		goto out;
1856	}
1857	if (uap->offset < 0) {
1858		error = EINVAL;
1859		goto out;
1860	}
1861
1862	/*
1863	 * The socket must be a stream socket and connected.
1864	 * Remember if it a blocking or non-blocking socket.
1865	 */
1866	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1867	    NULL)) != 0)
1868		goto out;
1869	so = sock_fp->f_data;
1870	if (so->so_type != SOCK_STREAM) {
1871		error = EINVAL;
1872		goto out;
1873	}
1874	if ((so->so_state & SS_ISCONNECTED) == 0) {
1875		error = ENOTCONN;
1876		goto out;
1877	}
1878	/*
1879	 * Do not wait on memory allocations but return ENOMEM for
1880	 * caller to retry later.
1881	 * XXX: Experimental.
1882	 */
1883	if (uap->flags & SF_MNOWAIT)
1884		mnw = 1;
1885
1886#ifdef MAC
1887	SOCK_LOCK(so);
1888	error = mac_check_socket_send(td->td_ucred, so);
1889	SOCK_UNLOCK(so);
1890	if (error)
1891		goto out;
1892#endif
1893
1894	/* If headers are specified copy them into mbufs. */
1895	if (hdr_uio != NULL) {
1896		hdr_uio->uio_td = td;
1897		hdr_uio->uio_rw = UIO_WRITE;
1898		if (hdr_uio->uio_resid > 0) {
1899			/*
1900			 * In FBSD < 5.0 the nbytes to send also included
1901			 * the header.  If compat is specified subtract the
1902			 * header size from nbytes.
1903			 */
1904			if (compat) {
1905				if (uap->nbytes > hdr_uio->uio_resid)
1906					uap->nbytes -= hdr_uio->uio_resid;
1907				else
1908					uap->nbytes = 0;
1909			}
1910			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1911			    0, 0, 0);
1912			if (m == NULL) {
1913				error = mnw ? EAGAIN : ENOBUFS;
1914				goto out;
1915			}
1916		}
1917	}
1918
1919	/* Protect against multiple writers to the socket. */
1920	SOCKBUF_LOCK(&so->so_snd);
1921	(void) sblock(&so->so_snd, M_WAITOK);
1922	SOCKBUF_UNLOCK(&so->so_snd);
1923
1924	/*
1925	 * Loop through the pages of the file, starting with the requested
1926	 * offset. Get a file page (do I/O if necessary), map the file page
1927	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1928	 * it on the socket.
1929	 * This is done in two loops.  The inner loop turns as many pages
1930	 * as it can, up to available socket buffer space, without blocking
1931	 * into mbufs to have it bulk delivered into the socket send buffer.
1932	 * The outer loop checks the state and available space of the socket
1933	 * and takes care of the overall progress.
1934	 */
1935	for (off = uap->offset; ; ) {
1936		int loopbytes = 0;
1937		int space = 0;
1938		int done = 0;
1939
1940		/*
1941		 * Check the socket state for ongoing connection,
1942		 * no errors and space in socket buffer.
1943		 * If space is low allow for the remainder of the
1944		 * file to be processed if it fits the socket buffer.
1945		 * Otherwise block in waiting for sufficient space
1946		 * to proceed, or if the socket is nonblocking, return
1947		 * to userland with EAGAIN while reporting how far
1948		 * we've come.
1949		 * We wait until the socket buffer has significant free
1950		 * space to do bulk sends.  This makes good use of file
1951		 * system read ahead and allows packet segmentation
1952		 * offloading hardware to take over lots of work.  If
1953		 * we were not careful here we would send off only one
1954		 * sfbuf at a time.
1955		 */
1956		SOCKBUF_LOCK(&so->so_snd);
1957		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1958			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1959retry_space:
1960		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1961			error = EPIPE;
1962			SOCKBUF_UNLOCK(&so->so_snd);
1963			goto done;
1964		} else if (so->so_error) {
1965			error = so->so_error;
1966			so->so_error = 0;
1967			SOCKBUF_UNLOCK(&so->so_snd);
1968			goto done;
1969		}
1970		space = sbspace(&so->so_snd);
1971		if (space < rem &&
1972		    (space <= 0 ||
1973		     space < so->so_snd.sb_lowat)) {
1974			if (so->so_state & SS_NBIO) {
1975				SOCKBUF_UNLOCK(&so->so_snd);
1976				error = EAGAIN;
1977				goto done;
1978			}
1979			/*
1980			 * sbwait drops the lock while sleeping.
1981			 * When we loop back to retry_space the
1982			 * state may have changed and we retest
1983			 * for it.
1984			 */
1985			error = sbwait(&so->so_snd);
1986			/*
1987			 * An error from sbwait usually indicates that we've
1988			 * been interrupted by a signal. If we've sent anything
1989			 * then return bytes sent, otherwise return the error.
1990			 */
1991			if (error) {
1992				SOCKBUF_UNLOCK(&so->so_snd);
1993				goto done;
1994			}
1995			goto retry_space;
1996		}
1997		SOCKBUF_UNLOCK(&so->so_snd);
1998
1999		/*
2000		 * Loop and construct maximum sized mbuf chain to be bulk
2001		 * dumped into socket buffer.
2002		 */
2003		while(space > loopbytes) {
2004			vm_pindex_t pindex;
2005			vm_offset_t pgoff;
2006			struct mbuf *m0;
2007
2008			VM_OBJECT_LOCK(obj);
2009			/*
2010			 * Calculate the amount to transfer.
2011			 * Not to exceed a page, the EOF,
2012			 * or the passed in nbytes.
2013			 */
2014			pgoff = (vm_offset_t)(off & PAGE_MASK);
2015			xfsize = omin(PAGE_SIZE - pgoff,
2016			    obj->un_pager.vnp.vnp_size - off -
2017			    sbytes - loopbytes);
2018			if (uap->nbytes)
2019				rem = (uap->nbytes - sbytes - loopbytes);
2020			else
2021				rem = obj->un_pager.vnp.vnp_size - off -
2022				    sbytes - loopbytes;
2023			xfsize = omin(rem, xfsize);
2024			if (xfsize <= 0) {
2025				VM_OBJECT_UNLOCK(obj);
2026				done = 1;		/* all data sent */
2027				break;
2028			}
2029			/*
2030			 * Don't overflow the send buffer.
2031			 * Stop here and send out what we've
2032			 * already got.
2033			 */
2034			if (space < loopbytes + xfsize) {
2035				VM_OBJECT_UNLOCK(obj);
2036				break;
2037			}
2038retry_lookup:
2039			/*
2040			 * Attempt to look up the page.
2041			 * Allocate if not found or
2042			 * wait and loop if busy.
2043			 */
2044			pindex = OFF_TO_IDX(off);
2045			pg = vm_page_lookup(obj, pindex);
2046			if (pg == NULL) {
2047				pg = vm_page_alloc(obj, pindex,
2048				    VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL |
2049				    VM_ALLOC_WIRED);
2050				if (pg == NULL) {
2051					VM_OBJECT_UNLOCK(obj);
2052					VM_WAIT;
2053					VM_OBJECT_LOCK(obj);
2054					goto retry_lookup;
2055				}
2056			} else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
2057				goto retry_lookup;
2058			else {
2059				/*
2060				 * Wire the page so it does not get
2061				 * ripped out from under us.
2062				 */
2063				vm_page_lock_queues();
2064				vm_page_wire(pg);
2065				vm_page_unlock_queues();
2066			}
2067
2068			/*
2069			 * Check if page is valid for what we need,
2070			 * otherwise initiate I/O.
2071			 * If we already turned some pages into mbufs,
2072			 * send them off before we come here again and
2073			 * block.
2074			 */
2075			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2076				VM_OBJECT_UNLOCK(obj);
2077			else if (m != NULL)
2078				error = EAGAIN;	/* send what we already got */
2079			else if (uap->flags & SF_NODISKIO)
2080				error = EBUSY;
2081			else {
2082				int bsize, resid;
2083
2084				/*
2085				 * Ensure that our page is still around
2086				 * when the I/O completes.
2087				 */
2088				vm_page_io_start(pg);
2089				VM_OBJECT_UNLOCK(obj);
2090
2091				/*
2092				 * Get the page from backing store.
2093				 */
2094				bsize = vp->v_mount->mnt_stat.f_iosize;
2095				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2096				vn_lock(vp, LK_SHARED | LK_RETRY, td);
2097
2098				/*
2099				 * XXXMAC: Because we don't have fp->f_cred
2100				 * here, we pass in NOCRED.  This is probably
2101				 * wrong, but is consistent with our original
2102				 * implementation.
2103				 */
2104				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2105				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2106				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2107				    td->td_ucred, NOCRED, &resid, td);
2108				VOP_UNLOCK(vp, 0, td);
2109				VFS_UNLOCK_GIANT(vfslocked);
2110				VM_OBJECT_LOCK(obj);
2111				vm_page_io_finish(pg);
2112				if (!error)
2113					VM_OBJECT_UNLOCK(obj);
2114				mbstat.sf_iocnt++;
2115			}
2116			if (error) {
2117				vm_page_lock_queues();
2118				vm_page_unwire(pg, 0);
2119				/*
2120				 * See if anyone else might know about
2121				 * this page.  If not and it is not valid,
2122				 * then free it.
2123				 */
2124				if (pg->wire_count == 0 && pg->valid == 0 &&
2125				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2126				    pg->hold_count == 0) {
2127					vm_page_free(pg);
2128				}
2129				vm_page_unlock_queues();
2130				VM_OBJECT_UNLOCK(obj);
2131				if (error == EAGAIN)
2132					error = 0;	/* not a real error */
2133				break;
2134			}
2135
2136			/*
2137			 * Get a sendfile buf.  We usually wait as long
2138			 * as necessary, but this wait can be interrupted.
2139			 */
2140			if ((sf = sf_buf_alloc(pg,
2141			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2142				mbstat.sf_allocfail++;
2143				vm_page_lock_queues();
2144				vm_page_unwire(pg, 0);
2145				/*
2146				 * XXX: Not same check as above!?
2147				 */
2148				if (pg->wire_count == 0 && pg->object == NULL)
2149					vm_page_free(pg);
2150				vm_page_unlock_queues();
2151				error = (mnw ? EAGAIN : EINTR);
2152				break;
2153			}
2154
2155			/*
2156			 * Get an mbuf and set it up as having
2157			 * external storage.
2158			 */
2159			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2160			if (m0 == NULL) {
2161				error = (mnw ? EAGAIN : ENOBUFS);
2162				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2163				break;
2164			}
2165			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2166			    sf, M_RDONLY, EXT_SFBUF);
2167			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2168			m0->m_len = xfsize;
2169
2170			/* Append to mbuf chain. */
2171			if (m != NULL)
2172				m_cat(m, m0);
2173			else
2174				m = m0;
2175
2176			/* Keep track of bits processed. */
2177			loopbytes += xfsize;
2178			off += xfsize;
2179		}
2180
2181		/* Add the buffer chain to the socket buffer. */
2182		if (m != NULL) {
2183			int mlen;
2184
2185			mlen = m_length(m, NULL);
2186			SOCKBUF_LOCK(&so->so_snd);
2187			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2188				error = EPIPE;
2189				SOCKBUF_UNLOCK(&so->so_snd);
2190				goto done;
2191			}
2192			SOCKBUF_UNLOCK(&so->so_snd);
2193			error = (*so->so_proto->pr_usrreqs->pru_send)
2194				    (so, 0, m, NULL, NULL, td);
2195			if (!error)
2196				sbytes += mlen;
2197			m = NULL;	/* pru_send always consumes */
2198		}
2199
2200		/* Quit outer loop on error or when we're done. */
2201		if (error || done)
2202			goto done;
2203	}
2204
2205	/*
2206	 * Send trailers. Wimp out and use writev(2).
2207	 */
2208	if (trl_uio != NULL) {
2209		error = kern_writev(td, uap->s, trl_uio);
2210		if (error)
2211			goto done;
2212		sbytes += td->td_retval[0];
2213	}
2214
2215done:
2216	SOCKBUF_LOCK(&so->so_snd);
2217	sbunlock(&so->so_snd);
2218	SOCKBUF_UNLOCK(&so->so_snd);
2219out:
2220	/*
2221	 * If there was no error we have to clear td->td_retval[0]
2222	 * because it may have been set by writev.
2223	 */
2224	if (error == 0) {
2225		td->td_retval[0] = 0;
2226	}
2227	if (uap->sbytes != NULL) {
2228		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2229	}
2230	if (obj != NULL)
2231		vm_object_deallocate(obj);
2232	if (vp != NULL) {
2233		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2234		vrele(vp);
2235		VFS_UNLOCK_GIANT(vfslocked);
2236	}
2237	if (so)
2238		fdrop(sock_fp, td);
2239	if (m)
2240		m_freem(m);
2241
2242	NET_UNLOCK_GIANT();
2243
2244	if (error == ERESTART)
2245		error = EINTR;
2246
2247	return (error);
2248}
2249
2250/*
2251 * SCTP syscalls.
2252 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2253 * otherwise all return EOPNOTSUPP.
2254 * XXX: We should make this loadable one day.
2255 */
2256int
2257sctp_peeloff(td, uap)
2258	struct thread *td;
2259	struct sctp_peeloff_args /* {
2260		int	sd;
2261		caddr_t	name;
2262	} */ *uap;
2263{
2264#ifdef SCTP
2265	struct filedesc *fdp;
2266	struct file *nfp = NULL;
2267	int error;
2268	struct socket *head, *so;
2269	int fd;
2270	u_int fflag;
2271
2272	fdp = td->td_proc->p_fd;
2273	error = fgetsock(td, uap->sd, &head, &fflag);
2274	if (error)
2275		goto done2;
2276	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2277	if (error)
2278		goto done2;
2279	/*
2280	 * At this point we know we do have a assoc to pull
2281	 * we proceed to get the fd setup. This may block
2282	 * but that is ok.
2283	 */
2284
2285	error = falloc(td, &nfp, &fd);
2286	if (error)
2287		goto done;
2288	td->td_retval[0] = fd;
2289
2290	so = sonewconn(head, SS_ISCONNECTED);
2291	if (so == NULL)
2292		goto noconnection;
2293	/*
2294	 * Before changing the flags on the socket, we have to bump the
2295	 * reference count.  Otherwise, if the protocol calls sofree(),
2296	 * the socket will be released due to a zero refcount.
2297	 */
2298        SOCK_LOCK(so);
2299        soref(so);                      /* file descriptor reference */
2300        SOCK_UNLOCK(so);
2301
2302	ACCEPT_LOCK();
2303
2304	TAILQ_REMOVE(&head->so_comp, so, so_list);
2305	head->so_qlen--;
2306	so->so_state |= (head->so_state & SS_NBIO);
2307	so->so_state &= ~SS_NOFDREF;
2308	so->so_qstate &= ~SQ_COMP;
2309	so->so_head = NULL;
2310
2311	ACCEPT_UNLOCK();
2312
2313	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2314	if (error)
2315		goto noconnection;
2316	if (head->so_sigio != NULL)
2317		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2318
2319	FILE_LOCK(nfp);
2320	nfp->f_data = so;
2321	nfp->f_flag = fflag;
2322	nfp->f_ops = &socketops;
2323	nfp->f_type = DTYPE_SOCKET;
2324	FILE_UNLOCK(nfp);
2325
2326noconnection:
2327	/*
2328	 * close the new descriptor, assuming someone hasn't ripped it
2329	 * out from under us.
2330	 */
2331	if (error)
2332		fdclose(fdp, nfp, fd, td);
2333
2334	/*
2335	 * Release explicitly held references before returning.
2336	 */
2337done:
2338	if (nfp != NULL)
2339		fdrop(nfp, td);
2340	fputsock(head);
2341done2:
2342	return (error);
2343#else  /* SCTP */
2344	return (EOPNOTSUPP);
2345#endif /* SCTP */
2346}
2347
2348int
2349sctp_generic_sendmsg (td, uap)
2350	struct thread *td;
2351	struct sctp_generic_sendmsg_args /* {
2352		int sd,
2353		caddr_t msg,
2354		int mlen,
2355		caddr_t to,
2356		__socklen_t tolen,
2357		struct sctp_sndrcvinfo *sinfo,
2358		int flags
2359	} */ *uap;
2360{
2361#ifdef SCTP
2362	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2363	struct socket *so;
2364	struct file *fp;
2365	int use_rcvinfo = 1;
2366	int error = 0, len;
2367	struct sockaddr *to = NULL;
2368#ifdef KTRACE
2369	struct uio *ktruio = NULL;
2370#endif
2371	struct uio auio;
2372	struct iovec iov[1];
2373
2374	if (uap->sinfo) {
2375		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2376		if (error)
2377			return (error);
2378		u_sinfo = &sinfo;
2379	}
2380	if (uap->tolen) {
2381		error = getsockaddr(&to, uap->to, uap->tolen);
2382		if (error) {
2383			to = NULL;
2384			goto sctp_bad2;
2385		}
2386	}
2387
2388	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2389	if (error)
2390		goto sctp_bad;
2391
2392	iov[0].iov_base = uap->msg;
2393	iov[0].iov_len = uap->mlen;
2394
2395	so = (struct socket *)fp->f_data;
2396#ifdef MAC
2397	SOCK_LOCK(so);
2398	error = mac_check_socket_send(td->td_ucred, so);
2399	SOCK_UNLOCK(so);
2400	if (error)
2401		goto sctp_bad;
2402#endif /* MAC */
2403
2404	auio.uio_iov =  iov;
2405	auio.uio_iovcnt = 1;
2406	auio.uio_segflg = UIO_USERSPACE;
2407	auio.uio_rw = UIO_WRITE;
2408	auio.uio_td = td;
2409	auio.uio_offset = 0;			/* XXX */
2410	auio.uio_resid = 0;
2411	len = auio.uio_resid = uap->mlen;
2412	error = sctp_lower_sosend(so, to, &auio,
2413		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2414		    uap->flags, use_rcvinfo, u_sinfo, td);
2415	if (error) {
2416		if (auio.uio_resid != len && (error == ERESTART ||
2417		    error == EINTR || error == EWOULDBLOCK))
2418			error = 0;
2419		/* Generation of SIGPIPE can be controlled per socket. */
2420		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2421		    !(uap->flags & MSG_NOSIGNAL)) {
2422			PROC_LOCK(td->td_proc);
2423			psignal(td->td_proc, SIGPIPE);
2424			PROC_UNLOCK(td->td_proc);
2425		}
2426	}
2427	if (error == 0)
2428		td->td_retval[0] = len - auio.uio_resid;
2429#ifdef KTRACE
2430	if (ktruio != NULL) {
2431		ktruio->uio_resid = td->td_retval[0];
2432		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2433	}
2434#endif /* KTRACE */
2435sctp_bad:
2436	fdrop(fp, td);
2437sctp_bad2:
2438	if (to)
2439		free(to, M_SONAME);
2440	return (error);
2441#else  /* SCTP */
2442	return (EOPNOTSUPP);
2443#endif /* SCTP */
2444}
2445
2446int
2447sctp_generic_sendmsg_iov(td, uap)
2448	struct thread *td;
2449	struct sctp_generic_sendmsg_iov_args /* {
2450		int sd,
2451		struct iovec *iov,
2452		int iovlen,
2453		caddr_t to,
2454		__socklen_t tolen,
2455		struct sctp_sndrcvinfo *sinfo,
2456		int flags
2457	} */ *uap;
2458{
2459#ifdef SCTP
2460	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2461	struct socket *so;
2462	struct file *fp;
2463	int use_rcvinfo = 1;
2464	int error=0, len, i;
2465	struct sockaddr *to = NULL;
2466#ifdef KTRACE
2467	struct uio *ktruio = NULL;
2468#endif
2469	struct uio auio;
2470	struct iovec *iov, *tiov;
2471
2472	if (uap->sinfo) {
2473		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2474		if (error)
2475			return (error);
2476		u_sinfo = &sinfo;
2477	}
2478	if (uap->tolen) {
2479		error = getsockaddr(&to, uap->to, uap->tolen);
2480		if (error) {
2481			to = NULL;
2482			goto sctp_bad2;
2483		}
2484	}
2485
2486	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2487	if (error)
2488		goto sctp_bad1;
2489
2490	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2491	if (error)
2492		goto sctp_bad1;
2493
2494	so = (struct socket *)fp->f_data;
2495#ifdef MAC
2496	SOCK_LOCK(so);
2497	error = mac_check_socket_send(td->td_ucred, so);
2498	SOCK_UNLOCK(so);
2499	if (error)
2500		goto sctp_bad;
2501#endif /* MAC */
2502
2503	auio.uio_iov =  iov;
2504	auio.uio_iovcnt = uap->iovlen;
2505	auio.uio_segflg = UIO_USERSPACE;
2506	auio.uio_rw = UIO_WRITE;
2507	auio.uio_td = td;
2508	auio.uio_offset = 0;			/* XXX */
2509	auio.uio_resid = 0;
2510	tiov = iov;
2511	for (i = 0; i <uap->iovlen; i++, tiov++) {
2512		if ((auio.uio_resid += tiov->iov_len) < 0) {
2513			error = EINVAL;
2514			goto sctp_bad;
2515		}
2516	}
2517	len = auio.uio_resid;
2518	error = sctp_lower_sosend(so, to, &auio,
2519		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2520		    uap->flags, use_rcvinfo, u_sinfo, td);
2521	if (error) {
2522		if (auio.uio_resid != len && (error == ERESTART ||
2523		    error == EINTR || error == EWOULDBLOCK))
2524			error = 0;
2525		/* Generation of SIGPIPE can be controlled per socket */
2526		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2527		    !(uap->flags & MSG_NOSIGNAL)) {
2528			PROC_LOCK(td->td_proc);
2529			psignal(td->td_proc, SIGPIPE);
2530			PROC_UNLOCK(td->td_proc);
2531		}
2532	}
2533	if (error == 0)
2534		td->td_retval[0] = len - auio.uio_resid;
2535#ifdef KTRACE
2536	if (ktruio != NULL) {
2537		ktruio->uio_resid = td->td_retval[0];
2538		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2539	}
2540#endif /* KTRACE */
2541sctp_bad:
2542	free(iov, M_IOV);
2543sctp_bad1:
2544	fdrop(fp, td);
2545sctp_bad2:
2546	if (to)
2547		free(to, M_SONAME);
2548	return (error);
2549#else  /* SCTP */
2550	return (EOPNOTSUPP);
2551#endif /* SCTP */
2552}
2553
2554int
2555sctp_generic_recvmsg(td, uap)
2556	struct thread *td;
2557	struct sctp_generic_recvmsg_args /* {
2558		int sd,
2559		struct iovec *iov,
2560		int iovlen,
2561		struct sockaddr *from,
2562		__socklen_t *fromlenaddr,
2563		struct sctp_sndrcvinfo *sinfo,
2564		int *msg_flags
2565	} */ *uap;
2566{
2567#ifdef SCTP
2568	u_int8_t sockbufstore[256];
2569	struct uio auio;
2570	struct iovec *iov, *tiov;
2571	struct sctp_sndrcvinfo sinfo;
2572	struct socket *so;
2573	struct file *fp;
2574	struct sockaddr *fromsa;
2575	int fromlen;
2576	int len, i, msg_flags;
2577	int error = 0;
2578#ifdef KTRACE
2579	struct uio *ktruio = NULL;
2580#endif
2581	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2582	if (error) {
2583		return (error);
2584	}
2585	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2586	if (error) {
2587		goto out1;
2588	}
2589
2590	so = fp->f_data;
2591#ifdef MAC
2592	SOCK_LOCK(so);
2593	error = mac_check_socket_receive(td->td_ucred, so);
2594	SOCK_UNLOCK(so);
2595	if (error) {
2596		goto out;
2597		return (error);
2598	}
2599#endif /* MAC */
2600
2601	if (uap->fromlenaddr) {
2602		error = copyin(uap->fromlenaddr,
2603		    &fromlen, sizeof (fromlen));
2604		if (error) {
2605			goto out;
2606		}
2607	} else {
2608		fromlen = 0;
2609	}
2610	if(uap->msg_flags) {
2611		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2612		if (error) {
2613			goto out;
2614		}
2615	} else {
2616		msg_flags = 0;
2617	}
2618	auio.uio_iov = iov;
2619	auio.uio_iovcnt = uap->iovlen;
2620  	auio.uio_segflg = UIO_USERSPACE;
2621	auio.uio_rw = UIO_READ;
2622	auio.uio_td = td;
2623	auio.uio_offset = 0;			/* XXX */
2624	auio.uio_resid = 0;
2625	tiov = iov;
2626	for (i = 0; i <uap->iovlen; i++, tiov++) {
2627		if ((auio.uio_resid += tiov->iov_len) < 0) {
2628			error = EINVAL;
2629			goto out;
2630		}
2631	}
2632	len = auio.uio_resid;
2633	fromsa = (struct sockaddr *)sockbufstore;
2634
2635#ifdef KTRACE
2636	if (KTRPOINT(td, KTR_GENIO))
2637		ktruio = cloneuio(&auio);
2638#endif /* KTRACE */
2639	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2640		    fromsa, fromlen, &msg_flags,
2641		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2642	if (error) {
2643		if (auio.uio_resid != (int)len && (error == ERESTART ||
2644		    error == EINTR || error == EWOULDBLOCK))
2645			error = 0;
2646	} else {
2647		if (uap->sinfo)
2648			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2649	}
2650#ifdef KTRACE
2651	if (ktruio != NULL) {
2652		ktruio->uio_resid = (int)len - auio.uio_resid;
2653		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2654	}
2655#endif /* KTRACE */
2656	if (error)
2657		goto out;
2658	td->td_retval[0] = (int)len - auio.uio_resid;
2659
2660	if (fromlen && uap->from) {
2661		len = fromlen;
2662		if (len <= 0 || fromsa == 0)
2663			len = 0;
2664		else {
2665			len = MIN(len, fromsa->sa_len);
2666			error = copyout(fromsa, uap->from, (unsigned)len);
2667			if (error)
2668				goto out;
2669		}
2670		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2671		if (error) {
2672			goto out;
2673		}
2674	}
2675	if (uap->msg_flags) {
2676		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2677		if (error) {
2678			goto out;
2679		}
2680	}
2681out:
2682	free(iov, M_IOV);
2683out1:
2684	fdrop(fp, td);
2685	return (error);
2686#else  /* SCTP */
2687	return (EOPNOTSUPP);
2688#endif /* SCTP */
2689}
2690