kern_sendfile.c revision 171744
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 171744 2007-08-06 14:26:03Z rwatson $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91			int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93			int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103    "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105    "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107    "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry.  A reference on the
111 * file entry is held upon returning.  This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count.  If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120	struct file *fp;
121	int error;
122
123	fp = NULL;
124	if (fdp == NULL)
125		error = EBADF;
126	else {
127		FILEDESC_SLOCK(fdp);
128		fp = fget_locked(fdp, fd);
129		if (fp == NULL)
130			error = EBADF;
131		else if (fp->f_type != DTYPE_SOCKET) {
132			fp = NULL;
133			error = ENOTSOCK;
134		} else {
135			fhold(fp);
136			if (fflagp != NULL)
137				*fflagp = fp->f_flag;
138			error = 0;
139		}
140		FILEDESC_SUNLOCK(fdp);
141	}
142	*fpp = fp;
143	return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155	struct thread *td;
156	struct socket_args /* {
157		int	domain;
158		int	type;
159		int	protocol;
160	} */ *uap;
161{
162	struct filedesc *fdp;
163	struct socket *so;
164	struct file *fp;
165	int fd, error;
166
167#ifdef MAC
168	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
169	    uap->protocol);
170	if (error)
171		return (error);
172#endif
173	fdp = td->td_proc->p_fd;
174	error = falloc(td, &fp, &fd);
175	if (error)
176		return (error);
177	/* An extra reference on `fp' has been held for us by falloc(). */
178	error = socreate(uap->domain, &so, uap->type, uap->protocol,
179	    td->td_ucred, td);
180	if (error) {
181		fdclose(fdp, fp, fd, td);
182	} else {
183		FILE_LOCK(fp);
184		fp->f_data = so;	/* already has ref count */
185		fp->f_flag = FREAD|FWRITE;
186		fp->f_type = DTYPE_SOCKET;
187		fp->f_ops = &socketops;
188		FILE_UNLOCK(fp);
189		td->td_retval[0] = fd;
190	}
191	fdrop(fp, td);
192	return (error);
193}
194
195/* ARGSUSED */
196int
197bind(td, uap)
198	struct thread *td;
199	struct bind_args /* {
200		int	s;
201		caddr_t	name;
202		int	namelen;
203	} */ *uap;
204{
205	struct sockaddr *sa;
206	int error;
207
208	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
209		return (error);
210
211	error = kern_bind(td, uap->s, sa);
212	free(sa, M_SONAME);
213	return (error);
214}
215
216int
217kern_bind(td, fd, sa)
218	struct thread *td;
219	int fd;
220	struct sockaddr *sa;
221{
222	struct socket *so;
223	struct file *fp;
224	int error;
225
226	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
227	if (error)
228		return (error);
229	so = fp->f_data;
230#ifdef MAC
231	SOCK_LOCK(so);
232	error = mac_check_socket_bind(td->td_ucred, so, sa);
233	SOCK_UNLOCK(so);
234	if (error)
235		goto done;
236#endif
237	error = sobind(so, sa, td);
238#ifdef MAC
239done:
240#endif
241	fdrop(fp, td);
242	return (error);
243}
244
245/* ARGSUSED */
246int
247listen(td, uap)
248	struct thread *td;
249	struct listen_args /* {
250		int	s;
251		int	backlog;
252	} */ *uap;
253{
254	struct socket *so;
255	struct file *fp;
256	int error;
257
258	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
259	if (error == 0) {
260		so = fp->f_data;
261#ifdef MAC
262		SOCK_LOCK(so);
263		error = mac_check_socket_listen(td->td_ucred, so);
264		SOCK_UNLOCK(so);
265		if (error)
266			goto done;
267#endif
268		error = solisten(so, uap->backlog, td);
269#ifdef MAC
270done:
271#endif
272		fdrop(fp, td);
273	}
274	return(error);
275}
276
277/*
278 * accept1()
279 */
280static int
281accept1(td, uap, compat)
282	struct thread *td;
283	struct accept_args /* {
284		int	s;
285		struct sockaddr	* __restrict name;
286		socklen_t	* __restrict anamelen;
287	} */ *uap;
288	int compat;
289{
290	struct sockaddr *name;
291	socklen_t namelen;
292	struct file *fp;
293	int error;
294
295	if (uap->name == NULL)
296		return (kern_accept(td, uap->s, NULL, NULL, NULL));
297
298	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
299	if (error)
300		return (error);
301
302	error = kern_accept(td, uap->s, &name, &namelen, &fp);
303
304	/*
305	 * return a namelen of zero for older code which might
306	 * ignore the return value from accept.
307	 */
308	if (error) {
309		(void) copyout(&namelen,
310		    uap->anamelen, sizeof(*uap->anamelen));
311		return (error);
312	}
313
314	if (error == 0 && name != NULL) {
315#ifdef COMPAT_OLDSOCK
316		if (compat)
317			((struct osockaddr *)name)->sa_family =
318			    name->sa_family;
319#endif
320		error = copyout(name, uap->name, namelen);
321	}
322	if (error == 0)
323		error = copyout(&namelen, uap->anamelen,
324		    sizeof(namelen));
325	if (error)
326		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
327	fdrop(fp, td);
328	free(name, M_SONAME);
329	return (error);
330}
331
332int
333kern_accept(struct thread *td, int s, struct sockaddr **name,
334    socklen_t *namelen, struct file **fp)
335{
336	struct filedesc *fdp;
337	struct file *headfp, *nfp = NULL;
338	struct sockaddr *sa = NULL;
339	int error;
340	struct socket *head, *so;
341	int fd;
342	u_int fflag;
343	pid_t pgid;
344	int tmp;
345
346	if (name) {
347		*name = NULL;
348		if (*namelen < 0)
349			return (EINVAL);
350	}
351
352	fdp = td->td_proc->p_fd;
353	error = getsock(fdp, s, &headfp, &fflag);
354	if (error)
355		return (error);
356	head = headfp->f_data;
357	if ((head->so_options & SO_ACCEPTCONN) == 0) {
358		error = EINVAL;
359		goto done;
360	}
361#ifdef MAC
362	SOCK_LOCK(head);
363	error = mac_check_socket_accept(td->td_ucred, head);
364	SOCK_UNLOCK(head);
365	if (error != 0)
366		goto done;
367#endif
368	error = falloc(td, &nfp, &fd);
369	if (error)
370		goto done;
371	ACCEPT_LOCK();
372	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
373		ACCEPT_UNLOCK();
374		error = EWOULDBLOCK;
375		goto noconnection;
376	}
377	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
378		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
379			head->so_error = ECONNABORTED;
380			break;
381		}
382		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
383		    "accept", 0);
384		if (error) {
385			ACCEPT_UNLOCK();
386			goto noconnection;
387		}
388	}
389	if (head->so_error) {
390		error = head->so_error;
391		head->so_error = 0;
392		ACCEPT_UNLOCK();
393		goto noconnection;
394	}
395	so = TAILQ_FIRST(&head->so_comp);
396	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
397	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
398
399	/*
400	 * Before changing the flags on the socket, we have to bump the
401	 * reference count.  Otherwise, if the protocol calls sofree(),
402	 * the socket will be released due to a zero refcount.
403	 */
404	SOCK_LOCK(so);			/* soref() and so_state update */
405	soref(so);			/* file descriptor reference */
406
407	TAILQ_REMOVE(&head->so_comp, so, so_list);
408	head->so_qlen--;
409	so->so_state |= (head->so_state & SS_NBIO);
410	so->so_qstate &= ~SQ_COMP;
411	so->so_head = NULL;
412
413	SOCK_UNLOCK(so);
414	ACCEPT_UNLOCK();
415
416	/* An extra reference on `nfp' has been held for us by falloc(). */
417	td->td_retval[0] = fd;
418
419	/* connection has been removed from the listen queue */
420	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
421
422	pgid = fgetown(&head->so_sigio);
423	if (pgid != 0)
424		fsetown(pgid, &so->so_sigio);
425
426	FILE_LOCK(nfp);
427	nfp->f_data = so;	/* nfp has ref count from falloc */
428	nfp->f_flag = fflag;
429	nfp->f_type = DTYPE_SOCKET;
430	nfp->f_ops = &socketops;
431	FILE_UNLOCK(nfp);
432	/* Sync socket nonblocking/async state with file flags */
433	tmp = fflag & FNONBLOCK;
434	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
435	tmp = fflag & FASYNC;
436	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
437	sa = 0;
438	error = soaccept(so, &sa);
439	if (error) {
440		/*
441		 * return a namelen of zero for older code which might
442		 * ignore the return value from accept.
443		 */
444		if (name)
445			*namelen = 0;
446		goto noconnection;
447	}
448	if (sa == NULL) {
449		if (name)
450			*namelen = 0;
451		goto done;
452	}
453	if (name) {
454		/* check sa_len before it is destroyed */
455		if (*namelen > sa->sa_len)
456			*namelen = sa->sa_len;
457		*name = sa;
458		sa = NULL;
459	}
460noconnection:
461	if (sa)
462		FREE(sa, M_SONAME);
463
464	/*
465	 * close the new descriptor, assuming someone hasn't ripped it
466	 * out from under us.
467	 */
468	if (error)
469		fdclose(fdp, nfp, fd, td);
470
471	/*
472	 * Release explicitly held references before returning.  We return
473	 * a reference on nfp to the caller on success if they request it.
474	 */
475done:
476	if (fp != NULL) {
477		if (error == 0) {
478			*fp = nfp;
479			nfp = NULL;
480		} else
481			*fp = NULL;
482	}
483	if (nfp != NULL)
484		fdrop(nfp, td);
485	fdrop(headfp, td);
486	return (error);
487}
488
489int
490accept(td, uap)
491	struct thread *td;
492	struct accept_args *uap;
493{
494
495	return (accept1(td, uap, 0));
496}
497
498#ifdef COMPAT_OLDSOCK
499int
500oaccept(td, uap)
501	struct thread *td;
502	struct accept_args *uap;
503{
504
505	return (accept1(td, uap, 1));
506}
507#endif /* COMPAT_OLDSOCK */
508
509/* ARGSUSED */
510int
511connect(td, uap)
512	struct thread *td;
513	struct connect_args /* {
514		int	s;
515		caddr_t	name;
516		int	namelen;
517	} */ *uap;
518{
519	struct sockaddr *sa;
520	int error;
521
522	error = getsockaddr(&sa, uap->name, uap->namelen);
523	if (error)
524		return (error);
525
526	error = kern_connect(td, uap->s, sa);
527	free(sa, M_SONAME);
528	return (error);
529}
530
531
532int
533kern_connect(td, fd, sa)
534	struct thread *td;
535	int fd;
536	struct sockaddr *sa;
537{
538	struct socket *so;
539	struct file *fp;
540	int error;
541	int interrupted = 0;
542
543	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
544	if (error)
545		return (error);
546	so = fp->f_data;
547	if (so->so_state & SS_ISCONNECTING) {
548		error = EALREADY;
549		goto done1;
550	}
551#ifdef MAC
552	SOCK_LOCK(so);
553	error = mac_check_socket_connect(td->td_ucred, so, sa);
554	SOCK_UNLOCK(so);
555	if (error)
556		goto bad;
557#endif
558	error = soconnect(so, sa, td);
559	if (error)
560		goto bad;
561	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
562		error = EINPROGRESS;
563		goto done1;
564	}
565	SOCK_LOCK(so);
566	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
567		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
568		    "connec", 0);
569		if (error) {
570			if (error == EINTR || error == ERESTART)
571				interrupted = 1;
572			break;
573		}
574	}
575	if (error == 0) {
576		error = so->so_error;
577		so->so_error = 0;
578	}
579	SOCK_UNLOCK(so);
580bad:
581	if (!interrupted)
582		so->so_state &= ~SS_ISCONNECTING;
583	if (error == ERESTART)
584		error = EINTR;
585done1:
586	fdrop(fp, td);
587	return (error);
588}
589
590int
591socketpair(td, uap)
592	struct thread *td;
593	struct socketpair_args /* {
594		int	domain;
595		int	type;
596		int	protocol;
597		int	*rsv;
598	} */ *uap;
599{
600	struct filedesc *fdp = td->td_proc->p_fd;
601	struct file *fp1, *fp2;
602	struct socket *so1, *so2;
603	int fd, error, sv[2];
604
605#ifdef MAC
606	/* We might want to have a separate check for socket pairs. */
607	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
608	    uap->protocol);
609	if (error)
610		return (error);
611#endif
612
613	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
614	    td->td_ucred, td);
615	if (error)
616		return (error);
617	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
618	    td->td_ucred, td);
619	if (error)
620		goto free1;
621	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
622	error = falloc(td, &fp1, &fd);
623	if (error)
624		goto free2;
625	sv[0] = fd;
626	fp1->f_data = so1;	/* so1 already has ref count */
627	error = falloc(td, &fp2, &fd);
628	if (error)
629		goto free3;
630	fp2->f_data = so2;	/* so2 already has ref count */
631	sv[1] = fd;
632	error = soconnect2(so1, so2);
633	if (error)
634		goto free4;
635	if (uap->type == SOCK_DGRAM) {
636		/*
637		 * Datagram socket connection is asymmetric.
638		 */
639		 error = soconnect2(so2, so1);
640		 if (error)
641			goto free4;
642	}
643	FILE_LOCK(fp1);
644	fp1->f_flag = FREAD|FWRITE;
645	fp1->f_type = DTYPE_SOCKET;
646	fp1->f_ops = &socketops;
647	FILE_UNLOCK(fp1);
648	FILE_LOCK(fp2);
649	fp2->f_flag = FREAD|FWRITE;
650	fp2->f_type = DTYPE_SOCKET;
651	fp2->f_ops = &socketops;
652	FILE_UNLOCK(fp2);
653	so1 = so2 = NULL;
654	error = copyout(sv, uap->rsv, 2 * sizeof (int));
655	if (error)
656		goto free4;
657	fdrop(fp1, td);
658	fdrop(fp2, td);
659	return (0);
660free4:
661	fdclose(fdp, fp2, sv[1], td);
662	fdrop(fp2, td);
663free3:
664	fdclose(fdp, fp1, sv[0], td);
665	fdrop(fp1, td);
666free2:
667	if (so2 != NULL)
668		(void)soclose(so2);
669free1:
670	if (so1 != NULL)
671		(void)soclose(so1);
672	return (error);
673}
674
675static int
676sendit(td, s, mp, flags)
677	struct thread *td;
678	int s;
679	struct msghdr *mp;
680	int flags;
681{
682	struct mbuf *control;
683	struct sockaddr *to;
684	int error;
685
686	if (mp->msg_name != NULL) {
687		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
688		if (error) {
689			to = NULL;
690			goto bad;
691		}
692		mp->msg_name = to;
693	} else {
694		to = NULL;
695	}
696
697	if (mp->msg_control) {
698		if (mp->msg_controllen < sizeof(struct cmsghdr)
699#ifdef COMPAT_OLDSOCK
700		    && mp->msg_flags != MSG_COMPAT
701#endif
702		) {
703			error = EINVAL;
704			goto bad;
705		}
706		error = sockargs(&control, mp->msg_control,
707		    mp->msg_controllen, MT_CONTROL);
708		if (error)
709			goto bad;
710#ifdef COMPAT_OLDSOCK
711		if (mp->msg_flags == MSG_COMPAT) {
712			struct cmsghdr *cm;
713
714			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
715			if (control == 0) {
716				error = ENOBUFS;
717				goto bad;
718			} else {
719				cm = mtod(control, struct cmsghdr *);
720				cm->cmsg_len = control->m_len;
721				cm->cmsg_level = SOL_SOCKET;
722				cm->cmsg_type = SCM_RIGHTS;
723			}
724		}
725#endif
726	} else {
727		control = NULL;
728	}
729
730	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
731
732bad:
733	if (to)
734		FREE(to, M_SONAME);
735	return (error);
736}
737
738int
739kern_sendit(td, s, mp, flags, control, segflg)
740	struct thread *td;
741	int s;
742	struct msghdr *mp;
743	int flags;
744	struct mbuf *control;
745	enum uio_seg segflg;
746{
747	struct file *fp;
748	struct uio auio;
749	struct iovec *iov;
750	struct socket *so;
751	int i;
752	int len, error;
753#ifdef KTRACE
754	struct uio *ktruio = NULL;
755#endif
756
757	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
758	if (error)
759		return (error);
760	so = (struct socket *)fp->f_data;
761
762#ifdef MAC
763	SOCK_LOCK(so);
764	error = mac_check_socket_send(td->td_ucred, so);
765	SOCK_UNLOCK(so);
766	if (error)
767		goto bad;
768#endif
769
770	auio.uio_iov = mp->msg_iov;
771	auio.uio_iovcnt = mp->msg_iovlen;
772	auio.uio_segflg = segflg;
773	auio.uio_rw = UIO_WRITE;
774	auio.uio_td = td;
775	auio.uio_offset = 0;			/* XXX */
776	auio.uio_resid = 0;
777	iov = mp->msg_iov;
778	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
779		if ((auio.uio_resid += iov->iov_len) < 0) {
780			error = EINVAL;
781			goto bad;
782		}
783	}
784#ifdef KTRACE
785	if (KTRPOINT(td, KTR_GENIO))
786		ktruio = cloneuio(&auio);
787#endif
788	len = auio.uio_resid;
789	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
790	if (error) {
791		if (auio.uio_resid != len && (error == ERESTART ||
792		    error == EINTR || error == EWOULDBLOCK))
793			error = 0;
794		/* Generation of SIGPIPE can be controlled per socket */
795		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
796		    !(flags & MSG_NOSIGNAL)) {
797			PROC_LOCK(td->td_proc);
798			psignal(td->td_proc, SIGPIPE);
799			PROC_UNLOCK(td->td_proc);
800		}
801	}
802	if (error == 0)
803		td->td_retval[0] = len - auio.uio_resid;
804#ifdef KTRACE
805	if (ktruio != NULL) {
806		ktruio->uio_resid = td->td_retval[0];
807		ktrgenio(s, UIO_WRITE, ktruio, error);
808	}
809#endif
810bad:
811	fdrop(fp, td);
812	return (error);
813}
814
815int
816sendto(td, uap)
817	struct thread *td;
818	struct sendto_args /* {
819		int	s;
820		caddr_t	buf;
821		size_t	len;
822		int	flags;
823		caddr_t	to;
824		int	tolen;
825	} */ *uap;
826{
827	struct msghdr msg;
828	struct iovec aiov;
829	int error;
830
831	msg.msg_name = uap->to;
832	msg.msg_namelen = uap->tolen;
833	msg.msg_iov = &aiov;
834	msg.msg_iovlen = 1;
835	msg.msg_control = 0;
836#ifdef COMPAT_OLDSOCK
837	msg.msg_flags = 0;
838#endif
839	aiov.iov_base = uap->buf;
840	aiov.iov_len = uap->len;
841	error = sendit(td, uap->s, &msg, uap->flags);
842	return (error);
843}
844
845#ifdef COMPAT_OLDSOCK
846int
847osend(td, uap)
848	struct thread *td;
849	struct osend_args /* {
850		int	s;
851		caddr_t	buf;
852		int	len;
853		int	flags;
854	} */ *uap;
855{
856	struct msghdr msg;
857	struct iovec aiov;
858	int error;
859
860	msg.msg_name = 0;
861	msg.msg_namelen = 0;
862	msg.msg_iov = &aiov;
863	msg.msg_iovlen = 1;
864	aiov.iov_base = uap->buf;
865	aiov.iov_len = uap->len;
866	msg.msg_control = 0;
867	msg.msg_flags = 0;
868	error = sendit(td, uap->s, &msg, uap->flags);
869	return (error);
870}
871
872int
873osendmsg(td, uap)
874	struct thread *td;
875	struct osendmsg_args /* {
876		int	s;
877		caddr_t	msg;
878		int	flags;
879	} */ *uap;
880{
881	struct msghdr msg;
882	struct iovec *iov;
883	int error;
884
885	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
886	if (error)
887		return (error);
888	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
889	if (error)
890		return (error);
891	msg.msg_iov = iov;
892	msg.msg_flags = MSG_COMPAT;
893	error = sendit(td, uap->s, &msg, uap->flags);
894	free(iov, M_IOV);
895	return (error);
896}
897#endif
898
899int
900sendmsg(td, uap)
901	struct thread *td;
902	struct sendmsg_args /* {
903		int	s;
904		caddr_t	msg;
905		int	flags;
906	} */ *uap;
907{
908	struct msghdr msg;
909	struct iovec *iov;
910	int error;
911
912	error = copyin(uap->msg, &msg, sizeof (msg));
913	if (error)
914		return (error);
915	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
916	if (error)
917		return (error);
918	msg.msg_iov = iov;
919#ifdef COMPAT_OLDSOCK
920	msg.msg_flags = 0;
921#endif
922	error = sendit(td, uap->s, &msg, uap->flags);
923	free(iov, M_IOV);
924	return (error);
925}
926
927int
928kern_recvit(td, s, mp, fromseg, controlp)
929	struct thread *td;
930	int s;
931	struct msghdr *mp;
932	enum uio_seg fromseg;
933	struct mbuf **controlp;
934{
935	struct uio auio;
936	struct iovec *iov;
937	int i;
938	socklen_t len;
939	int error;
940	struct mbuf *m, *control = 0;
941	caddr_t ctlbuf;
942	struct file *fp;
943	struct socket *so;
944	struct sockaddr *fromsa = 0;
945#ifdef KTRACE
946	struct uio *ktruio = NULL;
947#endif
948
949	if(controlp != NULL)
950		*controlp = 0;
951
952	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
953	if (error)
954		return (error);
955	so = fp->f_data;
956
957#ifdef MAC
958	SOCK_LOCK(so);
959	error = mac_check_socket_receive(td->td_ucred, so);
960	SOCK_UNLOCK(so);
961	if (error) {
962		fdrop(fp, td);
963		return (error);
964	}
965#endif
966
967	auio.uio_iov = mp->msg_iov;
968	auio.uio_iovcnt = mp->msg_iovlen;
969	auio.uio_segflg = UIO_USERSPACE;
970	auio.uio_rw = UIO_READ;
971	auio.uio_td = td;
972	auio.uio_offset = 0;			/* XXX */
973	auio.uio_resid = 0;
974	iov = mp->msg_iov;
975	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
976		if ((auio.uio_resid += iov->iov_len) < 0) {
977			fdrop(fp, td);
978			return (EINVAL);
979		}
980	}
981#ifdef KTRACE
982	if (KTRPOINT(td, KTR_GENIO))
983		ktruio = cloneuio(&auio);
984#endif
985	len = auio.uio_resid;
986	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
987	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
988	    &mp->msg_flags);
989	if (error) {
990		if (auio.uio_resid != (int)len && (error == ERESTART ||
991		    error == EINTR || error == EWOULDBLOCK))
992			error = 0;
993	}
994#ifdef KTRACE
995	if (ktruio != NULL) {
996		ktruio->uio_resid = (int)len - auio.uio_resid;
997		ktrgenio(s, UIO_READ, ktruio, error);
998	}
999#endif
1000	if (error)
1001		goto out;
1002	td->td_retval[0] = (int)len - auio.uio_resid;
1003	if (mp->msg_name) {
1004		len = mp->msg_namelen;
1005		if (len <= 0 || fromsa == 0)
1006			len = 0;
1007		else {
1008			/* save sa_len before it is destroyed by MSG_COMPAT */
1009			len = MIN(len, fromsa->sa_len);
1010#ifdef COMPAT_OLDSOCK
1011			if (mp->msg_flags & MSG_COMPAT)
1012				((struct osockaddr *)fromsa)->sa_family =
1013				    fromsa->sa_family;
1014#endif
1015			if (fromseg == UIO_USERSPACE) {
1016				error = copyout(fromsa, mp->msg_name,
1017				    (unsigned)len);
1018				if (error)
1019					goto out;
1020			} else
1021				bcopy(fromsa, mp->msg_name, len);
1022		}
1023		mp->msg_namelen = len;
1024	}
1025	if (mp->msg_control && controlp == NULL) {
1026#ifdef COMPAT_OLDSOCK
1027		/*
1028		 * We assume that old recvmsg calls won't receive access
1029		 * rights and other control info, esp. as control info
1030		 * is always optional and those options didn't exist in 4.3.
1031		 * If we receive rights, trim the cmsghdr; anything else
1032		 * is tossed.
1033		 */
1034		if (control && mp->msg_flags & MSG_COMPAT) {
1035			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1036			    SOL_SOCKET ||
1037			    mtod(control, struct cmsghdr *)->cmsg_type !=
1038			    SCM_RIGHTS) {
1039				mp->msg_controllen = 0;
1040				goto out;
1041			}
1042			control->m_len -= sizeof (struct cmsghdr);
1043			control->m_data += sizeof (struct cmsghdr);
1044		}
1045#endif
1046		len = mp->msg_controllen;
1047		m = control;
1048		mp->msg_controllen = 0;
1049		ctlbuf = mp->msg_control;
1050
1051		while (m && len > 0) {
1052			unsigned int tocopy;
1053
1054			if (len >= m->m_len)
1055				tocopy = m->m_len;
1056			else {
1057				mp->msg_flags |= MSG_CTRUNC;
1058				tocopy = len;
1059			}
1060
1061			if ((error = copyout(mtod(m, caddr_t),
1062					ctlbuf, tocopy)) != 0)
1063				goto out;
1064
1065			ctlbuf += tocopy;
1066			len -= tocopy;
1067			m = m->m_next;
1068		}
1069		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1070	}
1071out:
1072	fdrop(fp, td);
1073	if (fromsa)
1074		FREE(fromsa, M_SONAME);
1075
1076	if (error == 0 && controlp != NULL)
1077		*controlp = control;
1078	else  if (control)
1079		m_freem(control);
1080
1081	return (error);
1082}
1083
1084static int
1085recvit(td, s, mp, namelenp)
1086	struct thread *td;
1087	int s;
1088	struct msghdr *mp;
1089	void *namelenp;
1090{
1091	int error;
1092
1093	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1094	if (error)
1095		return (error);
1096	if (namelenp) {
1097		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1098#ifdef COMPAT_OLDSOCK
1099		if (mp->msg_flags & MSG_COMPAT)
1100			error = 0;	/* old recvfrom didn't check */
1101#endif
1102	}
1103	return (error);
1104}
1105
1106int
1107recvfrom(td, uap)
1108	struct thread *td;
1109	struct recvfrom_args /* {
1110		int	s;
1111		caddr_t	buf;
1112		size_t	len;
1113		int	flags;
1114		struct sockaddr * __restrict	from;
1115		socklen_t * __restrict fromlenaddr;
1116	} */ *uap;
1117{
1118	struct msghdr msg;
1119	struct iovec aiov;
1120	int error;
1121
1122	if (uap->fromlenaddr) {
1123		error = copyin(uap->fromlenaddr,
1124		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1125		if (error)
1126			goto done2;
1127	} else {
1128		msg.msg_namelen = 0;
1129	}
1130	msg.msg_name = uap->from;
1131	msg.msg_iov = &aiov;
1132	msg.msg_iovlen = 1;
1133	aiov.iov_base = uap->buf;
1134	aiov.iov_len = uap->len;
1135	msg.msg_control = 0;
1136	msg.msg_flags = uap->flags;
1137	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1138done2:
1139	return(error);
1140}
1141
1142#ifdef COMPAT_OLDSOCK
1143int
1144orecvfrom(td, uap)
1145	struct thread *td;
1146	struct recvfrom_args *uap;
1147{
1148
1149	uap->flags |= MSG_COMPAT;
1150	return (recvfrom(td, uap));
1151}
1152#endif
1153
1154#ifdef COMPAT_OLDSOCK
1155int
1156orecv(td, uap)
1157	struct thread *td;
1158	struct orecv_args /* {
1159		int	s;
1160		caddr_t	buf;
1161		int	len;
1162		int	flags;
1163	} */ *uap;
1164{
1165	struct msghdr msg;
1166	struct iovec aiov;
1167	int error;
1168
1169	msg.msg_name = 0;
1170	msg.msg_namelen = 0;
1171	msg.msg_iov = &aiov;
1172	msg.msg_iovlen = 1;
1173	aiov.iov_base = uap->buf;
1174	aiov.iov_len = uap->len;
1175	msg.msg_control = 0;
1176	msg.msg_flags = uap->flags;
1177	error = recvit(td, uap->s, &msg, NULL);
1178	return (error);
1179}
1180
1181/*
1182 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1183 * overlays the new one, missing only the flags, and with the (old) access
1184 * rights where the control fields are now.
1185 */
1186int
1187orecvmsg(td, uap)
1188	struct thread *td;
1189	struct orecvmsg_args /* {
1190		int	s;
1191		struct	omsghdr *msg;
1192		int	flags;
1193	} */ *uap;
1194{
1195	struct msghdr msg;
1196	struct iovec *iov;
1197	int error;
1198
1199	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1200	if (error)
1201		return (error);
1202	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1203	if (error)
1204		return (error);
1205	msg.msg_flags = uap->flags | MSG_COMPAT;
1206	msg.msg_iov = iov;
1207	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1208	if (msg.msg_controllen && error == 0)
1209		error = copyout(&msg.msg_controllen,
1210		    &uap->msg->msg_accrightslen, sizeof (int));
1211	free(iov, M_IOV);
1212	return (error);
1213}
1214#endif
1215
1216int
1217recvmsg(td, uap)
1218	struct thread *td;
1219	struct recvmsg_args /* {
1220		int	s;
1221		struct	msghdr *msg;
1222		int	flags;
1223	} */ *uap;
1224{
1225	struct msghdr msg;
1226	struct iovec *uiov, *iov;
1227	int error;
1228
1229	error = copyin(uap->msg, &msg, sizeof (msg));
1230	if (error)
1231		return (error);
1232	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1233	if (error)
1234		return (error);
1235	msg.msg_flags = uap->flags;
1236#ifdef COMPAT_OLDSOCK
1237	msg.msg_flags &= ~MSG_COMPAT;
1238#endif
1239	uiov = msg.msg_iov;
1240	msg.msg_iov = iov;
1241	error = recvit(td, uap->s, &msg, NULL);
1242	if (error == 0) {
1243		msg.msg_iov = uiov;
1244		error = copyout(&msg, uap->msg, sizeof(msg));
1245	}
1246	free(iov, M_IOV);
1247	return (error);
1248}
1249
1250/* ARGSUSED */
1251int
1252shutdown(td, uap)
1253	struct thread *td;
1254	struct shutdown_args /* {
1255		int	s;
1256		int	how;
1257	} */ *uap;
1258{
1259	struct socket *so;
1260	struct file *fp;
1261	int error;
1262
1263	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1264	if (error == 0) {
1265		so = fp->f_data;
1266		error = soshutdown(so, uap->how);
1267		fdrop(fp, td);
1268	}
1269	return (error);
1270}
1271
1272/* ARGSUSED */
1273int
1274setsockopt(td, uap)
1275	struct thread *td;
1276	struct setsockopt_args /* {
1277		int	s;
1278		int	level;
1279		int	name;
1280		caddr_t	val;
1281		int	valsize;
1282	} */ *uap;
1283{
1284
1285	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1286	    uap->val, UIO_USERSPACE, uap->valsize));
1287}
1288
1289int
1290kern_setsockopt(td, s, level, name, val, valseg, valsize)
1291	struct thread *td;
1292	int s;
1293	int level;
1294	int name;
1295	void *val;
1296	enum uio_seg valseg;
1297	socklen_t valsize;
1298{
1299	int error;
1300	struct socket *so;
1301	struct file *fp;
1302	struct sockopt sopt;
1303
1304	if (val == NULL && valsize != 0)
1305		return (EFAULT);
1306	if ((int)valsize < 0)
1307		return (EINVAL);
1308
1309	sopt.sopt_dir = SOPT_SET;
1310	sopt.sopt_level = level;
1311	sopt.sopt_name = name;
1312	sopt.sopt_val = val;
1313	sopt.sopt_valsize = valsize;
1314	switch (valseg) {
1315	case UIO_USERSPACE:
1316		sopt.sopt_td = td;
1317		break;
1318	case UIO_SYSSPACE:
1319		sopt.sopt_td = NULL;
1320		break;
1321	default:
1322		panic("kern_setsockopt called with bad valseg");
1323	}
1324
1325	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1326	if (error == 0) {
1327		so = fp->f_data;
1328		error = sosetopt(so, &sopt);
1329		fdrop(fp, td);
1330	}
1331	return(error);
1332}
1333
1334/* ARGSUSED */
1335int
1336getsockopt(td, uap)
1337	struct thread *td;
1338	struct getsockopt_args /* {
1339		int	s;
1340		int	level;
1341		int	name;
1342		void * __restrict	val;
1343		socklen_t * __restrict avalsize;
1344	} */ *uap;
1345{
1346	socklen_t valsize;
1347	int	error;
1348
1349	if (uap->val) {
1350		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1351		if (error)
1352			return (error);
1353	}
1354
1355	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1356	    uap->val, UIO_USERSPACE, &valsize);
1357
1358	if (error == 0)
1359		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1360	return (error);
1361}
1362
1363/*
1364 * Kernel version of getsockopt.
1365 * optval can be a userland or userspace. optlen is always a kernel pointer.
1366 */
1367int
1368kern_getsockopt(td, s, level, name, val, valseg, valsize)
1369	struct thread *td;
1370	int s;
1371	int level;
1372	int name;
1373	void *val;
1374	enum uio_seg valseg;
1375	socklen_t *valsize;
1376{
1377	int error;
1378	struct  socket *so;
1379	struct file *fp;
1380	struct	sockopt sopt;
1381
1382	if (val == NULL)
1383		*valsize = 0;
1384	if ((int)*valsize < 0)
1385		return (EINVAL);
1386
1387	sopt.sopt_dir = SOPT_GET;
1388	sopt.sopt_level = level;
1389	sopt.sopt_name = name;
1390	sopt.sopt_val = val;
1391	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1392	switch (valseg) {
1393	case UIO_USERSPACE:
1394		sopt.sopt_td = td;
1395		break;
1396	case UIO_SYSSPACE:
1397		sopt.sopt_td = NULL;
1398		break;
1399	default:
1400		panic("kern_getsockopt called with bad valseg");
1401	}
1402
1403	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1404	if (error == 0) {
1405		so = fp->f_data;
1406		error = sogetopt(so, &sopt);
1407		*valsize = sopt.sopt_valsize;
1408		fdrop(fp, td);
1409	}
1410	return (error);
1411}
1412
1413/*
1414 * getsockname1() - Get socket name.
1415 */
1416/* ARGSUSED */
1417static int
1418getsockname1(td, uap, compat)
1419	struct thread *td;
1420	struct getsockname_args /* {
1421		int	fdes;
1422		struct sockaddr * __restrict asa;
1423		socklen_t * __restrict alen;
1424	} */ *uap;
1425	int compat;
1426{
1427	struct sockaddr *sa;
1428	socklen_t len;
1429	int error;
1430
1431	error = copyin(uap->alen, &len, sizeof(len));
1432	if (error)
1433		return (error);
1434
1435	error = kern_getsockname(td, uap->fdes, &sa, &len);
1436	if (error)
1437		return (error);
1438
1439	if (len != 0) {
1440#ifdef COMPAT_OLDSOCK
1441		if (compat)
1442			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1443#endif
1444		error = copyout(sa, uap->asa, (u_int)len);
1445	}
1446	free(sa, M_SONAME);
1447	if (error == 0)
1448		error = copyout(&len, uap->alen, sizeof(len));
1449	return (error);
1450}
1451
1452int
1453kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1454    socklen_t *alen)
1455{
1456	struct socket *so;
1457	struct file *fp;
1458	socklen_t len;
1459	int error;
1460
1461	if (*alen < 0)
1462		return (EINVAL);
1463
1464	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1465	if (error)
1466		return (error);
1467	so = fp->f_data;
1468	*sa = NULL;
1469	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1470	if (error)
1471		goto bad;
1472	if (*sa == NULL)
1473		len = 0;
1474	else
1475		len = MIN(*alen, (*sa)->sa_len);
1476	*alen = len;
1477bad:
1478	fdrop(fp, td);
1479	if (error && *sa) {
1480		free(*sa, M_SONAME);
1481		*sa = NULL;
1482	}
1483	return (error);
1484}
1485
1486int
1487getsockname(td, uap)
1488	struct thread *td;
1489	struct getsockname_args *uap;
1490{
1491
1492	return (getsockname1(td, uap, 0));
1493}
1494
1495#ifdef COMPAT_OLDSOCK
1496int
1497ogetsockname(td, uap)
1498	struct thread *td;
1499	struct getsockname_args *uap;
1500{
1501
1502	return (getsockname1(td, uap, 1));
1503}
1504#endif /* COMPAT_OLDSOCK */
1505
1506/*
1507 * getpeername1() - Get name of peer for connected socket.
1508 */
1509/* ARGSUSED */
1510static int
1511getpeername1(td, uap, compat)
1512	struct thread *td;
1513	struct getpeername_args /* {
1514		int	fdes;
1515		struct sockaddr * __restrict	asa;
1516		socklen_t * __restrict	alen;
1517	} */ *uap;
1518	int compat;
1519{
1520	struct sockaddr *sa;
1521	socklen_t len;
1522	int error;
1523
1524	error = copyin(uap->alen, &len, sizeof (len));
1525	if (error)
1526		return (error);
1527
1528	error = kern_getpeername(td, uap->fdes, &sa, &len);
1529	if (error)
1530		return (error);
1531
1532	if (len != 0) {
1533#ifdef COMPAT_OLDSOCK
1534		if (compat)
1535			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1536#endif
1537		error = copyout(sa, uap->asa, (u_int)len);
1538	}
1539	free(sa, M_SONAME);
1540	if (error == 0)
1541		error = copyout(&len, uap->alen, sizeof(len));
1542	return (error);
1543}
1544
1545int
1546kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1547    socklen_t *alen)
1548{
1549	struct socket *so;
1550	struct file *fp;
1551	socklen_t len;
1552	int error;
1553
1554	if (*alen < 0)
1555		return (EINVAL);
1556
1557	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1558	if (error)
1559		return (error);
1560	so = fp->f_data;
1561	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1562		error = ENOTCONN;
1563		goto done;
1564	}
1565	*sa = NULL;
1566	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1567	if (error)
1568		goto bad;
1569	if (*sa == NULL)
1570		len = 0;
1571	else
1572		len = MIN(*alen, (*sa)->sa_len);
1573	*alen = len;
1574bad:
1575	if (error && *sa) {
1576		free(*sa, M_SONAME);
1577		*sa = NULL;
1578	}
1579done:
1580	fdrop(fp, td);
1581	return (error);
1582}
1583
1584int
1585getpeername(td, uap)
1586	struct thread *td;
1587	struct getpeername_args *uap;
1588{
1589
1590	return (getpeername1(td, uap, 0));
1591}
1592
1593#ifdef COMPAT_OLDSOCK
1594int
1595ogetpeername(td, uap)
1596	struct thread *td;
1597	struct ogetpeername_args *uap;
1598{
1599
1600	/* XXX uap should have type `getpeername_args *' to begin with. */
1601	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1602}
1603#endif /* COMPAT_OLDSOCK */
1604
1605int
1606sockargs(mp, buf, buflen, type)
1607	struct mbuf **mp;
1608	caddr_t buf;
1609	int buflen, type;
1610{
1611	struct sockaddr *sa;
1612	struct mbuf *m;
1613	int error;
1614
1615	if ((u_int)buflen > MLEN) {
1616#ifdef COMPAT_OLDSOCK
1617		if (type == MT_SONAME && (u_int)buflen <= 112)
1618			buflen = MLEN;		/* unix domain compat. hack */
1619		else
1620#endif
1621			if ((u_int)buflen > MCLBYTES)
1622				return (EINVAL);
1623	}
1624	m = m_get(M_TRYWAIT, type);
1625	if (m == NULL)
1626		return (ENOBUFS);
1627	if ((u_int)buflen > MLEN) {
1628		MCLGET(m, M_TRYWAIT);
1629		if ((m->m_flags & M_EXT) == 0) {
1630			m_free(m);
1631			return (ENOBUFS);
1632		}
1633	}
1634	m->m_len = buflen;
1635	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1636	if (error)
1637		(void) m_free(m);
1638	else {
1639		*mp = m;
1640		if (type == MT_SONAME) {
1641			sa = mtod(m, struct sockaddr *);
1642
1643#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1644			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1645				sa->sa_family = sa->sa_len;
1646#endif
1647			sa->sa_len = buflen;
1648		}
1649	}
1650	return (error);
1651}
1652
1653int
1654getsockaddr(namp, uaddr, len)
1655	struct sockaddr **namp;
1656	caddr_t uaddr;
1657	size_t len;
1658{
1659	struct sockaddr *sa;
1660	int error;
1661
1662	if (len > SOCK_MAXADDRLEN)
1663		return (ENAMETOOLONG);
1664	if (len < offsetof(struct sockaddr, sa_data[0]))
1665		return (EINVAL);
1666	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1667	error = copyin(uaddr, sa, len);
1668	if (error) {
1669		FREE(sa, M_SONAME);
1670	} else {
1671#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1672		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1673			sa->sa_family = sa->sa_len;
1674#endif
1675		sa->sa_len = len;
1676		*namp = sa;
1677	}
1678	return (error);
1679}
1680
1681/*
1682 * Detach mapped page and release resources back to the system.
1683 */
1684void
1685sf_buf_mext(void *addr, void *args)
1686{
1687	vm_page_t m;
1688
1689	m = sf_buf_page(args);
1690	sf_buf_free(args);
1691	vm_page_lock_queues();
1692	vm_page_unwire(m, 0);
1693	/*
1694	 * Check for the object going away on us. This can
1695	 * happen since we don't hold a reference to it.
1696	 * If so, we're responsible for freeing the page.
1697	 */
1698	if (m->wire_count == 0 && m->object == NULL)
1699		vm_page_free(m);
1700	vm_page_unlock_queues();
1701}
1702
1703/*
1704 * sendfile(2)
1705 *
1706 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1707 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1708 *
1709 * Send a file specified by 'fd' and starting at 'offset' to a socket
1710 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1711 * 0.  Optionally add a header and/or trailer to the socket output.  If
1712 * specified, write the total number of bytes sent into *sbytes.
1713 */
1714int
1715sendfile(struct thread *td, struct sendfile_args *uap)
1716{
1717
1718	return (do_sendfile(td, uap, 0));
1719}
1720
1721static int
1722do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1723{
1724	struct sf_hdtr hdtr;
1725	struct uio *hdr_uio, *trl_uio;
1726	int error;
1727
1728	hdr_uio = trl_uio = NULL;
1729
1730	if (uap->hdtr != NULL) {
1731		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1732		if (error)
1733			goto out;
1734		if (hdtr.headers != NULL) {
1735			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1736			if (error)
1737				goto out;
1738		}
1739		if (hdtr.trailers != NULL) {
1740			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1741			if (error)
1742				goto out;
1743
1744		}
1745	}
1746
1747	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1748out:
1749	if (hdr_uio)
1750		free(hdr_uio, M_IOV);
1751	if (trl_uio)
1752		free(trl_uio, M_IOV);
1753	return (error);
1754}
1755
1756#ifdef COMPAT_FREEBSD4
1757int
1758freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1759{
1760	struct sendfile_args args;
1761
1762	args.fd = uap->fd;
1763	args.s = uap->s;
1764	args.offset = uap->offset;
1765	args.nbytes = uap->nbytes;
1766	args.hdtr = uap->hdtr;
1767	args.sbytes = uap->sbytes;
1768	args.flags = uap->flags;
1769
1770	return (do_sendfile(td, &args, 1));
1771}
1772#endif /* COMPAT_FREEBSD4 */
1773
1774int
1775kern_sendfile(struct thread *td, struct sendfile_args *uap,
1776    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1777{
1778	struct file *sock_fp;
1779	struct vnode *vp;
1780	struct vm_object *obj = NULL;
1781	struct socket *so = NULL;
1782	struct mbuf *m = NULL;
1783	struct sf_buf *sf;
1784	struct vm_page *pg;
1785	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1786	int error, hdrlen = 0, mnw = 0;
1787	int vfslocked;
1788
1789	/*
1790	 * The file descriptor must be a regular file and have a
1791	 * backing VM object.
1792	 * File offset must be positive.  If it goes beyond EOF
1793	 * we send only the header/trailer and no payload data.
1794	 */
1795	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1796		goto out;
1797	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1798	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1799	obj = vp->v_object;
1800	if (obj != NULL) {
1801		/*
1802		 * Temporarily increase the backing VM object's reference
1803		 * count so that a forced reclamation of its vnode does not
1804		 * immediately destroy it.
1805		 */
1806		VM_OBJECT_LOCK(obj);
1807		if ((obj->flags & OBJ_DEAD) == 0) {
1808			vm_object_reference_locked(obj);
1809			VM_OBJECT_UNLOCK(obj);
1810		} else {
1811			VM_OBJECT_UNLOCK(obj);
1812			obj = NULL;
1813		}
1814	}
1815	VOP_UNLOCK(vp, 0, td);
1816	VFS_UNLOCK_GIANT(vfslocked);
1817	if (obj == NULL) {
1818		error = EINVAL;
1819		goto out;
1820	}
1821	if (uap->offset < 0) {
1822		error = EINVAL;
1823		goto out;
1824	}
1825
1826	/*
1827	 * The socket must be a stream socket and connected.
1828	 * Remember if it a blocking or non-blocking socket.
1829	 */
1830	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1831	    NULL)) != 0)
1832		goto out;
1833	so = sock_fp->f_data;
1834	if (so->so_type != SOCK_STREAM) {
1835		error = EINVAL;
1836		goto out;
1837	}
1838	if ((so->so_state & SS_ISCONNECTED) == 0) {
1839		error = ENOTCONN;
1840		goto out;
1841	}
1842	/*
1843	 * Do not wait on memory allocations but return ENOMEM for
1844	 * caller to retry later.
1845	 * XXX: Experimental.
1846	 */
1847	if (uap->flags & SF_MNOWAIT)
1848		mnw = 1;
1849
1850#ifdef MAC
1851	SOCK_LOCK(so);
1852	error = mac_check_socket_send(td->td_ucred, so);
1853	SOCK_UNLOCK(so);
1854	if (error)
1855		goto out;
1856#endif
1857
1858	/* If headers are specified copy them into mbufs. */
1859	if (hdr_uio != NULL) {
1860		hdr_uio->uio_td = td;
1861		hdr_uio->uio_rw = UIO_WRITE;
1862		if (hdr_uio->uio_resid > 0) {
1863			/*
1864			 * In FBSD < 5.0 the nbytes to send also included
1865			 * the header.  If compat is specified subtract the
1866			 * header size from nbytes.
1867			 */
1868			if (compat) {
1869				if (uap->nbytes > hdr_uio->uio_resid)
1870					uap->nbytes -= hdr_uio->uio_resid;
1871				else
1872					uap->nbytes = 0;
1873			}
1874			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1875			    0, 0, 0);
1876			if (m == NULL) {
1877				error = mnw ? EAGAIN : ENOBUFS;
1878				goto out;
1879			}
1880			hdrlen = m_length(m, NULL);
1881		}
1882	}
1883
1884	/* Protect against multiple writers to the socket. */
1885	(void) sblock(&so->so_snd, M_WAITOK);
1886
1887	/*
1888	 * Loop through the pages of the file, starting with the requested
1889	 * offset. Get a file page (do I/O if necessary), map the file page
1890	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1891	 * it on the socket.
1892	 * This is done in two loops.  The inner loop turns as many pages
1893	 * as it can, up to available socket buffer space, without blocking
1894	 * into mbufs to have it bulk delivered into the socket send buffer.
1895	 * The outer loop checks the state and available space of the socket
1896	 * and takes care of the overall progress.
1897	 */
1898	for (off = uap->offset, rem = uap->nbytes; ; ) {
1899		int loopbytes = 0;
1900		int space = 0;
1901		int done = 0;
1902
1903		/*
1904		 * Check the socket state for ongoing connection,
1905		 * no errors and space in socket buffer.
1906		 * If space is low allow for the remainder of the
1907		 * file to be processed if it fits the socket buffer.
1908		 * Otherwise block in waiting for sufficient space
1909		 * to proceed, or if the socket is nonblocking, return
1910		 * to userland with EAGAIN while reporting how far
1911		 * we've come.
1912		 * We wait until the socket buffer has significant free
1913		 * space to do bulk sends.  This makes good use of file
1914		 * system read ahead and allows packet segmentation
1915		 * offloading hardware to take over lots of work.  If
1916		 * we were not careful here we would send off only one
1917		 * sfbuf at a time.
1918		 */
1919		SOCKBUF_LOCK(&so->so_snd);
1920		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1921			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1922retry_space:
1923		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1924			error = EPIPE;
1925			SOCKBUF_UNLOCK(&so->so_snd);
1926			goto done;
1927		} else if (so->so_error) {
1928			error = so->so_error;
1929			so->so_error = 0;
1930			SOCKBUF_UNLOCK(&so->so_snd);
1931			goto done;
1932		}
1933		space = sbspace(&so->so_snd);
1934		if (space < rem &&
1935		    (space <= 0 ||
1936		     space < so->so_snd.sb_lowat)) {
1937			if (so->so_state & SS_NBIO) {
1938				SOCKBUF_UNLOCK(&so->so_snd);
1939				error = EAGAIN;
1940				goto done;
1941			}
1942			/*
1943			 * sbwait drops the lock while sleeping.
1944			 * When we loop back to retry_space the
1945			 * state may have changed and we retest
1946			 * for it.
1947			 */
1948			error = sbwait(&so->so_snd);
1949			/*
1950			 * An error from sbwait usually indicates that we've
1951			 * been interrupted by a signal. If we've sent anything
1952			 * then return bytes sent, otherwise return the error.
1953			 */
1954			if (error) {
1955				SOCKBUF_UNLOCK(&so->so_snd);
1956				goto done;
1957			}
1958			goto retry_space;
1959		}
1960		SOCKBUF_UNLOCK(&so->so_snd);
1961
1962		/*
1963		 * Reduce space in the socket buffer by the size of
1964		 * the header mbuf chain.
1965		 * hdrlen is set to 0 after the first loop.
1966		 */
1967		space -= hdrlen;
1968
1969		/*
1970		 * Loop and construct maximum sized mbuf chain to be bulk
1971		 * dumped into socket buffer.
1972		 */
1973		while(space > loopbytes) {
1974			vm_pindex_t pindex;
1975			vm_offset_t pgoff;
1976			struct mbuf *m0;
1977
1978			VM_OBJECT_LOCK(obj);
1979			/*
1980			 * Calculate the amount to transfer.
1981			 * Not to exceed a page, the EOF,
1982			 * or the passed in nbytes.
1983			 */
1984			pgoff = (vm_offset_t)(off & PAGE_MASK);
1985			xfsize = omin(PAGE_SIZE - pgoff,
1986			    obj->un_pager.vnp.vnp_size - uap->offset -
1987			    fsbytes - loopbytes);
1988			if (uap->nbytes)
1989				rem = (uap->nbytes - fsbytes - loopbytes);
1990			else
1991				rem = obj->un_pager.vnp.vnp_size -
1992				    uap->offset - fsbytes - loopbytes;
1993			xfsize = omin(rem, xfsize);
1994			if (xfsize <= 0) {
1995				VM_OBJECT_UNLOCK(obj);
1996				done = 1;		/* all data sent */
1997				break;
1998			}
1999			/*
2000			 * Don't overflow the send buffer.
2001			 * Stop here and send out what we've
2002			 * already got.
2003			 */
2004			if (space < loopbytes + xfsize) {
2005				VM_OBJECT_UNLOCK(obj);
2006				break;
2007			}
2008
2009			/*
2010			 * Attempt to look up the page.  Allocate
2011			 * if not found or wait and loop if busy.
2012			 */
2013			pindex = OFF_TO_IDX(off);
2014			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2015			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2016
2017			/*
2018			 * Check if page is valid for what we need,
2019			 * otherwise initiate I/O.
2020			 * If we already turned some pages into mbufs,
2021			 * send them off before we come here again and
2022			 * block.
2023			 */
2024			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2025				VM_OBJECT_UNLOCK(obj);
2026			else if (m != NULL)
2027				error = EAGAIN;	/* send what we already got */
2028			else if (uap->flags & SF_NODISKIO)
2029				error = EBUSY;
2030			else {
2031				int bsize, resid;
2032
2033				/*
2034				 * Ensure that our page is still around
2035				 * when the I/O completes.
2036				 */
2037				vm_page_io_start(pg);
2038				VM_OBJECT_UNLOCK(obj);
2039
2040				/*
2041				 * Get the page from backing store.
2042				 */
2043				bsize = vp->v_mount->mnt_stat.f_iosize;
2044				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2045				vn_lock(vp, LK_SHARED | LK_RETRY, td);
2046
2047				/*
2048				 * XXXMAC: Because we don't have fp->f_cred
2049				 * here, we pass in NOCRED.  This is probably
2050				 * wrong, but is consistent with our original
2051				 * implementation.
2052				 */
2053				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2054				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2055				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2056				    td->td_ucred, NOCRED, &resid, td);
2057				VOP_UNLOCK(vp, 0, td);
2058				VFS_UNLOCK_GIANT(vfslocked);
2059				VM_OBJECT_LOCK(obj);
2060				vm_page_io_finish(pg);
2061				if (!error)
2062					VM_OBJECT_UNLOCK(obj);
2063				mbstat.sf_iocnt++;
2064			}
2065			if (error) {
2066				vm_page_lock_queues();
2067				vm_page_unwire(pg, 0);
2068				/*
2069				 * See if anyone else might know about
2070				 * this page.  If not and it is not valid,
2071				 * then free it.
2072				 */
2073				if (pg->wire_count == 0 && pg->valid == 0 &&
2074				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2075				    pg->hold_count == 0) {
2076					vm_page_free(pg);
2077				}
2078				vm_page_unlock_queues();
2079				VM_OBJECT_UNLOCK(obj);
2080				if (error == EAGAIN)
2081					error = 0;	/* not a real error */
2082				break;
2083			}
2084
2085			/*
2086			 * Get a sendfile buf.  We usually wait as long
2087			 * as necessary, but this wait can be interrupted.
2088			 */
2089			if ((sf = sf_buf_alloc(pg,
2090			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2091				mbstat.sf_allocfail++;
2092				vm_page_lock_queues();
2093				vm_page_unwire(pg, 0);
2094				/*
2095				 * XXX: Not same check as above!?
2096				 */
2097				if (pg->wire_count == 0 && pg->object == NULL)
2098					vm_page_free(pg);
2099				vm_page_unlock_queues();
2100				error = (mnw ? EAGAIN : EINTR);
2101				break;
2102			}
2103
2104			/*
2105			 * Get an mbuf and set it up as having
2106			 * external storage.
2107			 */
2108			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2109			if (m0 == NULL) {
2110				error = (mnw ? EAGAIN : ENOBUFS);
2111				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2112				break;
2113			}
2114			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2115			    sf, M_RDONLY, EXT_SFBUF);
2116			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2117			m0->m_len = xfsize;
2118
2119			/* Append to mbuf chain. */
2120			if (m != NULL)
2121				m_cat(m, m0);
2122			else
2123				m = m0;
2124
2125			/* Keep track of bits processed. */
2126			loopbytes += xfsize;
2127			off += xfsize;
2128		}
2129
2130		/* Add the buffer chain to the socket buffer. */
2131		if (m != NULL) {
2132			int mlen, err;
2133
2134			mlen = m_length(m, NULL);
2135			SOCKBUF_LOCK(&so->so_snd);
2136			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2137				error = EPIPE;
2138				SOCKBUF_UNLOCK(&so->so_snd);
2139				goto done;
2140			}
2141			SOCKBUF_UNLOCK(&so->so_snd);
2142			/* Avoid error aliasing. */
2143			err = (*so->so_proto->pr_usrreqs->pru_send)
2144				    (so, 0, m, NULL, NULL, td);
2145			if (err == 0) {
2146				/*
2147				 * We need two counters to get the
2148				 * file offset and nbytes to send
2149				 * right:
2150				 * - sbytes contains the total amount
2151				 *   of bytes sent, including headers.
2152				 * - fsbytes contains the total amount
2153				 *   of bytes sent from the file.
2154				 */
2155				sbytes += mlen;
2156				fsbytes += mlen;
2157				if (hdrlen) {
2158					fsbytes -= hdrlen;
2159					hdrlen = 0;
2160				}
2161			} else if (error == 0)
2162				error = err;
2163			m = NULL;	/* pru_send always consumes */
2164		}
2165
2166		/* Quit outer loop on error or when we're done. */
2167		if (error || done)
2168			goto done;
2169	}
2170
2171	/*
2172	 * Send trailers. Wimp out and use writev(2).
2173	 */
2174	if (trl_uio != NULL) {
2175		error = kern_writev(td, uap->s, trl_uio);
2176		if (error)
2177			goto done;
2178		sbytes += td->td_retval[0];
2179	}
2180
2181done:
2182	sbunlock(&so->so_snd);
2183out:
2184	/*
2185	 * If there was no error we have to clear td->td_retval[0]
2186	 * because it may have been set by writev.
2187	 */
2188	if (error == 0) {
2189		td->td_retval[0] = 0;
2190	}
2191	if (uap->sbytes != NULL) {
2192		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2193	}
2194	if (obj != NULL)
2195		vm_object_deallocate(obj);
2196	if (vp != NULL) {
2197		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2198		vrele(vp);
2199		VFS_UNLOCK_GIANT(vfslocked);
2200	}
2201	if (so)
2202		fdrop(sock_fp, td);
2203	if (m)
2204		m_freem(m);
2205
2206	if (error == ERESTART)
2207		error = EINTR;
2208
2209	return (error);
2210}
2211
2212/*
2213 * SCTP syscalls.
2214 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2215 * otherwise all return EOPNOTSUPP.
2216 * XXX: We should make this loadable one day.
2217 */
2218int
2219sctp_peeloff(td, uap)
2220	struct thread *td;
2221	struct sctp_peeloff_args /* {
2222		int	sd;
2223		caddr_t	name;
2224	} */ *uap;
2225{
2226#ifdef SCTP
2227	struct filedesc *fdp;
2228	struct file *nfp = NULL;
2229	int error;
2230	struct socket *head, *so;
2231	int fd;
2232	u_int fflag;
2233
2234	fdp = td->td_proc->p_fd;
2235	error = fgetsock(td, uap->sd, &head, &fflag);
2236	if (error)
2237		goto done2;
2238	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2239	if (error)
2240		goto done2;
2241	/*
2242	 * At this point we know we do have a assoc to pull
2243	 * we proceed to get the fd setup. This may block
2244	 * but that is ok.
2245	 */
2246
2247	error = falloc(td, &nfp, &fd);
2248	if (error)
2249		goto done;
2250	td->td_retval[0] = fd;
2251
2252	so = sonewconn(head, SS_ISCONNECTED);
2253	if (so == NULL)
2254		goto noconnection;
2255	/*
2256	 * Before changing the flags on the socket, we have to bump the
2257	 * reference count.  Otherwise, if the protocol calls sofree(),
2258	 * the socket will be released due to a zero refcount.
2259	 */
2260        SOCK_LOCK(so);
2261        soref(so);                      /* file descriptor reference */
2262        SOCK_UNLOCK(so);
2263
2264	ACCEPT_LOCK();
2265
2266	TAILQ_REMOVE(&head->so_comp, so, so_list);
2267	head->so_qlen--;
2268	so->so_state |= (head->so_state & SS_NBIO);
2269	so->so_state &= ~SS_NOFDREF;
2270	so->so_qstate &= ~SQ_COMP;
2271	so->so_head = NULL;
2272
2273	ACCEPT_UNLOCK();
2274
2275	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2276	if (error)
2277		goto noconnection;
2278	if (head->so_sigio != NULL)
2279		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2280
2281	FILE_LOCK(nfp);
2282	nfp->f_data = so;
2283	nfp->f_flag = fflag;
2284	nfp->f_type = DTYPE_SOCKET;
2285	nfp->f_ops = &socketops;
2286	FILE_UNLOCK(nfp);
2287
2288noconnection:
2289	/*
2290	 * close the new descriptor, assuming someone hasn't ripped it
2291	 * out from under us.
2292	 */
2293	if (error)
2294		fdclose(fdp, nfp, fd, td);
2295
2296	/*
2297	 * Release explicitly held references before returning.
2298	 */
2299done:
2300	if (nfp != NULL)
2301		fdrop(nfp, td);
2302	fputsock(head);
2303done2:
2304	return (error);
2305#else  /* SCTP */
2306	return (EOPNOTSUPP);
2307#endif /* SCTP */
2308}
2309
2310int
2311sctp_generic_sendmsg (td, uap)
2312	struct thread *td;
2313	struct sctp_generic_sendmsg_args /* {
2314		int sd,
2315		caddr_t msg,
2316		int mlen,
2317		caddr_t to,
2318		__socklen_t tolen,
2319		struct sctp_sndrcvinfo *sinfo,
2320		int flags
2321	} */ *uap;
2322{
2323#ifdef SCTP
2324	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2325	struct socket *so;
2326	struct file *fp = NULL;
2327	int use_rcvinfo = 1;
2328	int error = 0, len;
2329	struct sockaddr *to = NULL;
2330#ifdef KTRACE
2331	struct uio *ktruio = NULL;
2332#endif
2333	struct uio auio;
2334	struct iovec iov[1];
2335
2336	if (uap->sinfo) {
2337		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2338		if (error)
2339			return (error);
2340		u_sinfo = &sinfo;
2341	}
2342	if (uap->tolen) {
2343		error = getsockaddr(&to, uap->to, uap->tolen);
2344		if (error) {
2345			to = NULL;
2346			goto sctp_bad2;
2347		}
2348	}
2349
2350	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2351	if (error)
2352		goto sctp_bad;
2353
2354	iov[0].iov_base = uap->msg;
2355	iov[0].iov_len = uap->mlen;
2356
2357	so = (struct socket *)fp->f_data;
2358#ifdef MAC
2359	SOCK_LOCK(so);
2360	error = mac_check_socket_send(td->td_ucred, so);
2361	SOCK_UNLOCK(so);
2362	if (error)
2363		goto sctp_bad;
2364#endif /* MAC */
2365
2366	auio.uio_iov =  iov;
2367	auio.uio_iovcnt = 1;
2368	auio.uio_segflg = UIO_USERSPACE;
2369	auio.uio_rw = UIO_WRITE;
2370	auio.uio_td = td;
2371	auio.uio_offset = 0;			/* XXX */
2372	auio.uio_resid = 0;
2373	len = auio.uio_resid = uap->mlen;
2374	error = sctp_lower_sosend(so, to, &auio,
2375		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2376		    uap->flags, use_rcvinfo, u_sinfo, td);
2377	if (error) {
2378		if (auio.uio_resid != len && (error == ERESTART ||
2379		    error == EINTR || error == EWOULDBLOCK))
2380			error = 0;
2381		/* Generation of SIGPIPE can be controlled per socket. */
2382		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2383		    !(uap->flags & MSG_NOSIGNAL)) {
2384			PROC_LOCK(td->td_proc);
2385			psignal(td->td_proc, SIGPIPE);
2386			PROC_UNLOCK(td->td_proc);
2387		}
2388	}
2389	if (error == 0)
2390		td->td_retval[0] = len - auio.uio_resid;
2391#ifdef KTRACE
2392	if (ktruio != NULL) {
2393		ktruio->uio_resid = td->td_retval[0];
2394		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2395	}
2396#endif /* KTRACE */
2397sctp_bad:
2398	if (fp)
2399		fdrop(fp, td);
2400sctp_bad2:
2401	if (to)
2402		free(to, M_SONAME);
2403	return (error);
2404#else  /* SCTP */
2405	return (EOPNOTSUPP);
2406#endif /* SCTP */
2407}
2408
2409int
2410sctp_generic_sendmsg_iov(td, uap)
2411	struct thread *td;
2412	struct sctp_generic_sendmsg_iov_args /* {
2413		int sd,
2414		struct iovec *iov,
2415		int iovlen,
2416		caddr_t to,
2417		__socklen_t tolen,
2418		struct sctp_sndrcvinfo *sinfo,
2419		int flags
2420	} */ *uap;
2421{
2422#ifdef SCTP
2423	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2424	struct socket *so;
2425	struct file *fp = NULL;
2426	int use_rcvinfo = 1;
2427	int error=0, len, i;
2428	struct sockaddr *to = NULL;
2429#ifdef KTRACE
2430	struct uio *ktruio = NULL;
2431#endif
2432	struct uio auio;
2433	struct iovec *iov, *tiov;
2434
2435	if (uap->sinfo) {
2436		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2437		if (error)
2438			return (error);
2439		u_sinfo = &sinfo;
2440	}
2441	if (uap->tolen) {
2442		error = getsockaddr(&to, uap->to, uap->tolen);
2443		if (error) {
2444			to = NULL;
2445			goto sctp_bad2;
2446		}
2447	}
2448
2449	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2450	if (error)
2451		goto sctp_bad1;
2452
2453	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2454	if (error)
2455		goto sctp_bad1;
2456
2457	so = (struct socket *)fp->f_data;
2458#ifdef MAC
2459	SOCK_LOCK(so);
2460	error = mac_check_socket_send(td->td_ucred, so);
2461	SOCK_UNLOCK(so);
2462	if (error)
2463		goto sctp_bad;
2464#endif /* MAC */
2465
2466	auio.uio_iov =  iov;
2467	auio.uio_iovcnt = uap->iovlen;
2468	auio.uio_segflg = UIO_USERSPACE;
2469	auio.uio_rw = UIO_WRITE;
2470	auio.uio_td = td;
2471	auio.uio_offset = 0;			/* XXX */
2472	auio.uio_resid = 0;
2473	tiov = iov;
2474	for (i = 0; i <uap->iovlen; i++, tiov++) {
2475		if ((auio.uio_resid += tiov->iov_len) < 0) {
2476			error = EINVAL;
2477			goto sctp_bad;
2478		}
2479	}
2480	len = auio.uio_resid;
2481	error = sctp_lower_sosend(so, to, &auio,
2482		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2483		    uap->flags, use_rcvinfo, u_sinfo, td);
2484	if (error) {
2485		if (auio.uio_resid != len && (error == ERESTART ||
2486		    error == EINTR || error == EWOULDBLOCK))
2487			error = 0;
2488		/* Generation of SIGPIPE can be controlled per socket */
2489		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2490		    !(uap->flags & MSG_NOSIGNAL)) {
2491			PROC_LOCK(td->td_proc);
2492			psignal(td->td_proc, SIGPIPE);
2493			PROC_UNLOCK(td->td_proc);
2494		}
2495	}
2496	if (error == 0)
2497		td->td_retval[0] = len - auio.uio_resid;
2498#ifdef KTRACE
2499	if (ktruio != NULL) {
2500		ktruio->uio_resid = td->td_retval[0];
2501		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2502	}
2503#endif /* KTRACE */
2504sctp_bad:
2505	free(iov, M_IOV);
2506sctp_bad1:
2507	if (fp)
2508		fdrop(fp, td);
2509sctp_bad2:
2510	if (to)
2511		free(to, M_SONAME);
2512	return (error);
2513#else  /* SCTP */
2514	return (EOPNOTSUPP);
2515#endif /* SCTP */
2516}
2517
2518int
2519sctp_generic_recvmsg(td, uap)
2520	struct thread *td;
2521	struct sctp_generic_recvmsg_args /* {
2522		int sd,
2523		struct iovec *iov,
2524		int iovlen,
2525		struct sockaddr *from,
2526		__socklen_t *fromlenaddr,
2527		struct sctp_sndrcvinfo *sinfo,
2528		int *msg_flags
2529	} */ *uap;
2530{
2531#ifdef SCTP
2532	u_int8_t sockbufstore[256];
2533	struct uio auio;
2534	struct iovec *iov, *tiov;
2535	struct sctp_sndrcvinfo sinfo;
2536	struct socket *so;
2537	struct file *fp = NULL;
2538	struct sockaddr *fromsa;
2539	int fromlen;
2540	int len, i, msg_flags;
2541	int error = 0;
2542#ifdef KTRACE
2543	struct uio *ktruio = NULL;
2544#endif
2545	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2546	if (error) {
2547		return (error);
2548	}
2549	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2550	if (error) {
2551		goto out1;
2552	}
2553
2554	so = fp->f_data;
2555#ifdef MAC
2556	SOCK_LOCK(so);
2557	error = mac_check_socket_receive(td->td_ucred, so);
2558	SOCK_UNLOCK(so);
2559	if (error) {
2560		goto out;
2561		return (error);
2562	}
2563#endif /* MAC */
2564
2565	if (uap->fromlenaddr) {
2566		error = copyin(uap->fromlenaddr,
2567		    &fromlen, sizeof (fromlen));
2568		if (error) {
2569			goto out;
2570		}
2571	} else {
2572		fromlen = 0;
2573	}
2574	if(uap->msg_flags) {
2575		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2576		if (error) {
2577			goto out;
2578		}
2579	} else {
2580		msg_flags = 0;
2581	}
2582	auio.uio_iov = iov;
2583	auio.uio_iovcnt = uap->iovlen;
2584  	auio.uio_segflg = UIO_USERSPACE;
2585	auio.uio_rw = UIO_READ;
2586	auio.uio_td = td;
2587	auio.uio_offset = 0;			/* XXX */
2588	auio.uio_resid = 0;
2589	tiov = iov;
2590	for (i = 0; i <uap->iovlen; i++, tiov++) {
2591		if ((auio.uio_resid += tiov->iov_len) < 0) {
2592			error = EINVAL;
2593			goto out;
2594		}
2595	}
2596	len = auio.uio_resid;
2597	fromsa = (struct sockaddr *)sockbufstore;
2598
2599#ifdef KTRACE
2600	if (KTRPOINT(td, KTR_GENIO))
2601		ktruio = cloneuio(&auio);
2602#endif /* KTRACE */
2603	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2604		    fromsa, fromlen, &msg_flags,
2605		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2606	if (error) {
2607		if (auio.uio_resid != (int)len && (error == ERESTART ||
2608		    error == EINTR || error == EWOULDBLOCK))
2609			error = 0;
2610	} else {
2611		if (uap->sinfo)
2612			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2613	}
2614#ifdef KTRACE
2615	if (ktruio != NULL) {
2616		ktruio->uio_resid = (int)len - auio.uio_resid;
2617		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2618	}
2619#endif /* KTRACE */
2620	if (error)
2621		goto out;
2622	td->td_retval[0] = (int)len - auio.uio_resid;
2623
2624	if (fromlen && uap->from) {
2625		len = fromlen;
2626		if (len <= 0 || fromsa == 0)
2627			len = 0;
2628		else {
2629			len = MIN(len, fromsa->sa_len);
2630			error = copyout(fromsa, uap->from, (unsigned)len);
2631			if (error)
2632				goto out;
2633		}
2634		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2635		if (error) {
2636			goto out;
2637		}
2638	}
2639	if (uap->msg_flags) {
2640		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2641		if (error) {
2642			goto out;
2643		}
2644	}
2645out:
2646	free(iov, M_IOV);
2647out1:
2648	if (fp)
2649		fdrop(fp, td);
2650
2651	return (error);
2652#else  /* SCTP */
2653	return (EOPNOTSUPP);
2654#endif /* SCTP */
2655}
2656