kern_sendfile.c revision 236738
1254885Sdumbbell/*-
2254885Sdumbbell * Copyright (c) 1982, 1986, 1989, 1990, 1993
3254885Sdumbbell *	The Regents of the University of California.  All rights reserved.
4254885Sdumbbell *
5254885Sdumbbell * sendfile(2) and related extensions:
6254885Sdumbbell * Copyright (c) 1998, David Greenman. All rights reserved.
7254885Sdumbbell *
8254885Sdumbbell * Redistribution and use in source and binary forms, with or without
9254885Sdumbbell * modification, are permitted provided that the following conditions
10254885Sdumbbell * are met:
11254885Sdumbbell * 1. Redistributions of source code must retain the above copyright
12254885Sdumbbell *    notice, this list of conditions and the following disclaimer.
13254885Sdumbbell * 2. Redistributions in binary form must reproduce the above copyright
14254885Sdumbbell *    notice, this list of conditions and the following disclaimer in the
15254885Sdumbbell *    documentation and/or other materials provided with the distribution.
16254885Sdumbbell * 4. Neither the name of the University nor the names of its contributors
17254885Sdumbbell *    may be used to endorse or promote products derived from this software
18254885Sdumbbell *    without specific prior written permission.
19254885Sdumbbell *
20254885Sdumbbell * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21254885Sdumbbell * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22254885Sdumbbell * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23254885Sdumbbell * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24254885Sdumbbell * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25254885Sdumbbell * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26254885Sdumbbell * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27254885Sdumbbell * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28254885Sdumbbell * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29254885Sdumbbell * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30254885Sdumbbell * SUCH DAMAGE.
31254885Sdumbbell *
32254885Sdumbbell *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33254885Sdumbbell */
34254885Sdumbbell
35254885Sdumbbell#include <sys/cdefs.h>
36254885Sdumbbell__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 236738 2012-06-08 08:04:51Z mjg $");
37254885Sdumbbell
38254885Sdumbbell#include "opt_capsicum.h"
39254885Sdumbbell#include "opt_inet.h"
40254885Sdumbbell#include "opt_inet6.h"
41254885Sdumbbell#include "opt_sctp.h"
42254885Sdumbbell#include "opt_compat.h"
43254885Sdumbbell#include "opt_ktrace.h"
44254885Sdumbbell
45254885Sdumbbell#include <sys/param.h>
46254885Sdumbbell#include <sys/systm.h>
47254885Sdumbbell#include <sys/capability.h>
48254885Sdumbbell#include <sys/kernel.h>
49254885Sdumbbell#include <sys/lock.h>
50254885Sdumbbell#include <sys/mutex.h>
51254885Sdumbbell#include <sys/sysproto.h>
52254885Sdumbbell#include <sys/malloc.h>
53254885Sdumbbell#include <sys/filedesc.h>
54254885Sdumbbell#include <sys/event.h>
55254885Sdumbbell#include <sys/proc.h>
56254885Sdumbbell#include <sys/fcntl.h>
57254885Sdumbbell#include <sys/file.h>
58254885Sdumbbell#include <sys/filio.h>
59254885Sdumbbell#include <sys/jail.h>
60254885Sdumbbell#include <sys/mount.h>
61254885Sdumbbell#include <sys/mbuf.h>
62254885Sdumbbell#include <sys/protosw.h>
63254885Sdumbbell#include <sys/sf_buf.h>
64254885Sdumbbell#include <sys/sysent.h>
65254885Sdumbbell#include <sys/socket.h>
66254885Sdumbbell#include <sys/socketvar.h>
67254885Sdumbbell#include <sys/signalvar.h>
68254885Sdumbbell#include <sys/syscallsubr.h>
69254885Sdumbbell#include <sys/sysctl.h>
70254885Sdumbbell#include <sys/uio.h>
71254885Sdumbbell#include <sys/vnode.h>
72254885Sdumbbell#ifdef KTRACE
73254885Sdumbbell#include <sys/ktrace.h>
74254885Sdumbbell#endif
75254885Sdumbbell#ifdef COMPAT_FREEBSD32
76254885Sdumbbell#include <compat/freebsd32/freebsd32_util.h>
77254885Sdumbbell#endif
78254885Sdumbbell
79254885Sdumbbell#include <net/vnet.h>
80254885Sdumbbell
81254885Sdumbbell#include <security/audit/audit.h>
82254885Sdumbbell#include <security/mac/mac_framework.h>
83254885Sdumbbell
84254885Sdumbbell#include <vm/vm.h>
85254885Sdumbbell#include <vm/vm_object.h>
86254885Sdumbbell#include <vm/vm_page.h>
87254885Sdumbbell#include <vm/vm_pageout.h>
88254885Sdumbbell#include <vm/vm_kern.h>
89254885Sdumbbell#include <vm/vm_extern.h>
90254885Sdumbbell
91254885Sdumbbell#if defined(INET) || defined(INET6)
92254885Sdumbbell#ifdef SCTP
93254885Sdumbbell#include <netinet/sctp.h>
94254885Sdumbbell#include <netinet/sctp_peeloff.h>
95254885Sdumbbell#endif /* SCTP */
96254885Sdumbbell#endif /* INET || INET6 */
97254885Sdumbbell
98254885Sdumbbellstatic int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
99254885Sdumbbellstatic int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
100254885Sdumbbell
101254885Sdumbbellstatic int accept1(struct thread *td, struct accept_args *uap, int compat);
102254885Sdumbbellstatic int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
103254885Sdumbbellstatic int getsockname1(struct thread *td, struct getsockname_args *uap,
104254885Sdumbbell			int compat);
105254885Sdumbbellstatic int getpeername1(struct thread *td, struct getpeername_args *uap,
106254885Sdumbbell			int compat);
107254885Sdumbbell
108254885Sdumbbell/*
109254885Sdumbbell * NSFBUFS-related variables and associated sysctls
110254885Sdumbbell */
111254885Sdumbbellint nsfbufs;
112254885Sdumbbellint nsfbufspeak;
113254885Sdumbbellint nsfbufsused;
114254885Sdumbbell
115254885SdumbbellSYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
116254885Sdumbbell    "Maximum number of sendfile(2) sf_bufs available");
117254885SdumbbellSYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
118254885Sdumbbell    "Number of sendfile(2) sf_bufs at peak usage");
119254885SdumbbellSYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
120254885Sdumbbell    "Number of sendfile(2) sf_bufs in use");
121254885Sdumbbell
122254885Sdumbbell/*
123254885Sdumbbell * Convert a user file descriptor to a kernel file entry and check that, if
124254885Sdumbbell * it is a capability, the right rights are present. A reference on the file
125254885Sdumbbell * entry is held upon returning.
126254885Sdumbbell */
127254885Sdumbbellstatic int
128254885Sdumbbellgetsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
129254885Sdumbbell    struct file **fpp, u_int *fflagp)
130254885Sdumbbell{
131254885Sdumbbell	struct file *fp;
132254885Sdumbbell#ifdef CAPABILITIES
133254885Sdumbbell	struct file *fp_fromcap;
134254885Sdumbbell	int error;
135254885Sdumbbell#endif
136254885Sdumbbell
137254885Sdumbbell	fp = NULL;
138254885Sdumbbell	if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
139254885Sdumbbell		return (EBADF);
140254885Sdumbbell#ifdef CAPABILITIES
141254885Sdumbbell	/*
142254885Sdumbbell	 * If the file descriptor is for a capability, test rights and use
143254885Sdumbbell	 * the file descriptor referenced by the capability.
144254885Sdumbbell	 */
145254885Sdumbbell	error = cap_funwrap(fp, rights, &fp_fromcap);
146254885Sdumbbell	if (error) {
147254885Sdumbbell		fdrop(fp, curthread);
148254885Sdumbbell		return (error);
149254885Sdumbbell	}
150254885Sdumbbell	if (fp != fp_fromcap) {
151254885Sdumbbell		fhold(fp_fromcap);
152254885Sdumbbell		fdrop(fp, curthread);
153254885Sdumbbell		fp = fp_fromcap;
154254885Sdumbbell	}
155254885Sdumbbell#endif /* CAPABILITIES */
156254885Sdumbbell	if (fp->f_type != DTYPE_SOCKET) {
157254885Sdumbbell		fdrop(fp, curthread);
158254885Sdumbbell		return (ENOTSOCK);
159254885Sdumbbell	}
160254885Sdumbbell	if (fflagp != NULL)
161254885Sdumbbell		*fflagp = fp->f_flag;
162254885Sdumbbell	*fpp = fp;
163254885Sdumbbell	return (0);
164254885Sdumbbell}
165254885Sdumbbell
166254885Sdumbbell/*
167254885Sdumbbell * System call interface to the socket abstraction.
168254885Sdumbbell */
169254885Sdumbbell#if defined(COMPAT_43)
170254885Sdumbbell#define COMPAT_OLDSOCK
171254885Sdumbbell#endif
172254885Sdumbbell
173254885Sdumbbellint
174254885Sdumbbellsys_socket(td, uap)
175254885Sdumbbell	struct thread *td;
176254885Sdumbbell	struct socket_args /* {
177254885Sdumbbell		int	domain;
178254885Sdumbbell		int	type;
179254885Sdumbbell		int	protocol;
180254885Sdumbbell	} */ *uap;
181254885Sdumbbell{
182254885Sdumbbell	struct filedesc *fdp;
183254885Sdumbbell	struct socket *so;
184254885Sdumbbell	struct file *fp;
185254885Sdumbbell	int fd, error;
186254885Sdumbbell
187254885Sdumbbell	AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
188254885Sdumbbell#ifdef MAC
189254885Sdumbbell	error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
190254885Sdumbbell	    uap->protocol);
191254885Sdumbbell	if (error)
192254885Sdumbbell		return (error);
193254885Sdumbbell#endif
194254885Sdumbbell	fdp = td->td_proc->p_fd;
195254885Sdumbbell	error = falloc(td, &fp, &fd, 0);
196254885Sdumbbell	if (error)
197254885Sdumbbell		return (error);
198254885Sdumbbell	/* An extra reference on `fp' has been held for us by falloc(). */
199254885Sdumbbell	error = socreate(uap->domain, &so, uap->type, uap->protocol,
200254885Sdumbbell	    td->td_ucred, td);
201254885Sdumbbell	if (error) {
202254885Sdumbbell		fdclose(fdp, fp, fd, td);
203254885Sdumbbell	} else {
204254885Sdumbbell		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
205254885Sdumbbell		td->td_retval[0] = fd;
206254885Sdumbbell	}
207254885Sdumbbell	fdrop(fp, td);
208254885Sdumbbell	return (error);
209254885Sdumbbell}
210254885Sdumbbell
211254885Sdumbbell/* ARGSUSED */
212254885Sdumbbellint
213254885Sdumbbellsys_bind(td, uap)
214254885Sdumbbell	struct thread *td;
215254885Sdumbbell	struct bind_args /* {
216254885Sdumbbell		int	s;
217254885Sdumbbell		caddr_t	name;
218254885Sdumbbell		int	namelen;
219254885Sdumbbell	} */ *uap;
220254885Sdumbbell{
221254885Sdumbbell	struct sockaddr *sa;
222254885Sdumbbell	int error;
223254885Sdumbbell
224254885Sdumbbell	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
225254885Sdumbbell		return (error);
226254885Sdumbbell
227254885Sdumbbell	error = kern_bind(td, uap->s, sa);
228254885Sdumbbell	free(sa, M_SONAME);
229254885Sdumbbell	return (error);
230254885Sdumbbell}
231254885Sdumbbell
232254885Sdumbbellint
233254885Sdumbbellkern_bind(td, fd, sa)
234254885Sdumbbell	struct thread *td;
235254885Sdumbbell	int fd;
236254885Sdumbbell	struct sockaddr *sa;
237254885Sdumbbell{
238254885Sdumbbell	struct socket *so;
239254885Sdumbbell	struct file *fp;
240254885Sdumbbell	int error;
241254885Sdumbbell
242254885Sdumbbell	AUDIT_ARG_FD(fd);
243254885Sdumbbell	error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
244254885Sdumbbell	if (error)
245254885Sdumbbell		return (error);
246254885Sdumbbell	so = fp->f_data;
247254885Sdumbbell#ifdef KTRACE
248254885Sdumbbell	if (KTRPOINT(td, KTR_STRUCT))
249254885Sdumbbell		ktrsockaddr(sa);
250254885Sdumbbell#endif
251254885Sdumbbell#ifdef MAC
252254885Sdumbbell	error = mac_socket_check_bind(td->td_ucred, so, sa);
253	if (error == 0)
254#endif
255		error = sobind(so, sa, td);
256	fdrop(fp, td);
257	return (error);
258}
259
260/* ARGSUSED */
261int
262sys_listen(td, uap)
263	struct thread *td;
264	struct listen_args /* {
265		int	s;
266		int	backlog;
267	} */ *uap;
268{
269	struct socket *so;
270	struct file *fp;
271	int error;
272
273	AUDIT_ARG_FD(uap->s);
274	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
275	if (error == 0) {
276		so = fp->f_data;
277#ifdef MAC
278		error = mac_socket_check_listen(td->td_ucred, so);
279		if (error == 0)
280#endif
281			error = solisten(so, uap->backlog, td);
282		fdrop(fp, td);
283	}
284	return(error);
285}
286
287/*
288 * accept1()
289 */
290static int
291accept1(td, uap, compat)
292	struct thread *td;
293	struct accept_args /* {
294		int	s;
295		struct sockaddr	* __restrict name;
296		socklen_t	* __restrict anamelen;
297	} */ *uap;
298	int compat;
299{
300	struct sockaddr *name;
301	socklen_t namelen;
302	struct file *fp;
303	int error;
304
305	if (uap->name == NULL)
306		return (kern_accept(td, uap->s, NULL, NULL, NULL));
307
308	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
309	if (error)
310		return (error);
311
312	error = kern_accept(td, uap->s, &name, &namelen, &fp);
313
314	/*
315	 * return a namelen of zero for older code which might
316	 * ignore the return value from accept.
317	 */
318	if (error) {
319		(void) copyout(&namelen,
320		    uap->anamelen, sizeof(*uap->anamelen));
321		return (error);
322	}
323
324	if (error == 0 && name != NULL) {
325#ifdef COMPAT_OLDSOCK
326		if (compat)
327			((struct osockaddr *)name)->sa_family =
328			    name->sa_family;
329#endif
330		error = copyout(name, uap->name, namelen);
331	}
332	if (error == 0)
333		error = copyout(&namelen, uap->anamelen,
334		    sizeof(namelen));
335	if (error)
336		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
337	fdrop(fp, td);
338	free(name, M_SONAME);
339	return (error);
340}
341
342int
343kern_accept(struct thread *td, int s, struct sockaddr **name,
344    socklen_t *namelen, struct file **fp)
345{
346	struct filedesc *fdp;
347	struct file *headfp, *nfp = NULL;
348	struct sockaddr *sa = NULL;
349	int error;
350	struct socket *head, *so;
351	int fd;
352	u_int fflag;
353	pid_t pgid;
354	int tmp;
355
356	if (name) {
357		*name = NULL;
358		if (*namelen < 0)
359			return (EINVAL);
360	}
361
362	AUDIT_ARG_FD(s);
363	fdp = td->td_proc->p_fd;
364	error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
365	if (error)
366		return (error);
367	head = headfp->f_data;
368	if ((head->so_options & SO_ACCEPTCONN) == 0) {
369		error = EINVAL;
370		goto done;
371	}
372#ifdef MAC
373	error = mac_socket_check_accept(td->td_ucred, head);
374	if (error != 0)
375		goto done;
376#endif
377	error = falloc(td, &nfp, &fd, 0);
378	if (error)
379		goto done;
380	ACCEPT_LOCK();
381	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
382		ACCEPT_UNLOCK();
383		error = EWOULDBLOCK;
384		goto noconnection;
385	}
386	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
387		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
388			head->so_error = ECONNABORTED;
389			break;
390		}
391		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
392		    "accept", 0);
393		if (error) {
394			ACCEPT_UNLOCK();
395			goto noconnection;
396		}
397	}
398	if (head->so_error) {
399		error = head->so_error;
400		head->so_error = 0;
401		ACCEPT_UNLOCK();
402		goto noconnection;
403	}
404	so = TAILQ_FIRST(&head->so_comp);
405	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
406	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
407
408	/*
409	 * Before changing the flags on the socket, we have to bump the
410	 * reference count.  Otherwise, if the protocol calls sofree(),
411	 * the socket will be released due to a zero refcount.
412	 */
413	SOCK_LOCK(so);			/* soref() and so_state update */
414	soref(so);			/* file descriptor reference */
415
416	TAILQ_REMOVE(&head->so_comp, so, so_list);
417	head->so_qlen--;
418	so->so_state |= (head->so_state & SS_NBIO);
419	so->so_qstate &= ~SQ_COMP;
420	so->so_head = NULL;
421
422	SOCK_UNLOCK(so);
423	ACCEPT_UNLOCK();
424
425	/* An extra reference on `nfp' has been held for us by falloc(). */
426	td->td_retval[0] = fd;
427
428	/* connection has been removed from the listen queue */
429	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
430
431	pgid = fgetown(&head->so_sigio);
432	if (pgid != 0)
433		fsetown(pgid, &so->so_sigio);
434
435	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
436	/* Sync socket nonblocking/async state with file flags */
437	tmp = fflag & FNONBLOCK;
438	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
439	tmp = fflag & FASYNC;
440	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
441	sa = 0;
442	error = soaccept(so, &sa);
443	if (error) {
444		/*
445		 * return a namelen of zero for older code which might
446		 * ignore the return value from accept.
447		 */
448		if (name)
449			*namelen = 0;
450		goto noconnection;
451	}
452	if (sa == NULL) {
453		if (name)
454			*namelen = 0;
455		goto done;
456	}
457	if (name) {
458		/* check sa_len before it is destroyed */
459		if (*namelen > sa->sa_len)
460			*namelen = sa->sa_len;
461#ifdef KTRACE
462		if (KTRPOINT(td, KTR_STRUCT))
463			ktrsockaddr(sa);
464#endif
465		*name = sa;
466		sa = NULL;
467	}
468noconnection:
469	if (sa)
470		free(sa, M_SONAME);
471
472	/*
473	 * close the new descriptor, assuming someone hasn't ripped it
474	 * out from under us.
475	 */
476	if (error)
477		fdclose(fdp, nfp, fd, td);
478
479	/*
480	 * Release explicitly held references before returning.  We return
481	 * a reference on nfp to the caller on success if they request it.
482	 */
483done:
484	if (fp != NULL) {
485		if (error == 0) {
486			*fp = nfp;
487			nfp = NULL;
488		} else
489			*fp = NULL;
490	}
491	if (nfp != NULL)
492		fdrop(nfp, td);
493	fdrop(headfp, td);
494	return (error);
495}
496
497int
498sys_accept(td, uap)
499	struct thread *td;
500	struct accept_args *uap;
501{
502
503	return (accept1(td, uap, 0));
504}
505
506#ifdef COMPAT_OLDSOCK
507int
508oaccept(td, uap)
509	struct thread *td;
510	struct accept_args *uap;
511{
512
513	return (accept1(td, uap, 1));
514}
515#endif /* COMPAT_OLDSOCK */
516
517/* ARGSUSED */
518int
519sys_connect(td, uap)
520	struct thread *td;
521	struct connect_args /* {
522		int	s;
523		caddr_t	name;
524		int	namelen;
525	} */ *uap;
526{
527	struct sockaddr *sa;
528	int error;
529
530	error = getsockaddr(&sa, uap->name, uap->namelen);
531	if (error)
532		return (error);
533
534	error = kern_connect(td, uap->s, sa);
535	free(sa, M_SONAME);
536	return (error);
537}
538
539
540int
541kern_connect(td, fd, sa)
542	struct thread *td;
543	int fd;
544	struct sockaddr *sa;
545{
546	struct socket *so;
547	struct file *fp;
548	int error;
549	int interrupted = 0;
550
551	AUDIT_ARG_FD(fd);
552	error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
553	if (error)
554		return (error);
555	so = fp->f_data;
556	if (so->so_state & SS_ISCONNECTING) {
557		error = EALREADY;
558		goto done1;
559	}
560#ifdef KTRACE
561	if (KTRPOINT(td, KTR_STRUCT))
562		ktrsockaddr(sa);
563#endif
564#ifdef MAC
565	error = mac_socket_check_connect(td->td_ucred, so, sa);
566	if (error)
567		goto bad;
568#endif
569	error = soconnect(so, sa, td);
570	if (error)
571		goto bad;
572	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
573		error = EINPROGRESS;
574		goto done1;
575	}
576	SOCK_LOCK(so);
577	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
578		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
579		    "connec", 0);
580		if (error) {
581			if (error == EINTR || error == ERESTART)
582				interrupted = 1;
583			break;
584		}
585	}
586	if (error == 0) {
587		error = so->so_error;
588		so->so_error = 0;
589	}
590	SOCK_UNLOCK(so);
591bad:
592	if (!interrupted)
593		so->so_state &= ~SS_ISCONNECTING;
594	if (error == ERESTART)
595		error = EINTR;
596done1:
597	fdrop(fp, td);
598	return (error);
599}
600
601int
602kern_socketpair(struct thread *td, int domain, int type, int protocol,
603    int *rsv)
604{
605	struct filedesc *fdp = td->td_proc->p_fd;
606	struct file *fp1, *fp2;
607	struct socket *so1, *so2;
608	int fd, error;
609
610	AUDIT_ARG_SOCKET(domain, type, protocol);
611#ifdef MAC
612	/* We might want to have a separate check for socket pairs. */
613	error = mac_socket_check_create(td->td_ucred, domain, type,
614	    protocol);
615	if (error)
616		return (error);
617#endif
618	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
619	if (error)
620		return (error);
621	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
622	if (error)
623		goto free1;
624	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
625	error = falloc(td, &fp1, &fd, 0);
626	if (error)
627		goto free2;
628	rsv[0] = fd;
629	fp1->f_data = so1;	/* so1 already has ref count */
630	error = falloc(td, &fp2, &fd, 0);
631	if (error)
632		goto free3;
633	fp2->f_data = so2;	/* so2 already has ref count */
634	rsv[1] = fd;
635	error = soconnect2(so1, so2);
636	if (error)
637		goto free4;
638	if (type == SOCK_DGRAM) {
639		/*
640		 * Datagram socket connection is asymmetric.
641		 */
642		 error = soconnect2(so2, so1);
643		 if (error)
644			goto free4;
645	}
646	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
647	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
648	fdrop(fp1, td);
649	fdrop(fp2, td);
650	return (0);
651free4:
652	fdclose(fdp, fp2, rsv[1], td);
653	fdrop(fp2, td);
654free3:
655	fdclose(fdp, fp1, rsv[0], td);
656	fdrop(fp1, td);
657free2:
658	if (so2 != NULL)
659		(void)soclose(so2);
660free1:
661	if (so1 != NULL)
662		(void)soclose(so1);
663	return (error);
664}
665
666int
667sys_socketpair(struct thread *td, struct socketpair_args *uap)
668{
669	int error, sv[2];
670
671	error = kern_socketpair(td, uap->domain, uap->type,
672	    uap->protocol, sv);
673	if (error)
674		return (error);
675	error = copyout(sv, uap->rsv, 2 * sizeof(int));
676	if (error) {
677		(void)kern_close(td, sv[0]);
678		(void)kern_close(td, sv[1]);
679	}
680	return (error);
681}
682
683static int
684sendit(td, s, mp, flags)
685	struct thread *td;
686	int s;
687	struct msghdr *mp;
688	int flags;
689{
690	struct mbuf *control;
691	struct sockaddr *to;
692	int error;
693
694#ifdef CAPABILITY_MODE
695	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
696		return (ECAPMODE);
697#endif
698
699	if (mp->msg_name != NULL) {
700		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
701		if (error) {
702			to = NULL;
703			goto bad;
704		}
705		mp->msg_name = to;
706	} else {
707		to = NULL;
708	}
709
710	if (mp->msg_control) {
711		if (mp->msg_controllen < sizeof(struct cmsghdr)
712#ifdef COMPAT_OLDSOCK
713		    && mp->msg_flags != MSG_COMPAT
714#endif
715		) {
716			error = EINVAL;
717			goto bad;
718		}
719		error = sockargs(&control, mp->msg_control,
720		    mp->msg_controllen, MT_CONTROL);
721		if (error)
722			goto bad;
723#ifdef COMPAT_OLDSOCK
724		if (mp->msg_flags == MSG_COMPAT) {
725			struct cmsghdr *cm;
726
727			M_PREPEND(control, sizeof(*cm), M_WAIT);
728			cm = mtod(control, struct cmsghdr *);
729			cm->cmsg_len = control->m_len;
730			cm->cmsg_level = SOL_SOCKET;
731			cm->cmsg_type = SCM_RIGHTS;
732		}
733#endif
734	} else {
735		control = NULL;
736	}
737
738	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
739
740bad:
741	if (to)
742		free(to, M_SONAME);
743	return (error);
744}
745
746int
747kern_sendit(td, s, mp, flags, control, segflg)
748	struct thread *td;
749	int s;
750	struct msghdr *mp;
751	int flags;
752	struct mbuf *control;
753	enum uio_seg segflg;
754{
755	struct file *fp;
756	struct uio auio;
757	struct iovec *iov;
758	struct socket *so;
759	int i, error;
760	ssize_t len;
761	cap_rights_t rights;
762#ifdef KTRACE
763	struct uio *ktruio = NULL;
764#endif
765
766	AUDIT_ARG_FD(s);
767	rights = CAP_WRITE;
768	if (mp->msg_name != NULL)
769		rights |= CAP_CONNECT;
770	error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
771	if (error)
772		return (error);
773	so = (struct socket *)fp->f_data;
774
775#ifdef KTRACE
776	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
777		ktrsockaddr(mp->msg_name);
778#endif
779#ifdef MAC
780	if (mp->msg_name != NULL) {
781		error = mac_socket_check_connect(td->td_ucred, so,
782		    mp->msg_name);
783		if (error)
784			goto bad;
785	}
786	error = mac_socket_check_send(td->td_ucred, so);
787	if (error)
788		goto bad;
789#endif
790
791	auio.uio_iov = mp->msg_iov;
792	auio.uio_iovcnt = mp->msg_iovlen;
793	auio.uio_segflg = segflg;
794	auio.uio_rw = UIO_WRITE;
795	auio.uio_td = td;
796	auio.uio_offset = 0;			/* XXX */
797	auio.uio_resid = 0;
798	iov = mp->msg_iov;
799	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
800		if ((auio.uio_resid += iov->iov_len) < 0) {
801			error = EINVAL;
802			goto bad;
803		}
804	}
805#ifdef KTRACE
806	if (KTRPOINT(td, KTR_GENIO))
807		ktruio = cloneuio(&auio);
808#endif
809	len = auio.uio_resid;
810	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
811	if (error) {
812		if (auio.uio_resid != len && (error == ERESTART ||
813		    error == EINTR || error == EWOULDBLOCK))
814			error = 0;
815		/* Generation of SIGPIPE can be controlled per socket */
816		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
817		    !(flags & MSG_NOSIGNAL)) {
818			PROC_LOCK(td->td_proc);
819			tdsignal(td, SIGPIPE);
820			PROC_UNLOCK(td->td_proc);
821		}
822	}
823	if (error == 0)
824		td->td_retval[0] = len - auio.uio_resid;
825#ifdef KTRACE
826	if (ktruio != NULL) {
827		ktruio->uio_resid = td->td_retval[0];
828		ktrgenio(s, UIO_WRITE, ktruio, error);
829	}
830#endif
831bad:
832	fdrop(fp, td);
833	return (error);
834}
835
836int
837sys_sendto(td, uap)
838	struct thread *td;
839	struct sendto_args /* {
840		int	s;
841		caddr_t	buf;
842		size_t	len;
843		int	flags;
844		caddr_t	to;
845		int	tolen;
846	} */ *uap;
847{
848	struct msghdr msg;
849	struct iovec aiov;
850	int error;
851
852	msg.msg_name = uap->to;
853	msg.msg_namelen = uap->tolen;
854	msg.msg_iov = &aiov;
855	msg.msg_iovlen = 1;
856	msg.msg_control = 0;
857#ifdef COMPAT_OLDSOCK
858	msg.msg_flags = 0;
859#endif
860	aiov.iov_base = uap->buf;
861	aiov.iov_len = uap->len;
862	error = sendit(td, uap->s, &msg, uap->flags);
863	return (error);
864}
865
866#ifdef COMPAT_OLDSOCK
867int
868osend(td, uap)
869	struct thread *td;
870	struct osend_args /* {
871		int	s;
872		caddr_t	buf;
873		int	len;
874		int	flags;
875	} */ *uap;
876{
877	struct msghdr msg;
878	struct iovec aiov;
879	int error;
880
881	msg.msg_name = 0;
882	msg.msg_namelen = 0;
883	msg.msg_iov = &aiov;
884	msg.msg_iovlen = 1;
885	aiov.iov_base = uap->buf;
886	aiov.iov_len = uap->len;
887	msg.msg_control = 0;
888	msg.msg_flags = 0;
889	error = sendit(td, uap->s, &msg, uap->flags);
890	return (error);
891}
892
893int
894osendmsg(td, uap)
895	struct thread *td;
896	struct osendmsg_args /* {
897		int	s;
898		caddr_t	msg;
899		int	flags;
900	} */ *uap;
901{
902	struct msghdr msg;
903	struct iovec *iov;
904	int error;
905
906	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
907	if (error)
908		return (error);
909	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
910	if (error)
911		return (error);
912	msg.msg_iov = iov;
913	msg.msg_flags = MSG_COMPAT;
914	error = sendit(td, uap->s, &msg, uap->flags);
915	free(iov, M_IOV);
916	return (error);
917}
918#endif
919
920int
921sys_sendmsg(td, uap)
922	struct thread *td;
923	struct sendmsg_args /* {
924		int	s;
925		caddr_t	msg;
926		int	flags;
927	} */ *uap;
928{
929	struct msghdr msg;
930	struct iovec *iov;
931	int error;
932
933	error = copyin(uap->msg, &msg, sizeof (msg));
934	if (error)
935		return (error);
936	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
937	if (error)
938		return (error);
939	msg.msg_iov = iov;
940#ifdef COMPAT_OLDSOCK
941	msg.msg_flags = 0;
942#endif
943	error = sendit(td, uap->s, &msg, uap->flags);
944	free(iov, M_IOV);
945	return (error);
946}
947
948int
949kern_recvit(td, s, mp, fromseg, controlp)
950	struct thread *td;
951	int s;
952	struct msghdr *mp;
953	enum uio_seg fromseg;
954	struct mbuf **controlp;
955{
956	struct uio auio;
957	struct iovec *iov;
958	int i;
959	ssize_t len;
960	int error;
961	struct mbuf *m, *control = 0;
962	caddr_t ctlbuf;
963	struct file *fp;
964	struct socket *so;
965	struct sockaddr *fromsa = 0;
966#ifdef KTRACE
967	struct uio *ktruio = NULL;
968#endif
969
970	if (controlp != NULL)
971		*controlp = NULL;
972
973	AUDIT_ARG_FD(s);
974	error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
975	if (error)
976		return (error);
977	so = fp->f_data;
978
979#ifdef MAC
980	error = mac_socket_check_receive(td->td_ucred, so);
981	if (error) {
982		fdrop(fp, td);
983		return (error);
984	}
985#endif
986
987	auio.uio_iov = mp->msg_iov;
988	auio.uio_iovcnt = mp->msg_iovlen;
989	auio.uio_segflg = UIO_USERSPACE;
990	auio.uio_rw = UIO_READ;
991	auio.uio_td = td;
992	auio.uio_offset = 0;			/* XXX */
993	auio.uio_resid = 0;
994	iov = mp->msg_iov;
995	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
996		if ((auio.uio_resid += iov->iov_len) < 0) {
997			fdrop(fp, td);
998			return (EINVAL);
999		}
1000	}
1001#ifdef KTRACE
1002	if (KTRPOINT(td, KTR_GENIO))
1003		ktruio = cloneuio(&auio);
1004#endif
1005	len = auio.uio_resid;
1006	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
1007	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
1008	    &mp->msg_flags);
1009	if (error) {
1010		if (auio.uio_resid != len && (error == ERESTART ||
1011		    error == EINTR || error == EWOULDBLOCK))
1012			error = 0;
1013	}
1014#ifdef KTRACE
1015	if (ktruio != NULL) {
1016		ktruio->uio_resid = len - auio.uio_resid;
1017		ktrgenio(s, UIO_READ, ktruio, error);
1018	}
1019#endif
1020	if (error)
1021		goto out;
1022	td->td_retval[0] = len - auio.uio_resid;
1023	if (mp->msg_name) {
1024		len = mp->msg_namelen;
1025		if (len <= 0 || fromsa == 0)
1026			len = 0;
1027		else {
1028			/* save sa_len before it is destroyed by MSG_COMPAT */
1029			len = MIN(len, fromsa->sa_len);
1030#ifdef COMPAT_OLDSOCK
1031			if (mp->msg_flags & MSG_COMPAT)
1032				((struct osockaddr *)fromsa)->sa_family =
1033				    fromsa->sa_family;
1034#endif
1035			if (fromseg == UIO_USERSPACE) {
1036				error = copyout(fromsa, mp->msg_name,
1037				    (unsigned)len);
1038				if (error)
1039					goto out;
1040			} else
1041				bcopy(fromsa, mp->msg_name, len);
1042		}
1043		mp->msg_namelen = len;
1044	}
1045	if (mp->msg_control && controlp == NULL) {
1046#ifdef COMPAT_OLDSOCK
1047		/*
1048		 * We assume that old recvmsg calls won't receive access
1049		 * rights and other control info, esp. as control info
1050		 * is always optional and those options didn't exist in 4.3.
1051		 * If we receive rights, trim the cmsghdr; anything else
1052		 * is tossed.
1053		 */
1054		if (control && mp->msg_flags & MSG_COMPAT) {
1055			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1056			    SOL_SOCKET ||
1057			    mtod(control, struct cmsghdr *)->cmsg_type !=
1058			    SCM_RIGHTS) {
1059				mp->msg_controllen = 0;
1060				goto out;
1061			}
1062			control->m_len -= sizeof (struct cmsghdr);
1063			control->m_data += sizeof (struct cmsghdr);
1064		}
1065#endif
1066		len = mp->msg_controllen;
1067		m = control;
1068		mp->msg_controllen = 0;
1069		ctlbuf = mp->msg_control;
1070
1071		while (m && len > 0) {
1072			unsigned int tocopy;
1073
1074			if (len >= m->m_len)
1075				tocopy = m->m_len;
1076			else {
1077				mp->msg_flags |= MSG_CTRUNC;
1078				tocopy = len;
1079			}
1080
1081			if ((error = copyout(mtod(m, caddr_t),
1082					ctlbuf, tocopy)) != 0)
1083				goto out;
1084
1085			ctlbuf += tocopy;
1086			len -= tocopy;
1087			m = m->m_next;
1088		}
1089		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1090	}
1091out:
1092	fdrop(fp, td);
1093#ifdef KTRACE
1094	if (fromsa && KTRPOINT(td, KTR_STRUCT))
1095		ktrsockaddr(fromsa);
1096#endif
1097	if (fromsa)
1098		free(fromsa, M_SONAME);
1099
1100	if (error == 0 && controlp != NULL)
1101		*controlp = control;
1102	else  if (control)
1103		m_freem(control);
1104
1105	return (error);
1106}
1107
1108static int
1109recvit(td, s, mp, namelenp)
1110	struct thread *td;
1111	int s;
1112	struct msghdr *mp;
1113	void *namelenp;
1114{
1115	int error;
1116
1117	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1118	if (error)
1119		return (error);
1120	if (namelenp) {
1121		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1122#ifdef COMPAT_OLDSOCK
1123		if (mp->msg_flags & MSG_COMPAT)
1124			error = 0;	/* old recvfrom didn't check */
1125#endif
1126	}
1127	return (error);
1128}
1129
1130int
1131sys_recvfrom(td, uap)
1132	struct thread *td;
1133	struct recvfrom_args /* {
1134		int	s;
1135		caddr_t	buf;
1136		size_t	len;
1137		int	flags;
1138		struct sockaddr * __restrict	from;
1139		socklen_t * __restrict fromlenaddr;
1140	} */ *uap;
1141{
1142	struct msghdr msg;
1143	struct iovec aiov;
1144	int error;
1145
1146	if (uap->fromlenaddr) {
1147		error = copyin(uap->fromlenaddr,
1148		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1149		if (error)
1150			goto done2;
1151	} else {
1152		msg.msg_namelen = 0;
1153	}
1154	msg.msg_name = uap->from;
1155	msg.msg_iov = &aiov;
1156	msg.msg_iovlen = 1;
1157	aiov.iov_base = uap->buf;
1158	aiov.iov_len = uap->len;
1159	msg.msg_control = 0;
1160	msg.msg_flags = uap->flags;
1161	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1162done2:
1163	return(error);
1164}
1165
1166#ifdef COMPAT_OLDSOCK
1167int
1168orecvfrom(td, uap)
1169	struct thread *td;
1170	struct recvfrom_args *uap;
1171{
1172
1173	uap->flags |= MSG_COMPAT;
1174	return (sys_recvfrom(td, uap));
1175}
1176#endif
1177
1178#ifdef COMPAT_OLDSOCK
1179int
1180orecv(td, uap)
1181	struct thread *td;
1182	struct orecv_args /* {
1183		int	s;
1184		caddr_t	buf;
1185		int	len;
1186		int	flags;
1187	} */ *uap;
1188{
1189	struct msghdr msg;
1190	struct iovec aiov;
1191	int error;
1192
1193	msg.msg_name = 0;
1194	msg.msg_namelen = 0;
1195	msg.msg_iov = &aiov;
1196	msg.msg_iovlen = 1;
1197	aiov.iov_base = uap->buf;
1198	aiov.iov_len = uap->len;
1199	msg.msg_control = 0;
1200	msg.msg_flags = uap->flags;
1201	error = recvit(td, uap->s, &msg, NULL);
1202	return (error);
1203}
1204
1205/*
1206 * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1207 * overlays the new one, missing only the flags, and with the (old) access
1208 * rights where the control fields are now.
1209 */
1210int
1211orecvmsg(td, uap)
1212	struct thread *td;
1213	struct orecvmsg_args /* {
1214		int	s;
1215		struct	omsghdr *msg;
1216		int	flags;
1217	} */ *uap;
1218{
1219	struct msghdr msg;
1220	struct iovec *iov;
1221	int error;
1222
1223	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1224	if (error)
1225		return (error);
1226	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1227	if (error)
1228		return (error);
1229	msg.msg_flags = uap->flags | MSG_COMPAT;
1230	msg.msg_iov = iov;
1231	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1232	if (msg.msg_controllen && error == 0)
1233		error = copyout(&msg.msg_controllen,
1234		    &uap->msg->msg_accrightslen, sizeof (int));
1235	free(iov, M_IOV);
1236	return (error);
1237}
1238#endif
1239
1240int
1241sys_recvmsg(td, uap)
1242	struct thread *td;
1243	struct recvmsg_args /* {
1244		int	s;
1245		struct	msghdr *msg;
1246		int	flags;
1247	} */ *uap;
1248{
1249	struct msghdr msg;
1250	struct iovec *uiov, *iov;
1251	int error;
1252
1253	error = copyin(uap->msg, &msg, sizeof (msg));
1254	if (error)
1255		return (error);
1256	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1257	if (error)
1258		return (error);
1259	msg.msg_flags = uap->flags;
1260#ifdef COMPAT_OLDSOCK
1261	msg.msg_flags &= ~MSG_COMPAT;
1262#endif
1263	uiov = msg.msg_iov;
1264	msg.msg_iov = iov;
1265	error = recvit(td, uap->s, &msg, NULL);
1266	if (error == 0) {
1267		msg.msg_iov = uiov;
1268		error = copyout(&msg, uap->msg, sizeof(msg));
1269	}
1270	free(iov, M_IOV);
1271	return (error);
1272}
1273
1274/* ARGSUSED */
1275int
1276sys_shutdown(td, uap)
1277	struct thread *td;
1278	struct shutdown_args /* {
1279		int	s;
1280		int	how;
1281	} */ *uap;
1282{
1283	struct socket *so;
1284	struct file *fp;
1285	int error;
1286
1287	AUDIT_ARG_FD(uap->s);
1288	error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
1289	    NULL);
1290	if (error == 0) {
1291		so = fp->f_data;
1292		error = soshutdown(so, uap->how);
1293		fdrop(fp, td);
1294	}
1295	return (error);
1296}
1297
1298/* ARGSUSED */
1299int
1300sys_setsockopt(td, uap)
1301	struct thread *td;
1302	struct setsockopt_args /* {
1303		int	s;
1304		int	level;
1305		int	name;
1306		caddr_t	val;
1307		int	valsize;
1308	} */ *uap;
1309{
1310
1311	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1312	    uap->val, UIO_USERSPACE, uap->valsize));
1313}
1314
1315int
1316kern_setsockopt(td, s, level, name, val, valseg, valsize)
1317	struct thread *td;
1318	int s;
1319	int level;
1320	int name;
1321	void *val;
1322	enum uio_seg valseg;
1323	socklen_t valsize;
1324{
1325	int error;
1326	struct socket *so;
1327	struct file *fp;
1328	struct sockopt sopt;
1329
1330	if (val == NULL && valsize != 0)
1331		return (EFAULT);
1332	if ((int)valsize < 0)
1333		return (EINVAL);
1334
1335	sopt.sopt_dir = SOPT_SET;
1336	sopt.sopt_level = level;
1337	sopt.sopt_name = name;
1338	sopt.sopt_val = val;
1339	sopt.sopt_valsize = valsize;
1340	switch (valseg) {
1341	case UIO_USERSPACE:
1342		sopt.sopt_td = td;
1343		break;
1344	case UIO_SYSSPACE:
1345		sopt.sopt_td = NULL;
1346		break;
1347	default:
1348		panic("kern_setsockopt called with bad valseg");
1349	}
1350
1351	AUDIT_ARG_FD(s);
1352	error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
1353	if (error == 0) {
1354		so = fp->f_data;
1355		error = sosetopt(so, &sopt);
1356		fdrop(fp, td);
1357	}
1358	return(error);
1359}
1360
1361/* ARGSUSED */
1362int
1363sys_getsockopt(td, uap)
1364	struct thread *td;
1365	struct getsockopt_args /* {
1366		int	s;
1367		int	level;
1368		int	name;
1369		void * __restrict	val;
1370		socklen_t * __restrict avalsize;
1371	} */ *uap;
1372{
1373	socklen_t valsize;
1374	int	error;
1375
1376	if (uap->val) {
1377		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1378		if (error)
1379			return (error);
1380	}
1381
1382	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1383	    uap->val, UIO_USERSPACE, &valsize);
1384
1385	if (error == 0)
1386		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1387	return (error);
1388}
1389
1390/*
1391 * Kernel version of getsockopt.
1392 * optval can be a userland or userspace. optlen is always a kernel pointer.
1393 */
1394int
1395kern_getsockopt(td, s, level, name, val, valseg, valsize)
1396	struct thread *td;
1397	int s;
1398	int level;
1399	int name;
1400	void *val;
1401	enum uio_seg valseg;
1402	socklen_t *valsize;
1403{
1404	int error;
1405	struct  socket *so;
1406	struct file *fp;
1407	struct	sockopt sopt;
1408
1409	if (val == NULL)
1410		*valsize = 0;
1411	if ((int)*valsize < 0)
1412		return (EINVAL);
1413
1414	sopt.sopt_dir = SOPT_GET;
1415	sopt.sopt_level = level;
1416	sopt.sopt_name = name;
1417	sopt.sopt_val = val;
1418	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1419	switch (valseg) {
1420	case UIO_USERSPACE:
1421		sopt.sopt_td = td;
1422		break;
1423	case UIO_SYSSPACE:
1424		sopt.sopt_td = NULL;
1425		break;
1426	default:
1427		panic("kern_getsockopt called with bad valseg");
1428	}
1429
1430	AUDIT_ARG_FD(s);
1431	error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
1432	if (error == 0) {
1433		so = fp->f_data;
1434		error = sogetopt(so, &sopt);
1435		*valsize = sopt.sopt_valsize;
1436		fdrop(fp, td);
1437	}
1438	return (error);
1439}
1440
1441/*
1442 * getsockname1() - Get socket name.
1443 */
1444/* ARGSUSED */
1445static int
1446getsockname1(td, uap, compat)
1447	struct thread *td;
1448	struct getsockname_args /* {
1449		int	fdes;
1450		struct sockaddr * __restrict asa;
1451		socklen_t * __restrict alen;
1452	} */ *uap;
1453	int compat;
1454{
1455	struct sockaddr *sa;
1456	socklen_t len;
1457	int error;
1458
1459	error = copyin(uap->alen, &len, sizeof(len));
1460	if (error)
1461		return (error);
1462
1463	error = kern_getsockname(td, uap->fdes, &sa, &len);
1464	if (error)
1465		return (error);
1466
1467	if (len != 0) {
1468#ifdef COMPAT_OLDSOCK
1469		if (compat)
1470			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1471#endif
1472		error = copyout(sa, uap->asa, (u_int)len);
1473	}
1474	free(sa, M_SONAME);
1475	if (error == 0)
1476		error = copyout(&len, uap->alen, sizeof(len));
1477	return (error);
1478}
1479
1480int
1481kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1482    socklen_t *alen)
1483{
1484	struct socket *so;
1485	struct file *fp;
1486	socklen_t len;
1487	int error;
1488
1489	if (*alen < 0)
1490		return (EINVAL);
1491
1492	AUDIT_ARG_FD(fd);
1493	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
1494	if (error)
1495		return (error);
1496	so = fp->f_data;
1497	*sa = NULL;
1498	CURVNET_SET(so->so_vnet);
1499	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1500	CURVNET_RESTORE();
1501	if (error)
1502		goto bad;
1503	if (*sa == NULL)
1504		len = 0;
1505	else
1506		len = MIN(*alen, (*sa)->sa_len);
1507	*alen = len;
1508#ifdef KTRACE
1509	if (KTRPOINT(td, KTR_STRUCT))
1510		ktrsockaddr(*sa);
1511#endif
1512bad:
1513	fdrop(fp, td);
1514	if (error && *sa) {
1515		free(*sa, M_SONAME);
1516		*sa = NULL;
1517	}
1518	return (error);
1519}
1520
1521int
1522sys_getsockname(td, uap)
1523	struct thread *td;
1524	struct getsockname_args *uap;
1525{
1526
1527	return (getsockname1(td, uap, 0));
1528}
1529
1530#ifdef COMPAT_OLDSOCK
1531int
1532ogetsockname(td, uap)
1533	struct thread *td;
1534	struct getsockname_args *uap;
1535{
1536
1537	return (getsockname1(td, uap, 1));
1538}
1539#endif /* COMPAT_OLDSOCK */
1540
1541/*
1542 * getpeername1() - Get name of peer for connected socket.
1543 */
1544/* ARGSUSED */
1545static int
1546getpeername1(td, uap, compat)
1547	struct thread *td;
1548	struct getpeername_args /* {
1549		int	fdes;
1550		struct sockaddr * __restrict	asa;
1551		socklen_t * __restrict	alen;
1552	} */ *uap;
1553	int compat;
1554{
1555	struct sockaddr *sa;
1556	socklen_t len;
1557	int error;
1558
1559	error = copyin(uap->alen, &len, sizeof (len));
1560	if (error)
1561		return (error);
1562
1563	error = kern_getpeername(td, uap->fdes, &sa, &len);
1564	if (error)
1565		return (error);
1566
1567	if (len != 0) {
1568#ifdef COMPAT_OLDSOCK
1569		if (compat)
1570			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1571#endif
1572		error = copyout(sa, uap->asa, (u_int)len);
1573	}
1574	free(sa, M_SONAME);
1575	if (error == 0)
1576		error = copyout(&len, uap->alen, sizeof(len));
1577	return (error);
1578}
1579
1580int
1581kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1582    socklen_t *alen)
1583{
1584	struct socket *so;
1585	struct file *fp;
1586	socklen_t len;
1587	int error;
1588
1589	if (*alen < 0)
1590		return (EINVAL);
1591
1592	AUDIT_ARG_FD(fd);
1593	error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
1594	if (error)
1595		return (error);
1596	so = fp->f_data;
1597	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1598		error = ENOTCONN;
1599		goto done;
1600	}
1601	*sa = NULL;
1602	CURVNET_SET(so->so_vnet);
1603	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1604	CURVNET_RESTORE();
1605	if (error)
1606		goto bad;
1607	if (*sa == NULL)
1608		len = 0;
1609	else
1610		len = MIN(*alen, (*sa)->sa_len);
1611	*alen = len;
1612#ifdef KTRACE
1613	if (KTRPOINT(td, KTR_STRUCT))
1614		ktrsockaddr(*sa);
1615#endif
1616bad:
1617	if (error && *sa) {
1618		free(*sa, M_SONAME);
1619		*sa = NULL;
1620	}
1621done:
1622	fdrop(fp, td);
1623	return (error);
1624}
1625
1626int
1627sys_getpeername(td, uap)
1628	struct thread *td;
1629	struct getpeername_args *uap;
1630{
1631
1632	return (getpeername1(td, uap, 0));
1633}
1634
1635#ifdef COMPAT_OLDSOCK
1636int
1637ogetpeername(td, uap)
1638	struct thread *td;
1639	struct ogetpeername_args *uap;
1640{
1641
1642	/* XXX uap should have type `getpeername_args *' to begin with. */
1643	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1644}
1645#endif /* COMPAT_OLDSOCK */
1646
1647int
1648sockargs(mp, buf, buflen, type)
1649	struct mbuf **mp;
1650	caddr_t buf;
1651	int buflen, type;
1652{
1653	struct sockaddr *sa;
1654	struct mbuf *m;
1655	int error;
1656
1657	if ((u_int)buflen > MLEN) {
1658#ifdef COMPAT_OLDSOCK
1659		if (type == MT_SONAME && (u_int)buflen <= 112)
1660			buflen = MLEN;		/* unix domain compat. hack */
1661		else
1662#endif
1663			if ((u_int)buflen > MCLBYTES)
1664				return (EINVAL);
1665	}
1666	m = m_get(M_WAIT, type);
1667	if ((u_int)buflen > MLEN)
1668		MCLGET(m, M_WAIT);
1669	m->m_len = buflen;
1670	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1671	if (error)
1672		(void) m_free(m);
1673	else {
1674		*mp = m;
1675		if (type == MT_SONAME) {
1676			sa = mtod(m, struct sockaddr *);
1677
1678#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1679			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1680				sa->sa_family = sa->sa_len;
1681#endif
1682			sa->sa_len = buflen;
1683		}
1684	}
1685	return (error);
1686}
1687
1688int
1689getsockaddr(namp, uaddr, len)
1690	struct sockaddr **namp;
1691	caddr_t uaddr;
1692	size_t len;
1693{
1694	struct sockaddr *sa;
1695	int error;
1696
1697	if (len > SOCK_MAXADDRLEN)
1698		return (ENAMETOOLONG);
1699	if (len < offsetof(struct sockaddr, sa_data[0]))
1700		return (EINVAL);
1701	sa = malloc(len, M_SONAME, M_WAITOK);
1702	error = copyin(uaddr, sa, len);
1703	if (error) {
1704		free(sa, M_SONAME);
1705	} else {
1706#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1707		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1708			sa->sa_family = sa->sa_len;
1709#endif
1710		sa->sa_len = len;
1711		*namp = sa;
1712	}
1713	return (error);
1714}
1715
1716#include <sys/condvar.h>
1717
1718struct sendfile_sync {
1719	struct mtx	mtx;
1720	struct cv	cv;
1721	unsigned 	count;
1722};
1723
1724/*
1725 * Detach mapped page and release resources back to the system.
1726 */
1727void
1728sf_buf_mext(void *addr, void *args)
1729{
1730	vm_page_t m;
1731	struct sendfile_sync *sfs;
1732
1733	m = sf_buf_page(args);
1734	sf_buf_free(args);
1735	vm_page_lock(m);
1736	vm_page_unwire(m, 0);
1737	/*
1738	 * Check for the object going away on us. This can
1739	 * happen since we don't hold a reference to it.
1740	 * If so, we're responsible for freeing the page.
1741	 */
1742	if (m->wire_count == 0 && m->object == NULL)
1743		vm_page_free(m);
1744	vm_page_unlock(m);
1745	if (addr == NULL)
1746		return;
1747	sfs = addr;
1748	mtx_lock(&sfs->mtx);
1749	KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1750	if (--sfs->count == 0)
1751		cv_signal(&sfs->cv);
1752	mtx_unlock(&sfs->mtx);
1753}
1754
1755/*
1756 * sendfile(2)
1757 *
1758 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1759 *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1760 *
1761 * Send a file specified by 'fd' and starting at 'offset' to a socket
1762 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1763 * 0.  Optionally add a header and/or trailer to the socket output.  If
1764 * specified, write the total number of bytes sent into *sbytes.
1765 */
1766int
1767sys_sendfile(struct thread *td, struct sendfile_args *uap)
1768{
1769
1770	return (do_sendfile(td, uap, 0));
1771}
1772
1773static int
1774do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1775{
1776	struct sf_hdtr hdtr;
1777	struct uio *hdr_uio, *trl_uio;
1778	int error;
1779
1780	hdr_uio = trl_uio = NULL;
1781
1782	if (uap->hdtr != NULL) {
1783		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1784		if (error)
1785			goto out;
1786		if (hdtr.headers != NULL) {
1787			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1788			if (error)
1789				goto out;
1790		}
1791		if (hdtr.trailers != NULL) {
1792			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1793			if (error)
1794				goto out;
1795
1796		}
1797	}
1798
1799	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1800out:
1801	if (hdr_uio)
1802		free(hdr_uio, M_IOV);
1803	if (trl_uio)
1804		free(trl_uio, M_IOV);
1805	return (error);
1806}
1807
1808#ifdef COMPAT_FREEBSD4
1809int
1810freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1811{
1812	struct sendfile_args args;
1813
1814	args.fd = uap->fd;
1815	args.s = uap->s;
1816	args.offset = uap->offset;
1817	args.nbytes = uap->nbytes;
1818	args.hdtr = uap->hdtr;
1819	args.sbytes = uap->sbytes;
1820	args.flags = uap->flags;
1821
1822	return (do_sendfile(td, &args, 1));
1823}
1824#endif /* COMPAT_FREEBSD4 */
1825
1826int
1827kern_sendfile(struct thread *td, struct sendfile_args *uap,
1828    struct uio *hdr_uio, struct uio *trl_uio, int compat)
1829{
1830	struct file *sock_fp;
1831	struct vnode *vp;
1832	struct vm_object *obj = NULL;
1833	struct socket *so = NULL;
1834	struct mbuf *m = NULL;
1835	struct sf_buf *sf;
1836	struct vm_page *pg;
1837	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1838	int error, hdrlen = 0, mnw = 0;
1839	int vfslocked;
1840	struct sendfile_sync *sfs = NULL;
1841
1842	/*
1843	 * The file descriptor must be a regular file and have a
1844	 * backing VM object.
1845	 * File offset must be positive.  If it goes beyond EOF
1846	 * we send only the header/trailer and no payload data.
1847	 */
1848	AUDIT_ARG_FD(uap->fd);
1849	if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
1850		goto out;
1851	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1852	vn_lock(vp, LK_SHARED | LK_RETRY);
1853	if (vp->v_type == VREG) {
1854		obj = vp->v_object;
1855		if (obj != NULL) {
1856			/*
1857			 * Temporarily increase the backing VM
1858			 * object's reference count so that a forced
1859			 * reclamation of its vnode does not
1860			 * immediately destroy it.
1861			 */
1862			VM_OBJECT_LOCK(obj);
1863			if ((obj->flags & OBJ_DEAD) == 0) {
1864				vm_object_reference_locked(obj);
1865				VM_OBJECT_UNLOCK(obj);
1866			} else {
1867				VM_OBJECT_UNLOCK(obj);
1868				obj = NULL;
1869			}
1870		}
1871	}
1872	VOP_UNLOCK(vp, 0);
1873	VFS_UNLOCK_GIANT(vfslocked);
1874	if (obj == NULL) {
1875		error = EINVAL;
1876		goto out;
1877	}
1878	if (uap->offset < 0) {
1879		error = EINVAL;
1880		goto out;
1881	}
1882
1883	/*
1884	 * The socket must be a stream socket and connected.
1885	 * Remember if it a blocking or non-blocking socket.
1886	 */
1887	if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
1888	    &sock_fp, NULL)) != 0)
1889		goto out;
1890	so = sock_fp->f_data;
1891	if (so->so_type != SOCK_STREAM) {
1892		error = EINVAL;
1893		goto out;
1894	}
1895	if ((so->so_state & SS_ISCONNECTED) == 0) {
1896		error = ENOTCONN;
1897		goto out;
1898	}
1899	/*
1900	 * Do not wait on memory allocations but return ENOMEM for
1901	 * caller to retry later.
1902	 * XXX: Experimental.
1903	 */
1904	if (uap->flags & SF_MNOWAIT)
1905		mnw = 1;
1906
1907	if (uap->flags & SF_SYNC) {
1908		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
1909		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
1910		cv_init(&sfs->cv, "sendfile");
1911	}
1912
1913#ifdef MAC
1914	error = mac_socket_check_send(td->td_ucred, so);
1915	if (error)
1916		goto out;
1917#endif
1918
1919	/* If headers are specified copy them into mbufs. */
1920	if (hdr_uio != NULL) {
1921		hdr_uio->uio_td = td;
1922		hdr_uio->uio_rw = UIO_WRITE;
1923		if (hdr_uio->uio_resid > 0) {
1924			/*
1925			 * In FBSD < 5.0 the nbytes to send also included
1926			 * the header.  If compat is specified subtract the
1927			 * header size from nbytes.
1928			 */
1929			if (compat) {
1930				if (uap->nbytes > hdr_uio->uio_resid)
1931					uap->nbytes -= hdr_uio->uio_resid;
1932				else
1933					uap->nbytes = 0;
1934			}
1935			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1936			    0, 0, 0);
1937			if (m == NULL) {
1938				error = mnw ? EAGAIN : ENOBUFS;
1939				goto out;
1940			}
1941			hdrlen = m_length(m, NULL);
1942		}
1943	}
1944
1945	/*
1946	 * Protect against multiple writers to the socket.
1947	 *
1948	 * XXXRW: Historically this has assumed non-interruptibility, so now
1949	 * we implement that, but possibly shouldn't.
1950	 */
1951	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1952
1953	/*
1954	 * Loop through the pages of the file, starting with the requested
1955	 * offset. Get a file page (do I/O if necessary), map the file page
1956	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1957	 * it on the socket.
1958	 * This is done in two loops.  The inner loop turns as many pages
1959	 * as it can, up to available socket buffer space, without blocking
1960	 * into mbufs to have it bulk delivered into the socket send buffer.
1961	 * The outer loop checks the state and available space of the socket
1962	 * and takes care of the overall progress.
1963	 */
1964	for (off = uap->offset, rem = uap->nbytes; ; ) {
1965		struct mbuf *mtail = NULL;
1966		int loopbytes = 0;
1967		int space = 0;
1968		int done = 0;
1969
1970		/*
1971		 * Check the socket state for ongoing connection,
1972		 * no errors and space in socket buffer.
1973		 * If space is low allow for the remainder of the
1974		 * file to be processed if it fits the socket buffer.
1975		 * Otherwise block in waiting for sufficient space
1976		 * to proceed, or if the socket is nonblocking, return
1977		 * to userland with EAGAIN while reporting how far
1978		 * we've come.
1979		 * We wait until the socket buffer has significant free
1980		 * space to do bulk sends.  This makes good use of file
1981		 * system read ahead and allows packet segmentation
1982		 * offloading hardware to take over lots of work.  If
1983		 * we were not careful here we would send off only one
1984		 * sfbuf at a time.
1985		 */
1986		SOCKBUF_LOCK(&so->so_snd);
1987		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1988			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1989retry_space:
1990		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1991			error = EPIPE;
1992			SOCKBUF_UNLOCK(&so->so_snd);
1993			goto done;
1994		} else if (so->so_error) {
1995			error = so->so_error;
1996			so->so_error = 0;
1997			SOCKBUF_UNLOCK(&so->so_snd);
1998			goto done;
1999		}
2000		space = sbspace(&so->so_snd);
2001		if (space < rem &&
2002		    (space <= 0 ||
2003		     space < so->so_snd.sb_lowat)) {
2004			if (so->so_state & SS_NBIO) {
2005				SOCKBUF_UNLOCK(&so->so_snd);
2006				error = EAGAIN;
2007				goto done;
2008			}
2009			/*
2010			 * sbwait drops the lock while sleeping.
2011			 * When we loop back to retry_space the
2012			 * state may have changed and we retest
2013			 * for it.
2014			 */
2015			error = sbwait(&so->so_snd);
2016			/*
2017			 * An error from sbwait usually indicates that we've
2018			 * been interrupted by a signal. If we've sent anything
2019			 * then return bytes sent, otherwise return the error.
2020			 */
2021			if (error) {
2022				SOCKBUF_UNLOCK(&so->so_snd);
2023				goto done;
2024			}
2025			goto retry_space;
2026		}
2027		SOCKBUF_UNLOCK(&so->so_snd);
2028
2029		/*
2030		 * Reduce space in the socket buffer by the size of
2031		 * the header mbuf chain.
2032		 * hdrlen is set to 0 after the first loop.
2033		 */
2034		space -= hdrlen;
2035
2036		/*
2037		 * Loop and construct maximum sized mbuf chain to be bulk
2038		 * dumped into socket buffer.
2039		 */
2040		while (space > loopbytes) {
2041			vm_pindex_t pindex;
2042			vm_offset_t pgoff;
2043			struct mbuf *m0;
2044
2045			VM_OBJECT_LOCK(obj);
2046			/*
2047			 * Calculate the amount to transfer.
2048			 * Not to exceed a page, the EOF,
2049			 * or the passed in nbytes.
2050			 */
2051			pgoff = (vm_offset_t)(off & PAGE_MASK);
2052			xfsize = omin(PAGE_SIZE - pgoff,
2053			    obj->un_pager.vnp.vnp_size - uap->offset -
2054			    fsbytes - loopbytes);
2055			if (uap->nbytes)
2056				rem = (uap->nbytes - fsbytes - loopbytes);
2057			else
2058				rem = obj->un_pager.vnp.vnp_size -
2059				    uap->offset - fsbytes - loopbytes;
2060			xfsize = omin(rem, xfsize);
2061			xfsize = omin(space - loopbytes, xfsize);
2062			if (xfsize <= 0) {
2063				VM_OBJECT_UNLOCK(obj);
2064				done = 1;		/* all data sent */
2065				break;
2066			}
2067
2068			/*
2069			 * Attempt to look up the page.  Allocate
2070			 * if not found or wait and loop if busy.
2071			 */
2072			pindex = OFF_TO_IDX(off);
2073			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2074			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2075
2076			/*
2077			 * Check if page is valid for what we need,
2078			 * otherwise initiate I/O.
2079			 * If we already turned some pages into mbufs,
2080			 * send them off before we come here again and
2081			 * block.
2082			 */
2083			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2084				VM_OBJECT_UNLOCK(obj);
2085			else if (m != NULL)
2086				error = EAGAIN;	/* send what we already got */
2087			else if (uap->flags & SF_NODISKIO)
2088				error = EBUSY;
2089			else {
2090				int bsize;
2091				ssize_t resid;
2092
2093				/*
2094				 * Ensure that our page is still around
2095				 * when the I/O completes.
2096				 */
2097				vm_page_io_start(pg);
2098				VM_OBJECT_UNLOCK(obj);
2099
2100				/*
2101				 * Get the page from backing store.
2102				 */
2103				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2104				error = vn_lock(vp, LK_SHARED);
2105				if (error != 0)
2106					goto after_read;
2107				bsize = vp->v_mount->mnt_stat.f_iosize;
2108
2109				/*
2110				 * XXXMAC: Because we don't have fp->f_cred
2111				 * here, we pass in NOCRED.  This is probably
2112				 * wrong, but is consistent with our original
2113				 * implementation.
2114				 */
2115				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2116				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2117				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2118				    td->td_ucred, NOCRED, &resid, td);
2119				VOP_UNLOCK(vp, 0);
2120			after_read:
2121				VFS_UNLOCK_GIANT(vfslocked);
2122				VM_OBJECT_LOCK(obj);
2123				vm_page_io_finish(pg);
2124				if (!error)
2125					VM_OBJECT_UNLOCK(obj);
2126				mbstat.sf_iocnt++;
2127			}
2128			if (error) {
2129				vm_page_lock(pg);
2130				vm_page_unwire(pg, 0);
2131				/*
2132				 * See if anyone else might know about
2133				 * this page.  If not and it is not valid,
2134				 * then free it.
2135				 */
2136				if (pg->wire_count == 0 && pg->valid == 0 &&
2137				    pg->busy == 0 && !(pg->oflags & VPO_BUSY))
2138					vm_page_free(pg);
2139				vm_page_unlock(pg);
2140				VM_OBJECT_UNLOCK(obj);
2141				if (error == EAGAIN)
2142					error = 0;	/* not a real error */
2143				break;
2144			}
2145
2146			/*
2147			 * Get a sendfile buf.  When allocating the
2148			 * first buffer for mbuf chain, we usually
2149			 * wait as long as necessary, but this wait
2150			 * can be interrupted.  For consequent
2151			 * buffers, do not sleep, since several
2152			 * threads might exhaust the buffers and then
2153			 * deadlock.
2154			 */
2155			sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
2156			    SFB_CATCH);
2157			if (sf == NULL) {
2158				mbstat.sf_allocfail++;
2159				vm_page_lock(pg);
2160				vm_page_unwire(pg, 0);
2161				KASSERT(pg->object != NULL,
2162				    ("kern_sendfile: object disappeared"));
2163				vm_page_unlock(pg);
2164				if (m == NULL)
2165					error = (mnw ? EAGAIN : EINTR);
2166				break;
2167			}
2168
2169			/*
2170			 * Get an mbuf and set it up as having
2171			 * external storage.
2172			 */
2173			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2174			if (m0 == NULL) {
2175				error = (mnw ? EAGAIN : ENOBUFS);
2176				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2177				break;
2178			}
2179			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2180			    sfs, sf, M_RDONLY, EXT_SFBUF);
2181			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2182			m0->m_len = xfsize;
2183
2184			/* Append to mbuf chain. */
2185			if (mtail != NULL)
2186				mtail->m_next = m0;
2187			else if (m != NULL)
2188				m_last(m)->m_next = m0;
2189			else
2190				m = m0;
2191			mtail = m0;
2192
2193			/* Keep track of bits processed. */
2194			loopbytes += xfsize;
2195			off += xfsize;
2196
2197			if (sfs != NULL) {
2198				mtx_lock(&sfs->mtx);
2199				sfs->count++;
2200				mtx_unlock(&sfs->mtx);
2201			}
2202		}
2203
2204		/* Add the buffer chain to the socket buffer. */
2205		if (m != NULL) {
2206			int mlen, err;
2207
2208			mlen = m_length(m, NULL);
2209			SOCKBUF_LOCK(&so->so_snd);
2210			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2211				error = EPIPE;
2212				SOCKBUF_UNLOCK(&so->so_snd);
2213				goto done;
2214			}
2215			SOCKBUF_UNLOCK(&so->so_snd);
2216			CURVNET_SET(so->so_vnet);
2217			/* Avoid error aliasing. */
2218			err = (*so->so_proto->pr_usrreqs->pru_send)
2219				    (so, 0, m, NULL, NULL, td);
2220			CURVNET_RESTORE();
2221			if (err == 0) {
2222				/*
2223				 * We need two counters to get the
2224				 * file offset and nbytes to send
2225				 * right:
2226				 * - sbytes contains the total amount
2227				 *   of bytes sent, including headers.
2228				 * - fsbytes contains the total amount
2229				 *   of bytes sent from the file.
2230				 */
2231				sbytes += mlen;
2232				fsbytes += mlen;
2233				if (hdrlen) {
2234					fsbytes -= hdrlen;
2235					hdrlen = 0;
2236				}
2237			} else if (error == 0)
2238				error = err;
2239			m = NULL;	/* pru_send always consumes */
2240		}
2241
2242		/* Quit outer loop on error or when we're done. */
2243		if (done)
2244			break;
2245		if (error)
2246			goto done;
2247	}
2248
2249	/*
2250	 * Send trailers. Wimp out and use writev(2).
2251	 */
2252	if (trl_uio != NULL) {
2253		sbunlock(&so->so_snd);
2254		error = kern_writev(td, uap->s, trl_uio);
2255		if (error == 0)
2256			sbytes += td->td_retval[0];
2257		goto out;
2258	}
2259
2260done:
2261	sbunlock(&so->so_snd);
2262out:
2263	/*
2264	 * If there was no error we have to clear td->td_retval[0]
2265	 * because it may have been set by writev.
2266	 */
2267	if (error == 0) {
2268		td->td_retval[0] = 0;
2269	}
2270	if (uap->sbytes != NULL) {
2271		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2272	}
2273	if (obj != NULL)
2274		vm_object_deallocate(obj);
2275	if (vp != NULL) {
2276		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2277		vrele(vp);
2278		VFS_UNLOCK_GIANT(vfslocked);
2279	}
2280	if (so)
2281		fdrop(sock_fp, td);
2282	if (m)
2283		m_freem(m);
2284
2285	if (sfs != NULL) {
2286		mtx_lock(&sfs->mtx);
2287		if (sfs->count != 0)
2288			cv_wait(&sfs->cv, &sfs->mtx);
2289		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2290		cv_destroy(&sfs->cv);
2291		mtx_destroy(&sfs->mtx);
2292		free(sfs, M_TEMP);
2293	}
2294
2295	if (error == ERESTART)
2296		error = EINTR;
2297
2298	return (error);
2299}
2300
2301/*
2302 * SCTP syscalls.
2303 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2304 * otherwise all return EOPNOTSUPP.
2305 * XXX: We should make this loadable one day.
2306 */
2307int
2308sys_sctp_peeloff(td, uap)
2309	struct thread *td;
2310	struct sctp_peeloff_args /* {
2311		int	sd;
2312		caddr_t	name;
2313	} */ *uap;
2314{
2315#if (defined(INET) || defined(INET6)) && defined(SCTP)
2316	struct filedesc *fdp;
2317	struct file *nfp = NULL;
2318	int error;
2319	struct socket *head, *so;
2320	int fd;
2321	u_int fflag;
2322
2323	fdp = td->td_proc->p_fd;
2324	AUDIT_ARG_FD(uap->sd);
2325	error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
2326	if (error)
2327		goto done2;
2328	if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
2329		error = EOPNOTSUPP;
2330		goto done;
2331	}
2332	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2333	if (error)
2334		goto done;
2335	/*
2336	 * At this point we know we do have a assoc to pull
2337	 * we proceed to get the fd setup. This may block
2338	 * but that is ok.
2339	 */
2340
2341	error = falloc(td, &nfp, &fd, 0);
2342	if (error)
2343		goto done;
2344	td->td_retval[0] = fd;
2345
2346	CURVNET_SET(head->so_vnet);
2347	so = sonewconn(head, SS_ISCONNECTED);
2348	if (so == NULL)
2349		goto noconnection;
2350	/*
2351	 * Before changing the flags on the socket, we have to bump the
2352	 * reference count.  Otherwise, if the protocol calls sofree(),
2353	 * the socket will be released due to a zero refcount.
2354	 */
2355        SOCK_LOCK(so);
2356        soref(so);                      /* file descriptor reference */
2357        SOCK_UNLOCK(so);
2358
2359	ACCEPT_LOCK();
2360
2361	TAILQ_REMOVE(&head->so_comp, so, so_list);
2362	head->so_qlen--;
2363	so->so_state |= (head->so_state & SS_NBIO);
2364	so->so_state &= ~SS_NOFDREF;
2365	so->so_qstate &= ~SQ_COMP;
2366	so->so_head = NULL;
2367	ACCEPT_UNLOCK();
2368	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2369	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2370	if (error)
2371		goto noconnection;
2372	if (head->so_sigio != NULL)
2373		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2374
2375noconnection:
2376	/*
2377	 * close the new descriptor, assuming someone hasn't ripped it
2378	 * out from under us.
2379	 */
2380	if (error)
2381		fdclose(fdp, nfp, fd, td);
2382
2383	/*
2384	 * Release explicitly held references before returning.
2385	 */
2386	CURVNET_RESTORE();
2387done:
2388	if (nfp != NULL)
2389		fdrop(nfp, td);
2390	fputsock(head);
2391done2:
2392	return (error);
2393#else  /* SCTP */
2394	return (EOPNOTSUPP);
2395#endif /* SCTP */
2396}
2397
2398int
2399sys_sctp_generic_sendmsg (td, uap)
2400	struct thread *td;
2401	struct sctp_generic_sendmsg_args /* {
2402		int sd,
2403		caddr_t msg,
2404		int mlen,
2405		caddr_t to,
2406		__socklen_t tolen,
2407		struct sctp_sndrcvinfo *sinfo,
2408		int flags
2409	} */ *uap;
2410{
2411#if (defined(INET) || defined(INET6)) && defined(SCTP)
2412	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2413	struct socket *so;
2414	struct file *fp = NULL;
2415	int error = 0, len;
2416	struct sockaddr *to = NULL;
2417#ifdef KTRACE
2418	struct uio *ktruio = NULL;
2419#endif
2420	struct uio auio;
2421	struct iovec iov[1];
2422	cap_rights_t rights;
2423
2424	if (uap->sinfo) {
2425		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2426		if (error)
2427			return (error);
2428		u_sinfo = &sinfo;
2429	}
2430
2431	rights = CAP_WRITE;
2432	if (uap->tolen) {
2433		error = getsockaddr(&to, uap->to, uap->tolen);
2434		if (error) {
2435			to = NULL;
2436			goto sctp_bad2;
2437		}
2438		rights |= CAP_CONNECT;
2439	}
2440
2441	AUDIT_ARG_FD(uap->sd);
2442	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2443	if (error)
2444		goto sctp_bad;
2445#ifdef KTRACE
2446	if (to && (KTRPOINT(td, KTR_STRUCT)))
2447		ktrsockaddr(to);
2448#endif
2449
2450	iov[0].iov_base = uap->msg;
2451	iov[0].iov_len = uap->mlen;
2452
2453	so = (struct socket *)fp->f_data;
2454	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2455		error = EOPNOTSUPP;
2456		goto sctp_bad;
2457	}
2458#ifdef MAC
2459	error = mac_socket_check_send(td->td_ucred, so);
2460	if (error)
2461		goto sctp_bad;
2462#endif /* MAC */
2463
2464	auio.uio_iov =  iov;
2465	auio.uio_iovcnt = 1;
2466	auio.uio_segflg = UIO_USERSPACE;
2467	auio.uio_rw = UIO_WRITE;
2468	auio.uio_td = td;
2469	auio.uio_offset = 0;			/* XXX */
2470	auio.uio_resid = 0;
2471	len = auio.uio_resid = uap->mlen;
2472	CURVNET_SET(so->so_vnet);
2473	error = sctp_lower_sosend(so, to, &auio,
2474		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2475		    uap->flags, u_sinfo, td);
2476	CURVNET_RESTORE();
2477	if (error) {
2478		if (auio.uio_resid != len && (error == ERESTART ||
2479		    error == EINTR || error == EWOULDBLOCK))
2480			error = 0;
2481		/* Generation of SIGPIPE can be controlled per socket. */
2482		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2483		    !(uap->flags & MSG_NOSIGNAL)) {
2484			PROC_LOCK(td->td_proc);
2485			tdsignal(td, SIGPIPE);
2486			PROC_UNLOCK(td->td_proc);
2487		}
2488	}
2489	if (error == 0)
2490		td->td_retval[0] = len - auio.uio_resid;
2491#ifdef KTRACE
2492	if (ktruio != NULL) {
2493		ktruio->uio_resid = td->td_retval[0];
2494		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2495	}
2496#endif /* KTRACE */
2497sctp_bad:
2498	if (fp)
2499		fdrop(fp, td);
2500sctp_bad2:
2501	if (to)
2502		free(to, M_SONAME);
2503	return (error);
2504#else  /* SCTP */
2505	return (EOPNOTSUPP);
2506#endif /* SCTP */
2507}
2508
2509int
2510sys_sctp_generic_sendmsg_iov(td, uap)
2511	struct thread *td;
2512	struct sctp_generic_sendmsg_iov_args /* {
2513		int sd,
2514		struct iovec *iov,
2515		int iovlen,
2516		caddr_t to,
2517		__socklen_t tolen,
2518		struct sctp_sndrcvinfo *sinfo,
2519		int flags
2520	} */ *uap;
2521{
2522#if (defined(INET) || defined(INET6)) && defined(SCTP)
2523	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2524	struct socket *so;
2525	struct file *fp = NULL;
2526	int error=0, i;
2527	ssize_t len;
2528	struct sockaddr *to = NULL;
2529#ifdef KTRACE
2530	struct uio *ktruio = NULL;
2531#endif
2532	struct uio auio;
2533	struct iovec *iov, *tiov;
2534	cap_rights_t rights;
2535
2536	if (uap->sinfo) {
2537		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2538		if (error)
2539			return (error);
2540		u_sinfo = &sinfo;
2541	}
2542	rights = CAP_WRITE;
2543	if (uap->tolen) {
2544		error = getsockaddr(&to, uap->to, uap->tolen);
2545		if (error) {
2546			to = NULL;
2547			goto sctp_bad2;
2548		}
2549		rights |= CAP_CONNECT;
2550	}
2551
2552	AUDIT_ARG_FD(uap->sd);
2553	error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
2554	if (error)
2555		goto sctp_bad1;
2556
2557#ifdef COMPAT_FREEBSD32
2558	if (SV_CURPROC_FLAG(SV_ILP32))
2559		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2560		    uap->iovlen, &iov, EMSGSIZE);
2561	else
2562#endif
2563		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2564	if (error)
2565		goto sctp_bad1;
2566#ifdef KTRACE
2567	if (to && (KTRPOINT(td, KTR_STRUCT)))
2568		ktrsockaddr(to);
2569#endif
2570
2571	so = (struct socket *)fp->f_data;
2572	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2573		error = EOPNOTSUPP;
2574		goto sctp_bad;
2575	}
2576#ifdef MAC
2577	error = mac_socket_check_send(td->td_ucred, so);
2578	if (error)
2579		goto sctp_bad;
2580#endif /* MAC */
2581
2582	auio.uio_iov = iov;
2583	auio.uio_iovcnt = uap->iovlen;
2584	auio.uio_segflg = UIO_USERSPACE;
2585	auio.uio_rw = UIO_WRITE;
2586	auio.uio_td = td;
2587	auio.uio_offset = 0;			/* XXX */
2588	auio.uio_resid = 0;
2589	tiov = iov;
2590	for (i = 0; i <uap->iovlen; i++, tiov++) {
2591		if ((auio.uio_resid += tiov->iov_len) < 0) {
2592			error = EINVAL;
2593			goto sctp_bad;
2594		}
2595	}
2596	len = auio.uio_resid;
2597	CURVNET_SET(so->so_vnet);
2598	error = sctp_lower_sosend(so, to, &auio,
2599		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2600		    uap->flags, u_sinfo, td);
2601	CURVNET_RESTORE();
2602	if (error) {
2603		if (auio.uio_resid != len && (error == ERESTART ||
2604		    error == EINTR || error == EWOULDBLOCK))
2605			error = 0;
2606		/* Generation of SIGPIPE can be controlled per socket */
2607		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2608		    !(uap->flags & MSG_NOSIGNAL)) {
2609			PROC_LOCK(td->td_proc);
2610			tdsignal(td, SIGPIPE);
2611			PROC_UNLOCK(td->td_proc);
2612		}
2613	}
2614	if (error == 0)
2615		td->td_retval[0] = len - auio.uio_resid;
2616#ifdef KTRACE
2617	if (ktruio != NULL) {
2618		ktruio->uio_resid = td->td_retval[0];
2619		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2620	}
2621#endif /* KTRACE */
2622sctp_bad:
2623	free(iov, M_IOV);
2624sctp_bad1:
2625	if (fp)
2626		fdrop(fp, td);
2627sctp_bad2:
2628	if (to)
2629		free(to, M_SONAME);
2630	return (error);
2631#else  /* SCTP */
2632	return (EOPNOTSUPP);
2633#endif /* SCTP */
2634}
2635
2636int
2637sys_sctp_generic_recvmsg(td, uap)
2638	struct thread *td;
2639	struct sctp_generic_recvmsg_args /* {
2640		int sd,
2641		struct iovec *iov,
2642		int iovlen,
2643		struct sockaddr *from,
2644		__socklen_t *fromlenaddr,
2645		struct sctp_sndrcvinfo *sinfo,
2646		int *msg_flags
2647	} */ *uap;
2648{
2649#if (defined(INET) || defined(INET6)) && defined(SCTP)
2650	uint8_t sockbufstore[256];
2651	struct uio auio;
2652	struct iovec *iov, *tiov;
2653	struct sctp_sndrcvinfo sinfo;
2654	struct socket *so;
2655	struct file *fp = NULL;
2656	struct sockaddr *fromsa;
2657	int fromlen;
2658	ssize_t len;
2659	int i, msg_flags;
2660	int error = 0;
2661#ifdef KTRACE
2662	struct uio *ktruio = NULL;
2663#endif
2664
2665	AUDIT_ARG_FD(uap->sd);
2666	error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
2667	if (error) {
2668		return (error);
2669	}
2670#ifdef COMPAT_FREEBSD32
2671	if (SV_CURPROC_FLAG(SV_ILP32))
2672		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
2673		    uap->iovlen, &iov, EMSGSIZE);
2674	else
2675#endif
2676		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2677	if (error)
2678		goto out1;
2679
2680	so = fp->f_data;
2681	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
2682		error = EOPNOTSUPP;
2683		goto out;
2684	}
2685#ifdef MAC
2686	error = mac_socket_check_receive(td->td_ucred, so);
2687	if (error) {
2688		goto out;
2689	}
2690#endif /* MAC */
2691
2692	if (uap->fromlenaddr) {
2693		error = copyin(uap->fromlenaddr,
2694		    &fromlen, sizeof (fromlen));
2695		if (error) {
2696			goto out;
2697		}
2698	} else {
2699		fromlen = 0;
2700	}
2701	if (uap->msg_flags) {
2702		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2703		if (error) {
2704			goto out;
2705		}
2706	} else {
2707		msg_flags = 0;
2708	}
2709	auio.uio_iov = iov;
2710	auio.uio_iovcnt = uap->iovlen;
2711  	auio.uio_segflg = UIO_USERSPACE;
2712	auio.uio_rw = UIO_READ;
2713	auio.uio_td = td;
2714	auio.uio_offset = 0;			/* XXX */
2715	auio.uio_resid = 0;
2716	tiov = iov;
2717	for (i = 0; i <uap->iovlen; i++, tiov++) {
2718		if ((auio.uio_resid += tiov->iov_len) < 0) {
2719			error = EINVAL;
2720			goto out;
2721		}
2722	}
2723	len = auio.uio_resid;
2724	fromsa = (struct sockaddr *)sockbufstore;
2725
2726#ifdef KTRACE
2727	if (KTRPOINT(td, KTR_GENIO))
2728		ktruio = cloneuio(&auio);
2729#endif /* KTRACE */
2730	memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
2731	CURVNET_SET(so->so_vnet);
2732	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2733		    fromsa, fromlen, &msg_flags,
2734		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2735	CURVNET_RESTORE();
2736	if (error) {
2737		if (auio.uio_resid != len && (error == ERESTART ||
2738		    error == EINTR || error == EWOULDBLOCK))
2739			error = 0;
2740	} else {
2741		if (uap->sinfo)
2742			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2743	}
2744#ifdef KTRACE
2745	if (ktruio != NULL) {
2746		ktruio->uio_resid = len - auio.uio_resid;
2747		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2748	}
2749#endif /* KTRACE */
2750	if (error)
2751		goto out;
2752	td->td_retval[0] = len - auio.uio_resid;
2753
2754	if (fromlen && uap->from) {
2755		len = fromlen;
2756		if (len <= 0 || fromsa == 0)
2757			len = 0;
2758		else {
2759			len = MIN(len, fromsa->sa_len);
2760			error = copyout(fromsa, uap->from, (size_t)len);
2761			if (error)
2762				goto out;
2763		}
2764		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2765		if (error) {
2766			goto out;
2767		}
2768	}
2769#ifdef KTRACE
2770	if (KTRPOINT(td, KTR_STRUCT))
2771		ktrsockaddr(fromsa);
2772#endif
2773	if (uap->msg_flags) {
2774		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2775		if (error) {
2776			goto out;
2777		}
2778	}
2779out:
2780	free(iov, M_IOV);
2781out1:
2782	if (fp)
2783		fdrop(fp, td);
2784
2785	return (error);
2786#else  /* SCTP */
2787	return (EOPNOTSUPP);
2788#endif /* SCTP */
2789}
2790