1/*	$NetBSD: uipc_syscalls.c,v 1.211 2024/02/03 19:05:14 jdolecek Exp $	*/
2
3/*-
4 * Copyright (c) 2008, 2009, 2023 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1986, 1989, 1990, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.211 2024/02/03 19:05:14 jdolecek Exp $");
65
66#ifdef _KERNEL_OPT
67#include "opt_pipe.h"
68#include "opt_sctp.h"
69#endif
70
71#define MBUFTYPES
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
75#include <sys/proc.h>
76#include <sys/file.h>
77#include <sys/buf.h>
78#include <sys/mbuf.h>
79#include <sys/protosw.h>
80#include <sys/socket.h>
81#include <sys/socketvar.h>
82#include <sys/signalvar.h>
83#include <sys/un.h>
84#include <sys/ktrace.h>
85#include <sys/event.h>
86#include <sys/atomic.h>
87#include <sys/kauth.h>
88
89#ifdef SCTP
90#include <netinet/sctp_uio.h>
91#include <netinet/sctp_peeloff.h>
92#endif
93
94#include <sys/mount.h>
95#include <sys/syscallargs.h>
96
97/*
98 * System call interface to the socket abstraction.
99 */
100extern const struct fileops socketops;
101
102static int	sockargs_sb(struct sockaddr_big *, const void *, socklen_t);
103
104int
105sys___socket30(struct lwp *l, const struct sys___socket30_args *uap,
106    register_t *retval)
107{
108	/* {
109		syscallarg(int)	domain;
110		syscallarg(int)	type;
111		syscallarg(int)	protocol;
112	} */
113	int fd, error;
114	file_t *fp;
115
116	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
117	    SCARG(uap, protocol), &fd, &fp, NULL);
118	if (error == 0) {
119		fd_affix(l->l_proc, fp, fd);
120		*retval = fd;
121	}
122	return error;
123}
124
125int
126sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
127{
128	/* {
129		syscallarg(int)				s;
130		syscallarg(const struct sockaddr *)	name;
131		syscallarg(unsigned int)		namelen;
132	} */
133	int		error;
134	struct sockaddr_big sb;
135
136	error = sockargs_sb(&sb, SCARG(uap, name), SCARG(uap, namelen));
137	if (error)
138		return error;
139
140	return do_sys_bind(l, SCARG(uap, s), (struct sockaddr *)&sb);
141}
142
143int
144do_sys_bind(struct lwp *l, int fd, struct sockaddr *nam)
145{
146	struct socket	*so;
147	int		error;
148
149	if ((error = fd_getsock(fd, &so)) != 0)
150		return error;
151	error = sobind(so, nam, l);
152	fd_putfile(fd);
153	return error;
154}
155
156int
157sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
158{
159	/* {
160		syscallarg(int)	s;
161		syscallarg(int)	backlog;
162	} */
163	struct socket	*so;
164	int		error;
165
166	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
167		return (error);
168	error = solisten(so, SCARG(uap, backlog), l);
169	fd_putfile(SCARG(uap, s));
170	return error;
171}
172
173int
174do_sys_accept(struct lwp *l, int sock, struct sockaddr *name,
175    register_t *new_sock, const sigset_t *mask, int flags, int clrflags)
176{
177	file_t		*fp, *fp2;
178	int		error, fd;
179	struct socket	*so, *so2;
180	short		wakeup_state = 0;
181
182	if ((fp = fd_getfile(sock)) == NULL)
183		return EBADF;
184	if (fp->f_type != DTYPE_SOCKET) {
185		fd_putfile(sock);
186		return ENOTSOCK;
187	}
188	if ((error = fd_allocfile(&fp2, &fd)) != 0) {
189		fd_putfile(sock);
190		return error;
191	}
192	*new_sock = fd;
193	so = fp->f_socket;
194	solock(so);
195
196	if (__predict_false(mask))
197		sigsuspendsetup(l, mask);
198
199	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
200		error = EOPNOTSUPP;
201		goto bad;
202	}
203	if ((so->so_options & SO_ACCEPTCONN) == 0) {
204		error = EINVAL;
205		goto bad;
206	}
207	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
208		error = EWOULDBLOCK;
209		goto bad;
210	}
211	while (so->so_qlen == 0 && so->so_error == 0) {
212		if (so->so_state & SS_CANTRCVMORE) {
213			so->so_error = ECONNABORTED;
214			break;
215		}
216		if (wakeup_state & SS_RESTARTSYS) {
217			error = ERESTART;
218			goto bad;
219		}
220		error = sowait(so, true, 0);
221		if (error) {
222			goto bad;
223		}
224		wakeup_state = so->so_state;
225	}
226	if (so->so_error) {
227		error = so->so_error;
228		so->so_error = 0;
229		goto bad;
230	}
231	/* connection has been removed from the listen queue */
232	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
233	so2 = TAILQ_FIRST(&so->so_q);
234	if (soqremque(so2, 1) == 0)
235		panic("accept");
236	fp2->f_type = DTYPE_SOCKET;
237	fp2->f_flag = (fp->f_flag & ~clrflags) |
238	    ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
239	    ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
240	fp2->f_ops = &socketops;
241	fp2->f_socket = so2;
242	if (fp2->f_flag & FNONBLOCK)
243		so2->so_state |= SS_NBIO;
244	else
245		so2->so_state &= ~SS_NBIO;
246	error = soaccept(so2, name);
247	so2->so_cred = kauth_cred_hold(so->so_cred);
248	sounlock(so);
249	if (error) {
250		/* an error occurred, free the file descriptor and mbuf */
251		mutex_enter(&fp2->f_lock);
252		fp2->f_count++;
253		mutex_exit(&fp2->f_lock);
254		closef(fp2);
255		fd_abort(curproc, NULL, fd);
256	} else {
257		fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
258		fd_affix(curproc, fp2, fd);
259	}
260	fd_putfile(sock);
261	if (__predict_false(mask))
262		sigsuspendteardown(l);
263	return error;
264 bad:
265	sounlock(so);
266	fd_putfile(sock);
267	fd_abort(curproc, fp2, fd);
268	if (__predict_false(mask))
269		sigsuspendteardown(l);
270	return error;
271}
272
273int
274sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
275{
276	/* {
277		syscallarg(int)			s;
278		syscallarg(struct sockaddr *)	name;
279		syscallarg(unsigned int *)	anamelen;
280	} */
281	int error, fd;
282	struct sockaddr_big name;
283
284	name.sb_len = UCHAR_MAX;
285	error = do_sys_accept(l, SCARG(uap, s), (struct sockaddr *)&name,
286	    retval, NULL, 0, 0);
287	if (error != 0)
288		return error;
289	error = copyout_sockname_sb(SCARG(uap, name), SCARG(uap, anamelen),
290	    MSG_LENUSRSPACE, &name);
291	if (error != 0) {
292		fd = (int)*retval;
293		if (fd_getfile(fd) != NULL)
294			(void)fd_close(fd);
295	}
296	return error;
297}
298
299int
300sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
301    register_t *retval)
302{
303	/* {
304		syscallarg(int)			s;
305		syscallarg(struct sockaddr *)	name;
306		syscallarg(unsigned int *)	anamelen;
307		syscallarg(const sigset_t *)	mask;
308		syscallarg(int)			flags;
309	} */
310	int error, fd;
311	struct sockaddr_big name;
312	sigset_t *mask, amask;
313
314	if (SCARG(uap, mask) != NULL) {
315		error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
316		if (error)
317			return error;
318		mask = &amask;
319	} else
320		mask = NULL;
321
322	name.sb_len = UCHAR_MAX;
323	error = do_sys_accept(l, SCARG(uap, s), (struct sockaddr *)&name,
324	    retval, mask, SCARG(uap, flags), FNONBLOCK);
325	if (error != 0)
326		return error;
327	error = copyout_sockname_sb(SCARG(uap, name), SCARG(uap, anamelen),
328	    MSG_LENUSRSPACE, &name);
329	if (error != 0) {
330		fd = (int)*retval;
331		if (fd_getfile(fd) != NULL)
332			(void)fd_close(fd);
333	}
334	return error;
335}
336
337int
338sys_connect(struct lwp *l, const struct sys_connect_args *uap,
339    register_t *retval)
340{
341	/* {
342		syscallarg(int)				s;
343		syscallarg(const struct sockaddr *)	name;
344		syscallarg(unsigned int)		namelen;
345	} */
346	int		error;
347	struct sockaddr_big sbig;
348
349	error = sockargs_sb(&sbig, SCARG(uap, name), SCARG(uap, namelen));
350	if (error)
351		return error;
352	return do_sys_connect(l, SCARG(uap, s), (struct sockaddr *)&sbig);
353}
354
355int
356do_sys_connect(struct lwp *l, int fd, struct sockaddr *nam)
357{
358	struct socket	*so;
359	int		error;
360	int		interrupted = 0;
361
362	if ((error = fd_getsock(fd, &so)) != 0) {
363		return (error);
364	}
365	solock(so);
366	if ((so->so_state & SS_ISCONNECTING) != 0) {
367		error = EALREADY;
368		goto out;
369	}
370
371	error = soconnect(so, nam, l);
372	if (error)
373		goto bad;
374	if ((so->so_state & (SS_NBIO|SS_ISCONNECTING)) ==
375	    (SS_NBIO|SS_ISCONNECTING)) {
376		error = EINPROGRESS;
377		goto out;
378	}
379	while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
380		error = sowait(so, true, 0);
381		if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
382			error = EPIPE;
383			interrupted = 1;
384			break;
385		}
386		if (error) {
387			if (error == EINTR || error == ERESTART)
388				interrupted = 1;
389			break;
390		}
391	}
392	if (error == 0) {
393		error = so->so_error;
394		so->so_error = 0;
395	}
396 bad:
397	if (!interrupted)
398		so->so_state &= ~SS_ISCONNECTING;
399	if (error == ERESTART)
400		error = EINTR;
401 out:
402	sounlock(so);
403	fd_putfile(fd);
404	return error;
405}
406
407int
408sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
409    register_t *retval)
410{
411	/* {
412		syscallarg(int)		domain;
413		syscallarg(int)		type;
414		syscallarg(int)		protocol;
415		syscallarg(int *)	rsv;
416	} */
417	file_t		*fp1, *fp2;
418	struct socket	*so1, *so2;
419	int		fd, error, sv[2];
420	proc_t		*p = curproc;
421	int		flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
422	int		type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
423	int		domain = SCARG(uap, domain);
424	int		proto = SCARG(uap, protocol);
425
426	error = fsocreate(domain, &so1, type|flags, proto, &fd, &fp1, NULL);
427	if (error)
428		return error;
429	sv[0] = fd;
430
431	error = fsocreate(domain, &so2, type|flags, proto, &fd, &fp2, so1);
432	if (error)
433		goto out;
434	sv[1] = fd;
435
436	solock(so1);
437	error = soconnect2(so1, so2);
438	if (error == 0 && type == SOCK_DGRAM) {
439		/*
440		 * Datagram socket connection is asymmetric.
441		 */
442		error = soconnect2(so2, so1);
443	}
444	sounlock(so1);
445
446	if (error == 0)
447		error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
448	if (error == 0) {
449		fd_affix(p, fp2, sv[1]);
450		fd_affix(p, fp1, sv[0]);
451		return 0;
452	}
453	fd_abort(p, fp2, sv[1]);
454	(void)soclose(so2);
455out:
456	fd_abort(p, fp1, sv[0]);
457	(void)soclose(so1);
458	return error;
459}
460
461int
462sys_sendto(struct lwp *l, const struct sys_sendto_args *uap,
463    register_t *retval)
464{
465	/* {
466		syscallarg(int)				s;
467		syscallarg(const void *)		buf;
468		syscallarg(size_t)			len;
469		syscallarg(int)				flags;
470		syscallarg(const struct sockaddr *)	to;
471		syscallarg(unsigned int)		tolen;
472	} */
473	struct msghdr	msg = {0};
474	struct iovec	aiov;
475
476	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
477	msg.msg_namelen = SCARG(uap, tolen);
478	msg.msg_iov = &aiov;
479	msg.msg_iovlen = 1;
480	msg.msg_control = NULL;
481	msg.msg_flags = 0;
482	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
483	aiov.iov_len = SCARG(uap, len);
484	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags),
485	    retval);
486}
487
488int
489sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap,
490    register_t *retval)
491{
492	/* {
493		syscallarg(int)				s;
494		syscallarg(const struct msghdr *)	msg;
495		syscallarg(int)				flags;
496	} */
497	struct msghdr	msg;
498	int		error;
499
500	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
501	if (error)
502		return (error);
503
504	msg.msg_flags = MSG_IOVUSRSPACE;
505	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags),
506	    retval);
507}
508
509int
510do_sys_sendmsg_so(struct lwp *l, int s, struct socket *so, file_t *fp,
511    struct msghdr *mp, int flags, register_t *retsize)
512{
513
514	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
515	struct sockaddr *sa = NULL;
516	struct mbuf	*to, *control;
517	struct uio	auio;
518	size_t		len, iovsz;
519	int		i, error;
520
521	ktrkuser("msghdr", mp, sizeof(*mp));
522
523	/* If the caller passed us stuff in mbufs, we must free them. */
524	to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
525	control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
526	iovsz = mp->msg_iovlen * sizeof(struct iovec);
527
528	if (mp->msg_flags & MSG_IOVUSRSPACE) {
529		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
530			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
531				error = EMSGSIZE;
532				goto bad;
533			}
534			iov = kmem_alloc(iovsz, KM_SLEEP);
535		}
536		if (mp->msg_iovlen != 0) {
537			error = copyin(mp->msg_iov, iov, iovsz);
538			if (error)
539				goto bad;
540		}
541		auio.uio_iov = iov;
542	} else
543		auio.uio_iov = mp->msg_iov;
544
545	auio.uio_iovcnt = mp->msg_iovlen;
546	auio.uio_rw = UIO_WRITE;
547	auio.uio_offset = 0;			/* XXX */
548	auio.uio_resid = 0;
549	KASSERT(l == curlwp);
550	auio.uio_vmspace = l->l_proc->p_vmspace;
551
552	tiov = auio.uio_iov;
553	for (i = 0; i < auio.uio_iovcnt; i++, tiov++) {
554		/*
555		 * Writes return ssize_t because -1 is returned on error.
556		 * Therefore, we must restrict the length to SSIZE_MAX to
557		 * avoid garbage return values.
558		 */
559		auio.uio_resid += tiov->iov_len;
560		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
561			error = EINVAL;
562			goto bad;
563		}
564	}
565
566	if (mp->msg_name && to == NULL) {
567		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
568		    UIO_USERSPACE, MT_SONAME);
569		if (error)
570			goto bad;
571	}
572
573	if (mp->msg_control) {
574		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
575			error = EINVAL;
576			goto bad;
577		}
578		if (control == NULL) {
579			error = sockargs(&control, mp->msg_control,
580			    mp->msg_controllen, UIO_USERSPACE, MT_CONTROL);
581			if (error)
582				goto bad;
583		}
584	}
585
586	if (ktrpoint(KTR_GENIO) && iovsz > 0) {
587		ktriov = kmem_alloc(iovsz, KM_SLEEP);
588		memcpy(ktriov, auio.uio_iov, iovsz);
589	}
590
591	if (mp->msg_name)
592		MCLAIM(to, so->so_mowner);
593	if (mp->msg_control)
594		MCLAIM(control, so->so_mowner);
595
596	if (to) {
597		sa = mtod(to, struct sockaddr *);
598	}
599
600	len = auio.uio_resid;
601	error = (*so->so_send)(so, sa, &auio, NULL, control, flags, l);
602	/* Protocol is responsible for freeing 'control' */
603	control = NULL;
604
605	if (error) {
606		if (auio.uio_resid != len && (error == ERESTART ||
607		    error == EINTR || error == EWOULDBLOCK))
608			error = 0;
609		if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 &&
610		    (flags & MSG_NOSIGNAL) == 0) {
611			mutex_enter(&proc_lock);
612			psignal(l->l_proc, SIGPIPE);
613			mutex_exit(&proc_lock);
614		}
615	}
616	if (error == 0)
617		*retsize = len - auio.uio_resid;
618
619bad:
620	if (ktriov != NULL) {
621		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
622		kmem_free(ktriov, iovsz);
623	}
624
625	if (iov != aiov)
626		kmem_free(iov, iovsz);
627	if (to)
628		m_freem(to);
629	if (control)
630		m_freem(control);
631
632	return error;
633}
634
635int
636do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
637    register_t *retsize)
638{
639	int		error;
640	struct socket	*so;
641	file_t		*fp;
642
643	if ((error = fd_getsock1(s, &so, &fp)) != 0) {
644		/* We have to free msg_name and msg_control ourselves */
645		if (mp->msg_flags & MSG_NAMEMBUF)
646			m_freem(mp->msg_name);
647		if (mp->msg_flags & MSG_CONTROLMBUF)
648			m_freem(mp->msg_control);
649		return error;
650	}
651	error = do_sys_sendmsg_so(l, s, so, fp, mp, flags, retsize);
652	/* msg_name and msg_control freed */
653	fd_putfile(s);
654	return error;
655}
656
657int
658sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap,
659    register_t *retval)
660{
661	/* {
662		syscallarg(int)			s;
663		syscallarg(void *)		buf;
664		syscallarg(size_t)		len;
665		syscallarg(int)			flags;
666		syscallarg(struct sockaddr *)	from;
667		syscallarg(unsigned int *)	fromlenaddr;
668	} */
669	struct msghdr	msg = {0};
670	struct iovec	aiov;
671	int		error;
672	struct mbuf	*from;
673
674	msg.msg_name = NULL;
675	msg.msg_iov = &aiov;
676	msg.msg_iovlen = 1;
677	aiov.iov_base = SCARG(uap, buf);
678	aiov.iov_len = SCARG(uap, len);
679	msg.msg_control = NULL;
680	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
681
682	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
683	if (error != 0)
684		return error;
685
686	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
687	    MSG_LENUSRSPACE, from);
688	if (from != NULL)
689		m_free(from);
690	return error;
691}
692
693int
694sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap,
695    register_t *retval)
696{
697	/* {
698		syscallarg(int)			s;
699		syscallarg(struct msghdr *)	msg;
700		syscallarg(int)			flags;
701	} */
702	struct msghdr	msg;
703	int		error;
704	struct mbuf	*from, *control;
705
706	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
707	if (error)
708		return error;
709
710	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
711
712	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
713	    msg.msg_control != NULL ? &control : NULL, retval);
714	if (error != 0)
715		return error;
716
717	if (msg.msg_control != NULL)
718		error = copyout_msg_control(l, &msg, control);
719
720	if (error == 0)
721		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
722			from);
723	if (from != NULL)
724		m_free(from);
725	if (error == 0) {
726		ktrkuser("msghdr", &msg, sizeof(msg));
727		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
728	}
729
730	return error;
731}
732
733int
734sys_sendmmsg(struct lwp *l, const struct sys_sendmmsg_args *uap,
735    register_t *retval)
736{
737	/* {
738		syscallarg(int)			s;
739		syscallarg(struct mmsghdr *)	mmsg;
740		syscallarg(unsigned int)	vlen;
741		syscallarg(unsigned int)	flags;
742	} */
743	struct mmsghdr mmsg;
744	struct socket *so;
745	file_t *fp;
746	struct msghdr *msg = &mmsg.msg_hdr;
747	int error, s;
748	unsigned int vlen, flags, dg;
749
750	s = SCARG(uap, s);
751	if ((error = fd_getsock1(s, &so, &fp)) != 0)
752		return error;
753
754	vlen = SCARG(uap, vlen);
755	if (vlen > 1024)
756		vlen = 1024;
757
758	flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
759
760	for (dg = 0; dg < vlen;) {
761		error = copyin(SCARG(uap, mmsg) + dg, &mmsg, sizeof(mmsg));
762		if (error)
763			break;
764
765		msg->msg_flags = flags;
766
767		error = do_sys_sendmsg_so(l, s, so, fp, msg, flags, retval);
768		if (error)
769			break;
770
771		ktrkuser("msghdr", msg, sizeof(*msg));
772		mmsg.msg_len = *retval;
773		error = copyout(&mmsg, SCARG(uap, mmsg) + dg, sizeof(mmsg));
774		if (error)
775			break;
776		dg++;
777
778	}
779
780	*retval = dg;
781
782	fd_putfile(s);
783
784	/*
785	 * If we succeeded at least once, return 0.
786	 */
787	if (dg)
788		return 0;
789	return error;
790}
791
792/*
793 * Adjust for a truncated SCM_RIGHTS control message.
794 *  This means closing any file descriptors that aren't present
795 *  in the returned buffer.
796 *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
797 */
798static void
799free_rights(struct mbuf *m)
800{
801	struct cmsghdr *cm;
802	int *fdv;
803	unsigned int nfds, i;
804
805	KASSERT(sizeof(*cm) <= m->m_len);
806	cm = mtod(m, struct cmsghdr *);
807
808	KASSERT(CMSG_ALIGN(sizeof(*cm)) <= cm->cmsg_len);
809	KASSERT(cm->cmsg_len <= m->m_len);
810	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
811	fdv = (int *)CMSG_DATA(cm);
812
813	for (i = 0; i < nfds; i++)
814		if (fd_getfile(fdv[i]) != NULL)
815			(void)fd_close(fdv[i]);
816}
817
818void
819free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
820{
821	struct mbuf *next;
822	struct cmsghdr *cmsg;
823	bool do_free_rights = false;
824
825	while (control != NULL) {
826		cmsg = mtod(control, struct cmsghdr *);
827		if (control == uncopied)
828			do_free_rights = true;
829		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
830		    && cmsg->cmsg_type == SCM_RIGHTS)
831			free_rights(control);
832		next = control->m_next;
833		m_free(control);
834		control = next;
835	}
836}
837
838/* Copy socket control/CMSG data to user buffer, frees the mbuf */
839int
840copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
841{
842	int i, len, error = 0;
843	struct cmsghdr *cmsg;
844	struct mbuf *m;
845	char *q;
846
847	len = mp->msg_controllen;
848	if (len <= 0 || control == 0) {
849		mp->msg_controllen = 0;
850		free_control_mbuf(l, control, control);
851		return 0;
852	}
853
854	q = (char *)mp->msg_control;
855
856	for (m = control; m != NULL; ) {
857		cmsg = mtod(m, struct cmsghdr *);
858		i = m->m_len;
859		if (len < i) {
860			mp->msg_flags |= MSG_CTRUNC;
861			if (cmsg->cmsg_level == SOL_SOCKET
862			    && cmsg->cmsg_type == SCM_RIGHTS)
863				/* Do not truncate me ... */
864				break;
865			i = len;
866		}
867		error = copyout(mtod(m, void *), q, i);
868		ktrkuser(mbuftypes[MT_CONTROL], cmsg, cmsg->cmsg_len);
869		if (error != 0) {
870			/* We must free all the SCM_RIGHTS */
871			m = control;
872			break;
873		}
874		m = m->m_next;
875		if (m)
876			i = ALIGN(i);
877		q += i;
878		len -= i;
879		if (len <= 0)
880			break;
881	}
882
883	free_control_mbuf(l, control, m);
884
885	mp->msg_controllen = q - (char *)mp->msg_control;
886	return error;
887}
888
889int
890do_sys_recvmsg_so(struct lwp *l, int s, struct socket *so, struct msghdr *mp,
891    struct mbuf **from, struct mbuf **control, register_t *retsize)
892{
893	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
894	struct uio	auio;
895	size_t		len, iovsz;
896	int		i, error;
897
898	ktrkuser("msghdr", mp, sizeof(*mp));
899
900	*from = NULL;
901	if (control != NULL)
902		*control = NULL;
903
904	iovsz = mp->msg_iovlen * sizeof(struct iovec);
905
906	if (mp->msg_flags & MSG_IOVUSRSPACE) {
907		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
908			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
909				error = EMSGSIZE;
910				goto out;
911			}
912			iov = kmem_alloc(iovsz, KM_SLEEP);
913		}
914		if (mp->msg_iovlen != 0) {
915			error = copyin(mp->msg_iov, iov, iovsz);
916			if (error)
917				goto out;
918		}
919		auio.uio_iov = iov;
920	} else
921		auio.uio_iov = mp->msg_iov;
922	auio.uio_iovcnt = mp->msg_iovlen;
923	auio.uio_rw = UIO_READ;
924	auio.uio_offset = 0;			/* XXX */
925	auio.uio_resid = 0;
926	KASSERT(l == curlwp);
927	auio.uio_vmspace = l->l_proc->p_vmspace;
928
929	tiov = auio.uio_iov;
930	for (i = 0; i < auio.uio_iovcnt; i++, tiov++) {
931		/*
932		 * Reads return ssize_t because -1 is returned on error.
933		 * Therefore we must restrict the length to SSIZE_MAX to
934		 * avoid garbage return values.
935		 */
936		auio.uio_resid += tiov->iov_len;
937		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
938			error = EINVAL;
939			goto out;
940		}
941	}
942
943	if (ktrpoint(KTR_GENIO) && iovsz > 0) {
944		ktriov = kmem_alloc(iovsz, KM_SLEEP);
945		memcpy(ktriov, auio.uio_iov, iovsz);
946	}
947
948	len = auio.uio_resid;
949	mp->msg_flags &= MSG_USERFLAGS;
950	error = (*so->so_receive)(so, from, &auio, NULL, control,
951	    &mp->msg_flags);
952	KASSERT(*from == NULL || (*from)->m_next == NULL);
953	len -= auio.uio_resid;
954	*retsize = len;
955	if (error != 0 && len != 0
956	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
957		/* Some data transferred */
958		error = 0;
959
960	if (ktriov != NULL) {
961		ktrgeniov(s, UIO_READ, ktriov, len, error);
962		kmem_free(ktriov, iovsz);
963	}
964
965	if (error != 0) {
966		m_freem(*from);
967		*from = NULL;
968		if (control != NULL) {
969			free_control_mbuf(l, *control, *control);
970			*control = NULL;
971		}
972	}
973 out:
974	if (iov != aiov)
975		kmem_free(iov, iovsz);
976	return error;
977}
978
979
980int
981do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp,
982    struct mbuf **from, struct mbuf **control, register_t *retsize)
983{
984	int error;
985	struct socket *so;
986
987	if ((error = fd_getsock(s, &so)) != 0)
988		return error;
989	error = do_sys_recvmsg_so(l, s, so, mp, from, control, retsize);
990	fd_putfile(s);
991	return error;
992}
993
994int
995sys_recvmmsg(struct lwp *l, const struct sys_recvmmsg_args *uap,
996    register_t *retval)
997{
998	/* {
999		syscallarg(int)			s;
1000		syscallarg(struct mmsghdr *)	mmsg;
1001		syscallarg(unsigned int)	vlen;
1002		syscallarg(unsigned int)	flags;
1003		syscallarg(struct timespec *)	timeout;
1004	} */
1005	struct mmsghdr mmsg;
1006	struct socket *so;
1007	struct msghdr *msg = &mmsg.msg_hdr;
1008	int error, s;
1009	struct mbuf *from, *control;
1010	struct timespec ts, now;
1011	unsigned int vlen, flags, dg;
1012
1013	if (SCARG(uap, timeout)) {
1014		if ((error = copyin(SCARG(uap, timeout), &ts, sizeof(ts))) != 0)
1015			return error;
1016		if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000L)
1017			return EINVAL;
1018		getnanotime(&now);
1019		if (timespecaddok(&now, &ts)) {
1020			timespecadd(&now, &ts, &ts);
1021		} else {
1022			ts.tv_sec = __type_max(time_t);
1023			ts.tv_nsec = 999999999L;
1024		}
1025	}
1026
1027	s = SCARG(uap, s);
1028	if ((error = fd_getsock(s, &so)) != 0)
1029		return error;
1030
1031	/*
1032	 * If so->so_rerror holds a deferred error return it now.
1033	 */
1034	if (so->so_rerror) {
1035		error = so->so_rerror;
1036		so->so_rerror = 0;
1037		fd_putfile(s);
1038		return error;
1039	}
1040
1041	vlen = SCARG(uap, vlen);
1042	if (vlen > 1024)
1043		vlen = 1024;
1044
1045	from = NULL;
1046	flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
1047
1048	for (dg = 0; dg < vlen;) {
1049		error = copyin(SCARG(uap, mmsg) + dg, &mmsg, sizeof(mmsg));
1050		if (error)
1051			break;
1052
1053		msg->msg_flags = flags & ~MSG_WAITFORONE;
1054
1055		if (from != NULL) {
1056			m_free(from);
1057			from = NULL;
1058		}
1059
1060		error = do_sys_recvmsg_so(l, s, so, msg, &from,
1061		    msg->msg_control != NULL ? &control : NULL, retval);
1062		if (error) {
1063			if (error == EAGAIN && dg > 0)
1064				error = 0;
1065			break;
1066		}
1067
1068		if (msg->msg_control != NULL)
1069			error = copyout_msg_control(l, msg, control);
1070		if (error)
1071			break;
1072
1073		error = copyout_sockname(msg->msg_name, &msg->msg_namelen, 0,
1074		    from);
1075		if (error)
1076			break;
1077
1078		ktrkuser("msghdr", msg, sizeof *msg);
1079		mmsg.msg_len = *retval;
1080
1081		error = copyout(&mmsg, SCARG(uap, mmsg) + dg, sizeof(mmsg));
1082		if (error)
1083			break;
1084
1085		dg++;
1086		if (msg->msg_flags & MSG_OOB)
1087			break;
1088
1089		if (SCARG(uap, timeout)) {
1090			getnanotime(&now);
1091			if (timespeccmp(&ts, &now, <))
1092				break;
1093		}
1094
1095		if (flags & MSG_WAITFORONE)
1096			flags |= MSG_DONTWAIT;
1097
1098	}
1099
1100	if (from != NULL)
1101		m_free(from);
1102
1103	*retval = dg;
1104
1105	/*
1106	 * If we succeeded at least once, return 0, hopefully so->so_rerror
1107	 * will catch it next time.
1108	 */
1109	if (error && dg > 0) {
1110		so->so_rerror = error;
1111		error = 0;
1112	}
1113
1114	fd_putfile(s);
1115
1116	return error;
1117}
1118
1119int
1120sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap,
1121    register_t *retval)
1122{
1123	/* {
1124		syscallarg(int)	s;
1125		syscallarg(int)	how;
1126	} */
1127	struct socket	*so;
1128	int		error;
1129
1130	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
1131		return error;
1132	solock(so);
1133	error = soshutdown(so, SCARG(uap, how));
1134	sounlock(so);
1135	fd_putfile(SCARG(uap, s));
1136	return error;
1137}
1138
1139int
1140sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap,
1141    register_t *retval)
1142{
1143	/* {
1144		syscallarg(int)			s;
1145		syscallarg(int)			level;
1146		syscallarg(int)			name;
1147		syscallarg(const void *)	val;
1148		syscallarg(unsigned int)	valsize;
1149	} */
1150	struct sockopt	sopt;
1151	struct socket	*so;
1152	file_t		*fp;
1153	int		error;
1154	unsigned int	len;
1155
1156	len = SCARG(uap, valsize);
1157	if (len > 0 && SCARG(uap, val) == NULL)
1158		return EINVAL;
1159
1160	if (len > MCLBYTES)
1161		return EINVAL;
1162
1163	if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
1164		return (error);
1165
1166	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
1167
1168	if (len > 0) {
1169		error = copyin(SCARG(uap, val), sopt.sopt_data, len);
1170		if (error)
1171			goto out;
1172	}
1173
1174	error = sosetopt(so, &sopt);
1175	if (so->so_options & SO_NOSIGPIPE)
1176		atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
1177	else
1178		atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
1179
1180 out:
1181	sockopt_destroy(&sopt);
1182	fd_putfile(SCARG(uap, s));
1183	return error;
1184}
1185
1186static int
1187getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap,
1188    register_t *retval, bool copyarg)
1189{
1190	struct sockopt	sopt;
1191	struct socket	*so;
1192	file_t		*fp;
1193	unsigned int	valsize, len;
1194	int		error;
1195
1196	if (SCARG(uap, val) != NULL) {
1197		error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
1198		if (error)
1199			return error;
1200	} else
1201		valsize = 0;
1202
1203	if (valsize > MCLBYTES)
1204		return EINVAL;
1205
1206	if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
1207		return error;
1208
1209	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), valsize);
1210	if (copyarg && valsize > 0) {
1211		error = copyin(SCARG(uap, val), sopt.sopt_data, valsize);
1212		if (error)
1213			goto out;
1214	}
1215
1216	if (fp->f_flag & FNOSIGPIPE)
1217		so->so_options |= SO_NOSIGPIPE;
1218	else
1219		so->so_options &= ~SO_NOSIGPIPE;
1220
1221	error = sogetopt(so, &sopt);
1222	if (error || valsize == 0)
1223		goto out;
1224
1225	len = uimin(valsize, sopt.sopt_retsize);
1226	error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1227	if (error)
1228		goto out;
1229
1230	error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1231 out:
1232	sockopt_destroy(&sopt);
1233	fd_putfile(SCARG(uap, s));
1234	return error;
1235}
1236
1237int
1238sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap,
1239    register_t *retval)
1240{
1241	/* {
1242		syscallarg(int)			s;
1243		syscallarg(int)			level;
1244		syscallarg(int)			name;
1245		syscallarg(void *)		val;
1246		syscallarg(unsigned int *)	avalsize;
1247	} */
1248	return getsockopt(l, uap, retval, false);
1249}
1250
1251int
1252sys_getsockopt2(struct lwp *l, const struct sys_getsockopt2_args *uap,
1253    register_t *retval)
1254{
1255	/* {
1256		syscallarg(int)			s;
1257		syscallarg(int)			level;
1258		syscallarg(int)			name;
1259		syscallarg(void *)		val;
1260		syscallarg(unsigned int *)	avalsize;
1261	} */
1262	return getsockopt(l, (const struct sys_getsockopt_args *) uap, retval, true);
1263}
1264
1265#ifdef PIPE_SOCKETPAIR
1266
1267int
1268pipe1(struct lwp *l, int *fildes, int flags)
1269{
1270	file_t		*rf, *wf;
1271	struct socket	*rso, *wso;
1272	int		error, soflags = 0;
1273	unsigned	rfd, wfd;
1274	proc_t		*p = l->l_proc;
1275
1276	if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
1277		return EINVAL;
1278	if (flags & O_CLOEXEC)
1279		soflags |= SOCK_CLOEXEC;
1280	if (flags & O_NONBLOCK)
1281		soflags |= SOCK_NONBLOCK;
1282	if (flags & O_NOSIGPIPE)
1283		soflags |= SOCK_NOSIGPIPE;
1284
1285	error = fsocreate(AF_LOCAL, &rso, SOCK_STREAM|soflags, 0, &rfd, &rf,
1286	    NULL);
1287	if (error)
1288		goto free1;
1289	error = fsocreate(AF_LOCAL, &wso, SOCK_STREAM|soflags, 0, &wfd, &wf,
1290	    rso);
1291	if (error)
1292		goto free2;
1293
1294	/* make sure the descriptors are uni-directional */
1295	rf->f_type = rf->f_type & ~(FWRITE);
1296	wf->f_type = wf->f_type & ~(FREAD);
1297
1298	/* remember this socket pair implements a pipe */
1299	rso->so_state |= SS_ISAPIPE;
1300	wso->so_state |= SS_ISAPIPE;
1301
1302	solock(wso);
1303	/*
1304	 * Pipes must be readable when there is at least 1
1305	 * byte of data available in the receive buffer.
1306	 *
1307	 * Pipes must be writable when there is space for
1308	 * at least PIPE_BUF bytes in the send buffer.
1309	 * If we're increasing the low water mark for the
1310	 * send buffer, then mimic how soreserve() would
1311	 * have set the high water mark.
1312	 */
1313	rso->so_rcv.sb_lowat = 1;
1314	if (wso->so_snd.sb_lowat < PIPE_BUF) {
1315		wso->so_snd.sb_hiwat = PIPE_BUF * 2;
1316	}
1317	wso->so_snd.sb_lowat = PIPE_BUF;
1318	error = unp_connect2(wso, rso);
1319	sounlock(wso);
1320
1321	if (error != 0)
1322		goto free3;
1323
1324	fd_affix(p, wf, wfd);
1325	fd_affix(p, rf, rfd);
1326	fildes[0] = rfd;
1327	fildes[1] = wfd;
1328	return (0);
1329 free3:
1330	(void)soclose(wso);
1331	fd_abort(p, wf, wfd);
1332 free2:
1333	(void)soclose(rso);
1334	fd_abort(p, rf, rfd);
1335 free1:
1336	return error;
1337}
1338#endif /* PIPE_SOCKETPAIR */
1339
1340/*
1341 * Get peer socket name.
1342 */
1343int
1344do_sys_getpeername(int fd, struct sockaddr *nam)
1345{
1346	struct socket	*so;
1347	int		error;
1348
1349	if ((error = fd_getsock(fd, &so)) != 0)
1350		return error;
1351
1352	solock(so);
1353	if ((so->so_state & SS_ISCONNECTED) == 0)
1354		error = ENOTCONN;
1355	else {
1356		error = (*so->so_proto->pr_usrreqs->pr_peeraddr)(so, nam);
1357	}
1358	sounlock(so);
1359	fd_putfile(fd);
1360	return error;
1361}
1362
1363/*
1364 * Get local socket name.
1365 */
1366int
1367do_sys_getsockname(int fd, struct sockaddr *nam)
1368{
1369	struct socket	*so;
1370	int		error;
1371
1372	if ((error = fd_getsock(fd, &so)) != 0)
1373		return error;
1374
1375	solock(so);
1376	error = (*so->so_proto->pr_usrreqs->pr_sockaddr)(so, nam);
1377	sounlock(so);
1378	fd_putfile(fd);
1379	return error;
1380}
1381
1382int
1383copyout_sockname_sb(struct sockaddr *asa, unsigned int *alen, int flags,
1384    struct sockaddr_big *addr)
1385{
1386	unsigned int len;
1387	int error;
1388
1389	if (asa == NULL)
1390		/* Assume application not interested */
1391		return 0;
1392
1393	if (flags & MSG_LENUSRSPACE) {
1394		error = copyin(alen, &len, sizeof(len));
1395		if (error)
1396			return error;
1397	} else
1398		len = *alen;
1399
1400	if (addr == NULL) {
1401		len = 0;
1402		error = 0;
1403	} else {
1404		if (len > addr->sb_len)
1405			len = addr->sb_len;
1406		/* XXX addr isn't an mbuf... */
1407		ktrkuser(mbuftypes[MT_SONAME], addr, len);
1408		error = copyout(addr, asa, len);
1409	}
1410
1411	if (error == 0) {
1412		if (flags & MSG_LENUSRSPACE)
1413			error = copyout(&len, alen, sizeof(len));
1414		else
1415			*alen = len;
1416	}
1417
1418	return error;
1419}
1420
1421int
1422copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1423    struct mbuf *addr)
1424{
1425	int len;
1426	int error;
1427
1428	if (asa == NULL)
1429		/* Assume application not interested */
1430		return 0;
1431
1432	if (flags & MSG_LENUSRSPACE) {
1433		error = copyin(alen, &len, sizeof(len));
1434		if (error)
1435			return error;
1436	} else
1437		len = *alen;
1438	if (len < 0)
1439		return EINVAL;
1440
1441	if (addr == NULL) {
1442		len = 0;
1443		error = 0;
1444	} else {
1445		if (len > addr->m_len)
1446			len = addr->m_len;
1447		/* Maybe this ought to copy a chain ? */
1448		ktrkuser(mbuftypes[MT_SONAME], mtod(addr, void *), len);
1449		error = copyout(mtod(addr, void *), asa, len);
1450	}
1451
1452	if (error == 0) {
1453		if (flags & MSG_LENUSRSPACE)
1454			error = copyout(&len, alen, sizeof(len));
1455		else
1456			*alen = len;
1457	}
1458
1459	return error;
1460}
1461
1462/*
1463 * Get socket name.
1464 */
1465int
1466sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap,
1467    register_t *retval)
1468{
1469	/* {
1470		syscallarg(int)			fdes;
1471		syscallarg(struct sockaddr *)	asa;
1472		syscallarg(unsigned int *)	alen;
1473	} */
1474	struct sockaddr_big sbig;
1475	int		    error;
1476
1477	sbig.sb_len = UCHAR_MAX;
1478	error = do_sys_getsockname(SCARG(uap, fdes), (struct sockaddr *)&sbig);
1479	if (error != 0)
1480		return error;
1481
1482	error = copyout_sockname_sb(SCARG(uap, asa), SCARG(uap, alen),
1483	    MSG_LENUSRSPACE, &sbig);
1484	return error;
1485}
1486
1487/*
1488 * Get name of peer for connected socket.
1489 */
1490int
1491sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap,
1492    register_t *retval)
1493{
1494	/* {
1495		syscallarg(int)			fdes;
1496		syscallarg(struct sockaddr *)	asa;
1497		syscallarg(unsigned int *)	alen;
1498	} */
1499	struct sockaddr_big sbig;
1500	int		    error;
1501
1502	sbig.sb_len = UCHAR_MAX;
1503	error = do_sys_getpeername(SCARG(uap, fdes), (struct sockaddr *)&sbig);
1504	if (error != 0)
1505		return error;
1506
1507	error = copyout_sockname_sb(SCARG(uap, asa), SCARG(uap, alen),
1508	    MSG_LENUSRSPACE, &sbig);
1509	return error;
1510}
1511
1512static int
1513sockargs_sb(struct sockaddr_big *sb, const void *name, socklen_t buflen)
1514{
1515	int error;
1516
1517	/*
1518	 * We can't allow socket names > UCHAR_MAX in length, since that
1519	 * will overflow sb_len. Further no reasonable buflen is <=
1520	 * offsetof(sockaddr_big, sb_data) since it shall be at least
1521	 * the size of the preamble sb_len and sb_family members.
1522	 */
1523	if (buflen > UCHAR_MAX ||
1524	    buflen <= offsetof(struct sockaddr_big, sb_data))
1525		return EINVAL;
1526
1527	error = copyin(name, (void *)sb, buflen);
1528	if (error)
1529		return error;
1530
1531	ktrkuser(mbuftypes[MT_SONAME], sb, buflen);
1532#if BYTE_ORDER != BIG_ENDIAN
1533	/*
1534	 * 4.3BSD compat thing - need to stay, since bind(2),
1535	 * connect(2), sendto(2) were not versioned for COMPAT_43.
1536	 */
1537	if (sb->sb_family == 0 && sb->sb_len < AF_MAX)
1538		sb->sb_family = sb->sb_len;
1539#endif
1540	sb->sb_len = buflen;
1541	return 0;
1542}
1543
1544/*
1545 * XXX In a perfect world, we wouldn't pass around socket control
1546 * XXX arguments in mbufs, and this could go away.
1547 */
1548int
1549sockargs(struct mbuf **mp, const void *bf, size_t buflen, enum uio_seg seg,
1550    int type)
1551{
1552	struct mbuf	*m;
1553	int		error;
1554
1555	/*
1556	 * We can't allow socket names > UCHAR_MAX in length, since that
1557	 * will overflow sa_len.  Control data more than a page size in
1558	 * length is just too much.
1559	 */
1560	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1561		return EINVAL;
1562
1563	/*
1564	 * length must greater than sizeof(sa_family) + sizeof(sa_len)
1565	 */
1566	if (type == MT_SONAME && buflen <= 2)
1567		return EINVAL;
1568
1569	/* Allocate an mbuf to hold the arguments. */
1570	m = m_get(M_WAIT, type);
1571	/* can't claim.  don't who to assign it to. */
1572	if (buflen > MLEN) {
1573		/*
1574		 * Won't fit into a regular mbuf, so we allocate just
1575		 * enough external storage to hold the argument.
1576		 */
1577		MEXTMALLOC(m, buflen, M_WAITOK);
1578	}
1579	m->m_len = buflen;
1580	if (seg == UIO_USERSPACE) {
1581		error = copyin(bf, mtod(m, void *), buflen);
1582		if (error) {
1583			(void)m_free(m);
1584			return error;
1585		}
1586	} else {
1587		memcpy(mtod(m, void *), bf, buflen);
1588	}
1589	*mp = m;
1590	switch (type) {
1591	case MT_SONAME:
1592		ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1593
1594		struct sockaddr *sa = mtod(m, struct sockaddr *);
1595#if BYTE_ORDER != BIG_ENDIAN
1596		/*
1597		 * 4.3BSD compat thing - need to stay, since bind(2),
1598		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1599		 */
1600		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1601			sa->sa_family = sa->sa_len;
1602#endif
1603		sa->sa_len = buflen;
1604		return 0;
1605	case MT_CONTROL:
1606		if (!KTRPOINT(curproc, KTR_USER))
1607			return 0;
1608
1609		struct msghdr mhdr;
1610		mhdr.msg_control = mtod(m, void *);
1611		mhdr.msg_controllen = buflen;
1612		for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mhdr); cmsg;
1613		    cmsg = CMSG_NXTHDR(&mhdr, cmsg)) {
1614			KASSERT(((char *)cmsg - mtod(m, char *)) <= buflen);
1615			if (cmsg->cmsg_len >
1616			    buflen - ((char *)cmsg - mtod(m, char *)))
1617				break;
1618			ktrkuser(mbuftypes[type], cmsg, cmsg->cmsg_len);
1619		}
1620		return 0;
1621	default:
1622		return EINVAL;
1623	}
1624}
1625
1626int
1627do_sys_peeloff(struct socket *head, void *data)
1628{
1629#ifdef SCTP
1630	/*file_t *lfp = NULL;*/
1631	file_t *nfp = NULL;
1632	int error;
1633	struct socket *so;
1634	int fd;
1635	uint32_t name;
1636	/*short fflag;*/		/* type must match fp->f_flag */
1637
1638	name = *(uint32_t *) data;
1639	error = sctp_can_peel_off(head, name);
1640	if (error) {
1641		printf("peeloff failed\n");
1642		return error;
1643	}
1644	/*
1645	 * At this point we know we do have a assoc to pull
1646	 * we proceed to get the fd setup. This may block
1647	 * but that is ok.
1648	 */
1649	error = fd_allocfile(&nfp, &fd);
1650	if (error) {
1651		/*
1652		 * Probably ran out of file descriptors. Put the
1653		 * unaccepted connection back onto the queue and
1654		 * do another wakeup so some other process might
1655		 * have a chance at it.
1656		 */
1657		return error;
1658	}
1659	*(int *) data = fd;
1660
1661	so = sctp_get_peeloff(head, name, &error);
1662	if (so == NULL) {
1663		/*
1664		 * Either someone else peeled it off OR
1665		 * we can't get a socket.
1666		 * close the new descriptor, assuming someone hasn't ripped it
1667		 * out from under us.
1668		 */
1669		mutex_enter(&nfp->f_lock);
1670		nfp->f_count++;
1671		mutex_exit(&nfp->f_lock);
1672		fd_abort(curlwp->l_proc, nfp, fd);
1673		return error;
1674	}
1675	so->so_state &= ~SS_NOFDREF;
1676	so->so_state &= ~SS_ISCONNECTING;
1677	so->so_head = NULL;
1678	so->so_cred = kauth_cred_hold(head->so_cred);
1679	nfp->f_socket = so;
1680	nfp->f_flag = FREAD|FWRITE;
1681	nfp->f_ops = &socketops;
1682	nfp->f_type = DTYPE_SOCKET;
1683
1684	fd_affix(curlwp->l_proc, nfp, fd);
1685
1686	return error;
1687#else
1688	return EOPNOTSUPP;
1689#endif
1690}
1691