1/*	$NetBSD$	*/
2
3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1986, 1989, 1990, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD$");
65
66#include "opt_pipe.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/filedesc.h>
71#include <sys/proc.h>
72#include <sys/file.h>
73#include <sys/buf.h>
74#define MBUFTYPES
75#include <sys/mbuf.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/signalvar.h>
80#include <sys/un.h>
81#include <sys/ktrace.h>
82#include <sys/event.h>
83#include <sys/atomic.h>
84#include <sys/kauth.h>
85
86#include <sys/mount.h>
87#include <sys/syscallargs.h>
88
89/*
90 * System call interface to the socket abstraction.
91 */
92extern const struct fileops socketops;
93
94int
95sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
96{
97	/* {
98		syscallarg(int)	domain;
99		syscallarg(int)	type;
100		syscallarg(int)	protocol;
101	} */
102	int		fd, error;
103
104	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
105			 SCARG(uap, protocol), l, &fd);
106	if (error == 0)
107		*retval = fd;
108	return error;
109}
110
111/* ARGSUSED */
112int
113sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
114{
115	/* {
116		syscallarg(int)				s;
117		syscallarg(const struct sockaddr *)	name;
118		syscallarg(unsigned int)		namelen;
119	} */
120	struct mbuf	*nam;
121	int		error;
122
123	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
124	    MT_SONAME);
125	if (error)
126		return error;
127
128	return do_sys_bind(l, SCARG(uap, s), nam);
129}
130
131int
132do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
133{
134	struct socket	*so;
135	int		error;
136
137	if ((error = fd_getsock(fd, &so)) != 0) {
138		m_freem(nam);
139		return (error);
140	}
141	MCLAIM(nam, so->so_mowner);
142	error = sobind(so, nam, l);
143	m_freem(nam);
144	fd_putfile(fd);
145	return error;
146}
147
148/* ARGSUSED */
149int
150sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
151{
152	/* {
153		syscallarg(int)	s;
154		syscallarg(int)	backlog;
155	} */
156	struct socket	*so;
157	int		error;
158
159	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
160		return (error);
161	error = solisten(so, SCARG(uap, backlog), l);
162	fd_putfile(SCARG(uap, s));
163	return error;
164}
165
166int
167do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock,
168    const sigset_t *mask, int flags, int clrflags)
169{
170	file_t		*fp, *fp2;
171	struct mbuf	*nam;
172	int		error, fd;
173	struct socket	*so, *so2;
174	short		wakeup_state = 0;
175
176	if ((fp = fd_getfile(sock)) == NULL)
177		return (EBADF);
178	if (fp->f_type != DTYPE_SOCKET) {
179		fd_putfile(sock);
180		return (ENOTSOCK);
181	}
182	if ((error = fd_allocfile(&fp2, &fd)) != 0) {
183		fd_putfile(sock);
184		return (error);
185	}
186	nam = m_get(M_WAIT, MT_SONAME);
187	nam->m_len = 0;
188	*new_sock = fd;
189	so = fp->f_data;
190	solock(so);
191
192	if (__predict_false(mask))
193		sigsuspendsetup(l, mask);
194
195	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
196		error = EOPNOTSUPP;
197		goto bad;
198	}
199	if ((so->so_options & SO_ACCEPTCONN) == 0) {
200		error = EINVAL;
201		goto bad;
202	}
203	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
204		error = EWOULDBLOCK;
205		goto bad;
206	}
207	while (so->so_qlen == 0 && so->so_error == 0) {
208		if (so->so_state & SS_CANTRCVMORE) {
209			so->so_error = ECONNABORTED;
210			break;
211		}
212		if (wakeup_state & SS_RESTARTSYS) {
213			error = ERESTART;
214			goto bad;
215		}
216		error = sowait(so, true, 0);
217		if (error) {
218			goto bad;
219		}
220		wakeup_state = so->so_state;
221	}
222	if (so->so_error) {
223		error = so->so_error;
224		so->so_error = 0;
225		goto bad;
226	}
227	/* connection has been removed from the listen queue */
228	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
229	so2 = TAILQ_FIRST(&so->so_q);
230	if (soqremque(so2, 1) == 0)
231		panic("accept");
232	fp2->f_type = DTYPE_SOCKET;
233	fp2->f_flag = (fp->f_flag & ~clrflags) |
234	    ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
235	    ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
236	fp2->f_ops = &socketops;
237	fp2->f_data = so2;
238	if (flags & SOCK_NONBLOCK)
239		so2->so_state |= SS_NBIO;
240	error = soaccept(so2, nam);
241	so2->so_cred = kauth_cred_dup(so->so_cred);
242	sounlock(so);
243	if (error) {
244		/* an error occurred, free the file descriptor and mbuf */
245		m_freem(nam);
246		mutex_enter(&fp2->f_lock);
247		fp2->f_count++;
248		mutex_exit(&fp2->f_lock);
249		closef(fp2);
250		fd_abort(curproc, NULL, fd);
251	} else {
252		fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
253		fd_affix(curproc, fp2, fd);
254		*name = nam;
255	}
256	fd_putfile(sock);
257	if (__predict_false(mask))
258		sigsuspendteardown(l);
259	return (error);
260 bad:
261 	sounlock(so);
262 	m_freem(nam);
263	fd_putfile(sock);
264 	fd_abort(curproc, fp2, fd);
265	if (__predict_false(mask))
266		sigsuspendteardown(l);
267 	return (error);
268}
269
270int
271sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
272{
273	/* {
274		syscallarg(int)			s;
275		syscallarg(struct sockaddr *)	name;
276		syscallarg(unsigned int *)	anamelen;
277	} */
278	int error, fd;
279	struct mbuf *name;
280
281	error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0);
282	if (error != 0)
283		return error;
284	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
285	    MSG_LENUSRSPACE, name);
286	if (name != NULL)
287		m_free(name);
288	if (error != 0) {
289		fd = (int)*retval;
290		if (fd_getfile(fd) != NULL)
291			(void)fd_close(fd);
292	}
293	return error;
294}
295
296int
297sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
298    register_t *retval)
299{
300	/* {
301		syscallarg(int)			s;
302		syscallarg(struct sockaddr *)	name;
303		syscallarg(unsigned int *)	anamelen;
304		syscallarg(const sigset_t *)	mask;
305		syscallarg(int)			flags;
306	} */
307	int error, fd;
308	struct mbuf *name;
309	sigset_t *mask, amask;
310
311	if (SCARG(uap, mask) != NULL) {
312		error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
313		if (error)
314			return error;
315		mask = &amask;
316	} else
317		mask = NULL;
318
319	error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask,
320	    SCARG(uap, flags), FNONBLOCK);
321	if (error != 0)
322		return error;
323	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
324	    MSG_LENUSRSPACE, name);
325	if (name != NULL)
326		m_free(name);
327	if (error != 0) {
328		fd = (int)*retval;
329		if (fd_getfile(fd) != NULL)
330			(void)fd_close(fd);
331	}
332	return error;
333}
334
335/* ARGSUSED */
336int
337sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
338{
339	/* {
340		syscallarg(int)				s;
341		syscallarg(const struct sockaddr *)	name;
342		syscallarg(unsigned int)		namelen;
343	} */
344	int		error;
345	struct mbuf	*nam;
346
347	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
348	    MT_SONAME);
349	if (error)
350		return error;
351	return do_sys_connect(l,  SCARG(uap, s), nam);
352}
353
354int
355do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
356{
357	struct socket	*so;
358	int		error;
359	int		interrupted = 0;
360
361	if ((error = fd_getsock(fd, &so)) != 0) {
362		m_freem(nam);
363		return (error);
364	}
365	solock(so);
366	MCLAIM(nam, so->so_mowner);
367	if ((so->so_state & SS_ISCONNECTING) != 0) {
368		error = EALREADY;
369		goto out;
370	}
371
372	error = soconnect(so, nam, l);
373	if (error)
374		goto bad;
375	if ((so->so_state & (SS_NBIO|SS_ISCONNECTING)) ==
376	    (SS_NBIO|SS_ISCONNECTING)) {
377		error = EINPROGRESS;
378		goto out;
379	}
380	while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
381		error = sowait(so, true, 0);
382		if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
383			error = EPIPE;
384			interrupted = 1;
385			break;
386		}
387		if (error) {
388			if (error == EINTR || error == ERESTART)
389				interrupted = 1;
390			break;
391		}
392	}
393	if (error == 0) {
394		error = so->so_error;
395		so->so_error = 0;
396	}
397 bad:
398	if (!interrupted)
399		so->so_state &= ~SS_ISCONNECTING;
400	if (error == ERESTART)
401		error = EINTR;
402 out:
403 	sounlock(so);
404 	fd_putfile(fd);
405	m_freem(nam);
406	return (error);
407}
408
409static int
410makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
411    int domain, int proto, struct socket *soo)
412{
413	int error;
414	struct socket *so;
415
416	if ((error = socreate(domain, &so, type, proto, l, soo)) != 0)
417		return error;
418
419	if ((error = fd_allocfile(fp, fd)) != 0) {
420		soclose(so);
421		return error;
422	}
423	fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
424	(*fp)->f_flag = FREAD|FWRITE|
425	    ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
426	    ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
427	(*fp)->f_type = DTYPE_SOCKET;
428	(*fp)->f_ops = &socketops;
429	(*fp)->f_data = so;
430	if (flags & SOCK_NONBLOCK)
431		so->so_state |= SS_NBIO;
432	return 0;
433}
434
435int
436sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
437    register_t *retval)
438{
439	/* {
440		syscallarg(int)		domain;
441		syscallarg(int)		type;
442		syscallarg(int)		protocol;
443		syscallarg(int *)	rsv;
444	} */
445	file_t		*fp1, *fp2;
446	struct socket	*so1, *so2;
447	int		fd, error, sv[2];
448	proc_t		*p;
449	int		flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
450	int		type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
451	int		domain = SCARG(uap, domain);
452	int		proto = SCARG(uap, protocol);
453
454	p = curproc;
455
456	error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
457	if (error)
458		return error;
459	so1 = fp1->f_data;
460	sv[0] = fd;
461
462	error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
463	if (error)
464		goto out;
465	so2 = fp2->f_data;
466	sv[1] = fd;
467
468	solock(so1);
469	error = soconnect2(so1, so2);
470	if (error == 0 && type == SOCK_DGRAM) {
471		/*
472		 * Datagram socket connection is asymmetric.
473		 */
474		error = soconnect2(so2, so1);
475	}
476	sounlock(so1);
477
478	if (error == 0)
479		error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
480	if (error == 0) {
481		fd_affix(p, fp2, sv[1]);
482		fd_affix(p, fp1, sv[0]);
483		return 0;
484	}
485	fd_abort(p, fp2, sv[1]);
486	(void)soclose(so2);
487out:
488	fd_abort(p, fp1, sv[0]);
489	(void)soclose(so1);
490	return error;
491}
492
493int
494sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
495{
496	/* {
497		syscallarg(int)				s;
498		syscallarg(const void *)		buf;
499		syscallarg(size_t)			len;
500		syscallarg(int)				flags;
501		syscallarg(const struct sockaddr *)	to;
502		syscallarg(unsigned int)		tolen;
503	} */
504	struct msghdr	msg;
505	struct iovec	aiov;
506
507	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
508	msg.msg_namelen = SCARG(uap, tolen);
509	msg.msg_iov = &aiov;
510	msg.msg_iovlen = 1;
511	msg.msg_control = NULL;
512	msg.msg_flags = 0;
513	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
514	aiov.iov_len = SCARG(uap, len);
515	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
516}
517
518int
519sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
520{
521	/* {
522		syscallarg(int)				s;
523		syscallarg(const struct msghdr *)	msg;
524		syscallarg(int)				flags;
525	} */
526	struct msghdr	msg;
527	int		error;
528
529	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
530	if (error)
531		return (error);
532
533	msg.msg_flags = MSG_IOVUSRSPACE;
534	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
535}
536
537int
538do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
539		register_t *retsize)
540{
541	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
542	struct mbuf	*to, *control;
543	struct socket	*so;
544	file_t		*fp;
545	struct uio	auio;
546	size_t		len, iovsz;
547	int		i, error;
548
549	ktrkuser("msghdr", mp, sizeof *mp);
550
551	/* If the caller passed us stuff in mbufs, we must free them. */
552	to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
553	control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
554	iovsz = mp->msg_iovlen * sizeof(struct iovec);
555
556	if (mp->msg_flags & MSG_IOVUSRSPACE) {
557		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
558			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
559				error = EMSGSIZE;
560				goto bad;
561			}
562			iov = kmem_alloc(iovsz, KM_SLEEP);
563		}
564		if (mp->msg_iovlen != 0) {
565			error = copyin(mp->msg_iov, iov, iovsz);
566			if (error)
567				goto bad;
568		}
569		mp->msg_iov = iov;
570	}
571
572	auio.uio_iov = mp->msg_iov;
573	auio.uio_iovcnt = mp->msg_iovlen;
574	auio.uio_rw = UIO_WRITE;
575	auio.uio_offset = 0;			/* XXX */
576	auio.uio_resid = 0;
577	KASSERT(l == curlwp);
578	auio.uio_vmspace = l->l_proc->p_vmspace;
579
580	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
581		/*
582		 * Writes return ssize_t because -1 is returned on error.
583		 * Therefore, we must restrict the length to SSIZE_MAX to
584		 * avoid garbage return values.
585		 */
586		auio.uio_resid += tiov->iov_len;
587		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
588			error = EINVAL;
589			goto bad;
590		}
591	}
592
593	if (mp->msg_name && to == NULL) {
594		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
595		    MT_SONAME);
596		if (error)
597			goto bad;
598	}
599
600	if (mp->msg_control) {
601		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
602			error = EINVAL;
603			goto bad;
604		}
605		if (control == NULL) {
606			error = sockargs(&control, mp->msg_control,
607			    mp->msg_controllen, MT_CONTROL);
608			if (error)
609				goto bad;
610		}
611	}
612
613	if (ktrpoint(KTR_GENIO) && iovsz > 0) {
614		ktriov = kmem_alloc(iovsz, KM_SLEEP);
615		memcpy(ktriov, auio.uio_iov, iovsz);
616	}
617
618	if ((error = fd_getsock1(s, &so, &fp)) != 0)
619		goto bad;
620
621	if (mp->msg_name)
622		MCLAIM(to, so->so_mowner);
623	if (mp->msg_control)
624		MCLAIM(control, so->so_mowner);
625
626	len = auio.uio_resid;
627	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
628	/* Protocol is responsible for freeing 'control' */
629	control = NULL;
630
631	fd_putfile(s);
632
633	if (error) {
634		if (auio.uio_resid != len && (error == ERESTART ||
635		    error == EINTR || error == EWOULDBLOCK))
636			error = 0;
637		if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 &&
638		    (flags & MSG_NOSIGNAL) == 0) {
639			mutex_enter(proc_lock);
640			psignal(l->l_proc, SIGPIPE);
641			mutex_exit(proc_lock);
642		}
643	}
644	if (error == 0)
645		*retsize = len - auio.uio_resid;
646
647bad:
648	if (ktriov != NULL) {
649		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
650		kmem_free(ktriov, iovsz);
651	}
652
653 	if (iov != aiov)
654 		kmem_free(iov, iovsz);
655	if (to)
656		m_freem(to);
657	if (control)
658		m_freem(control);
659
660	return (error);
661}
662
663int
664sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
665{
666	/* {
667		syscallarg(int)			s;
668		syscallarg(void *)		buf;
669		syscallarg(size_t)		len;
670		syscallarg(int)			flags;
671		syscallarg(struct sockaddr *)	from;
672		syscallarg(unsigned int *)	fromlenaddr;
673	} */
674	struct msghdr	msg;
675	struct iovec	aiov;
676	int		error;
677	struct mbuf	*from;
678
679	msg.msg_name = NULL;
680	msg.msg_iov = &aiov;
681	msg.msg_iovlen = 1;
682	aiov.iov_base = SCARG(uap, buf);
683	aiov.iov_len = SCARG(uap, len);
684	msg.msg_control = NULL;
685	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
686
687	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
688	if (error != 0)
689		return error;
690
691	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
692	    MSG_LENUSRSPACE, from);
693	if (from != NULL)
694		m_free(from);
695	return error;
696}
697
698int
699sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
700{
701	/* {
702		syscallarg(int)			s;
703		syscallarg(struct msghdr *)	msg;
704		syscallarg(int)			flags;
705	} */
706	struct msghdr	msg;
707	int		error;
708	struct mbuf	*from, *control;
709
710	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
711	if (error)
712		return (error);
713
714	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
715
716	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
717	    msg.msg_control != NULL ? &control : NULL, retval);
718	if (error != 0)
719		return error;
720
721	if (msg.msg_control != NULL)
722		error = copyout_msg_control(l, &msg, control);
723
724	if (error == 0)
725		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
726			from);
727	if (from != NULL)
728		m_free(from);
729	if (error == 0) {
730		ktrkuser("msghdr", &msg, sizeof msg);
731		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
732	}
733
734	return (error);
735}
736
737/*
738 * Adjust for a truncated SCM_RIGHTS control message.
739 *  This means closing any file descriptors that aren't present
740 *  in the returned buffer.
741 *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
742 */
743static void
744free_rights(struct mbuf *m)
745{
746	struct cmsghdr *cm;
747	int *fdv;
748	unsigned int nfds, i;
749
750	KASSERT(sizeof(*cm) <= m->m_len);
751	cm = mtod(m, struct cmsghdr *);
752
753	KASSERT(CMSG_ALIGN(sizeof(*cm)) <= cm->cmsg_len);
754	KASSERT(cm->cmsg_len <= m->m_len);
755	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
756	fdv = (int *)CMSG_DATA(cm);
757
758	for (i = 0; i < nfds; i++)
759		if (fd_getfile(fdv[i]) != NULL)
760			(void)fd_close(fdv[i]);
761}
762
763void
764free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
765{
766	struct mbuf *next;
767	struct cmsghdr *cmsg;
768	bool do_free_rights = false;
769
770	while (control != NULL) {
771		cmsg = mtod(control, struct cmsghdr *);
772		if (control == uncopied)
773			do_free_rights = true;
774		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
775		    && cmsg->cmsg_type == SCM_RIGHTS)
776			free_rights(control);
777		next = control->m_next;
778		m_free(control);
779		control = next;
780	}
781}
782
783/* Copy socket control/CMSG data to user buffer, frees the mbuf */
784int
785copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
786{
787	int i, len, error = 0;
788	struct cmsghdr *cmsg;
789	struct mbuf *m;
790	char *q;
791
792	len = mp->msg_controllen;
793	if (len <= 0 || control == 0) {
794		mp->msg_controllen = 0;
795		free_control_mbuf(l, control, control);
796		return 0;
797	}
798
799	q = (char *)mp->msg_control;
800
801	for (m = control; m != NULL; ) {
802		cmsg = mtod(m, struct cmsghdr *);
803		i = m->m_len;
804		if (len < i) {
805			mp->msg_flags |= MSG_CTRUNC;
806			if (cmsg->cmsg_level == SOL_SOCKET
807			    && cmsg->cmsg_type == SCM_RIGHTS)
808				/* Do not truncate me ... */
809				break;
810			i = len;
811		}
812		error = copyout(mtod(m, void *), q, i);
813		ktrkuser("msgcontrol", mtod(m, void *), i);
814		if (error != 0) {
815			/* We must free all the SCM_RIGHTS */
816			m = control;
817			break;
818		}
819		m = m->m_next;
820		if (m)
821			i = ALIGN(i);
822		q += i;
823		len -= i;
824		if (len <= 0)
825			break;
826	}
827
828	free_control_mbuf(l, control, m);
829
830	mp->msg_controllen = q - (char *)mp->msg_control;
831	return error;
832}
833
834int
835do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
836    struct mbuf **control, register_t *retsize)
837{
838	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
839	struct socket	*so;
840	struct uio	auio;
841	size_t		len, iovsz;
842	int		i, error;
843
844	ktrkuser("msghdr", mp, sizeof *mp);
845
846	*from = NULL;
847	if (control != NULL)
848		*control = NULL;
849
850	if ((error = fd_getsock(s, &so)) != 0)
851		return (error);
852
853	iovsz = mp->msg_iovlen * sizeof(struct iovec);
854
855	if (mp->msg_flags & MSG_IOVUSRSPACE) {
856		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
857			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
858				error = EMSGSIZE;
859				goto out;
860			}
861			iov = kmem_alloc(iovsz, KM_SLEEP);
862		}
863		if (mp->msg_iovlen != 0) {
864			error = copyin(mp->msg_iov, iov, iovsz);
865			if (error)
866				goto out;
867		}
868		auio.uio_iov = iov;
869	} else
870		auio.uio_iov = mp->msg_iov;
871	auio.uio_iovcnt = mp->msg_iovlen;
872	auio.uio_rw = UIO_READ;
873	auio.uio_offset = 0;			/* XXX */
874	auio.uio_resid = 0;
875	KASSERT(l == curlwp);
876	auio.uio_vmspace = l->l_proc->p_vmspace;
877
878	tiov = auio.uio_iov;
879	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
880		/*
881		 * Reads return ssize_t because -1 is returned on error.
882		 * Therefore we must restrict the length to SSIZE_MAX to
883		 * avoid garbage return values.
884		 */
885		auio.uio_resid += tiov->iov_len;
886		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
887			error = EINVAL;
888			goto out;
889		}
890	}
891
892	if (ktrpoint(KTR_GENIO) && iovsz > 0) {
893		ktriov = kmem_alloc(iovsz, KM_SLEEP);
894		memcpy(ktriov, auio.uio_iov, iovsz);
895	}
896
897	len = auio.uio_resid;
898	mp->msg_flags &= MSG_USERFLAGS;
899	error = (*so->so_receive)(so, from, &auio, NULL, control,
900	    &mp->msg_flags);
901	len -= auio.uio_resid;
902	*retsize = len;
903	if (error != 0 && len != 0
904	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
905		/* Some data transferred */
906		error = 0;
907
908	if (ktriov != NULL) {
909		ktrgeniov(s, UIO_READ, ktriov, len, error);
910		kmem_free(ktriov, iovsz);
911	}
912
913	if (error != 0) {
914		m_freem(*from);
915		*from = NULL;
916		if (control != NULL) {
917			free_control_mbuf(l, *control, *control);
918			*control = NULL;
919		}
920	}
921 out:
922	if (iov != aiov)
923		kmem_free(iov, iovsz);
924	fd_putfile(s);
925	return (error);
926}
927
928
929/* ARGSUSED */
930int
931sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
932{
933	/* {
934		syscallarg(int)	s;
935		syscallarg(int)	how;
936	} */
937	struct socket	*so;
938	int		error;
939
940	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
941		return (error);
942	solock(so);
943	error = soshutdown(so, SCARG(uap, how));
944	sounlock(so);
945	fd_putfile(SCARG(uap, s));
946	return (error);
947}
948
949/* ARGSUSED */
950int
951sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
952{
953	/* {
954		syscallarg(int)			s;
955		syscallarg(int)			level;
956		syscallarg(int)			name;
957		syscallarg(const void *)	val;
958		syscallarg(unsigned int)	valsize;
959	} */
960	struct sockopt	sopt;
961	struct socket	*so;
962	file_t		*fp;
963	int		error;
964	unsigned int	len;
965
966	len = SCARG(uap, valsize);
967	if (len > 0 && SCARG(uap, val) == NULL)
968		return (EINVAL);
969
970	if (len > MCLBYTES)
971		return (EINVAL);
972
973	if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
974		return (error);
975
976	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
977
978	if (len > 0) {
979		error = copyin(SCARG(uap, val), sopt.sopt_data, len);
980		if (error)
981			goto out;
982	}
983
984	error = sosetopt(so, &sopt);
985	if (so->so_options & SO_NOSIGPIPE)
986		atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
987	else
988		atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
989
990 out:
991	sockopt_destroy(&sopt);
992 	fd_putfile(SCARG(uap, s));
993	return (error);
994}
995
996/* ARGSUSED */
997int
998sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
999{
1000	/* {
1001		syscallarg(int)			s;
1002		syscallarg(int)			level;
1003		syscallarg(int)			name;
1004		syscallarg(void *)		val;
1005		syscallarg(unsigned int *)	avalsize;
1006	} */
1007	struct sockopt	sopt;
1008	struct socket	*so;
1009	file_t		*fp;
1010	unsigned int	valsize, len;
1011	int		error;
1012
1013	if (SCARG(uap, val) != NULL) {
1014		error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
1015		if (error)
1016			return (error);
1017	} else
1018		valsize = 0;
1019
1020	if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
1021		return (error);
1022
1023	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
1024
1025	if (fp->f_flag & FNOSIGPIPE)
1026		so->so_options |= SO_NOSIGPIPE;
1027	else
1028		so->so_options &= ~SO_NOSIGPIPE;
1029	error = sogetopt(so, &sopt);
1030	if (error)
1031		goto out;
1032
1033	if (valsize > 0) {
1034		len = min(valsize, sopt.sopt_size);
1035		error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1036		if (error)
1037			goto out;
1038
1039		error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1040		if (error)
1041			goto out;
1042	}
1043
1044 out:
1045	sockopt_destroy(&sopt);
1046 	fd_putfile(SCARG(uap, s));
1047	return (error);
1048}
1049
1050#ifdef PIPE_SOCKETPAIR
1051/* ARGSUSED */
1052int
1053pipe1(struct lwp *l, register_t *retval, int flags)
1054{
1055	file_t		*rf, *wf;
1056	struct socket	*rso, *wso;
1057	int		fd, error;
1058	proc_t		*p;
1059
1060	if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
1061		return EINVAL;
1062	p = curproc;
1063	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
1064		return (error);
1065	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1066		goto free1;
1067	/* remember this socket pair implements a pipe */
1068	wso->so_state |= SS_ISAPIPE;
1069	rso->so_state |= SS_ISAPIPE;
1070	if ((error = fd_allocfile(&rf, &fd)) != 0)
1071		goto free2;
1072	retval[0] = fd;
1073	rf->f_flag = FREAD | flags;
1074	rf->f_type = DTYPE_SOCKET;
1075	rf->f_ops = &socketops;
1076	rf->f_data = rso;
1077	if ((error = fd_allocfile(&wf, &fd)) != 0)
1078		goto free3;
1079	wf->f_flag = FWRITE | flags;
1080	wf->f_type = DTYPE_SOCKET;
1081	wf->f_ops = &socketops;
1082	wf->f_data = wso;
1083	retval[1] = fd;
1084	solock(wso);
1085	error = unp_connect2(wso, rso, PRU_CONNECT2);
1086	sounlock(wso);
1087	if (error != 0)
1088		goto free4;
1089	fd_affix(p, wf, (int)retval[1]);
1090	fd_affix(p, rf, (int)retval[0]);
1091	return (0);
1092 free4:
1093	fd_abort(p, wf, (int)retval[1]);
1094 free3:
1095	fd_abort(p, rf, (int)retval[0]);
1096 free2:
1097	(void)soclose(wso);
1098 free1:
1099	(void)soclose(rso);
1100	return (error);
1101}
1102#endif /* PIPE_SOCKETPAIR */
1103
1104/*
1105 * Get socket name.
1106 */
1107/* ARGSUSED */
1108int
1109do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1110{
1111	struct socket	*so;
1112	struct mbuf	*m;
1113	int		error;
1114
1115	if ((error = fd_getsock(fd, &so)) != 0)
1116		return error;
1117
1118	m = m_getclr(M_WAIT, MT_SONAME);
1119	MCLAIM(m, so->so_mowner);
1120
1121	solock(so);
1122	if (which == PRU_PEERADDR
1123	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1124		error = ENOTCONN;
1125	} else {
1126		*nam = m;
1127		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1128		    NULL);
1129	}
1130 	sounlock(so);
1131	if (error != 0)
1132		m_free(m);
1133 	fd_putfile(fd);
1134	return error;
1135}
1136
1137int
1138copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1139    struct mbuf *addr)
1140{
1141	int len;
1142	int error;
1143
1144	if (asa == NULL)
1145		/* Assume application not interested */
1146		return 0;
1147
1148	if (flags & MSG_LENUSRSPACE) {
1149		error = copyin(alen, &len, sizeof(len));
1150		if (error)
1151			return error;
1152	} else
1153		len = *alen;
1154	if (len < 0)
1155		return EINVAL;
1156
1157	if (addr == NULL) {
1158		len = 0;
1159		error = 0;
1160	} else {
1161		if (len > addr->m_len)
1162			len = addr->m_len;
1163		/* Maybe this ought to copy a chain ? */
1164		ktrkuser("sockname", mtod(addr, void *), len);
1165		error = copyout(mtod(addr, void *), asa, len);
1166	}
1167
1168	if (error == 0) {
1169		if (flags & MSG_LENUSRSPACE)
1170			error = copyout(&len, alen, sizeof(len));
1171		else
1172			*alen = len;
1173	}
1174
1175	return error;
1176}
1177
1178/*
1179 * Get socket name.
1180 */
1181/* ARGSUSED */
1182int
1183sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1184{
1185	/* {
1186		syscallarg(int)			fdes;
1187		syscallarg(struct sockaddr *)	asa;
1188		syscallarg(unsigned int *)	alen;
1189	} */
1190	struct mbuf	*m;
1191	int		error;
1192
1193	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1194	if (error != 0)
1195		return error;
1196
1197	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1198	    MSG_LENUSRSPACE, m);
1199	if (m != NULL)
1200		m_free(m);
1201	return error;
1202}
1203
1204/*
1205 * Get name of peer for connected socket.
1206 */
1207/* ARGSUSED */
1208int
1209sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1210{
1211	/* {
1212		syscallarg(int)			fdes;
1213		syscallarg(struct sockaddr *)	asa;
1214		syscallarg(unsigned int *)	alen;
1215	} */
1216	struct mbuf	*m;
1217	int		error;
1218
1219	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1220	if (error != 0)
1221		return error;
1222
1223	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1224	    MSG_LENUSRSPACE, m);
1225	if (m != NULL)
1226		m_free(m);
1227	return error;
1228}
1229
1230/*
1231 * XXX In a perfect world, we wouldn't pass around socket control
1232 * XXX arguments in mbufs, and this could go away.
1233 */
1234int
1235sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1236{
1237	struct sockaddr	*sa;
1238	struct mbuf	*m;
1239	int		error;
1240
1241	/*
1242	 * We can't allow socket names > UCHAR_MAX in length, since that
1243	 * will overflow sa_len.  Control data more than a page size in
1244	 * length is just too much.
1245	 */
1246	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1247		return (EINVAL);
1248
1249	/* Allocate an mbuf to hold the arguments. */
1250	m = m_get(M_WAIT, type);
1251	/* can't claim.  don't who to assign it to. */
1252	if (buflen > MLEN) {
1253		/*
1254		 * Won't fit into a regular mbuf, so we allocate just
1255		 * enough external storage to hold the argument.
1256		 */
1257		MEXTMALLOC(m, buflen, M_WAITOK);
1258	}
1259	m->m_len = buflen;
1260	error = copyin(bf, mtod(m, void *), buflen);
1261	if (error) {
1262		(void) m_free(m);
1263		return (error);
1264	}
1265	ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1266	*mp = m;
1267	if (type == MT_SONAME) {
1268		sa = mtod(m, struct sockaddr *);
1269#if BYTE_ORDER != BIG_ENDIAN
1270		/*
1271		 * 4.3BSD compat thing - need to stay, since bind(2),
1272		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1273		 */
1274		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1275			sa->sa_family = sa->sa_len;
1276#endif
1277		sa->sa_len = buflen;
1278	}
1279	return (0);
1280}
1281