1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections.  This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/domain.h>
73#include <sys/fcntl.h>
74#include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
75#include <sys/file_internal.h>
76#include <sys/guarded.h>
77#include <sys/filedesc.h>
78#include <sys/lock.h>
79#include <sys/mbuf.h>
80#include <sys/namei.h>
81#include <sys/proc_internal.h>
82#include <sys/kauth.h>
83#include <sys/protosw.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/stat.h>
87#include <sys/sysctl.h>
88#include <sys/un.h>
89#include <sys/unpcb.h>
90#include <sys/vnode_internal.h>
91#include <sys/kdebug.h>
92
93#include <kern/zalloc.h>
94#include <kern/locks.h>
95
96#if CONFIG_MACF
97#include <security/mac_framework.h>
98#endif /* CONFIG_MACF */
99
100#include <mach/vm_param.h>
101
102#define	f_msgcount f_fglob->fg_msgcount
103#define	f_cred f_fglob->fg_cred
104#define	f_ops f_fglob->fg_ops
105#define	f_offset f_fglob->fg_offset
106#define	f_data f_fglob->fg_data
107struct	zone *unp_zone;
108static	unp_gen_t unp_gencnt;
109static	u_int unp_count;
110
111static	lck_attr_t		*unp_mtx_attr;
112static	lck_grp_t		*unp_mtx_grp;
113static	lck_grp_attr_t		*unp_mtx_grp_attr;
114static	lck_rw_t		*unp_list_mtx;
115
116static  lck_mtx_t		*unp_disconnect_lock;
117static	lck_mtx_t		*unp_connect_lock;
118static  u_int                   disconnect_in_progress;
119
120extern lck_mtx_t *uipc_lock;
121static	struct unp_head unp_shead, unp_dhead;
122
123/*
124 * mDNSResponder tracing.  When enabled, endpoints connected to
125 * /var/run/mDNSResponder will be traced; during each send on
126 * the traced socket, we log the PID and process name of the
127 * sending process.  We also print out a bit of info related
128 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
129 * of mDNSResponder stays the same.
130 */
131#define	MDNSRESPONDER_PATH	"/var/run/mDNSResponder"
132
133static int unpst_tracemdns;	/* enable tracing */
134
135#define	MDNS_IPC_MSG_HDR_VERSION_1	1
136
137struct mdns_ipc_msg_hdr {
138	uint32_t version;
139	uint32_t datalen;
140	uint32_t ipc_flags;
141	uint32_t op;
142	union {
143		void *context;
144		uint32_t u32[2];
145	} __attribute__((packed));
146	uint32_t reg_index;
147} __attribute__((packed));
148
149/*
150 * Unix communications domain.
151 *
152 * TODO:
153 *	SEQPACKET, RDM
154 *	rethink name space problems
155 *	need a proper out-of-band
156 *	lock pushdown
157 */
158static struct	sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
159static ino_t	unp_ino;		/* prototype for fake inode numbers */
160
161static int	unp_attach(struct socket *);
162static void	unp_detach(struct unpcb *);
163static int	unp_bind(struct unpcb *, struct sockaddr *, proc_t);
164static int	unp_connect(struct socket *, struct sockaddr *, proc_t);
165static void	unp_disconnect(struct unpcb *);
166static void	unp_shutdown(struct unpcb *);
167static void	unp_drop(struct unpcb *, int);
168__private_extern__ void	unp_gc(void);
169static void	unp_scan(struct mbuf *, void (*)(struct fileglob *));
170static void	unp_mark(struct fileglob *);
171static void	unp_discard(struct fileglob *);
172static void	unp_discard_fdlocked(struct fileglob *, proc_t);
173static int	unp_internalize(struct mbuf *, proc_t);
174static int	unp_listen(struct unpcb *, proc_t);
175static void	unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
176static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
177
178static void
179unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
180{
181	if (so < conn_so) {
182		socket_lock(conn_so, 1);
183	} else {
184		struct unpcb *unp = sotounpcb(so);
185		unp->unp_flags |= UNP_DONTDISCONNECT;
186		unp->rw_thrcount++;
187		socket_unlock(so, 0);
188
189		/* Get the locks in the correct order */
190		socket_lock(conn_so, 1);
191		socket_lock(so, 0);
192		unp->rw_thrcount--;
193		if (unp->rw_thrcount == 0) {
194			unp->unp_flags &= ~UNP_DONTDISCONNECT;
195			wakeup(unp);
196		}
197	}
198}
199
200static int
201uipc_abort(struct socket *so)
202{
203	struct unpcb *unp = sotounpcb(so);
204
205	if (unp == 0)
206		return (EINVAL);
207	unp_drop(unp, ECONNABORTED);
208	unp_detach(unp);
209	sofree(so);
210	return (0);
211}
212
213static int
214uipc_accept(struct socket *so, struct sockaddr **nam)
215{
216	struct unpcb *unp = sotounpcb(so);
217
218	if (unp == 0)
219		return (EINVAL);
220
221	/*
222	 * Pass back name of connected socket,
223	 * if it was bound and we are still connected
224	 * (our peer may have closed already!).
225	 */
226	if (unp->unp_conn && unp->unp_conn->unp_addr) {
227		*nam = dup_sockaddr((struct sockaddr *)
228		    unp->unp_conn->unp_addr, 1);
229	} else {
230		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
231	}
232	return (0);
233}
234
235/*
236 * Returns:	0			Success
237 *		EISCONN
238 *	unp_attach:
239 */
240static int
241uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
242{
243	struct unpcb *unp = sotounpcb(so);
244
245	if (unp != 0)
246		return (EISCONN);
247	return (unp_attach(so));
248}
249
250static int
251uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
252{
253	struct unpcb *unp = sotounpcb(so);
254
255	if (unp == 0)
256		return (EINVAL);
257
258	return (unp_bind(unp, nam, p));
259}
260
261/*
262 * Returns:	0			Success
263 *		EINVAL
264 *	unp_connect:???			[See elsewhere in this file]
265 */
266static int
267uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
268{
269	struct unpcb *unp = sotounpcb(so);
270
271	if (unp == 0)
272		return (EINVAL);
273	return (unp_connect(so, nam, p));
274}
275
276/*
277 * Returns:	0			Success
278 *		EINVAL
279 *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
280 *	unp_connect2:EINVAL		Invalid argument
281 */
282static int
283uipc_connect2(struct socket *so1, struct socket *so2)
284{
285	struct unpcb *unp = sotounpcb(so1);
286
287	if (unp == 0)
288		return (EINVAL);
289
290	return (unp_connect2(so1, so2));
291}
292
293/* control is EOPNOTSUPP */
294
295static int
296uipc_detach(struct socket *so)
297{
298	struct unpcb *unp = sotounpcb(so);
299
300	if (unp == 0)
301		return (EINVAL);
302
303	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
304	unp_detach(unp);
305	return (0);
306}
307
308static int
309uipc_disconnect(struct socket *so)
310{
311	struct unpcb *unp = sotounpcb(so);
312
313	if (unp == 0)
314		return (EINVAL);
315	unp_disconnect(unp);
316	return (0);
317}
318
319/*
320 * Returns:	0			Success
321 *		EINVAL
322 */
323static int
324uipc_listen(struct socket *so, __unused proc_t p)
325{
326	struct unpcb *unp = sotounpcb(so);
327
328	if (unp == 0 || unp->unp_vnode == 0)
329		return (EINVAL);
330	return (unp_listen(unp, p));
331}
332
333static int
334uipc_peeraddr(struct socket *so, struct sockaddr **nam)
335{
336	struct unpcb *unp = sotounpcb(so);
337
338	if (unp == NULL)
339		return (EINVAL);
340	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
341		*nam = dup_sockaddr((struct sockaddr *)
342		    unp->unp_conn->unp_addr, 1);
343	} else {
344		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
345	}
346	return (0);
347}
348
349static int
350uipc_rcvd(struct socket *so, __unused int flags)
351{
352	struct unpcb *unp = sotounpcb(so);
353	struct socket *so2;
354
355	if (unp == 0)
356		return (EINVAL);
357	switch (so->so_type) {
358	case SOCK_DGRAM:
359		panic("uipc_rcvd DGRAM?");
360		/*NOTREACHED*/
361
362	case SOCK_STREAM:
363#define	rcv (&so->so_rcv)
364#define	snd (&so2->so_snd)
365		if (unp->unp_conn == 0)
366			break;
367
368		so2 = unp->unp_conn->unp_socket;
369		unp_get_locks_in_order(so, so2);
370		/*
371		 * Adjust backpressure on sender
372		 * and wakeup any waiting to write.
373		 */
374		snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
375		unp->unp_mbcnt = rcv->sb_mbcnt;
376		snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
377		unp->unp_cc = rcv->sb_cc;
378		sowwakeup(so2);
379
380		socket_unlock(so2, 1);
381
382#undef snd
383#undef rcv
384		break;
385
386	default:
387		panic("uipc_rcvd unknown socktype");
388	}
389	return (0);
390}
391
392/* pru_rcvoob is EOPNOTSUPP */
393
394/*
395 * Returns:	0			Success
396 *		EINVAL
397 *		EOPNOTSUPP
398 *		EPIPE
399 *		ENOTCONN
400 *		EISCONN
401 *	unp_internalize:EINVAL
402 *	unp_internalize:EBADF
403 *	unp_connect:EAFNOSUPPORT	Address family not supported
404 *	unp_connect:EINVAL		Invalid argument
405 *	unp_connect:ENOTSOCK		Not a socket
406 *	unp_connect:ECONNREFUSED	Connection refused
407 *	unp_connect:EISCONN		Socket is connected
408 *	unp_connect:EPROTOTYPE		Protocol wrong type for socket
409 *	unp_connect:???
410 *	sbappendaddr:ENOBUFS		[5th argument, contents modified]
411 *	sbappendaddr:???		[whatever a filter author chooses]
412 */
413static int
414uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
415    struct mbuf *control, proc_t p)
416{
417	int error = 0;
418	struct unpcb *unp = sotounpcb(so);
419	struct socket *so2;
420
421	if (unp == 0) {
422		error = EINVAL;
423		goto release;
424	}
425	if (flags & PRUS_OOB) {
426		error = EOPNOTSUPP;
427		goto release;
428	}
429
430	if (control) {
431		/* release lock to avoid deadlock (4436174) */
432		socket_unlock(so, 0);
433		error = unp_internalize(control, p);
434		socket_lock(so, 0);
435		if (error)
436			goto release;
437	}
438
439	switch (so->so_type) {
440	case SOCK_DGRAM:
441	{
442		struct sockaddr *from;
443
444		if (nam) {
445			if (unp->unp_conn) {
446				error = EISCONN;
447				break;
448			}
449			error = unp_connect(so, nam, p);
450			if (error)
451				break;
452		} else {
453			if (unp->unp_conn == 0) {
454				error = ENOTCONN;
455				break;
456			}
457		}
458
459		so2 = unp->unp_conn->unp_socket;
460		if (so != so2)
461			unp_get_locks_in_order(so, so2);
462
463		if (unp->unp_addr)
464			from = (struct sockaddr *)unp->unp_addr;
465		else
466			from = &sun_noname;
467		/*
468		 * sbappendaddr() will fail when the receiver runs out of
469		 * space; in contrast to SOCK_STREAM, we will lose messages
470		 * for the SOCK_DGRAM case when the receiver's queue overflows.
471		 * SB_UNIX on the socket buffer implies that the callee will
472		 * not free the control message, if any, because we would need
473		 * to call unp_dispose() on it.
474		 */
475		if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
476			control = NULL;
477			sorwakeup(so2);
478		} else if (control != NULL && error == 0) {
479			/* A socket filter took control; don't touch it */
480			control = NULL;
481		}
482
483		if (so != so2)
484			socket_unlock(so2, 1);
485
486		m = NULL;
487		if (nam)
488			unp_disconnect(unp);
489		break;
490	}
491
492	case SOCK_STREAM: {
493		int didreceive = 0;
494#define	rcv (&so2->so_rcv)
495#define	snd (&so->so_snd)
496		/* Connect if not connected yet. */
497		/*
498		 * Note: A better implementation would complain
499		 * if not equal to the peer's address.
500		 */
501		if ((so->so_state & SS_ISCONNECTED) == 0) {
502			if (nam) {
503				error = unp_connect(so, nam, p);
504				if (error)
505					break;	/* XXX */
506			} else {
507				error = ENOTCONN;
508				break;
509			}
510		}
511
512		if (so->so_state & SS_CANTSENDMORE) {
513			error = EPIPE;
514			break;
515		}
516		if (unp->unp_conn == 0)
517			panic("uipc_send connected but no connection?");
518
519		so2 = unp->unp_conn->unp_socket;
520		unp_get_locks_in_order(so, so2);
521
522		/* Check socket state again as we might have unlocked the socket
523		 * while trying to get the locks in order
524		 */
525
526		if ((so->so_state & SS_CANTSENDMORE)) {
527			error = EPIPE;
528			socket_unlock(so2, 1);
529			break;
530		}
531
532		if (unp->unp_flags & UNP_TRACE_MDNS) {
533			struct mdns_ipc_msg_hdr hdr;
534
535			if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
536			    hdr.version  == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
537				printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
538				    __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
539			}
540		}
541
542		/*
543		 * Send to paired receive port, and then reduce send buffer
544		 * hiwater marks to maintain backpressure.  Wake up readers.
545		 * SB_UNIX flag will allow new record to be appended to the
546		 * receiver's queue even when it is already full.  It is
547		 * possible, however, that append might fail.  In that case,
548		 * we will need to call unp_dispose() on the control message;
549		 * the callee will not free it since SB_UNIX is set.
550		 */
551		didreceive = control ?
552		    sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
553
554		snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
555		unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
556		if ((int32_t)snd->sb_hiwat >=
557		    (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
558			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
559		} else {
560			snd->sb_hiwat = 0;
561		}
562		unp->unp_conn->unp_cc = rcv->sb_cc;
563		if (didreceive) {
564			control = NULL;
565			sorwakeup(so2);
566		} else if (control != NULL && error == 0) {
567			/* A socket filter took control; don't touch it */
568			control = NULL;
569		}
570
571		socket_unlock(so2, 1);
572		m = NULL;
573#undef snd
574#undef rcv
575		}
576		break;
577
578	default:
579		panic("uipc_send unknown socktype");
580	}
581
582	/*
583	 * SEND_EOF is equivalent to a SEND followed by
584	 * a SHUTDOWN.
585	 */
586	if (flags & PRUS_EOF) {
587		socantsendmore(so);
588		unp_shutdown(unp);
589	}
590
591	if (control && error != 0) {
592		socket_unlock(so, 0);
593		unp_dispose(control);
594		socket_lock(so, 0);
595	}
596
597release:
598	if (control)
599		m_freem(control);
600	if (m)
601		m_freem(m);
602	return (error);
603}
604
605static int
606uipc_sense(struct socket *so, void *ub, int isstat64)
607{
608	struct unpcb *unp = sotounpcb(so);
609	struct socket *so2;
610	blksize_t blksize;
611
612	if (unp == 0)
613		return (EINVAL);
614
615	blksize = so->so_snd.sb_hiwat;
616	if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
617		so2 = unp->unp_conn->unp_socket;
618		blksize += so2->so_rcv.sb_cc;
619	}
620	if (unp->unp_ino == 0)
621		unp->unp_ino = unp_ino++;
622
623	if (isstat64 != 0) {
624		struct stat64  *sb64;
625
626		sb64 = (struct stat64 *)ub;
627		sb64->st_blksize = blksize;
628		sb64->st_dev = NODEV;
629		sb64->st_ino = (ino64_t)unp->unp_ino;
630	} else {
631		struct stat *sb;
632
633		sb = (struct stat *)ub;
634		sb->st_blksize = blksize;
635		sb->st_dev = NODEV;
636		sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
637	}
638
639	return (0);
640}
641
642/*
643 * Returns:	0		Success
644 *		EINVAL
645 *
646 * Notes:	This is not strictly correct, as unp_shutdown() also calls
647 *		socantrcvmore().  These should maybe both be conditionalized
648 *		on the 'how' argument in soshutdown() as called from the
649 *		shutdown() system call.
650 */
651static int
652uipc_shutdown(struct socket *so)
653{
654	struct unpcb *unp = sotounpcb(so);
655
656	if (unp == 0)
657		return (EINVAL);
658	socantsendmore(so);
659	unp_shutdown(unp);
660	return (0);
661}
662
663/*
664 * Returns:	0			Success
665 *		EINVAL			Invalid argument
666 */
667static int
668uipc_sockaddr(struct socket *so, struct sockaddr **nam)
669{
670	struct unpcb *unp = sotounpcb(so);
671
672	if (unp == NULL)
673		return (EINVAL);
674	if (unp->unp_addr != NULL) {
675		*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
676	} else {
677		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
678	}
679	return (0);
680}
681
682struct pr_usrreqs uipc_usrreqs = {
683	.pru_abort =		uipc_abort,
684	.pru_accept =		uipc_accept,
685	.pru_attach =		uipc_attach,
686	.pru_bind =		uipc_bind,
687	.pru_connect =		uipc_connect,
688	.pru_connect2 =		uipc_connect2,
689	.pru_detach =		uipc_detach,
690	.pru_disconnect =	uipc_disconnect,
691	.pru_listen =		uipc_listen,
692	.pru_peeraddr =		uipc_peeraddr,
693	.pru_rcvd =		uipc_rcvd,
694	.pru_send =		uipc_send,
695	.pru_sense =		uipc_sense,
696	.pru_shutdown =		uipc_shutdown,
697	.pru_sockaddr =		uipc_sockaddr,
698	.pru_sosend =		sosend,
699	.pru_soreceive =	soreceive,
700};
701
702int
703uipc_ctloutput(struct socket *so, struct sockopt *sopt)
704{
705	struct unpcb *unp = sotounpcb(so);
706	int error = 0;
707	pid_t peerpid;
708	struct socket *peerso;
709
710	switch (sopt->sopt_dir) {
711	case SOPT_GET:
712		switch (sopt->sopt_name) {
713		case LOCAL_PEERCRED:
714			if (unp->unp_flags & UNP_HAVEPC) {
715				error = sooptcopyout(sopt, &unp->unp_peercred,
716				    sizeof (unp->unp_peercred));
717			} else {
718				if (so->so_type == SOCK_STREAM)
719					error = ENOTCONN;
720				else
721					error = EINVAL;
722			}
723			break;
724		case LOCAL_PEERPID:
725		case LOCAL_PEEREPID:
726			if (unp->unp_conn == NULL) {
727				error = ENOTCONN;
728				break;
729			}
730			peerso = unp->unp_conn->unp_socket;
731			if (peerso == NULL)
732				panic("peer is connected but has no socket?");
733			unp_get_locks_in_order(so, peerso);
734			if (sopt->sopt_name == LOCAL_PEEREPID &&
735			    peerso->so_flags & SOF_DELEGATED)
736				peerpid = peerso->e_pid;
737			else
738				peerpid = peerso->last_pid;
739			socket_unlock(peerso, 1);
740			error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
741			break;
742		case LOCAL_PEERUUID:
743		case LOCAL_PEEREUUID:
744			if (unp->unp_conn == NULL) {
745				error = ENOTCONN;
746				break;
747			}
748			peerso = unp->unp_conn->unp_socket;
749			if (peerso == NULL)
750				panic("peer is connected but has no socket?");
751			unp_get_locks_in_order(so, peerso);
752			if (sopt->sopt_name == LOCAL_PEEREUUID &&
753			    peerso->so_flags & SOF_DELEGATED)
754				error = sooptcopyout(sopt, &peerso->e_uuid,
755				    sizeof (peerso->e_uuid));
756			else
757				error = sooptcopyout(sopt, &peerso->last_uuid,
758				    sizeof (peerso->last_uuid));
759			socket_unlock(peerso, 1);
760			break;
761		default:
762			error = EOPNOTSUPP;
763			break;
764		}
765		break;
766	case SOPT_SET:
767	default:
768		error = EOPNOTSUPP;
769		break;
770	}
771
772	return (error);
773}
774
775/*
776 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
777 * for stream sockets, although the total for sender and receiver is
778 * actually only PIPSIZ.
779 * Datagram sockets really use the sendspace as the maximum datagram size,
780 * and don't really want to reserve the sendspace.  Their recvspace should
781 * be large enough for at least one max-size datagram plus address.
782 */
783#ifndef PIPSIZ
784#define	PIPSIZ	8192
785#endif
786static u_int32_t	unpst_sendspace = PIPSIZ;
787static u_int32_t	unpst_recvspace = PIPSIZ;
788static u_int32_t	unpdg_sendspace = 2*1024;	/* really max datagram size */
789static u_int32_t	unpdg_recvspace = 4*1024;
790
791static int	unp_rights;			/* file descriptors in flight */
792static int	unp_disposed;			/* discarded file descriptors */
793
794SYSCTL_DECL(_net_local_stream);
795SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
796   &unpst_sendspace, 0, "");
797SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
798   &unpst_recvspace, 0, "");
799SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
800   &unpst_tracemdns, 0, "");
801SYSCTL_DECL(_net_local_dgram);
802SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
803   &unpdg_sendspace, 0, "");
804SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
805   &unpdg_recvspace, 0, "");
806SYSCTL_DECL(_net_local);
807SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
808
809/*
810 * Returns:	0			Success
811 *		ENOBUFS
812 *	soreserve:ENOBUFS
813 */
814static int
815unp_attach(struct socket *so)
816{
817	struct unpcb *unp;
818	int error = 0;
819
820	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
821		switch (so->so_type) {
822
823		case SOCK_STREAM:
824			error = soreserve(so, unpst_sendspace, unpst_recvspace);
825			break;
826
827		case SOCK_DGRAM:
828			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
829			break;
830
831		default:
832			panic("unp_attach");
833		}
834		if (error)
835			return (error);
836	}
837	unp = (struct unpcb *)zalloc(unp_zone);
838	if (unp == NULL)
839		return (ENOBUFS);
840	bzero(unp, sizeof (*unp));
841
842	lck_mtx_init(&unp->unp_mtx,
843		unp_mtx_grp, unp_mtx_attr);
844
845	lck_rw_lock_exclusive(unp_list_mtx);
846	LIST_INIT(&unp->unp_refs);
847	unp->unp_socket = so;
848	unp->unp_gencnt = ++unp_gencnt;
849	unp_count++;
850	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
851	    &unp_dhead : &unp_shead, unp, unp_link);
852	lck_rw_done(unp_list_mtx);
853	so->so_pcb = (caddr_t)unp;
854	/*
855	 * Mark AF_UNIX socket buffers accordingly so that:
856	 *
857	 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
858	 *    the lack of space; this essentially loosens the sbspace() check,
859	 *    since there is disconnect between sosend() and uipc_send() with
860	 *    respect to flow control that might result in our dropping the
861	 *    data in uipc_send().  By setting this, we allow for slightly
862	 *    more records to be appended to the receiving socket to avoid
863	 *    losing data (which we can't afford in the SOCK_STREAM case).
864	 *    Flow control still takes place since we adjust the sender's
865	 *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
866	 *    case and append would still fail when the queue overflows.
867	 *
868	 * b. In the presence of control messages containing internalized
869	 *    file descriptors, the append routines will not free them since
870	 *    we'd need to undo the work first via unp_dispose().
871	 */
872	so->so_rcv.sb_flags |= SB_UNIX;
873	so->so_snd.sb_flags |= SB_UNIX;
874	return (0);
875}
876
877static void
878unp_detach(struct unpcb *unp)
879{
880	int so_locked = 1;
881
882	lck_rw_lock_exclusive(unp_list_mtx);
883	LIST_REMOVE(unp, unp_link);
884	--unp_count;
885	++unp_gencnt;
886	lck_rw_done(unp_list_mtx);
887	if (unp->unp_vnode) {
888		struct vnode *tvp = NULL;
889		socket_unlock(unp->unp_socket, 0);
890
891		/* Holding unp_connect_lock will avoid a race between
892		 * a thread closing the listening socket and a thread
893		 * connecting to it.
894		 */
895		lck_mtx_lock(unp_connect_lock);
896		socket_lock(unp->unp_socket, 0);
897		if (unp->unp_vnode) {
898			tvp = unp->unp_vnode;
899			unp->unp_vnode->v_socket = NULL;
900			unp->unp_vnode = NULL;
901		}
902		lck_mtx_unlock(unp_connect_lock);
903		if (tvp != NULL)
904			vnode_rele(tvp);		/* drop the usecount */
905	}
906	if (unp->unp_conn)
907		unp_disconnect(unp);
908	while (unp->unp_refs.lh_first) {
909		struct unpcb *unp2 = NULL;
910
911		/* This datagram socket is connected to one or more
912		 * sockets. In order to avoid a race condition between removing
913		 * this reference and closing the connected socket, we need
914		 * to check disconnect_in_progress
915		 */
916		if (so_locked == 1) {
917			socket_unlock(unp->unp_socket, 0);
918			so_locked = 0;
919		}
920		lck_mtx_lock(unp_disconnect_lock);
921		while (disconnect_in_progress != 0) {
922			(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
923				PSOCK, "disconnect", NULL);
924		}
925		disconnect_in_progress = 1;
926		lck_mtx_unlock(unp_disconnect_lock);
927
928		/* Now we are sure that any unpcb socket disconnect is not happening */
929		if (unp->unp_refs.lh_first != NULL) {
930 			unp2 = unp->unp_refs.lh_first;
931 			socket_lock(unp2->unp_socket, 1);
932		}
933
934		lck_mtx_lock(unp_disconnect_lock);
935		disconnect_in_progress = 0;
936		wakeup(&disconnect_in_progress);
937		lck_mtx_unlock(unp_disconnect_lock);
938
939		if (unp2 != NULL) {
940			/* We already locked this socket and have a reference on it */
941 			unp_drop(unp2, ECONNRESET);
942 			socket_unlock(unp2->unp_socket, 1);
943		}
944	}
945
946	if (so_locked == 0) {
947		socket_lock(unp->unp_socket, 0);
948		so_locked = 1;
949	}
950	soisdisconnected(unp->unp_socket);
951	/* makes sure we're getting dealloced */
952	unp->unp_socket->so_flags |= SOF_PCBCLEARING;
953}
954
955/*
956 * Returns:	0			Success
957 *		EAFNOSUPPORT
958 *		EINVAL
959 *		EADDRINUSE
960 *		namei:???		[anything namei can return]
961 *		vnode_authorize:???	[anything vnode_authorize can return]
962 *
963 * Notes:	p at this point is the current process, as this function is
964 *		only called by sobind().
965 */
966static int
967unp_bind(
968	struct unpcb *unp,
969	struct sockaddr *nam,
970	proc_t p)
971{
972	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
973	struct vnode *vp, *dvp;
974	struct vnode_attr va;
975	vfs_context_t ctx = vfs_context_current();
976	int error, namelen;
977	struct nameidata nd;
978	struct socket *so = unp->unp_socket;
979	char buf[SOCK_MAXADDRLEN];
980
981	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
982		return (EAFNOSUPPORT);
983	}
984
985	if (unp->unp_vnode != NULL)
986		return (EINVAL);
987	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
988	if (namelen <= 0)
989		return (EINVAL);
990
991	socket_unlock(so, 0);
992
993	strlcpy(buf, soun->sun_path, namelen+1);
994	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
995	    CAST_USER_ADDR_T(buf), ctx);
996	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
997	error = namei(&nd);
998	if (error) {
999		socket_lock(so, 0);
1000		return (error);
1001	}
1002	dvp = nd.ni_dvp;
1003	vp = nd.ni_vp;
1004
1005	if (vp != NULL) {
1006		/*
1007		 * need to do this before the vnode_put of dvp
1008		 * since we may have to release an fs_nodelock
1009		 */
1010		nameidone(&nd);
1011
1012		vnode_put(dvp);
1013		vnode_put(vp);
1014
1015		socket_lock(so, 0);
1016		return (EADDRINUSE);
1017	}
1018
1019	VATTR_INIT(&va);
1020	VATTR_SET(&va, va_type, VSOCK);
1021	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1022
1023#if CONFIG_MACF
1024	error = mac_vnode_check_create(ctx,
1025	    nd.ni_dvp, &nd.ni_cnd, &va);
1026
1027	if (error == 0)
1028#endif /* CONFIG_MACF */
1029#if CONFIG_MACF_SOCKET_SUBSET
1030	error = mac_vnode_check_uipc_bind(ctx,
1031	    nd.ni_dvp, &nd.ni_cnd, &va);
1032
1033	if (error == 0)
1034#endif /* MAC_SOCKET_SUBSET */
1035	/* authorize before creating */
1036	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1037
1038	if (!error) {
1039		/* create the socket */
1040		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1041	}
1042
1043	nameidone(&nd);
1044	vnode_put(dvp);
1045
1046	if (error) {
1047		socket_lock(so, 0);
1048		return (error);
1049	}
1050	vnode_ref(vp);	/* gain a longterm reference */
1051	socket_lock(so, 0);
1052	vp->v_socket = unp->unp_socket;
1053	unp->unp_vnode = vp;
1054	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1055	vnode_put(vp);		/* drop the iocount */
1056
1057	return (0);
1058}
1059
1060
1061/*
1062 * Returns:	0			Success
1063 *		EAFNOSUPPORT		Address family not supported
1064 *		EINVAL			Invalid argument
1065 *		ENOTSOCK		Not a socket
1066 *		ECONNREFUSED		Connection refused
1067 *		EPROTOTYPE		Protocol wrong type for socket
1068 *		EISCONN			Socket is connected
1069 *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
1070 *	unp_connect2:EINVAL		Invalid argument
1071 *	namei:???			[anything namei can return]
1072 *	vnode_authorize:????		[anything vnode_authorize can return]
1073 *
1074 * Notes:	p at this point is the current process, as this function is
1075 *		only called by sosend(), sendfile(), and soconnectlock().
1076 */
1077static int
1078unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1079{
1080	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1081	struct vnode *vp;
1082	struct socket *so2, *so3, *list_so=NULL;
1083	struct unpcb *unp, *unp2, *unp3;
1084	vfs_context_t ctx = vfs_context_current();
1085	int error, len;
1086	struct nameidata nd;
1087	char buf[SOCK_MAXADDRLEN];
1088
1089	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1090		return (EAFNOSUPPORT);
1091	}
1092
1093	unp = sotounpcb(so);
1094	so2 = so3 = NULL;
1095
1096	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1097	if (len <= 0)
1098		return (EINVAL);
1099
1100	strlcpy(buf, soun->sun_path, len+1);
1101	socket_unlock(so, 0);
1102
1103	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1104	    CAST_USER_ADDR_T(buf), ctx);
1105	error = namei(&nd);
1106	if (error) {
1107		socket_lock(so, 0);
1108		return (error);
1109	}
1110	nameidone(&nd);
1111	vp = nd.ni_vp;
1112	if (vp->v_type != VSOCK) {
1113		error = ENOTSOCK;
1114		socket_lock(so, 0);
1115		goto out;
1116	}
1117
1118#if CONFIG_MACF_SOCKET_SUBSET
1119	error = mac_vnode_check_uipc_connect(ctx, vp);
1120	if (error) {
1121		socket_lock(so, 0);
1122		goto out;
1123	}
1124#endif /* MAC_SOCKET_SUBSET */
1125
1126	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1127	if (error) {
1128		socket_lock(so, 0);
1129		goto out;
1130	}
1131
1132	lck_mtx_lock(unp_connect_lock);
1133
1134	if (vp->v_socket == 0) {
1135		lck_mtx_unlock(unp_connect_lock);
1136		error = ECONNREFUSED;
1137		socket_lock(so, 0);
1138		goto out;
1139	}
1140
1141	socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1142	so2 = vp->v_socket;
1143	lck_mtx_unlock(unp_connect_lock);
1144
1145
1146	if (so2->so_pcb == NULL) {
1147		error = ECONNREFUSED;
1148		if (so != so2) {
1149			socket_unlock(so2, 1);
1150			socket_lock(so, 0);
1151		} else {
1152			/* Release the reference held for the listen socket */
1153			so2->so_usecount--;
1154		}
1155		goto out;
1156	}
1157
1158	if (so < so2) {
1159		socket_unlock(so2, 0);
1160		socket_lock(so, 0);
1161		socket_lock(so2, 0);
1162	} else if (so > so2) {
1163		socket_lock(so, 0);
1164	}
1165	/*
1166	 * Check if socket was connected while we were trying to
1167	 * get the socket locks in order.
1168	 * XXX - probably shouldn't return an error for SOCK_DGRAM
1169	 */
1170	if ((so->so_state & SS_ISCONNECTED) != 0) {
1171		error = EISCONN;
1172		goto decref_out;
1173	}
1174
1175	if (so->so_type != so2->so_type) {
1176		error = EPROTOTYPE;
1177		goto decref_out;
1178	}
1179
1180	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1181		/* Release the incoming socket but keep a reference */
1182		socket_unlock(so, 0);
1183
1184		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1185		    (so3 = sonewconn(so2, 0, nam)) == 0) {
1186			error = ECONNREFUSED;
1187			if (so != so2) {
1188				socket_unlock(so2, 1);
1189				socket_lock(so, 0);
1190			} else {
1191				socket_lock(so, 0);
1192				/* Release the reference held for
1193				 * listen socket.
1194				 */
1195				so2->so_usecount--;
1196			}
1197			goto out;
1198		}
1199		unp2 = sotounpcb(so2);
1200		unp3 = sotounpcb(so3);
1201		if (unp2->unp_addr)
1202			unp3->unp_addr = (struct sockaddr_un *)
1203			    dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1204
1205		/*
1206		 * unp_peercred management:
1207		 *
1208		 * The connecter's (client's) credentials are copied
1209		 * from its process structure at the time of connect()
1210		 * (which is now).
1211		 */
1212		cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1213		unp3->unp_flags |= UNP_HAVEPC;
1214		/*
1215		 * The receiver's (server's) credentials are copied
1216		 * from the unp_peercred member of socket on which the
1217		 * former called listen(); unp_listen() cached that
1218		 * process's credentials at that time so we can use
1219		 * them now.
1220		 */
1221		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1222		    ("unp_connect: listener without cached peercred"));
1223
1224		/* Here we need to have both so and so2 locks and so2
1225		 * is already locked. Lock ordering is required.
1226		 */
1227		if (so < so2) {
1228			socket_unlock(so2, 0);
1229			socket_lock(so, 0);
1230			socket_lock(so2, 0);
1231		} else {
1232			socket_lock(so, 0);
1233		}
1234
1235		/* Check again if the socket state changed when its lock was released */
1236		if ((so->so_state & SS_ISCONNECTED) != 0) {
1237			error = EISCONN;
1238			socket_unlock(so2, 1);
1239			socket_lock(so3, 0);
1240			sofreelastref(so3, 1);
1241                	goto out;
1242		}
1243		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1244		    sizeof (unp->unp_peercred));
1245		unp->unp_flags |= UNP_HAVEPC;
1246
1247#if CONFIG_MACF_SOCKET
1248		/* XXXMAC: recursive lock: SOCK_LOCK(so); */
1249		mac_socketpeer_label_associate_socket(so, so3);
1250		mac_socketpeer_label_associate_socket(so3, so);
1251		/* XXXMAC: SOCK_UNLOCK(so); */
1252#endif /* MAC_SOCKET */
1253
1254		/* Hold the reference on listening socket until the end */
1255		socket_unlock(so2, 0);
1256		list_so = so2;
1257
1258		/* Lock ordering doesn't matter because so3 was just created */
1259		socket_lock(so3, 1);
1260		so2 = so3;
1261
1262		/*
1263		 * Enable tracing for mDNSResponder endpoints.  (The use
1264		 * of sizeof instead of strlen below takes the null
1265		 * terminating character into account.)
1266		 */
1267		if (unpst_tracemdns &&
1268		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1269		    sizeof (MDNSRESPONDER_PATH))) {
1270			unp->unp_flags |= UNP_TRACE_MDNS;
1271			unp2->unp_flags |= UNP_TRACE_MDNS;
1272		}
1273	}
1274
1275	error = unp_connect2(so, so2);
1276
1277decref_out:
1278	if (so2 != NULL) {
1279		if (so != so2) {
1280			socket_unlock(so2, 1);
1281		} else {
1282			/* Release the extra reference held for the listen socket.
1283			 * This is possible only for SOCK_DGRAM sockets. We refuse
1284			 * connecting to the same socket for SOCK_STREAM sockets.
1285			 */
1286			so2->so_usecount--;
1287		}
1288	}
1289
1290	if (list_so != NULL) {
1291		socket_lock(list_so, 0);
1292		socket_unlock(list_so, 1);
1293	}
1294
1295out:
1296	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1297	vnode_put(vp);
1298	return (error);
1299}
1300
1301/*
1302 * Returns:	0			Success
1303 *		EPROTOTYPE		Protocol wrong type for socket
1304 *		EINVAL			Invalid argument
1305 */
1306int
1307unp_connect2(struct socket *so, struct socket *so2)
1308{
1309	struct unpcb *unp = sotounpcb(so);
1310	struct unpcb *unp2;
1311
1312	if (so2->so_type != so->so_type)
1313		return (EPROTOTYPE);
1314
1315	unp2 = sotounpcb(so2);
1316
1317	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1318	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1319
1320	/* Verify both sockets are still opened */
1321	if (unp == 0 || unp2 == 0)
1322		return (EINVAL);
1323
1324	unp->unp_conn = unp2;
1325	so2->so_usecount++;
1326
1327	switch (so->so_type) {
1328
1329	case SOCK_DGRAM:
1330		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1331
1332		if (so != so2) {
1333			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1334 			/* Keep an extra reference on so2 that will be dropped
1335			 * soon after getting the locks in order
1336			 */
1337			socket_unlock(so2, 0);
1338			soisconnected(so);
1339			unp_get_locks_in_order(so, so2);
1340			so2->so_usecount--;
1341		} else {
1342			soisconnected(so);
1343		}
1344
1345		break;
1346
1347	case SOCK_STREAM:
1348		/* This takes care of socketpair */
1349		if (!(unp->unp_flags & UNP_HAVEPC) &&
1350		    !(unp2->unp_flags & UNP_HAVEPC)) {
1351			cru2x(kauth_cred_get(), &unp->unp_peercred);
1352			unp->unp_flags |= UNP_HAVEPC;
1353
1354			cru2x(kauth_cred_get(), &unp2->unp_peercred);
1355			unp2->unp_flags |= UNP_HAVEPC;
1356		}
1357		unp2->unp_conn = unp;
1358		so->so_usecount++;
1359
1360		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1361		socket_unlock(so, 0);
1362		soisconnected(so2);
1363
1364		/* Keep an extra reference on so2, that will be dropped soon after
1365		 * getting the locks in order again.
1366		 */
1367		socket_unlock(so2, 0);
1368
1369		socket_lock(so, 0);
1370		soisconnected(so);
1371
1372		unp_get_locks_in_order(so, so2);
1373		/* Decrement the extra reference left before */
1374		so2->so_usecount--;
1375		break;
1376
1377	default:
1378		panic("unknown socket type %d in unp_connect2", so->so_type);
1379	}
1380	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1381	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1382	return (0);
1383}
1384
1385static void
1386unp_disconnect(struct unpcb *unp)
1387{
1388	struct unpcb *unp2 = NULL;
1389	struct socket *so2 = NULL, *so;
1390	struct socket *waitso;
1391	int so_locked = 1, strdisconn = 0;
1392
1393	so = unp->unp_socket;
1394	if (unp->unp_conn == NULL) {
1395		return;
1396	}
1397	lck_mtx_lock(unp_disconnect_lock);
1398	while (disconnect_in_progress != 0) {
1399		if (so_locked == 1) {
1400			socket_unlock(so, 0);
1401			so_locked = 0;
1402		}
1403		(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1404			PSOCK, "disconnect", NULL);
1405	}
1406	disconnect_in_progress = 1;
1407	lck_mtx_unlock(unp_disconnect_lock);
1408
1409	if (so_locked == 0) {
1410		socket_lock(so, 0);
1411		so_locked = 1;
1412	}
1413
1414	unp2 = unp->unp_conn;
1415
1416	if (unp2 == 0 || unp2->unp_socket == NULL) {
1417		goto out;
1418	}
1419	so2 = unp2->unp_socket;
1420
1421try_again:
1422	if (so == so2) {
1423		if (so_locked == 0) {
1424			socket_lock(so, 0);
1425		}
1426		waitso = so;
1427	} else if (so < so2) {
1428		if (so_locked == 0) {
1429			socket_lock(so, 0);
1430		}
1431		socket_lock(so2, 1);
1432		waitso = so2;
1433	} else {
1434		if (so_locked == 1) {
1435			socket_unlock(so, 0);
1436		}
1437		socket_lock(so2, 1);
1438		socket_lock(so, 0);
1439		waitso = so;
1440	}
1441	so_locked = 1;
1442
1443	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1444	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1445
1446	/* Check for the UNP_DONTDISCONNECT flag, if it
1447	 * is set, release both sockets and go to sleep
1448	 */
1449
1450	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1451		if (so != so2) {
1452			socket_unlock(so2, 1);
1453		}
1454		so_locked = 0;
1455
1456		(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1457			PSOCK | PDROP, "unpdisconnect", NULL);
1458		goto try_again;
1459	}
1460
1461	if (unp->unp_conn == NULL) {
1462		panic("unp_conn became NULL after sleep");
1463	}
1464
1465	unp->unp_conn = NULL;
1466	so2->so_usecount--;
1467
1468	if (unp->unp_flags & UNP_TRACE_MDNS)
1469		unp->unp_flags &= ~UNP_TRACE_MDNS;
1470
1471	switch (unp->unp_socket->so_type) {
1472
1473	case SOCK_DGRAM:
1474		LIST_REMOVE(unp, unp_reflink);
1475		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1476		if (so != so2)
1477			socket_unlock(so2, 1);
1478		break;
1479
1480	case SOCK_STREAM:
1481		unp2->unp_conn = NULL;
1482		so->so_usecount--;
1483
1484		/* Set the socket state correctly but do a wakeup later when
1485		 * we release all locks except the socket lock, this will avoid
1486		 * a deadlock.
1487		 */
1488		unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1489		unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1490
1491		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1492		unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1493
1494		if (unp2->unp_flags & UNP_TRACE_MDNS)
1495			unp2->unp_flags &= ~UNP_TRACE_MDNS;
1496
1497		strdisconn = 1;
1498		break;
1499	default:
1500		panic("unknown socket type %d", so->so_type);
1501	}
1502out:
1503	lck_mtx_lock(unp_disconnect_lock);
1504	disconnect_in_progress = 0;
1505	wakeup(&disconnect_in_progress);
1506	lck_mtx_unlock(unp_disconnect_lock);
1507
1508	if (strdisconn) {
1509		socket_unlock(so, 0);
1510		soisdisconnected(so2);
1511		socket_unlock(so2, 1);
1512
1513		socket_lock(so,0);
1514		soisdisconnected(so);
1515	}
1516	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1517	return;
1518}
1519
1520/*
1521 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1522 * The unpcb_compat data structure is passed to user space and must not change.
1523 */
1524static void
1525unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1526{
1527#if defined(__LP64__)
1528	cp->unp_link.le_next = (u_int32_t)
1529	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1530	cp->unp_link.le_prev = (u_int32_t)
1531	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1532#else
1533	cp->unp_link.le_next = (struct unpcb_compat *)
1534	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1535	cp->unp_link.le_prev = (struct unpcb_compat **)
1536	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1537#endif
1538	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1539	    VM_KERNEL_ADDRPERM(up->unp_socket);
1540	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1541	    VM_KERNEL_ADDRPERM(up->unp_vnode);
1542	cp->unp_ino = up->unp_ino;
1543	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1544	    VM_KERNEL_ADDRPERM(up->unp_conn);
1545	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1546#if defined(__LP64__)
1547	cp->unp_reflink.le_next =
1548	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1549	cp->unp_reflink.le_prev =
1550	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1551#else
1552	cp->unp_reflink.le_next =
1553	    (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1554	cp->unp_reflink.le_prev =
1555	    (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1556#endif
1557	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1558	    VM_KERNEL_ADDRPERM(up->unp_addr);
1559	cp->unp_cc = up->unp_cc;
1560	cp->unp_mbcnt = up->unp_mbcnt;
1561	cp->unp_gencnt = up->unp_gencnt;
1562}
1563
1564static int
1565unp_pcblist SYSCTL_HANDLER_ARGS
1566{
1567#pragma unused(oidp,arg2)
1568	int error, i, n;
1569	struct unpcb *unp, **unp_list;
1570	unp_gen_t gencnt;
1571	struct xunpgen xug;
1572	struct unp_head *head;
1573
1574	lck_rw_lock_shared(unp_list_mtx);
1575	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1576
1577	/*
1578	 * The process of preparing the PCB list is too time-consuming and
1579	 * resource-intensive to repeat twice on every request.
1580	 */
1581	if (req->oldptr == USER_ADDR_NULL) {
1582		n = unp_count;
1583		req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1584		    sizeof (struct xunpcb);
1585		lck_rw_done(unp_list_mtx);
1586		return (0);
1587	}
1588
1589	if (req->newptr != USER_ADDR_NULL) {
1590		lck_rw_done(unp_list_mtx);
1591		return (EPERM);
1592	}
1593
1594	/*
1595	 * OK, now we're committed to doing something.
1596	 */
1597	gencnt = unp_gencnt;
1598	n = unp_count;
1599
1600	bzero(&xug, sizeof (xug));
1601	xug.xug_len = sizeof (xug);
1602	xug.xug_count = n;
1603	xug.xug_gen = gencnt;
1604	xug.xug_sogen = so_gencnt;
1605	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1606	if (error) {
1607		lck_rw_done(unp_list_mtx);
1608		return (error);
1609	}
1610
1611	/*
1612	 * We are done if there is no pcb
1613	 */
1614	if (n == 0)  {
1615		lck_rw_done(unp_list_mtx);
1616		return (0);
1617	}
1618
1619	MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1620	    M_TEMP, M_WAITOK);
1621	if (unp_list == 0) {
1622		lck_rw_done(unp_list_mtx);
1623		return (ENOMEM);
1624	}
1625
1626	for (unp = head->lh_first, i = 0; unp && i < n;
1627	    unp = unp->unp_link.le_next) {
1628		if (unp->unp_gencnt <= gencnt)
1629			unp_list[i++] = unp;
1630	}
1631	n = i;			/* in case we lost some during malloc */
1632
1633	error = 0;
1634	for (i = 0; i < n; i++) {
1635		unp = unp_list[i];
1636		if (unp->unp_gencnt <= gencnt) {
1637			struct xunpcb xu;
1638
1639			bzero(&xu, sizeof (xu));
1640			xu.xu_len = sizeof (xu);
1641			xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1642			    VM_KERNEL_ADDRPERM(unp);
1643			/*
1644			 * XXX - need more locking here to protect against
1645			 * connect/disconnect races for SMP.
1646			 */
1647			if (unp->unp_addr)
1648				bcopy(unp->unp_addr, &xu.xu_addr,
1649				    unp->unp_addr->sun_len);
1650			if (unp->unp_conn && unp->unp_conn->unp_addr)
1651				bcopy(unp->unp_conn->unp_addr,
1652				    &xu.xu_caddr,
1653				    unp->unp_conn->unp_addr->sun_len);
1654			unpcb_to_compat(unp, &xu.xu_unp);
1655			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1656			error = SYSCTL_OUT(req, &xu, sizeof (xu));
1657		}
1658	}
1659	if (!error) {
1660		/*
1661		 * Give the user an updated idea of our state.
1662		 * If the generation differs from what we told
1663		 * her before, she knows that something happened
1664		 * while we were processing this request, and it
1665		 * might be necessary to retry.
1666		 */
1667		bzero(&xug, sizeof (xug));
1668		xug.xug_len = sizeof (xug);
1669		xug.xug_gen = unp_gencnt;
1670		xug.xug_sogen = so_gencnt;
1671		xug.xug_count = unp_count;
1672		error = SYSCTL_OUT(req, &xug, sizeof (xug));
1673	}
1674	FREE(unp_list, M_TEMP);
1675	lck_rw_done(unp_list_mtx);
1676	return (error);
1677}
1678
1679SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1680            CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1681            (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1682            "List of active local datagram sockets");
1683SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1684            CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1685            (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1686            "List of active local stream sockets");
1687
1688
1689static int
1690unp_pcblist64 SYSCTL_HANDLER_ARGS
1691{
1692#pragma unused(oidp,arg2)
1693	int error, i, n;
1694	struct unpcb *unp, **unp_list;
1695	unp_gen_t gencnt;
1696	struct xunpgen xug;
1697	struct unp_head *head;
1698
1699	lck_rw_lock_shared(unp_list_mtx);
1700	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1701
1702	/*
1703	 * The process of preparing the PCB list is too time-consuming and
1704	 * resource-intensive to repeat twice on every request.
1705	 */
1706	if (req->oldptr == USER_ADDR_NULL) {
1707		n = unp_count;
1708		req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1709		    (sizeof (struct xunpcb64));
1710		lck_rw_done(unp_list_mtx);
1711		return (0);
1712	}
1713
1714	if (req->newptr != USER_ADDR_NULL) {
1715		lck_rw_done(unp_list_mtx);
1716		return (EPERM);
1717	}
1718
1719	/*
1720	 * OK, now we're committed to doing something.
1721	 */
1722	gencnt = unp_gencnt;
1723	n = unp_count;
1724
1725	bzero(&xug, sizeof (xug));
1726	xug.xug_len = sizeof (xug);
1727	xug.xug_count = n;
1728	xug.xug_gen = gencnt;
1729	xug.xug_sogen = so_gencnt;
1730	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1731	if (error) {
1732		lck_rw_done(unp_list_mtx);
1733		return (error);
1734	}
1735
1736	/*
1737	 * We are done if there is no pcb
1738	 */
1739	if (n == 0)  {
1740		lck_rw_done(unp_list_mtx);
1741		return (0);
1742	}
1743
1744	MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1745	    M_TEMP, M_WAITOK);
1746	if (unp_list == 0) {
1747		lck_rw_done(unp_list_mtx);
1748		return (ENOMEM);
1749	}
1750
1751	for (unp = head->lh_first, i = 0; unp && i < n;
1752	    unp = unp->unp_link.le_next) {
1753		if (unp->unp_gencnt <= gencnt)
1754			unp_list[i++] = unp;
1755	}
1756	n = i;			/* in case we lost some during malloc */
1757
1758	error = 0;
1759	for (i = 0; i < n; i++) {
1760		unp = unp_list[i];
1761		if (unp->unp_gencnt <= gencnt) {
1762			struct xunpcb64 xu;
1763			size_t		xu_len = sizeof(struct xunpcb64);
1764
1765			bzero(&xu, xu_len);
1766			xu.xu_len = xu_len;
1767			xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1768			xu.xunp_link.le_next = (u_int64_t)
1769			    VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1770			xu.xunp_link.le_prev = (u_int64_t)
1771			    VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1772			xu.xunp_socket = (u_int64_t)
1773			    VM_KERNEL_ADDRPERM(unp->unp_socket);
1774			xu.xunp_vnode = (u_int64_t)
1775			    VM_KERNEL_ADDRPERM(unp->unp_vnode);
1776			xu.xunp_ino = unp->unp_ino;
1777			xu.xunp_conn = (u_int64_t)
1778			    VM_KERNEL_ADDRPERM(unp->unp_conn);
1779			xu.xunp_refs = (u_int64_t)
1780			    VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1781			xu.xunp_reflink.le_next = (u_int64_t)
1782			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1783			xu.xunp_reflink.le_prev = (u_int64_t)
1784			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1785			xu.xunp_cc = unp->unp_cc;
1786			xu.xunp_mbcnt = unp->unp_mbcnt;
1787			xu.xunp_gencnt = unp->unp_gencnt;
1788
1789			if (unp->unp_socket)
1790				sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1791
1792			/*
1793			 * XXX - need more locking here to protect against
1794			 * connect/disconnect races for SMP.
1795			 */
1796                        if (unp->unp_addr)
1797                                bcopy(unp->unp_addr, &xu.xunp_addr,
1798                                    unp->unp_addr->sun_len);
1799                        if (unp->unp_conn && unp->unp_conn->unp_addr)
1800                                bcopy(unp->unp_conn->unp_addr,
1801                                    &xu.xunp_caddr,
1802                                    unp->unp_conn->unp_addr->sun_len);
1803
1804			error = SYSCTL_OUT(req, &xu, xu_len);
1805		}
1806	}
1807	if (!error) {
1808		/*
1809		 * Give the user an updated idea of our state.
1810		 * If the generation differs from what we told
1811		 * her before, she knows that something happened
1812		 * while we were processing this request, and it
1813		 * might be necessary to retry.
1814		 */
1815		bzero(&xug, sizeof (xug));
1816		xug.xug_len = sizeof (xug);
1817		xug.xug_gen = unp_gencnt;
1818		xug.xug_sogen = so_gencnt;
1819		xug.xug_count = unp_count;
1820		error = SYSCTL_OUT(req, &xug, sizeof (xug));
1821	}
1822	FREE(unp_list, M_TEMP);
1823	lck_rw_done(unp_list_mtx);
1824	return (error);
1825}
1826
1827SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1828	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1829	    (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1830	    "List of active local datagram sockets 64 bit");
1831SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1832	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1833	    (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1834	    "List of active local stream sockets 64 bit");
1835
1836
1837static void
1838unp_shutdown(struct unpcb *unp)
1839{
1840	struct socket *so = unp->unp_socket;
1841	struct socket *so2;
1842	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1843		so2 = unp->unp_conn->unp_socket;
1844		unp_get_locks_in_order(so, so2);
1845		socantrcvmore(so2);
1846		socket_unlock(so2, 1);
1847	}
1848}
1849
1850static void
1851unp_drop(struct unpcb *unp, int errno)
1852{
1853	struct socket *so = unp->unp_socket;
1854
1855	so->so_error = errno;
1856	unp_disconnect(unp);
1857}
1858
1859/*
1860 * Returns:	0			Success
1861 *		EMSGSIZE		The new fd's will not fit
1862 *		ENOBUFS			Cannot alloc struct fileproc
1863 */
1864int
1865unp_externalize(struct mbuf *rights)
1866{
1867	proc_t p = current_proc();		/* XXX */
1868	int i;
1869	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1870	struct fileglob **rp = (struct fileglob **)(cm + 1);
1871	int *fds = (int *)(cm + 1);
1872	struct fileproc *fp;
1873	struct fileglob *fg;
1874	int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1875	int f;
1876
1877	proc_fdlock(p);
1878
1879	/*
1880	 * if the new FD's will not fit, then we free them all
1881	 */
1882	if (!fdavail(p, newfds)) {
1883		for (i = 0; i < newfds; i++) {
1884			fg = *rp;
1885			unp_discard_fdlocked(fg, p);
1886			*rp++ = NULL;
1887		}
1888		proc_fdunlock(p);
1889
1890		return (EMSGSIZE);
1891	}
1892	/*
1893	 * now change each pointer to an fd in the global table to
1894	 * an integer that is the index to the local fd table entry
1895	 * that we set up to point to the global one we are transferring.
1896	 * XXX (1) this assumes a pointer and int are the same size,
1897	 * XXX     or the mbuf can hold the expansion
1898	 * XXX (2) allocation failures should be non-fatal
1899	 */
1900	for (i = 0; i < newfds; i++) {
1901#if CONFIG_MACF_SOCKET
1902		/*
1903		 * If receive access is denied, don't pass along
1904		 * and error message, just discard the descriptor.
1905		 */
1906		if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1907			fg = *rp;
1908			*rp++ = 0;
1909			unp_discard_fdlocked(fg, p);
1910			continue;
1911		}
1912#endif
1913		if (fdalloc(p, 0, &f))
1914			panic("unp_externalize:fdalloc");
1915		fg = rp[i];
1916		fp = fileproc_alloc_init(NULL);
1917		if (fp == NULL)
1918			panic("unp_externalize: MALLOC_ZONE");
1919		fp->f_iocount = 0;
1920		fp->f_fglob = fg;
1921		fg_removeuipc(fg);
1922		procfdtbl_releasefd(p, f, fp);
1923		(void) OSAddAtomic(-1, &unp_rights);
1924		fds[i] = f;
1925	}
1926	proc_fdunlock(p);
1927
1928	return (0);
1929}
1930
1931void
1932unp_init(void)
1933{
1934	unp_zone = zinit(sizeof (struct unpcb),
1935	    (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1936
1937	if (unp_zone == 0)
1938		panic("unp_init");
1939	LIST_INIT(&unp_dhead);
1940	LIST_INIT(&unp_shead);
1941
1942	/*
1943	 * allocate lock group attribute and group for udp pcb mutexes
1944	 */
1945	unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1946
1947	unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1948
1949	unp_mtx_attr = lck_attr_alloc_init();
1950
1951	if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1952	    unp_mtx_attr)) == NULL)
1953		return;	/* pretty much dead if this fails... */
1954
1955	if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1956		unp_mtx_attr)) == NULL)
1957		return;
1958
1959	if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1960		unp_mtx_attr)) == NULL)
1961		return;
1962}
1963
1964#ifndef MIN
1965#define	MIN(a, b) (((a) < (b)) ? (a) : (b))
1966#endif
1967
1968/*
1969 * Returns:	0			Success
1970 *		EINVAL
1971 *	fdgetf_noref:EBADF
1972 */
1973static int
1974unp_internalize(struct mbuf *control, proc_t p)
1975{
1976	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1977	int *fds;
1978	struct fileglob **rp;
1979	struct fileproc *fp;
1980	int i, error;
1981	int oldfds;
1982
1983	/* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1984	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1985	    (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1986		return (EINVAL);
1987	}
1988	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1989
1990	proc_fdlock(p);
1991	fds = (int *)(cm + 1);
1992
1993	for (i = 0; i < oldfds; i++) {
1994		struct fileproc *tmpfp;
1995		if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1996			proc_fdunlock(p);
1997			return (error);
1998		} else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) {
1999			proc_fdunlock(p);
2000			return (EINVAL);
2001		} else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2002			error = fp_guard_exception(p,
2003				fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2004			proc_fdunlock(p);
2005			return (error);
2006		}
2007	}
2008	rp = (struct fileglob **)(cm + 1);
2009
2010	/* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2011	 * and doing them in-order would result in stomping over unprocessed fd's
2012	 */
2013	for (i = (oldfds - 1); i >= 0; i--) {
2014		(void) fdgetf_noref(p, fds[i], &fp);
2015		fg_insertuipc(fp->f_fglob);
2016		rp[i] = fp->f_fglob;
2017		(void) OSAddAtomic(1, &unp_rights);
2018	}
2019	proc_fdunlock(p);
2020
2021	return (0);
2022}
2023
2024static int	unp_defer, unp_gcing, unp_gcwait;
2025static thread_t unp_gcthread = NULL;
2026
2027/* always called under uipc_lock */
2028void
2029unp_gc_wait(void)
2030{
2031	if (unp_gcthread == current_thread())
2032		return;
2033
2034	while (unp_gcing != 0) {
2035		unp_gcwait = 1;
2036		msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
2037	}
2038}
2039
2040
2041__private_extern__ void
2042unp_gc(void)
2043{
2044	struct fileglob *fg, *nextfg;
2045	struct socket *so;
2046	static struct fileglob **extra_ref;
2047	struct fileglob **fpp;
2048	int nunref, i;
2049	int need_gcwakeup = 0;
2050
2051	lck_mtx_lock(uipc_lock);
2052	if (unp_gcing) {
2053		lck_mtx_unlock(uipc_lock);
2054		return;
2055	}
2056	unp_gcing = 1;
2057	unp_defer = 0;
2058	unp_gcthread = current_thread();
2059	lck_mtx_unlock(uipc_lock);
2060	/*
2061	 * before going through all this, set all FDs to
2062	 * be NOT defered and NOT externally accessible
2063	 */
2064	for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2065		lck_mtx_lock(&fg->fg_lock);
2066		fg->fg_flag &= ~(FMARK|FDEFER);
2067		lck_mtx_unlock(&fg->fg_lock);
2068	}
2069	do {
2070		for (fg = fmsghead.lh_first; fg != 0;
2071		    fg = fg->f_msglist.le_next) {
2072			lck_mtx_lock(&fg->fg_lock);
2073			/*
2074			 * If the file is not open, skip it
2075			 */
2076			if (fg->fg_count == 0) {
2077				lck_mtx_unlock(&fg->fg_lock);
2078				continue;
2079			}
2080			/*
2081			 * If we already marked it as 'defer'  in a
2082			 * previous pass, then try process it this time
2083			 * and un-mark it
2084			 */
2085			if (fg->fg_flag & FDEFER) {
2086				fg->fg_flag &= ~FDEFER;
2087				unp_defer--;
2088			} else {
2089				/*
2090				 * if it's not defered, then check if it's
2091				 * already marked.. if so skip it
2092				 */
2093				if (fg->fg_flag & FMARK) {
2094					lck_mtx_unlock(&fg->fg_lock);
2095					continue;
2096				}
2097				/*
2098				 * If all references are from messages
2099				 * in transit, then skip it. it's not
2100				 * externally accessible.
2101				 */
2102				if (fg->fg_count == fg->fg_msgcount) {
2103					lck_mtx_unlock(&fg->fg_lock);
2104					continue;
2105				}
2106				/*
2107				 * If it got this far then it must be
2108				 * externally accessible.
2109				 */
2110				fg->fg_flag |= FMARK;
2111			}
2112			/*
2113			 * either it was defered, or it is externally
2114			 * accessible and not already marked so.
2115			 * Now check if it is possibly one of OUR sockets.
2116			 */
2117			if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2118			    (so = (struct socket *)fg->fg_data) == 0) {
2119				lck_mtx_unlock(&fg->fg_lock);
2120				continue;
2121			}
2122			if (so->so_proto->pr_domain != localdomain ||
2123			    (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2124				lck_mtx_unlock(&fg->fg_lock);
2125				continue;
2126			}
2127#ifdef notdef
2128			if (so->so_rcv.sb_flags & SB_LOCK) {
2129				/*
2130				 * This is problematical; it's not clear
2131				 * we need to wait for the sockbuf to be
2132				 * unlocked (on a uniprocessor, at least),
2133				 * and it's also not clear what to do
2134				 * if sbwait returns an error due to receipt
2135				 * of a signal.  If sbwait does return
2136				 * an error, we'll go into an infinite
2137				 * loop.  Delete all of this for now.
2138				 */
2139				(void) sbwait(&so->so_rcv);
2140				goto restart;
2141			}
2142#endif
2143			/*
2144			 * So, Ok, it's one of our sockets and it IS externally
2145			 * accessible (or was defered). Now we look
2146			 * to see if we hold any file descriptors in its
2147			 * message buffers. Follow those links and mark them
2148			 * as accessible too.
2149			 *
2150			 * In case a file is passed onto itself we need to
2151			 * release the file lock.
2152			 */
2153			lck_mtx_unlock(&fg->fg_lock);
2154
2155			unp_scan(so->so_rcv.sb_mb, unp_mark);
2156		}
2157	} while (unp_defer);
2158	/*
2159	 * We grab an extra reference to each of the file table entries
2160	 * that are not otherwise accessible and then free the rights
2161	 * that are stored in messages on them.
2162	 *
2163	 * The bug in the orginal code is a little tricky, so I'll describe
2164	 * what's wrong with it here.
2165	 *
2166	 * It is incorrect to simply unp_discard each entry for f_msgcount
2167	 * times -- consider the case of sockets A and B that contain
2168	 * references to each other.  On a last close of some other socket,
2169	 * we trigger a gc since the number of outstanding rights (unp_rights)
2170	 * is non-zero.  If during the sweep phase the gc code un_discards,
2171	 * we end up doing a (full) closef on the descriptor.  A closef on A
2172	 * results in the following chain.  Closef calls soo_close, which
2173	 * calls soclose.   Soclose calls first (through the switch
2174	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
2175	 * returns because the previous instance had set unp_gcing, and
2176	 * we return all the way back to soclose, which marks the socket
2177	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
2178	 * to free up the rights that are queued in messages on the socket A,
2179	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
2180	 * switch unp_dispose, which unp_scans with unp_discard.  This second
2181	 * instance of unp_discard just calls closef on B.
2182	 *
2183	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2184	 * which results in another closef on A.  Unfortunately, A is already
2185	 * being closed, and the descriptor has already been marked with
2186	 * SS_NOFDREF, and soclose panics at this point.
2187	 *
2188	 * Here, we first take an extra reference to each inaccessible
2189	 * descriptor.  Then, we call sorflush ourself, since we know
2190	 * it is a Unix domain socket anyhow.  After we destroy all the
2191	 * rights carried in messages, we do a last closef to get rid
2192	 * of our extra reference.  This is the last close, and the
2193	 * unp_detach etc will shut down the socket.
2194	 *
2195	 * 91/09/19, bsy@cs.cmu.edu
2196	 */
2197	extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2198	    M_FILEGLOB, M_WAITOK);
2199	if (extra_ref == NULL)
2200		goto bail;
2201	for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2202	    fg = nextfg) {
2203		lck_mtx_lock(&fg->fg_lock);
2204
2205		nextfg = fg->f_msglist.le_next;
2206		/*
2207		 * If it's not open, skip it
2208		 */
2209		if (fg->fg_count == 0) {
2210			lck_mtx_unlock(&fg->fg_lock);
2211			continue;
2212		}
2213		/*
2214		 * If all refs are from msgs, and it's not marked accessible
2215		 * then it must be referenced from some unreachable cycle
2216		 * of (shut-down) FDs, so include it in our
2217		 * list of FDs to remove
2218		 */
2219		if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2220			fg->fg_count++;
2221			*fpp++ = fg;
2222			nunref++;
2223		}
2224		lck_mtx_unlock(&fg->fg_lock);
2225	}
2226	/*
2227	 * for each FD on our hit list, do the following two things
2228	 */
2229	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2230		struct fileglob *tfg;
2231
2232		tfg = *fpp;
2233
2234		if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2235		    tfg->fg_data != NULL) {
2236			so = (struct socket *)(tfg->fg_data);
2237
2238			socket_lock(so, 0);
2239
2240			sorflush(so);
2241
2242			socket_unlock(so, 0);
2243		}
2244	}
2245	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2246		closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2247
2248	FREE((caddr_t)extra_ref, M_FILEGLOB);
2249bail:
2250        lck_mtx_lock(uipc_lock);
2251	unp_gcing = 0;
2252	unp_gcthread = NULL;
2253
2254	if (unp_gcwait != 0) {
2255		unp_gcwait = 0;
2256		need_gcwakeup = 1;
2257	}
2258	lck_mtx_unlock(uipc_lock);
2259
2260	if (need_gcwakeup != 0)
2261		wakeup(&unp_gcing);
2262}
2263
2264void
2265unp_dispose(struct mbuf *m)
2266{
2267	if (m) {
2268		unp_scan(m, unp_discard);
2269	}
2270}
2271
2272/*
2273 * Returns:	0			Success
2274 */
2275static int
2276unp_listen(struct unpcb *unp, proc_t p)
2277{
2278	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2279	cru2x(safecred, &unp->unp_peercred);
2280	kauth_cred_unref(&safecred);
2281	unp->unp_flags |= UNP_HAVEPCCACHED;
2282	return (0);
2283}
2284
2285static void
2286unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2287{
2288	struct mbuf *m;
2289	struct fileglob **rp;
2290	struct cmsghdr *cm;
2291	int i;
2292	int qfds;
2293
2294	while (m0) {
2295		for (m = m0; m; m = m->m_next)
2296			if (m->m_type == MT_CONTROL &&
2297			    (size_t)m->m_len >= sizeof (*cm)) {
2298				cm = mtod(m, struct cmsghdr *);
2299				if (cm->cmsg_level != SOL_SOCKET ||
2300				    cm->cmsg_type != SCM_RIGHTS)
2301					continue;
2302				qfds = (cm->cmsg_len - sizeof (*cm)) /
2303				    sizeof (int);
2304				rp = (struct fileglob **)(cm + 1);
2305				for (i = 0; i < qfds; i++)
2306					(*op)(*rp++);
2307				break;		/* XXX, but saves time */
2308			}
2309		m0 = m0->m_act;
2310	}
2311}
2312
2313static void
2314unp_mark(struct fileglob *fg)
2315{
2316	lck_mtx_lock(&fg->fg_lock);
2317
2318	if (fg->fg_flag & FMARK) {
2319		lck_mtx_unlock(&fg->fg_lock);
2320		return;
2321	}
2322	fg->fg_flag |= (FMARK|FDEFER);
2323
2324	lck_mtx_unlock(&fg->fg_lock);
2325
2326	unp_defer++;
2327}
2328
2329static void
2330unp_discard(struct fileglob *fg)
2331{
2332	proc_t p = current_proc();		/* XXX */
2333
2334	(void) OSAddAtomic(1, &unp_disposed);
2335
2336	proc_fdlock(p);
2337	unp_discard_fdlocked(fg, p);
2338	proc_fdunlock(p);
2339}
2340static void
2341unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2342{
2343	fg_removeuipc(fg);
2344
2345	(void) OSAddAtomic(-1, &unp_rights);
2346	(void) closef_locked((struct fileproc *)0, fg, p);
2347}
2348
2349int
2350unp_lock(struct socket *so, int refcount, void * lr)
2351 {
2352        void * lr_saved;
2353        if (lr == 0)
2354                lr_saved = (void *)  __builtin_return_address(0);
2355        else lr_saved = lr;
2356
2357        if (so->so_pcb) {
2358                lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2359        } else  {
2360                panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2361			so, lr_saved, so->so_usecount);
2362        }
2363
2364        if (so->so_usecount < 0)
2365                panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2366                so, so->so_pcb, lr_saved, so->so_usecount);
2367
2368        if (refcount)
2369                so->so_usecount++;
2370
2371        so->lock_lr[so->next_lock_lr] = lr_saved;
2372        so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2373        return (0);
2374}
2375
2376int
2377unp_unlock(struct socket *so, int refcount, void * lr)
2378{
2379        void * lr_saved;
2380        lck_mtx_t * mutex_held = NULL;
2381	struct unpcb *unp = sotounpcb(so);
2382
2383        if (lr == 0)
2384                lr_saved = (void *) __builtin_return_address(0);
2385        else lr_saved = lr;
2386
2387        if (refcount)
2388                so->so_usecount--;
2389
2390        if (so->so_usecount < 0)
2391                panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2392        if (so->so_pcb == NULL) {
2393                panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2394        } else {
2395                mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2396        }
2397        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2398        so->unlock_lr[so->next_unlock_lr] = lr_saved;
2399        so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2400
2401        if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2402		sofreelastref(so, 1);
2403
2404		if (unp->unp_addr)
2405			FREE(unp->unp_addr, M_SONAME);
2406
2407		lck_mtx_unlock(mutex_held);
2408
2409		lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2410		zfree(unp_zone, unp);
2411
2412		unp_gc();
2413	} else {
2414		lck_mtx_unlock(mutex_held);
2415	}
2416
2417        return (0);
2418}
2419
2420lck_mtx_t *
2421unp_getlock(struct socket *so, __unused int locktype)
2422{
2423        struct unpcb *unp = (struct unpcb *)so->so_pcb;
2424
2425
2426        if (so->so_pcb)  {
2427                if (so->so_usecount < 0)
2428                        panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2429                return(&unp->unp_mtx);
2430        } else {
2431                panic("unp_getlock: so=%p NULL so_pcb\n", so);
2432                return (so->so_proto->pr_domain->dom_mtx);
2433        }
2434}
2435
2436