1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections.  This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/domain.h>
73#include <sys/fcntl.h>
74#include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
75#include <sys/file_internal.h>
76#include <sys/filedesc.h>
77#include <sys/lock.h>
78#include <sys/mbuf.h>
79#include <sys/namei.h>
80#include <sys/proc_internal.h>
81#include <sys/kauth.h>
82#include <sys/protosw.h>
83#include <sys/socket.h>
84#include <sys/socketvar.h>
85#include <sys/stat.h>
86#include <sys/sysctl.h>
87#include <sys/un.h>
88#include <sys/unpcb.h>
89#include <sys/vnode_internal.h>
90#include <sys/kdebug.h>
91
92#include <kern/zalloc.h>
93#include <kern/locks.h>
94
95#if CONFIG_MACF
96#include <security/mac_framework.h>
97#endif /* CONFIG_MACF */
98
99#include <mach/vm_param.h>
100
101#define	f_msgcount f_fglob->fg_msgcount
102#define	f_cred f_fglob->fg_cred
103#define	f_ops f_fglob->fg_ops
104#define	f_offset f_fglob->fg_offset
105#define	f_data f_fglob->fg_data
106struct	zone *unp_zone;
107static	unp_gen_t unp_gencnt;
108static	u_int unp_count;
109
110static	lck_attr_t		*unp_mtx_attr;
111static	lck_grp_t		*unp_mtx_grp;
112static	lck_grp_attr_t		*unp_mtx_grp_attr;
113static	lck_rw_t		*unp_list_mtx;
114
115static  lck_mtx_t		*unp_disconnect_lock;
116static	lck_mtx_t		*unp_connect_lock;
117static  u_int                   disconnect_in_progress;
118
119extern lck_mtx_t *uipc_lock;
120static	struct unp_head unp_shead, unp_dhead;
121
122/*
123 * mDNSResponder tracing.  When enabled, endpoints connected to
124 * /var/run/mDNSResponder will be traced; during each send on
125 * the traced socket, we log the PID and process name of the
126 * sending process.  We also print out a bit of info related
127 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
128 * of mDNSResponder stays the same.
129 */
130#define	MDNSRESPONDER_PATH	"/var/run/mDNSResponder"
131
132static int unpst_tracemdns;	/* enable tracing */
133
134#define	MDNS_IPC_MSG_HDR_VERSION_1	1
135
136struct mdns_ipc_msg_hdr {
137	uint32_t version;
138	uint32_t datalen;
139	uint32_t ipc_flags;
140	uint32_t op;
141	union {
142		void *context;
143		uint32_t u32[2];
144	} __attribute__((packed));
145	uint32_t reg_index;
146} __attribute__((packed));
147
148/*
149 * Unix communications domain.
150 *
151 * TODO:
152 *	SEQPACKET, RDM
153 *	rethink name space problems
154 *	need a proper out-of-band
155 *	lock pushdown
156 */
157static struct	sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
158static ino_t	unp_ino;		/* prototype for fake inode numbers */
159
160static int	unp_attach(struct socket *);
161static void	unp_detach(struct unpcb *);
162static int	unp_bind(struct unpcb *, struct sockaddr *, proc_t);
163static int	unp_connect(struct socket *, struct sockaddr *, proc_t);
164static void	unp_disconnect(struct unpcb *);
165static void	unp_shutdown(struct unpcb *);
166static void	unp_drop(struct unpcb *, int);
167__private_extern__ void	unp_gc(void);
168static void	unp_scan(struct mbuf *, void (*)(struct fileglob *));
169static void	unp_mark(struct fileglob *);
170static void	unp_discard(struct fileglob *);
171static void	unp_discard_fdlocked(struct fileglob *, proc_t);
172static int	unp_internalize(struct mbuf *, proc_t);
173static int	unp_listen(struct unpcb *, proc_t);
174static void	unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
175static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
176
177static void
178unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
179{
180	if (so < conn_so) {
181		socket_lock(conn_so, 1);
182	} else {
183		struct unpcb *unp = sotounpcb(so);
184		unp->unp_flags |= UNP_DONTDISCONNECT;
185		unp->rw_thrcount++;
186		socket_unlock(so, 0);
187
188		/* Get the locks in the correct order */
189		socket_lock(conn_so, 1);
190		socket_lock(so, 0);
191		unp->rw_thrcount--;
192		if (unp->rw_thrcount == 0) {
193			unp->unp_flags &= ~UNP_DONTDISCONNECT;
194			wakeup(unp);
195		}
196	}
197}
198
199static int
200uipc_abort(struct socket *so)
201{
202	struct unpcb *unp = sotounpcb(so);
203
204	if (unp == 0)
205		return (EINVAL);
206	unp_drop(unp, ECONNABORTED);
207	unp_detach(unp);
208	sofree(so);
209	return (0);
210}
211
212static int
213uipc_accept(struct socket *so, struct sockaddr **nam)
214{
215	struct unpcb *unp = sotounpcb(so);
216
217	if (unp == 0)
218		return (EINVAL);
219
220	/*
221	 * Pass back name of connected socket,
222	 * if it was bound and we are still connected
223	 * (our peer may have closed already!).
224	 */
225	if (unp->unp_conn && unp->unp_conn->unp_addr) {
226		*nam = dup_sockaddr((struct sockaddr *)
227		    unp->unp_conn->unp_addr, 1);
228	} else {
229		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
230	}
231	return (0);
232}
233
234/*
235 * Returns:	0			Success
236 *		EISCONN
237 *	unp_attach:
238 */
239static int
240uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
241{
242	struct unpcb *unp = sotounpcb(so);
243
244	if (unp != 0)
245		return (EISCONN);
246	return (unp_attach(so));
247}
248
249static int
250uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
251{
252	struct unpcb *unp = sotounpcb(so);
253
254	if (unp == 0)
255		return (EINVAL);
256
257	return (unp_bind(unp, nam, p));
258}
259
260/*
261 * Returns:	0			Success
262 *		EINVAL
263 *	unp_connect:???			[See elsewhere in this file]
264 */
265static int
266uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
267{
268	struct unpcb *unp = sotounpcb(so);
269
270	if (unp == 0)
271		return (EINVAL);
272	return (unp_connect(so, nam, p));
273}
274
275/*
276 * Returns:	0			Success
277 *		EINVAL
278 *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
279 *	unp_connect2:EINVAL		Invalid argument
280 */
281static int
282uipc_connect2(struct socket *so1, struct socket *so2)
283{
284	struct unpcb *unp = sotounpcb(so1);
285
286	if (unp == 0)
287		return (EINVAL);
288
289	return (unp_connect2(so1, so2));
290}
291
292/* control is EOPNOTSUPP */
293
294static int
295uipc_detach(struct socket *so)
296{
297	struct unpcb *unp = sotounpcb(so);
298
299	if (unp == 0)
300		return (EINVAL);
301
302	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
303	unp_detach(unp);
304	return (0);
305}
306
307static int
308uipc_disconnect(struct socket *so)
309{
310	struct unpcb *unp = sotounpcb(so);
311
312	if (unp == 0)
313		return (EINVAL);
314	unp_disconnect(unp);
315	return (0);
316}
317
318/*
319 * Returns:	0			Success
320 *		EINVAL
321 */
322static int
323uipc_listen(struct socket *so, __unused proc_t p)
324{
325	struct unpcb *unp = sotounpcb(so);
326
327	if (unp == 0 || unp->unp_vnode == 0)
328		return (EINVAL);
329	return (unp_listen(unp, p));
330}
331
332static int
333uipc_peeraddr(struct socket *so, struct sockaddr **nam)
334{
335	struct unpcb *unp = sotounpcb(so);
336
337	if (unp == NULL)
338		return (EINVAL);
339	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
340		*nam = dup_sockaddr((struct sockaddr *)
341		    unp->unp_conn->unp_addr, 1);
342	} else {
343		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
344	}
345	return (0);
346}
347
348static int
349uipc_rcvd(struct socket *so, __unused int flags)
350{
351	struct unpcb *unp = sotounpcb(so);
352	struct socket *so2;
353
354	if (unp == 0)
355		return (EINVAL);
356	switch (so->so_type) {
357	case SOCK_DGRAM:
358		panic("uipc_rcvd DGRAM?");
359		/*NOTREACHED*/
360
361	case SOCK_STREAM:
362#define	rcv (&so->so_rcv)
363#define	snd (&so2->so_snd)
364		if (unp->unp_conn == 0)
365			break;
366
367		so2 = unp->unp_conn->unp_socket;
368		unp_get_locks_in_order(so, so2);
369		/*
370		 * Adjust backpressure on sender
371		 * and wakeup any waiting to write.
372		 */
373		snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
374		unp->unp_mbcnt = rcv->sb_mbcnt;
375		snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
376		unp->unp_cc = rcv->sb_cc;
377		sowwakeup(so2);
378
379		socket_unlock(so2, 1);
380
381#undef snd
382#undef rcv
383		break;
384
385	default:
386		panic("uipc_rcvd unknown socktype");
387	}
388	return (0);
389}
390
391/* pru_rcvoob is EOPNOTSUPP */
392
393/*
394 * Returns:	0			Success
395 *		EINVAL
396 *		EOPNOTSUPP
397 *		EPIPE
398 *		ENOTCONN
399 *		EISCONN
400 *	unp_internalize:EINVAL
401 *	unp_internalize:EBADF
402 *	unp_connect:EAFNOSUPPORT	Address family not supported
403 *	unp_connect:EINVAL		Invalid argument
404 *	unp_connect:ENOTSOCK		Not a socket
405 *	unp_connect:ECONNREFUSED	Connection refused
406 *	unp_connect:EISCONN		Socket is connected
407 *	unp_connect:EPROTOTYPE		Protocol wrong type for socket
408 *	unp_connect:???
409 *	sbappendaddr:ENOBUFS		[5th argument, contents modified]
410 *	sbappendaddr:???		[whatever a filter author chooses]
411 */
412static int
413uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
414    struct mbuf *control, proc_t p)
415{
416	int error = 0;
417	struct unpcb *unp = sotounpcb(so);
418	struct socket *so2;
419
420	if (unp == 0) {
421		error = EINVAL;
422		goto release;
423	}
424	if (flags & PRUS_OOB) {
425		error = EOPNOTSUPP;
426		goto release;
427	}
428
429	if (control) {
430		/* release lock to avoid deadlock (4436174) */
431		socket_unlock(so, 0);
432		error = unp_internalize(control, p);
433		socket_lock(so, 0);
434		if (error)
435			goto release;
436	}
437
438	switch (so->so_type) {
439	case SOCK_DGRAM:
440	{
441		struct sockaddr *from;
442
443		if (nam) {
444			if (unp->unp_conn) {
445				error = EISCONN;
446				break;
447			}
448			error = unp_connect(so, nam, p);
449			if (error)
450				break;
451		} else {
452			if (unp->unp_conn == 0) {
453				error = ENOTCONN;
454				break;
455			}
456		}
457
458		so2 = unp->unp_conn->unp_socket;
459		if (so != so2)
460			unp_get_locks_in_order(so, so2);
461
462		if (unp->unp_addr)
463			from = (struct sockaddr *)unp->unp_addr;
464		else
465			from = &sun_noname;
466		/*
467		 * sbappendaddr() will fail when the receiver runs out of
468		 * space; in contrast to SOCK_STREAM, we will lose messages
469		 * for the SOCK_DGRAM case when the receiver's queue overflows.
470		 * SB_UNIX on the socket buffer implies that the callee will
471		 * not free the control message, if any, because we would need
472		 * to call unp_dispose() on it.
473		 */
474		if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
475			control = NULL;
476			sorwakeup(so2);
477		} else if (control != NULL && error == 0) {
478			/* A socket filter took control; don't touch it */
479			control = NULL;
480		}
481
482		if (so != so2)
483			socket_unlock(so2, 1);
484
485		m = NULL;
486		if (nam)
487			unp_disconnect(unp);
488		break;
489	}
490
491	case SOCK_STREAM: {
492		int didreceive = 0;
493#define	rcv (&so2->so_rcv)
494#define	snd (&so->so_snd)
495		/* Connect if not connected yet. */
496		/*
497		 * Note: A better implementation would complain
498		 * if not equal to the peer's address.
499		 */
500		if ((so->so_state & SS_ISCONNECTED) == 0) {
501			if (nam) {
502				error = unp_connect(so, nam, p);
503				if (error)
504					break;	/* XXX */
505			} else {
506				error = ENOTCONN;
507				break;
508			}
509		}
510
511		if (so->so_state & SS_CANTSENDMORE) {
512			error = EPIPE;
513			break;
514		}
515		if (unp->unp_conn == 0)
516			panic("uipc_send connected but no connection?");
517
518		so2 = unp->unp_conn->unp_socket;
519		unp_get_locks_in_order(so, so2);
520
521		/* Check socket state again as we might have unlocked the socket
522		 * while trying to get the locks in order
523		 */
524
525		if ((so->so_state & SS_CANTSENDMORE)) {
526			error = EPIPE;
527			socket_unlock(so2, 1);
528			break;
529		}
530
531		if (unp->unp_flags & UNP_TRACE_MDNS) {
532			struct mdns_ipc_msg_hdr hdr;
533
534			if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
535			    hdr.version  == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
536				printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
537				    __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
538			}
539		}
540
541		/*
542		 * Send to paired receive port, and then reduce send buffer
543		 * hiwater marks to maintain backpressure.  Wake up readers.
544		 * SB_UNIX flag will allow new record to be appended to the
545		 * receiver's queue even when it is already full.  It is
546		 * possible, however, that append might fail.  In that case,
547		 * we will need to call unp_dispose() on the control message;
548		 * the callee will not free it since SB_UNIX is set.
549		 */
550		didreceive = control ?
551		    sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
552
553		snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
554		unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
555		snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
556		unp->unp_conn->unp_cc = rcv->sb_cc;
557		if (didreceive) {
558			control = NULL;
559			sorwakeup(so2);
560		} else if (control != NULL && error == 0) {
561			/* A socket filter took control; don't touch it */
562			control = NULL;
563		}
564
565		socket_unlock(so2, 1);
566		m = NULL;
567#undef snd
568#undef rcv
569		}
570		break;
571
572	default:
573		panic("uipc_send unknown socktype");
574	}
575
576	/*
577	 * SEND_EOF is equivalent to a SEND followed by
578	 * a SHUTDOWN.
579	 */
580	if (flags & PRUS_EOF) {
581		socantsendmore(so);
582		unp_shutdown(unp);
583	}
584
585	if (control && error != 0) {
586		socket_unlock(so, 0);
587		unp_dispose(control);
588		socket_lock(so, 0);
589	}
590
591release:
592	if (control)
593		m_freem(control);
594	if (m)
595		m_freem(m);
596	return (error);
597}
598
599static int
600uipc_sense(struct socket *so, void *ub, int isstat64)
601{
602	struct unpcb *unp = sotounpcb(so);
603	struct socket *so2;
604	blksize_t blksize;
605
606	if (unp == 0)
607		return (EINVAL);
608
609	blksize = so->so_snd.sb_hiwat;
610	if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
611		so2 = unp->unp_conn->unp_socket;
612		blksize += so2->so_rcv.sb_cc;
613	}
614	if (unp->unp_ino == 0)
615		unp->unp_ino = unp_ino++;
616
617	if (isstat64 != 0) {
618		struct stat64  *sb64;
619
620		sb64 = (struct stat64 *)ub;
621		sb64->st_blksize = blksize;
622		sb64->st_dev = NODEV;
623		sb64->st_ino = (ino64_t)unp->unp_ino;
624	} else {
625		struct stat *sb;
626
627		sb = (struct stat *)ub;
628		sb->st_blksize = blksize;
629		sb->st_dev = NODEV;
630		sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
631	}
632
633	return (0);
634}
635
636/*
637 * Returns:	0		Success
638 *		EINVAL
639 *
640 * Notes:	This is not strictly correct, as unp_shutdown() also calls
641 *		socantrcvmore().  These should maybe both be conditionalized
642 *		on the 'how' argument in soshutdown() as called from the
643 *		shutdown() system call.
644 */
645static int
646uipc_shutdown(struct socket *so)
647{
648	struct unpcb *unp = sotounpcb(so);
649
650	if (unp == 0)
651		return (EINVAL);
652	socantsendmore(so);
653	unp_shutdown(unp);
654	return (0);
655}
656
657/*
658 * Returns:	0			Success
659 *		EINVAL			Invalid argument
660 */
661static int
662uipc_sockaddr(struct socket *so, struct sockaddr **nam)
663{
664	struct unpcb *unp = sotounpcb(so);
665
666	if (unp == NULL)
667		return (EINVAL);
668	if (unp->unp_addr != NULL) {
669		*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
670	} else {
671		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
672	}
673	return (0);
674}
675
676struct pr_usrreqs uipc_usrreqs = {
677	uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
678	uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
679	uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
680	uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
681	sosend, soreceive, pru_sopoll_notsupp
682};
683
684int
685uipc_ctloutput(struct socket *so, struct sockopt *sopt)
686{
687	struct unpcb *unp = sotounpcb(so);
688	int error;
689
690	switch (sopt->sopt_dir) {
691	case SOPT_GET:
692		switch (sopt->sopt_name) {
693		case LOCAL_PEERCRED:
694			if (unp->unp_flags & UNP_HAVEPC) {
695				error = sooptcopyout(sopt, &unp->unp_peercred,
696				    sizeof (unp->unp_peercred));
697			} else {
698				if (so->so_type == SOCK_STREAM)
699					error = ENOTCONN;
700				else
701					error = EINVAL;
702			}
703			break;
704		case LOCAL_PEERPID:
705			if (unp->unp_conn != NULL) {
706				if (unp->unp_conn->unp_socket != NULL) {
707					pid_t peerpid = unp->unp_conn->unp_socket->last_pid;
708					error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
709				} else {
710					panic("peer is connected but has no socket?");
711				}
712			} else {
713				error = ENOTCONN;
714			}
715			break;
716		default:
717			error = EOPNOTSUPP;
718			break;
719		}
720		break;
721	case SOPT_SET:
722	default:
723		error = EOPNOTSUPP;
724		break;
725	}
726	return (error);
727}
728
729/*
730 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
731 * for stream sockets, although the total for sender and receiver is
732 * actually only PIPSIZ.
733 * Datagram sockets really use the sendspace as the maximum datagram size,
734 * and don't really want to reserve the sendspace.  Their recvspace should
735 * be large enough for at least one max-size datagram plus address.
736 */
737#ifndef PIPSIZ
738#define	PIPSIZ	8192
739#endif
740static u_int32_t	unpst_sendspace = PIPSIZ;
741static u_int32_t	unpst_recvspace = PIPSIZ;
742static u_int32_t	unpdg_sendspace = 2*1024;	/* really max datagram size */
743static u_int32_t	unpdg_recvspace = 4*1024;
744
745static int	unp_rights;			/* file descriptors in flight */
746static int	unp_disposed;			/* discarded file descriptors */
747
748SYSCTL_DECL(_net_local_stream);
749SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
750   &unpst_sendspace, 0, "");
751SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
752   &unpst_recvspace, 0, "");
753SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
754   &unpst_tracemdns, 0, "");
755SYSCTL_DECL(_net_local_dgram);
756SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
757   &unpdg_sendspace, 0, "");
758SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
759   &unpdg_recvspace, 0, "");
760SYSCTL_DECL(_net_local);
761SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
762
763/*
764 * Returns:	0			Success
765 *		ENOBUFS
766 *	soreserve:ENOBUFS
767 */
768static int
769unp_attach(struct socket *so)
770{
771	struct unpcb *unp;
772	int error = 0;
773
774	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
775		switch (so->so_type) {
776
777		case SOCK_STREAM:
778			error = soreserve(so, unpst_sendspace, unpst_recvspace);
779			break;
780
781		case SOCK_DGRAM:
782			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
783			break;
784
785		default:
786			panic("unp_attach");
787		}
788		if (error)
789			return (error);
790	}
791	unp = (struct unpcb *)zalloc(unp_zone);
792	if (unp == NULL)
793		return (ENOBUFS);
794	bzero(unp, sizeof (*unp));
795
796	lck_mtx_init(&unp->unp_mtx,
797		unp_mtx_grp, unp_mtx_attr);
798
799	lck_rw_lock_exclusive(unp_list_mtx);
800	LIST_INIT(&unp->unp_refs);
801	unp->unp_socket = so;
802	unp->unp_gencnt = ++unp_gencnt;
803	unp_count++;
804	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
805	    &unp_dhead : &unp_shead, unp, unp_link);
806	lck_rw_done(unp_list_mtx);
807	so->so_pcb = (caddr_t)unp;
808	/*
809	 * Mark AF_UNIX socket buffers accordingly so that:
810	 *
811	 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
812	 *    the lack of space; this essentially loosens the sbspace() check,
813	 *    since there is disconnect between sosend() and uipc_send() with
814	 *    respect to flow control that might result in our dropping the
815	 *    data in uipc_send().  By setting this, we allow for slightly
816	 *    more records to be appended to the receiving socket to avoid
817	 *    losing data (which we can't afford in the SOCK_STREAM case).
818	 *    Flow control still takes place since we adjust the sender's
819	 *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
820	 *    case and append would still fail when the queue overflows.
821	 *
822	 * b. In the presence of control messages containing internalized
823	 *    file descriptors, the append routines will not free them since
824	 *    we'd need to undo the work first via unp_dispose().
825	 */
826	so->so_rcv.sb_flags |= SB_UNIX;
827	so->so_snd.sb_flags |= SB_UNIX;
828	return (0);
829}
830
831static void
832unp_detach(struct unpcb *unp)
833{
834	int so_locked = 1;
835
836	lck_rw_lock_exclusive(unp_list_mtx);
837	LIST_REMOVE(unp, unp_link);
838	--unp_count;
839	++unp_gencnt;
840	lck_rw_done(unp_list_mtx);
841	if (unp->unp_vnode) {
842		struct vnode *tvp = NULL;
843		socket_unlock(unp->unp_socket, 0);
844
845		/* Holding unp_connect_lock will avoid a race between
846		 * a thread closing the listening socket and a thread
847		 * connecting to it.
848		 */
849		lck_mtx_lock(unp_connect_lock);
850		socket_lock(unp->unp_socket, 0);
851		if (unp->unp_vnode) {
852			tvp = unp->unp_vnode;
853			unp->unp_vnode->v_socket = NULL;
854			unp->unp_vnode = NULL;
855		}
856		lck_mtx_unlock(unp_connect_lock);
857		if (tvp != NULL)
858			vnode_rele(tvp);		/* drop the usecount */
859	}
860	if (unp->unp_conn)
861		unp_disconnect(unp);
862	while (unp->unp_refs.lh_first) {
863		struct unpcb *unp2 = NULL;
864
865		/* This datagram socket is connected to one or more
866		 * sockets. In order to avoid a race condition between removing
867		 * this reference and closing the connected socket, we need
868		 * to check disconnect_in_progress
869		 */
870		if (so_locked == 1) {
871			socket_unlock(unp->unp_socket, 0);
872			so_locked = 0;
873		}
874		lck_mtx_lock(unp_disconnect_lock);
875		while (disconnect_in_progress != 0) {
876			(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
877				PSOCK, "disconnect", NULL);
878		}
879		disconnect_in_progress = 1;
880		lck_mtx_unlock(unp_disconnect_lock);
881
882		/* Now we are sure that any unpcb socket disconnect is not happening */
883		if (unp->unp_refs.lh_first != NULL) {
884 			unp2 = unp->unp_refs.lh_first;
885 			socket_lock(unp2->unp_socket, 1);
886		}
887
888		lck_mtx_lock(unp_disconnect_lock);
889		disconnect_in_progress = 0;
890		wakeup(&disconnect_in_progress);
891		lck_mtx_unlock(unp_disconnect_lock);
892
893		if (unp2 != NULL) {
894			/* We already locked this socket and have a reference on it */
895 			unp_drop(unp2, ECONNRESET);
896 			socket_unlock(unp2->unp_socket, 1);
897		}
898	}
899
900	if (so_locked == 0) {
901		socket_lock(unp->unp_socket, 0);
902		so_locked = 1;
903	}
904	soisdisconnected(unp->unp_socket);
905	/* makes sure we're getting dealloced */
906	unp->unp_socket->so_flags |= SOF_PCBCLEARING;
907}
908
909/*
910 * Returns:	0			Success
911 *		EAFNOSUPPORT
912 *		EINVAL
913 *		EADDRINUSE
914 *		namei:???		[anything namei can return]
915 *		vnode_authorize:???	[anything vnode_authorize can return]
916 *
917 * Notes:	p at this point is the current process, as this function is
918 *		only called by sobind().
919 */
920static int
921unp_bind(
922	struct unpcb *unp,
923	struct sockaddr *nam,
924	proc_t p)
925{
926	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
927	struct vnode *vp, *dvp;
928	struct vnode_attr va;
929	vfs_context_t ctx = vfs_context_current();
930	int error, namelen;
931	struct nameidata nd;
932	struct socket *so = unp->unp_socket;
933	char buf[SOCK_MAXADDRLEN];
934
935	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
936		return (EAFNOSUPPORT);
937	}
938
939	if (unp->unp_vnode != NULL)
940		return (EINVAL);
941	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
942	if (namelen <= 0)
943		return (EINVAL);
944
945	socket_unlock(so, 0);
946
947	strlcpy(buf, soun->sun_path, namelen+1);
948	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
949	    CAST_USER_ADDR_T(buf), ctx);
950	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
951	error = namei(&nd);
952	if (error) {
953		socket_lock(so, 0);
954		return (error);
955	}
956	dvp = nd.ni_dvp;
957	vp = nd.ni_vp;
958
959	if (vp != NULL) {
960		/*
961		 * need to do this before the vnode_put of dvp
962		 * since we may have to release an fs_nodelock
963		 */
964		nameidone(&nd);
965
966		vnode_put(dvp);
967		vnode_put(vp);
968
969		socket_lock(so, 0);
970		return (EADDRINUSE);
971	}
972
973	VATTR_INIT(&va);
974	VATTR_SET(&va, va_type, VSOCK);
975	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
976
977#if CONFIG_MACF
978	error = mac_vnode_check_create(ctx,
979	    nd.ni_dvp, &nd.ni_cnd, &va);
980
981	if (error == 0)
982#endif /* CONFIG_MACF */
983#if CONFIG_MACF_SOCKET_SUBSET
984	error = mac_vnode_check_uipc_bind(ctx,
985	    nd.ni_dvp, &nd.ni_cnd, &va);
986
987	if (error == 0)
988#endif /* MAC_SOCKET_SUBSET */
989	/* authorize before creating */
990	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
991
992	if (!error) {
993		/* create the socket */
994		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
995	}
996
997	nameidone(&nd);
998	vnode_put(dvp);
999
1000	if (error) {
1001		socket_lock(so, 0);
1002		return (error);
1003	}
1004	vnode_ref(vp);	/* gain a longterm reference */
1005	socket_lock(so, 0);
1006	vp->v_socket = unp->unp_socket;
1007	unp->unp_vnode = vp;
1008	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1009	vnode_put(vp);		/* drop the iocount */
1010
1011	return (0);
1012}
1013
1014
1015/*
1016 * Returns:	0			Success
1017 *		EAFNOSUPPORT		Address family not supported
1018 *		EINVAL			Invalid argument
1019 *		ENOTSOCK		Not a socket
1020 *		ECONNREFUSED		Connection refused
1021 *		EPROTOTYPE		Protocol wrong type for socket
1022 *		EISCONN			Socket is connected
1023 *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
1024 *	unp_connect2:EINVAL		Invalid argument
1025 *	namei:???			[anything namei can return]
1026 *	vnode_authorize:????		[anything vnode_authorize can return]
1027 *
1028 * Notes:	p at this point is the current process, as this function is
1029 *		only called by sosend(), sendfile(), and soconnectlock().
1030 */
1031static int
1032unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1033{
1034	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1035	struct vnode *vp;
1036	struct socket *so2, *so3, *list_so=NULL;
1037	struct unpcb *unp, *unp2, *unp3;
1038	vfs_context_t ctx = vfs_context_current();
1039	int error, len;
1040	struct nameidata nd;
1041	char buf[SOCK_MAXADDRLEN];
1042
1043	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1044		return (EAFNOSUPPORT);
1045	}
1046
1047	unp = sotounpcb(so);
1048	so2 = so3 = NULL;
1049
1050	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1051	if (len <= 0)
1052		return (EINVAL);
1053
1054	strlcpy(buf, soun->sun_path, len+1);
1055	socket_unlock(so, 0);
1056
1057	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1058	    CAST_USER_ADDR_T(buf), ctx);
1059	error = namei(&nd);
1060	if (error) {
1061		socket_lock(so, 0);
1062		return (error);
1063	}
1064	nameidone(&nd);
1065	vp = nd.ni_vp;
1066	if (vp->v_type != VSOCK) {
1067		error = ENOTSOCK;
1068		socket_lock(so, 0);
1069		goto out;
1070	}
1071
1072#if CONFIG_MACF_SOCKET_SUBSET
1073	error = mac_vnode_check_uipc_connect(ctx, vp);
1074	if (error) {
1075		socket_lock(so, 0);
1076		goto out;
1077	}
1078#endif /* MAC_SOCKET_SUBSET */
1079
1080	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1081	if (error) {
1082		socket_lock(so, 0);
1083		goto out;
1084	}
1085
1086	lck_mtx_lock(unp_connect_lock);
1087
1088	if (vp->v_socket == 0) {
1089		lck_mtx_unlock(unp_connect_lock);
1090		error = ECONNREFUSED;
1091		socket_lock(so, 0);
1092		goto out;
1093	}
1094
1095	socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1096	so2 = vp->v_socket;
1097	lck_mtx_unlock(unp_connect_lock);
1098
1099
1100	if (so2->so_pcb == NULL) {
1101		error = ECONNREFUSED;
1102		if (so != so2) {
1103			socket_unlock(so2, 1);
1104			socket_lock(so, 0);
1105		} else {
1106			/* Release the reference held for the listen socket */
1107			so2->so_usecount--;
1108		}
1109		goto out;
1110	}
1111
1112	if (so < so2) {
1113		socket_unlock(so2, 0);
1114		socket_lock(so, 0);
1115		socket_lock(so2, 0);
1116	} else if (so > so2) {
1117		socket_lock(so, 0);
1118	}
1119	/*
1120	 * Check if socket was connected while we were trying to
1121	 * get the socket locks in order.
1122	 * XXX - probably shouldn't return an error for SOCK_DGRAM
1123	 */
1124	if ((so->so_state & SS_ISCONNECTED) != 0) {
1125		error = EISCONN;
1126		goto decref_out;
1127	}
1128
1129	if (so->so_type != so2->so_type) {
1130		error = EPROTOTYPE;
1131		goto decref_out;
1132	}
1133
1134	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1135		/* Release the incoming socket but keep a reference */
1136		socket_unlock(so, 0);
1137
1138		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1139		    (so3 = sonewconn(so2, 0, nam)) == 0) {
1140			error = ECONNREFUSED;
1141			if (so != so2) {
1142				socket_unlock(so2, 1);
1143				socket_lock(so, 0);
1144			} else {
1145				socket_lock(so, 0);
1146				/* Release the reference held for
1147				 * listen socket.
1148				 */
1149				so2->so_usecount--;
1150			}
1151			goto out;
1152		}
1153		unp2 = sotounpcb(so2);
1154		unp3 = sotounpcb(so3);
1155		if (unp2->unp_addr)
1156			unp3->unp_addr = (struct sockaddr_un *)
1157			    dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1158
1159		/*
1160		 * unp_peercred management:
1161		 *
1162		 * The connecter's (client's) credentials are copied
1163		 * from its process structure at the time of connect()
1164		 * (which is now).
1165		 */
1166		cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1167		unp3->unp_flags |= UNP_HAVEPC;
1168		/*
1169		 * The receiver's (server's) credentials are copied
1170		 * from the unp_peercred member of socket on which the
1171		 * former called listen(); unp_listen() cached that
1172		 * process's credentials at that time so we can use
1173		 * them now.
1174		 */
1175		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1176		    ("unp_connect: listener without cached peercred"));
1177
1178		/* Here we need to have both so and so2 locks and so2
1179		 * is already locked. Lock ordering is required.
1180		 */
1181		if (so < so2) {
1182			socket_unlock(so2, 0);
1183			socket_lock(so, 0);
1184			socket_lock(so2, 0);
1185		} else {
1186			socket_lock(so, 0);
1187		}
1188
1189		/* Check again if the socket state changed when its lock was released */
1190		if ((so->so_state & SS_ISCONNECTED) != 0) {
1191			error = EISCONN;
1192			socket_unlock(so2, 1);
1193			socket_lock(so3, 0);
1194			sofreelastref(so3, 1);
1195                	goto out;
1196		}
1197		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1198		    sizeof (unp->unp_peercred));
1199		unp->unp_flags |= UNP_HAVEPC;
1200
1201#if CONFIG_MACF_SOCKET
1202		/* XXXMAC: recursive lock: SOCK_LOCK(so); */
1203		mac_socketpeer_label_associate_socket(so, so3);
1204		mac_socketpeer_label_associate_socket(so3, so);
1205		/* XXXMAC: SOCK_UNLOCK(so); */
1206#endif /* MAC_SOCKET */
1207
1208		/* Hold the reference on listening socket until the end */
1209		socket_unlock(so2, 0);
1210		list_so = so2;
1211
1212		/* Lock ordering doesn't matter because so3 was just created */
1213		socket_lock(so3, 1);
1214		so2 = so3;
1215
1216		/*
1217		 * Enable tracing for mDNSResponder endpoints.  (The use
1218		 * of sizeof instead of strlen below takes the null
1219		 * terminating character into account.)
1220		 */
1221		if (unpst_tracemdns &&
1222		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1223		    sizeof (MDNSRESPONDER_PATH))) {
1224			unp->unp_flags |= UNP_TRACE_MDNS;
1225			unp2->unp_flags |= UNP_TRACE_MDNS;
1226		}
1227	}
1228
1229	error = unp_connect2(so, so2);
1230
1231decref_out:
1232	if (so2 != NULL) {
1233		if (so != so2) {
1234			socket_unlock(so2, 1);
1235		} else {
1236			/* Release the extra reference held for the listen socket.
1237			 * This is possible only for SOCK_DGRAM sockets. We refuse
1238			 * connecting to the same socket for SOCK_STREAM sockets.
1239			 */
1240			so2->so_usecount--;
1241		}
1242	}
1243
1244	if (list_so != NULL) {
1245		socket_lock(list_so, 0);
1246		socket_unlock(list_so, 1);
1247	}
1248
1249out:
1250	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1251	vnode_put(vp);
1252	return (error);
1253}
1254
1255/*
1256 * Returns:	0			Success
1257 *		EPROTOTYPE		Protocol wrong type for socket
1258 *		EINVAL			Invalid argument
1259 */
1260int
1261unp_connect2(struct socket *so, struct socket *so2)
1262{
1263	struct unpcb *unp = sotounpcb(so);
1264	struct unpcb *unp2;
1265
1266	if (so2->so_type != so->so_type)
1267		return (EPROTOTYPE);
1268
1269	unp2 = sotounpcb(so2);
1270
1271	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1272	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1273
1274	/* Verify both sockets are still opened */
1275	if (unp == 0 || unp2 == 0)
1276		return (EINVAL);
1277
1278	unp->unp_conn = unp2;
1279	so2->so_usecount++;
1280
1281	switch (so->so_type) {
1282
1283	case SOCK_DGRAM:
1284		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1285
1286		if (so != so2) {
1287			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1288 			/* Keep an extra reference on so2 that will be dropped
1289			 * soon after getting the locks in order
1290			 */
1291			socket_unlock(so2, 0);
1292			soisconnected(so);
1293			unp_get_locks_in_order(so, so2);
1294			so2->so_usecount--;
1295		} else {
1296			soisconnected(so);
1297		}
1298
1299		break;
1300
1301	case SOCK_STREAM:
1302		/* This takes care of socketpair */
1303		if (!(unp->unp_flags & UNP_HAVEPC) &&
1304		    !(unp2->unp_flags & UNP_HAVEPC)) {
1305			cru2x(kauth_cred_get(), &unp->unp_peercred);
1306			unp->unp_flags |= UNP_HAVEPC;
1307
1308			cru2x(kauth_cred_get(), &unp2->unp_peercred);
1309			unp2->unp_flags |= UNP_HAVEPC;
1310		}
1311		unp2->unp_conn = unp;
1312		so->so_usecount++;
1313
1314		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1315		socket_unlock(so, 0);
1316		soisconnected(so2);
1317
1318		/* Keep an extra reference on so2, that will be dropped soon after
1319		 * getting the locks in order again.
1320		 */
1321		socket_unlock(so2, 0);
1322
1323		socket_lock(so, 0);
1324		soisconnected(so);
1325
1326		unp_get_locks_in_order(so, so2);
1327		/* Decrement the extra reference left before */
1328		so2->so_usecount--;
1329		break;
1330
1331	default:
1332		panic("unknown socket type %d in unp_connect2", so->so_type);
1333	}
1334	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1335	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1336	return (0);
1337}
1338
1339static void
1340unp_disconnect(struct unpcb *unp)
1341{
1342	struct unpcb *unp2 = NULL;
1343	struct socket *so2 = NULL, *so;
1344	struct socket *waitso;
1345	int so_locked = 1, strdisconn = 0;
1346
1347	so = unp->unp_socket;
1348	if (unp->unp_conn == NULL) {
1349		return;
1350	}
1351	lck_mtx_lock(unp_disconnect_lock);
1352	while (disconnect_in_progress != 0) {
1353		if (so_locked == 1) {
1354			socket_unlock(so, 0);
1355			so_locked = 0;
1356		}
1357		(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1358			PSOCK, "disconnect", NULL);
1359	}
1360	disconnect_in_progress = 1;
1361	lck_mtx_unlock(unp_disconnect_lock);
1362
1363	if (so_locked == 0) {
1364		socket_lock(so, 0);
1365		so_locked = 1;
1366	}
1367
1368	unp2 = unp->unp_conn;
1369
1370	if (unp2 == 0 || unp2->unp_socket == NULL) {
1371		goto out;
1372	}
1373	so2 = unp2->unp_socket;
1374
1375try_again:
1376	if (so == so2) {
1377		if (so_locked == 0) {
1378			socket_lock(so, 0);
1379		}
1380		waitso = so;
1381	} else if (so < so2) {
1382		if (so_locked == 0) {
1383			socket_lock(so, 0);
1384		}
1385		socket_lock(so2, 1);
1386		waitso = so2;
1387	} else {
1388		if (so_locked == 1) {
1389			socket_unlock(so, 0);
1390		}
1391		socket_lock(so2, 1);
1392		socket_lock(so, 0);
1393		waitso = so;
1394	}
1395	so_locked = 1;
1396
1397	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1398	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1399
1400	/* Check for the UNP_DONTDISCONNECT flag, if it
1401	 * is set, release both sockets and go to sleep
1402	 */
1403
1404	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1405		if (so != so2) {
1406			socket_unlock(so2, 1);
1407		}
1408		so_locked = 0;
1409
1410		(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1411			PSOCK | PDROP, "unpdisconnect", NULL);
1412		goto try_again;
1413	}
1414
1415	if (unp->unp_conn == NULL) {
1416		panic("unp_conn became NULL after sleep");
1417	}
1418
1419	unp->unp_conn = NULL;
1420	so2->so_usecount--;
1421
1422	if (unp->unp_flags & UNP_TRACE_MDNS)
1423		unp->unp_flags &= ~UNP_TRACE_MDNS;
1424
1425	switch (unp->unp_socket->so_type) {
1426
1427	case SOCK_DGRAM:
1428		LIST_REMOVE(unp, unp_reflink);
1429		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1430		if (so != so2)
1431			socket_unlock(so2, 1);
1432		break;
1433
1434	case SOCK_STREAM:
1435		unp2->unp_conn = NULL;
1436		so->so_usecount--;
1437
1438		/* Set the socket state correctly but do a wakeup later when
1439		 * we release all locks except the socket lock, this will avoid
1440		 * a deadlock.
1441		 */
1442		unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1443		unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1444
1445		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1446		unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1447
1448		if (unp2->unp_flags & UNP_TRACE_MDNS)
1449			unp2->unp_flags &= ~UNP_TRACE_MDNS;
1450
1451		strdisconn = 1;
1452		break;
1453	default:
1454		panic("unknown socket type %d", so->so_type);
1455	}
1456out:
1457	lck_mtx_lock(unp_disconnect_lock);
1458	disconnect_in_progress = 0;
1459	wakeup(&disconnect_in_progress);
1460	lck_mtx_unlock(unp_disconnect_lock);
1461
1462	if (strdisconn) {
1463		socket_unlock(so, 0);
1464		soisdisconnected(so2);
1465		socket_unlock(so2, 1);
1466
1467		socket_lock(so,0);
1468		soisdisconnected(so);
1469	}
1470	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1471	return;
1472}
1473
1474/*
1475 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1476 * The unpcb_compat data structure is passed to user space and must not change.
1477 */
1478static void
1479unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1480{
1481#if defined(__LP64__)
1482	cp->unp_link.le_next = (u_int32_t)
1483	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1484	cp->unp_link.le_prev = (u_int32_t)
1485	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1486#else
1487	cp->unp_link.le_next = (struct unpcb_compat *)
1488	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1489	cp->unp_link.le_prev = (struct unpcb_compat **)
1490	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1491#endif
1492	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1493	    VM_KERNEL_ADDRPERM(up->unp_socket);
1494	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1495	    VM_KERNEL_ADDRPERM(up->unp_vnode);
1496	cp->unp_ino = up->unp_ino;
1497	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1498	    VM_KERNEL_ADDRPERM(up->unp_conn);
1499	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1500#if defined(__LP64__)
1501	cp->unp_reflink.le_next =
1502	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1503	cp->unp_reflink.le_prev =
1504	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1505#else
1506	cp->unp_reflink.le_next =
1507	    (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1508	cp->unp_reflink.le_prev =
1509	    (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1510#endif
1511	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1512	    VM_KERNEL_ADDRPERM(up->unp_addr);
1513	cp->unp_cc = up->unp_cc;
1514	cp->unp_mbcnt = up->unp_mbcnt;
1515	cp->unp_gencnt = up->unp_gencnt;
1516}
1517
1518static int
1519unp_pcblist SYSCTL_HANDLER_ARGS
1520{
1521#pragma unused(oidp,arg2)
1522	int error, i, n;
1523	struct unpcb *unp, **unp_list;
1524	unp_gen_t gencnt;
1525	struct xunpgen xug;
1526	struct unp_head *head;
1527
1528	lck_rw_lock_shared(unp_list_mtx);
1529	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1530
1531	/*
1532	 * The process of preparing the PCB list is too time-consuming and
1533	 * resource-intensive to repeat twice on every request.
1534	 */
1535	if (req->oldptr == USER_ADDR_NULL) {
1536		n = unp_count;
1537		req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1538		    sizeof (struct xunpcb);
1539		lck_rw_done(unp_list_mtx);
1540		return (0);
1541	}
1542
1543	if (req->newptr != USER_ADDR_NULL) {
1544		lck_rw_done(unp_list_mtx);
1545		return (EPERM);
1546	}
1547
1548	/*
1549	 * OK, now we're committed to doing something.
1550	 */
1551	gencnt = unp_gencnt;
1552	n = unp_count;
1553
1554	bzero(&xug, sizeof (xug));
1555	xug.xug_len = sizeof (xug);
1556	xug.xug_count = n;
1557	xug.xug_gen = gencnt;
1558	xug.xug_sogen = so_gencnt;
1559	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1560	if (error) {
1561		lck_rw_done(unp_list_mtx);
1562		return (error);
1563	}
1564
1565	/*
1566	 * We are done if there is no pcb
1567	 */
1568	if (n == 0)  {
1569		lck_rw_done(unp_list_mtx);
1570		return (0);
1571	}
1572
1573	MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1574	    M_TEMP, M_WAITOK);
1575	if (unp_list == 0) {
1576		lck_rw_done(unp_list_mtx);
1577		return (ENOMEM);
1578	}
1579
1580	for (unp = head->lh_first, i = 0; unp && i < n;
1581	    unp = unp->unp_link.le_next) {
1582		if (unp->unp_gencnt <= gencnt)
1583			unp_list[i++] = unp;
1584	}
1585	n = i;			/* in case we lost some during malloc */
1586
1587	error = 0;
1588	for (i = 0; i < n; i++) {
1589		unp = unp_list[i];
1590		if (unp->unp_gencnt <= gencnt) {
1591			struct xunpcb xu;
1592
1593			bzero(&xu, sizeof (xu));
1594			xu.xu_len = sizeof (xu);
1595			xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1596			    VM_KERNEL_ADDRPERM(unp);
1597			/*
1598			 * XXX - need more locking here to protect against
1599			 * connect/disconnect races for SMP.
1600			 */
1601			if (unp->unp_addr)
1602				bcopy(unp->unp_addr, &xu.xu_addr,
1603				    unp->unp_addr->sun_len);
1604			if (unp->unp_conn && unp->unp_conn->unp_addr)
1605				bcopy(unp->unp_conn->unp_addr,
1606				    &xu.xu_caddr,
1607				    unp->unp_conn->unp_addr->sun_len);
1608			unpcb_to_compat(unp, &xu.xu_unp);
1609			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1610			error = SYSCTL_OUT(req, &xu, sizeof (xu));
1611		}
1612	}
1613	if (!error) {
1614		/*
1615		 * Give the user an updated idea of our state.
1616		 * If the generation differs from what we told
1617		 * them before, they know that something happened
1618		 * while we were processing this request, and it
1619		 * might be necessary to retry.
1620		 */
1621		bzero(&xug, sizeof (xug));
1622		xug.xug_len = sizeof (xug);
1623		xug.xug_gen = unp_gencnt;
1624		xug.xug_sogen = so_gencnt;
1625		xug.xug_count = unp_count;
1626		error = SYSCTL_OUT(req, &xug, sizeof (xug));
1627	}
1628	FREE(unp_list, M_TEMP);
1629	lck_rw_done(unp_list_mtx);
1630	return (error);
1631}
1632
1633SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1634            (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1635            "List of active local datagram sockets");
1636SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1637            (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1638            "List of active local stream sockets");
1639
1640#if !CONFIG_EMBEDDED
1641
1642static int
1643unp_pcblist64 SYSCTL_HANDLER_ARGS
1644{
1645#pragma unused(oidp,arg2)
1646	int error, i, n;
1647	struct unpcb *unp, **unp_list;
1648	unp_gen_t gencnt;
1649	struct xunpgen xug;
1650	struct unp_head *head;
1651
1652	lck_rw_lock_shared(unp_list_mtx);
1653	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1654
1655	/*
1656	 * The process of preparing the PCB list is too time-consuming and
1657	 * resource-intensive to repeat twice on every request.
1658	 */
1659	if (req->oldptr == USER_ADDR_NULL) {
1660		n = unp_count;
1661		req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1662		    (sizeof (struct xunpcb64));
1663		lck_rw_done(unp_list_mtx);
1664		return (0);
1665	}
1666
1667	if (req->newptr != USER_ADDR_NULL) {
1668		lck_rw_done(unp_list_mtx);
1669		return (EPERM);
1670	}
1671
1672	/*
1673	 * OK, now we're committed to doing something.
1674	 */
1675	gencnt = unp_gencnt;
1676	n = unp_count;
1677
1678	bzero(&xug, sizeof (xug));
1679	xug.xug_len = sizeof (xug);
1680	xug.xug_count = n;
1681	xug.xug_gen = gencnt;
1682	xug.xug_sogen = so_gencnt;
1683	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1684	if (error) {
1685		lck_rw_done(unp_list_mtx);
1686		return (error);
1687	}
1688
1689	/*
1690	 * We are done if there is no pcb
1691	 */
1692	if (n == 0)  {
1693		lck_rw_done(unp_list_mtx);
1694		return (0);
1695	}
1696
1697	MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1698	    M_TEMP, M_WAITOK);
1699	if (unp_list == 0) {
1700		lck_rw_done(unp_list_mtx);
1701		return (ENOMEM);
1702	}
1703
1704	for (unp = head->lh_first, i = 0; unp && i < n;
1705	    unp = unp->unp_link.le_next) {
1706		if (unp->unp_gencnt <= gencnt)
1707			unp_list[i++] = unp;
1708	}
1709	n = i;			/* in case we lost some during malloc */
1710
1711	error = 0;
1712	for (i = 0; i < n; i++) {
1713		unp = unp_list[i];
1714		if (unp->unp_gencnt <= gencnt) {
1715			struct xunpcb64 xu;
1716			size_t		xu_len = sizeof(struct xunpcb64);
1717
1718			bzero(&xu, xu_len);
1719			xu.xu_len = xu_len;
1720			xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1721			xu.xunp_link.le_next = (u_int64_t)
1722			    VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1723			xu.xunp_link.le_prev = (u_int64_t)
1724			    VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1725			xu.xunp_socket = (u_int64_t)
1726			    VM_KERNEL_ADDRPERM(unp->unp_socket);
1727			xu.xunp_vnode = (u_int64_t)
1728			    VM_KERNEL_ADDRPERM(unp->unp_vnode);
1729			xu.xunp_ino = unp->unp_ino;
1730			xu.xunp_conn = (u_int64_t)
1731			    VM_KERNEL_ADDRPERM(unp->unp_conn);
1732			xu.xunp_refs = (u_int64_t)
1733			    VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1734			xu.xunp_reflink.le_next = (u_int64_t)
1735			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1736			xu.xunp_reflink.le_prev = (u_int64_t)
1737			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1738			xu.xunp_cc = unp->unp_cc;
1739			xu.xunp_mbcnt = unp->unp_mbcnt;
1740			xu.xunp_gencnt = unp->unp_gencnt;
1741
1742			if (unp->unp_socket)
1743				sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1744
1745			/*
1746			 * XXX - need more locking here to protect against
1747			 * connect/disconnect races for SMP.
1748			 */
1749                        if (unp->unp_addr)
1750                                bcopy(unp->unp_addr, &xu.xunp_addr,
1751                                    unp->unp_addr->sun_len);
1752                        if (unp->unp_conn && unp->unp_conn->unp_addr)
1753                                bcopy(unp->unp_conn->unp_addr,
1754                                    &xu.xunp_caddr,
1755                                    unp->unp_conn->unp_addr->sun_len);
1756
1757			error = SYSCTL_OUT(req, &xu, xu_len);
1758		}
1759	}
1760	if (!error) {
1761		/*
1762		 * Give the user an updated idea of our state.
1763		 * If the generation differs from what we told
1764		 * her before, she knows that something happened
1765		 * while we were processing this request, and it
1766		 * might be necessary to retry.
1767		 */
1768		bzero(&xug, sizeof (xug));
1769		xug.xug_len = sizeof (xug);
1770		xug.xug_gen = unp_gencnt;
1771		xug.xug_sogen = so_gencnt;
1772		xug.xug_count = unp_count;
1773		error = SYSCTL_OUT(req, &xug, sizeof (xug));
1774	}
1775	FREE(unp_list, M_TEMP);
1776	lck_rw_done(unp_list_mtx);
1777	return (error);
1778}
1779
1780SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1781	    (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1782	    "List of active local datagram sockets 64 bit");
1783SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1784	    (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1785	    "List of active local stream sockets 64 bit");
1786
1787#endif /* !CONFIG_EMBEDDED */
1788
1789static void
1790unp_shutdown(struct unpcb *unp)
1791{
1792	struct socket *so = unp->unp_socket;
1793	struct socket *so2;
1794	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1795		so2 = unp->unp_conn->unp_socket;
1796		unp_get_locks_in_order(so, so2);
1797		socantrcvmore(so2);
1798		socket_unlock(so2, 1);
1799	}
1800}
1801
1802static void
1803unp_drop(struct unpcb *unp, int errno)
1804{
1805	struct socket *so = unp->unp_socket;
1806
1807	so->so_error = errno;
1808	unp_disconnect(unp);
1809}
1810
1811/*
1812 * Returns:	0			Success
1813 *		EMSGSIZE		The new fd's will not fit
1814 *		ENOBUFS			Cannot alloc struct fileproc
1815 */
1816int
1817unp_externalize(struct mbuf *rights)
1818{
1819	proc_t p = current_proc();		/* XXX */
1820	int i;
1821	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1822	struct fileglob **rp = (struct fileglob **)(cm + 1);
1823	int *fds = (int *)(cm + 1);
1824	struct fileproc *fp;
1825	struct fileglob *fg;
1826	int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1827	int f;
1828
1829	proc_fdlock(p);
1830
1831	/*
1832	 * if the new FD's will not fit, then we free them all
1833	 */
1834	if (!fdavail(p, newfds)) {
1835		for (i = 0; i < newfds; i++) {
1836			fg = *rp;
1837			unp_discard_fdlocked(fg, p);
1838			*rp++ = NULL;
1839		}
1840		proc_fdunlock(p);
1841
1842		return (EMSGSIZE);
1843	}
1844	/*
1845	 * now change each pointer to an fd in the global table to
1846	 * an integer that is the index to the local fd table entry
1847	 * that we set up to point to the global one we are transferring.
1848	 * XXX (1) this assumes a pointer and int are the same size,
1849	 * XXX     or the mbuf can hold the expansion
1850	 * XXX (2) allocation failures should be non-fatal
1851	 */
1852	for (i = 0; i < newfds; i++) {
1853#if CONFIG_MACF_SOCKET
1854		/*
1855		 * If receive access is denied, don't pass along
1856		 * and error message, just discard the descriptor.
1857		 */
1858		if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1859			fg = *rp;
1860			*rp++ = 0;
1861			unp_discard_fdlocked(fg, p);
1862			continue;
1863		}
1864#endif
1865		if (fdalloc(p, 0, &f))
1866			panic("unp_externalize:fdalloc");
1867		fg = rp[i];
1868		MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc),
1869		    M_FILEPROC, M_WAITOK);
1870		if (fp == NULL)
1871			panic("unp_externalize: MALLOC_ZONE");
1872		bzero(fp, sizeof (struct fileproc));
1873		fp->f_iocount = 0;
1874		fp->f_fglob = fg;
1875		fg_removeuipc(fg);
1876		procfdtbl_releasefd(p, f, fp);
1877		(void) OSAddAtomic(-1, &unp_rights);
1878		fds[i] = f;
1879	}
1880	proc_fdunlock(p);
1881
1882	return (0);
1883}
1884
1885void
1886unp_init(void)
1887{
1888	unp_zone = zinit(sizeof (struct unpcb),
1889	    (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1890
1891	if (unp_zone == 0)
1892		panic("unp_init");
1893	LIST_INIT(&unp_dhead);
1894	LIST_INIT(&unp_shead);
1895
1896	/*
1897	 * allocate lock group attribute and group for udp pcb mutexes
1898	 */
1899	unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1900
1901	unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1902
1903	unp_mtx_attr = lck_attr_alloc_init();
1904
1905	if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1906	    unp_mtx_attr)) == NULL)
1907		return;	/* pretty much dead if this fails... */
1908
1909	if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1910		unp_mtx_attr)) == NULL)
1911		return;
1912
1913	if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1914		unp_mtx_attr)) == NULL)
1915		return;
1916}
1917
1918#ifndef MIN
1919#define	MIN(a, b) (((a) < (b)) ? (a) : (b))
1920#endif
1921
1922/*
1923 * Returns:	0			Success
1924 *		EINVAL
1925 *	fdgetf_noref:EBADF
1926 */
1927static int
1928unp_internalize(struct mbuf *control, proc_t p)
1929{
1930	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1931	int *fds;
1932	struct fileglob **rp;
1933	struct fileproc *fp;
1934	int i, error;
1935	int oldfds;
1936
1937	/* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1938	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1939	    (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1940		return (EINVAL);
1941	}
1942	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1943
1944	proc_fdlock(p);
1945	fds = (int *)(cm + 1);
1946
1947	for (i = 0; i < oldfds; i++) {
1948		struct fileproc *tmpfp;
1949		if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1950			proc_fdunlock(p);
1951			return (error);
1952		} else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) {
1953			proc_fdunlock(p);
1954			return (EINVAL);
1955		}
1956	}
1957	rp = (struct fileglob **)(cm + 1);
1958
1959	/* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
1960	 * and doing them in-order would result in stomping over unprocessed fd's
1961	 */
1962	for (i = (oldfds - 1); i >= 0; i--) {
1963		(void) fdgetf_noref(p, fds[i], &fp);
1964		fg_insertuipc(fp->f_fglob);
1965		rp[i] = fp->f_fglob;
1966		(void) OSAddAtomic(1, &unp_rights);
1967	}
1968	proc_fdunlock(p);
1969
1970	return (0);
1971}
1972
1973static int	unp_defer, unp_gcing, unp_gcwait;
1974static thread_t unp_gcthread = NULL;
1975
1976/* always called under uipc_lock */
1977void
1978unp_gc_wait(void)
1979{
1980	if (unp_gcthread == current_thread())
1981		return;
1982
1983	while (unp_gcing != 0) {
1984		unp_gcwait = 1;
1985		msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
1986	}
1987}
1988
1989
1990__private_extern__ void
1991unp_gc(void)
1992{
1993	struct fileglob *fg, *nextfg;
1994	struct socket *so;
1995	static struct fileglob **extra_ref;
1996	struct fileglob **fpp;
1997	int nunref, i;
1998	int need_gcwakeup = 0;
1999
2000	lck_mtx_lock(uipc_lock);
2001	if (unp_gcing) {
2002		lck_mtx_unlock(uipc_lock);
2003		return;
2004	}
2005	unp_gcing = 1;
2006	unp_defer = 0;
2007	unp_gcthread = current_thread();
2008	lck_mtx_unlock(uipc_lock);
2009	/*
2010	 * before going through all this, set all FDs to
2011	 * be NOT defered and NOT externally accessible
2012	 */
2013	for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2014		lck_mtx_lock(&fg->fg_lock);
2015		fg->fg_flag &= ~(FMARK|FDEFER);
2016		lck_mtx_unlock(&fg->fg_lock);
2017	}
2018	do {
2019		for (fg = fmsghead.lh_first; fg != 0;
2020		    fg = fg->f_msglist.le_next) {
2021			lck_mtx_lock(&fg->fg_lock);
2022			/*
2023			 * If the file is not open, skip it
2024			 */
2025			if (fg->fg_count == 0) {
2026				lck_mtx_unlock(&fg->fg_lock);
2027				continue;
2028			}
2029			/*
2030			 * If we already marked it as 'defer'  in a
2031			 * previous pass, then try process it this time
2032			 * and un-mark it
2033			 */
2034			if (fg->fg_flag & FDEFER) {
2035				fg->fg_flag &= ~FDEFER;
2036				unp_defer--;
2037			} else {
2038				/*
2039				 * if it's not defered, then check if it's
2040				 * already marked.. if so skip it
2041				 */
2042				if (fg->fg_flag & FMARK) {
2043					lck_mtx_unlock(&fg->fg_lock);
2044					continue;
2045				}
2046				/*
2047				 * If all references are from messages
2048				 * in transit, then skip it. it's not
2049				 * externally accessible.
2050				 */
2051				if (fg->fg_count == fg->fg_msgcount) {
2052					lck_mtx_unlock(&fg->fg_lock);
2053					continue;
2054				}
2055				/*
2056				 * If it got this far then it must be
2057				 * externally accessible.
2058				 */
2059				fg->fg_flag |= FMARK;
2060			}
2061			/*
2062			 * either it was defered, or it is externally
2063			 * accessible and not already marked so.
2064			 * Now check if it is possibly one of OUR sockets.
2065			 */
2066			if (fg->fg_type != DTYPE_SOCKET ||
2067			    (so = (struct socket *)fg->fg_data) == 0) {
2068				lck_mtx_unlock(&fg->fg_lock);
2069				continue;
2070			}
2071			if (so->so_proto->pr_domain != &localdomain ||
2072			    (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2073				lck_mtx_unlock(&fg->fg_lock);
2074				continue;
2075			}
2076#ifdef notdef
2077			/*
2078			 * if this code is enabled need to run
2079			 * under network funnel
2080			 */
2081			if (so->so_rcv.sb_flags & SB_LOCK) {
2082				/*
2083				 * This is problematical; it's not clear
2084				 * we need to wait for the sockbuf to be
2085				 * unlocked (on a uniprocessor, at least),
2086				 * and it's also not clear what to do
2087				 * if sbwait returns an error due to receipt
2088				 * of a signal.  If sbwait does return
2089				 * an error, we'll go into an infinite
2090				 * loop.  Delete all of this for now.
2091				 */
2092				(void) sbwait(&so->so_rcv);
2093				goto restart;
2094			}
2095#endif
2096			/*
2097			 * So, Ok, it's one of our sockets and it IS externally
2098			 * accessible (or was defered). Now we look
2099			 * to see if we hold any file descriptors in its
2100			 * message buffers. Follow those links and mark them
2101			 * as accessible too.
2102			 *
2103			 * In case a file is passed onto itself we need to
2104			 * release the file lock.
2105			 */
2106			lck_mtx_unlock(&fg->fg_lock);
2107
2108			unp_scan(so->so_rcv.sb_mb, unp_mark);
2109		}
2110	} while (unp_defer);
2111	/*
2112	 * We grab an extra reference to each of the file table entries
2113	 * that are not otherwise accessible and then free the rights
2114	 * that are stored in messages on them.
2115	 *
2116	 * The bug in the orginal code is a little tricky, so I'll describe
2117	 * what's wrong with it here.
2118	 *
2119	 * It is incorrect to simply unp_discard each entry for f_msgcount
2120	 * times -- consider the case of sockets A and B that contain
2121	 * references to each other.  On a last close of some other socket,
2122	 * we trigger a gc since the number of outstanding rights (unp_rights)
2123	 * is non-zero.  If during the sweep phase the gc code un_discards,
2124	 * we end up doing a (full) closef on the descriptor.  A closef on A
2125	 * results in the following chain.  Closef calls soo_close, which
2126	 * calls soclose.   Soclose calls first (through the switch
2127	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
2128	 * returns because the previous instance had set unp_gcing, and
2129	 * we return all the way back to soclose, which marks the socket
2130	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
2131	 * to free up the rights that are queued in messages on the socket A,
2132	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
2133	 * switch unp_dispose, which unp_scans with unp_discard.  This second
2134	 * instance of unp_discard just calls closef on B.
2135	 *
2136	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2137	 * which results in another closef on A.  Unfortunately, A is already
2138	 * being closed, and the descriptor has already been marked with
2139	 * SS_NOFDREF, and soclose panics at this point.
2140	 *
2141	 * Here, we first take an extra reference to each inaccessible
2142	 * descriptor.  Then, we call sorflush ourself, since we know
2143	 * it is a Unix domain socket anyhow.  After we destroy all the
2144	 * rights carried in messages, we do a last closef to get rid
2145	 * of our extra reference.  This is the last close, and the
2146	 * unp_detach etc will shut down the socket.
2147	 *
2148	 * 91/09/19, bsy@cs.cmu.edu
2149	 */
2150	extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2151	    M_FILEGLOB, M_WAITOK);
2152	if (extra_ref == NULL)
2153		goto bail;
2154	for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2155	    fg = nextfg) {
2156		lck_mtx_lock(&fg->fg_lock);
2157
2158		nextfg = fg->f_msglist.le_next;
2159		/*
2160		 * If it's not open, skip it
2161		 */
2162		if (fg->fg_count == 0) {
2163			lck_mtx_unlock(&fg->fg_lock);
2164			continue;
2165		}
2166		/*
2167		 * If all refs are from msgs, and it's not marked accessible
2168		 * then it must be referenced from some unreachable cycle
2169		 * of (shut-down) FDs, so include it in our
2170		 * list of FDs to remove
2171		 */
2172		if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2173			fg->fg_count++;
2174			*fpp++ = fg;
2175			nunref++;
2176		}
2177		lck_mtx_unlock(&fg->fg_lock);
2178	}
2179	/*
2180	 * for each FD on our hit list, do the following two things
2181	 */
2182	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2183		struct fileglob *tfg;
2184
2185		tfg = *fpp;
2186
2187		if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
2188			so = (struct socket *)(tfg->fg_data);
2189
2190			socket_lock(so, 0);
2191
2192			sorflush(so);
2193
2194			socket_unlock(so, 0);
2195		}
2196	}
2197	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2198		closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2199
2200	FREE((caddr_t)extra_ref, M_FILEGLOB);
2201bail:
2202        lck_mtx_lock(uipc_lock);
2203	unp_gcing = 0;
2204	unp_gcthread = NULL;
2205
2206	if (unp_gcwait != 0) {
2207		unp_gcwait = 0;
2208		need_gcwakeup = 1;
2209	}
2210	lck_mtx_unlock(uipc_lock);
2211
2212	if (need_gcwakeup != 0)
2213		wakeup(&unp_gcing);
2214}
2215
2216void
2217unp_dispose(struct mbuf *m)
2218{
2219	if (m) {
2220		unp_scan(m, unp_discard);
2221	}
2222}
2223
2224/*
2225 * Returns:	0			Success
2226 */
2227static int
2228unp_listen(struct unpcb *unp, proc_t p)
2229{
2230	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2231	cru2x(safecred, &unp->unp_peercred);
2232	kauth_cred_unref(&safecred);
2233	unp->unp_flags |= UNP_HAVEPCCACHED;
2234	return (0);
2235}
2236
2237/* should run under kernel funnel */
2238static void
2239unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2240{
2241	struct mbuf *m;
2242	struct fileglob **rp;
2243	struct cmsghdr *cm;
2244	int i;
2245	int qfds;
2246
2247	while (m0) {
2248		for (m = m0; m; m = m->m_next)
2249			if (m->m_type == MT_CONTROL &&
2250			    (size_t)m->m_len >= sizeof (*cm)) {
2251				cm = mtod(m, struct cmsghdr *);
2252				if (cm->cmsg_level != SOL_SOCKET ||
2253				    cm->cmsg_type != SCM_RIGHTS)
2254					continue;
2255				qfds = (cm->cmsg_len - sizeof (*cm)) /
2256				    sizeof (int);
2257				rp = (struct fileglob **)(cm + 1);
2258				for (i = 0; i < qfds; i++)
2259					(*op)(*rp++);
2260				break;		/* XXX, but saves time */
2261			}
2262		m0 = m0->m_act;
2263	}
2264}
2265
2266/* should run under kernel funnel */
2267static void
2268unp_mark(struct fileglob *fg)
2269{
2270	lck_mtx_lock(&fg->fg_lock);
2271
2272	if (fg->fg_flag & FMARK) {
2273		lck_mtx_unlock(&fg->fg_lock);
2274		return;
2275	}
2276	fg->fg_flag |= (FMARK|FDEFER);
2277
2278	lck_mtx_unlock(&fg->fg_lock);
2279
2280	unp_defer++;
2281}
2282
2283/* should run under kernel funnel */
2284static void
2285unp_discard(struct fileglob *fg)
2286{
2287	proc_t p = current_proc();		/* XXX */
2288
2289	(void) OSAddAtomic(1, &unp_disposed);
2290
2291	proc_fdlock(p);
2292	unp_discard_fdlocked(fg, p);
2293	proc_fdunlock(p);
2294}
2295static void
2296unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2297{
2298	fg_removeuipc(fg);
2299
2300	(void) OSAddAtomic(-1, &unp_rights);
2301	(void) closef_locked((struct fileproc *)0, fg, p);
2302}
2303
2304int
2305unp_lock(struct socket *so, int refcount, void * lr)
2306 {
2307        void * lr_saved;
2308        if (lr == 0)
2309                lr_saved = (void *)  __builtin_return_address(0);
2310        else lr_saved = lr;
2311
2312        if (so->so_pcb) {
2313                lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2314        } else  {
2315                panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2316			so, lr_saved, so->so_usecount);
2317        }
2318
2319        if (so->so_usecount < 0)
2320                panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2321                so, so->so_pcb, lr_saved, so->so_usecount);
2322
2323        if (refcount)
2324                so->so_usecount++;
2325
2326        so->lock_lr[so->next_lock_lr] = lr_saved;
2327        so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2328        return (0);
2329}
2330
2331int
2332unp_unlock(struct socket *so, int refcount, void * lr)
2333{
2334        void * lr_saved;
2335        lck_mtx_t * mutex_held = NULL;
2336	struct unpcb *unp = sotounpcb(so);
2337
2338        if (lr == 0)
2339                lr_saved = (void *) __builtin_return_address(0);
2340        else lr_saved = lr;
2341
2342        if (refcount)
2343                so->so_usecount--;
2344
2345        if (so->so_usecount < 0)
2346                panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2347        if (so->so_pcb == NULL) {
2348                panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2349        } else {
2350                mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2351        }
2352        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2353        so->unlock_lr[so->next_unlock_lr] = lr_saved;
2354        so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2355
2356        if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2357		sofreelastref(so, 1);
2358
2359		if (unp->unp_addr)
2360			FREE(unp->unp_addr, M_SONAME);
2361
2362		lck_mtx_unlock(mutex_held);
2363
2364		lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2365		zfree(unp_zone, unp);
2366
2367		unp_gc();
2368	} else {
2369		lck_mtx_unlock(mutex_held);
2370	}
2371
2372        return (0);
2373}
2374
2375lck_mtx_t *
2376unp_getlock(struct socket *so, __unused int locktype)
2377{
2378        struct unpcb *unp = (struct unpcb *)so->so_pcb;
2379
2380
2381        if (so->so_pcb)  {
2382                if (so->so_usecount < 0)
2383                        panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2384                return(&unp->unp_mtx);
2385        } else {
2386                panic("unp_getlock: so=%p NULL so_pcb\n", so);
2387                return (so->so_proto->pr_domain->dom_mtx);
2388        }
2389}
2390
2391