1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
32 */
33
34/*
35 * UNIX Domain (Local) Sockets
36 *
37 * This is an implementation of UNIX (local) domain sockets.  Each socket has
38 * an associated struct unpcb (UNIX protocol control block).  Stream sockets
39 * may be connected to 0 or 1 other socket.  Datagram sockets may be
40 * connected to 0, 1, or many other sockets.  Sockets may be created and
41 * connected in pairs (socketpair(2)), or bound/connected to using the file
42 * system name space.  For most purposes, only the receive socket buffer is
43 * used, as sending on one socket delivers directly to the receive socket
44 * buffer of a second socket.
45 *
46 * The implementation is substantially complicated by the fact that
47 * "ancillary data", such as file descriptors or credentials, may be passed
48 * across UNIX domain sockets.  The potential for passing UNIX domain sockets
49 * over other UNIX domain sockets requires the implementation of a simple
50 * garbage collector to find and tear down cycles of disconnected sockets.
51 *
52 * TODO:
53 *	RDM
54 *	rethink name space problems
55 *	need a proper out-of-band
56 */
57
58#include <sys/cdefs.h>
59__FBSDID("$FreeBSD: stable/11/sys/kern/uipc_usrreq.c 350223 2019-07-22 19:27:23Z kib $");
60
61#include "opt_ddb.h"
62
63#include <sys/param.h>
64#include <sys/capsicum.h>
65#include <sys/domain.h>
66#include <sys/fcntl.h>
67#include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
68#include <sys/eventhandler.h>
69#include <sys/file.h>
70#include <sys/filedesc.h>
71#include <sys/kernel.h>
72#include <sys/lock.h>
73#include <sys/mbuf.h>
74#include <sys/mount.h>
75#include <sys/mutex.h>
76#include <sys/namei.h>
77#include <sys/proc.h>
78#include <sys/protosw.h>
79#include <sys/queue.h>
80#include <sys/resourcevar.h>
81#include <sys/rwlock.h>
82#include <sys/socket.h>
83#include <sys/socketvar.h>
84#include <sys/signalvar.h>
85#include <sys/stat.h>
86#include <sys/sx.h>
87#include <sys/sysctl.h>
88#include <sys/systm.h>
89#include <sys/taskqueue.h>
90#include <sys/un.h>
91#include <sys/unpcb.h>
92#include <sys/vnode.h>
93
94#include <net/vnet.h>
95
96#ifdef DDB
97#include <ddb/ddb.h>
98#endif
99
100#include <security/mac/mac_framework.h>
101
102#include <vm/uma.h>
103
104MALLOC_DECLARE(M_FILECAPS);
105
106/*
107 * Locking key:
108 * (l)	Locked using list lock
109 * (g)	Locked using linkage lock
110 */
111
112static uma_zone_t	unp_zone;
113static unp_gen_t	unp_gencnt;	/* (l) */
114static u_int		unp_count;	/* (l) Count of local sockets. */
115static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
116static int		unp_rights;	/* (g) File descriptors in flight. */
117static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
118static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
119static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
120
121struct unp_defer {
122	SLIST_ENTRY(unp_defer) ud_link;
123	struct file *ud_fp;
124};
125static SLIST_HEAD(, unp_defer) unp_defers;
126static int unp_defers_count;
127
128static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
129
130/*
131 * Garbage collection of cyclic file descriptor/socket references occurs
132 * asynchronously in a taskqueue context in order to avoid recursion and
133 * reentrance in the UNIX domain socket, file descriptor, and socket layer
134 * code.  See unp_gc() for a full description.
135 */
136static struct timeout_task unp_gc_task;
137
138/*
139 * The close of unix domain sockets attached as SCM_RIGHTS is
140 * postponed to the taskqueue, to avoid arbitrary recursion depth.
141 * The attached sockets might have another sockets attached.
142 */
143static struct task	unp_defer_task;
144
145/*
146 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
147 * stream sockets, although the total for sender and receiver is actually
148 * only PIPSIZ.
149 *
150 * Datagram sockets really use the sendspace as the maximum datagram size,
151 * and don't really want to reserve the sendspace.  Their recvspace should be
152 * large enough for at least one max-size datagram plus address.
153 */
154#ifndef PIPSIZ
155#define	PIPSIZ	8192
156#endif
157static u_long	unpst_sendspace = PIPSIZ;
158static u_long	unpst_recvspace = PIPSIZ;
159static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
160static u_long	unpdg_recvspace = 4*1024;
161static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
162static u_long	unpsp_recvspace = PIPSIZ;
163
164static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
165static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0,
166    "SOCK_STREAM");
167static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
168static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0,
169    "SOCK_SEQPACKET");
170
171SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
172	   &unpst_sendspace, 0, "Default stream send space.");
173SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
174	   &unpst_recvspace, 0, "Default stream receive space.");
175SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
176	   &unpdg_sendspace, 0, "Default datagram send space.");
177SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
178	   &unpdg_recvspace, 0, "Default datagram receive space.");
179SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
180	   &unpsp_sendspace, 0, "Default seqpacket send space.");
181SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
182	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
183SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
184    "File descriptors in flight.");
185SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
186    &unp_defers_count, 0,
187    "File descriptors deferred to taskqueue for close.");
188
189/*
190 * Locking and synchronization:
191 *
192 * Three types of locks exit in the local domain socket implementation: a
193 * global list mutex, a global linkage rwlock, and per-unpcb mutexes.  Of the
194 * global locks, the list lock protects the socket count, global generation
195 * number, and stream/datagram global lists.  The linkage lock protects the
196 * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
197 * held exclusively over the acquisition of multiple unpcb locks to prevent
198 * deadlock.
199 *
200 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
201 * allocated in pru_attach() and freed in pru_detach().  The validity of that
202 * pointer is an invariant, so no lock is required to dereference the so_pcb
203 * pointer if a valid socket reference is held by the caller.  In practice,
204 * this is always true during operations performed on a socket.  Each unpcb
205 * has a back-pointer to its socket, unp_socket, which will be stable under
206 * the same circumstances.
207 *
208 * This pointer may only be safely dereferenced as long as a valid reference
209 * to the unpcb is held.  Typically, this reference will be from the socket,
210 * or from another unpcb when the referring unpcb's lock is held (in order
211 * that the reference not be invalidated during use).  For example, to follow
212 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn,
213 * as unp_socket remains valid as long as the reference to unp_conn is valid.
214 *
215 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx.  Individual
216 * atomic reads without the lock may be performed "lockless", but more
217 * complex reads and read-modify-writes require the mutex to be held.  No
218 * lock order is defined between unpcb locks -- multiple unpcb locks may be
219 * acquired at the same time only when holding the linkage rwlock
220 * exclusively, which prevents deadlocks.
221 *
222 * Blocking with UNIX domain sockets is a tricky issue: unlike most network
223 * protocols, bind() is a non-atomic operation, and connect() requires
224 * potential sleeping in the protocol, due to potentially waiting on local or
225 * distributed file systems.  We try to separate "lookup" operations, which
226 * may sleep, and the IPC operations themselves, which typically can occur
227 * with relative atomicity as locks can be held over the entire operation.
228 *
229 * Another tricky issue is simultaneous multi-threaded or multi-process
230 * access to a single UNIX domain socket.  These are handled by the flags
231 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
232 * binding, both of which involve dropping UNIX domain socket locks in order
233 * to perform namei() and other file system operations.
234 */
235static struct rwlock	unp_link_rwlock;
236static struct mtx	unp_list_lock;
237static struct mtx	unp_defers_lock;
238
239#define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
240					    "unp_link_rwlock")
241
242#define	UNP_LINK_LOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
243					    RA_LOCKED)
244#define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
245					    RA_UNLOCKED)
246
247#define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
248#define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
249#define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
250#define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
251#define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
252					    RA_WLOCKED)
253
254#define	UNP_LIST_LOCK_INIT()		mtx_init(&unp_list_lock,	\
255					    "unp_list_lock", NULL, MTX_DEF)
256#define	UNP_LIST_LOCK()			mtx_lock(&unp_list_lock)
257#define	UNP_LIST_UNLOCK()		mtx_unlock(&unp_list_lock)
258
259#define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
260					    "unp_defer", NULL, MTX_DEF)
261#define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
262#define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
263
264#define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
265					    "unp_mtx", "unp_mtx",	\
266					    MTX_DUPOK|MTX_DEF|MTX_RECURSE)
267#define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
268#define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
269#define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
270#define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
271
272static int	uipc_connect2(struct socket *, struct socket *);
273static int	uipc_ctloutput(struct socket *, struct sockopt *);
274static int	unp_connect(struct socket *, struct sockaddr *,
275		    struct thread *);
276static int	unp_connectat(int, struct socket *, struct sockaddr *,
277		    struct thread *);
278static int	unp_connect2(struct socket *so, struct socket *so2, int);
279static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
280static void	unp_dispose(struct mbuf *);
281static void	unp_dispose_so(struct socket *so);
282static void	unp_shutdown(struct unpcb *);
283static void	unp_drop(struct unpcb *);
284static void	unp_gc(__unused void *, int);
285static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
286static void	unp_discard(struct file *);
287static void	unp_freerights(struct filedescent **, int);
288static void	unp_init(void);
289static int	unp_internalize(struct mbuf **, struct thread *);
290static void	unp_internalize_fp(struct file *);
291static int	unp_externalize(struct mbuf *, struct mbuf **, int);
292static int	unp_externalize_fp(struct file *);
293static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *);
294static void	unp_process_defers(void * __unused, int);
295
296/*
297 * Definitions of protocols supported in the LOCAL domain.
298 */
299static struct domain localdomain;
300static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
301static struct pr_usrreqs uipc_usrreqs_seqpacket;
302static struct protosw localsw[] = {
303{
304	.pr_type =		SOCK_STREAM,
305	.pr_domain =		&localdomain,
306	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
307	.pr_ctloutput =		&uipc_ctloutput,
308	.pr_usrreqs =		&uipc_usrreqs_stream
309},
310{
311	.pr_type =		SOCK_DGRAM,
312	.pr_domain =		&localdomain,
313	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
314	.pr_ctloutput =		&uipc_ctloutput,
315	.pr_usrreqs =		&uipc_usrreqs_dgram
316},
317{
318	.pr_type =		SOCK_SEQPACKET,
319	.pr_domain =		&localdomain,
320
321	/*
322	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
323	 * due to our use of sbappendaddr.  A new sbappend variants is needed
324	 * that supports both atomic record writes and control data.
325	 */
326	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
327				    PR_RIGHTS,
328	.pr_ctloutput =		&uipc_ctloutput,
329	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
330},
331};
332
333static struct domain localdomain = {
334	.dom_family =		AF_LOCAL,
335	.dom_name =		"local",
336	.dom_init =		unp_init,
337	.dom_externalize =	unp_externalize,
338	.dom_dispose =		unp_dispose_so,
339	.dom_protosw =		localsw,
340	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
341};
342DOMAIN_SET(local);
343
344static void
345uipc_abort(struct socket *so)
346{
347	struct unpcb *unp, *unp2;
348
349	unp = sotounpcb(so);
350	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
351
352	UNP_LINK_WLOCK();
353	UNP_PCB_LOCK(unp);
354	unp2 = unp->unp_conn;
355	if (unp2 != NULL) {
356		UNP_PCB_LOCK(unp2);
357		unp_drop(unp2);
358		UNP_PCB_UNLOCK(unp2);
359	}
360	UNP_PCB_UNLOCK(unp);
361	UNP_LINK_WUNLOCK();
362}
363
364static int
365uipc_accept(struct socket *so, struct sockaddr **nam)
366{
367	struct unpcb *unp, *unp2;
368	const struct sockaddr *sa;
369
370	/*
371	 * Pass back name of connected socket, if it was bound and we are
372	 * still connected (our peer may have closed already!).
373	 */
374	unp = sotounpcb(so);
375	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
376
377	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
378	UNP_LINK_RLOCK();
379	unp2 = unp->unp_conn;
380	if (unp2 != NULL && unp2->unp_addr != NULL) {
381		UNP_PCB_LOCK(unp2);
382		sa = (struct sockaddr *) unp2->unp_addr;
383		bcopy(sa, *nam, sa->sa_len);
384		UNP_PCB_UNLOCK(unp2);
385	} else {
386		sa = &sun_noname;
387		bcopy(sa, *nam, sa->sa_len);
388	}
389	UNP_LINK_RUNLOCK();
390	return (0);
391}
392
393static int
394uipc_attach(struct socket *so, int proto, struct thread *td)
395{
396	u_long sendspace, recvspace;
397	struct unpcb *unp;
398	int error;
399
400	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
401	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
402		switch (so->so_type) {
403		case SOCK_STREAM:
404			sendspace = unpst_sendspace;
405			recvspace = unpst_recvspace;
406			break;
407
408		case SOCK_DGRAM:
409			sendspace = unpdg_sendspace;
410			recvspace = unpdg_recvspace;
411			break;
412
413		case SOCK_SEQPACKET:
414			sendspace = unpsp_sendspace;
415			recvspace = unpsp_recvspace;
416			break;
417
418		default:
419			panic("uipc_attach");
420		}
421		error = soreserve(so, sendspace, recvspace);
422		if (error)
423			return (error);
424	}
425	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
426	if (unp == NULL)
427		return (ENOBUFS);
428	LIST_INIT(&unp->unp_refs);
429	UNP_PCB_LOCK_INIT(unp);
430	unp->unp_socket = so;
431	so->so_pcb = unp;
432	unp->unp_refcount = 1;
433	if (so->so_head != NULL)
434		unp->unp_flags |= UNP_NASCENT;
435
436	UNP_LIST_LOCK();
437	unp->unp_gencnt = ++unp_gencnt;
438	unp_count++;
439	switch (so->so_type) {
440	case SOCK_STREAM:
441		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
442		break;
443
444	case SOCK_DGRAM:
445		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
446		break;
447
448	case SOCK_SEQPACKET:
449		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
450		break;
451
452	default:
453		panic("uipc_attach");
454	}
455	UNP_LIST_UNLOCK();
456
457	return (0);
458}
459
460static int
461uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
462{
463	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
464	struct vattr vattr;
465	int error, namelen;
466	struct nameidata nd;
467	struct unpcb *unp;
468	struct vnode *vp;
469	struct mount *mp;
470	cap_rights_t rights;
471	char *buf;
472
473	if (nam->sa_family != AF_UNIX)
474		return (EAFNOSUPPORT);
475
476	unp = sotounpcb(so);
477	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
478
479	if (soun->sun_len > sizeof(struct sockaddr_un))
480		return (EINVAL);
481	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
482	if (namelen <= 0)
483		return (EINVAL);
484
485	/*
486	 * We don't allow simultaneous bind() calls on a single UNIX domain
487	 * socket, so flag in-progress operations, and return an error if an
488	 * operation is already in progress.
489	 *
490	 * Historically, we have not allowed a socket to be rebound, so this
491	 * also returns an error.  Not allowing re-binding simplifies the
492	 * implementation and avoids a great many possible failure modes.
493	 */
494	UNP_PCB_LOCK(unp);
495	if (unp->unp_vnode != NULL) {
496		UNP_PCB_UNLOCK(unp);
497		return (EINVAL);
498	}
499	if (unp->unp_flags & UNP_BINDING) {
500		UNP_PCB_UNLOCK(unp);
501		return (EALREADY);
502	}
503	unp->unp_flags |= UNP_BINDING;
504	UNP_PCB_UNLOCK(unp);
505
506	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
507	bcopy(soun->sun_path, buf, namelen);
508	buf[namelen] = 0;
509
510restart:
511	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
512	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
513/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
514	error = namei(&nd);
515	if (error)
516		goto error;
517	vp = nd.ni_vp;
518	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
519		NDFREE(&nd, NDF_ONLY_PNBUF);
520		if (nd.ni_dvp == vp)
521			vrele(nd.ni_dvp);
522		else
523			vput(nd.ni_dvp);
524		if (vp != NULL) {
525			vrele(vp);
526			error = EADDRINUSE;
527			goto error;
528		}
529		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
530		if (error)
531			goto error;
532		goto restart;
533	}
534	VATTR_NULL(&vattr);
535	vattr.va_type = VSOCK;
536	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
537#ifdef MAC
538	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
539	    &vattr);
540#endif
541	if (error == 0)
542		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
543	NDFREE(&nd, NDF_ONLY_PNBUF);
544	vput(nd.ni_dvp);
545	if (error) {
546		vn_finished_write(mp);
547		goto error;
548	}
549	vp = nd.ni_vp;
550	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
551	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
552
553	UNP_LINK_WLOCK();
554	UNP_PCB_LOCK(unp);
555	VOP_UNP_BIND(vp, unp->unp_socket);
556	unp->unp_vnode = vp;
557	unp->unp_addr = soun;
558	unp->unp_flags &= ~UNP_BINDING;
559	UNP_PCB_UNLOCK(unp);
560	UNP_LINK_WUNLOCK();
561	VOP_UNLOCK(vp, 0);
562	vn_finished_write(mp);
563	free(buf, M_TEMP);
564	return (0);
565
566error:
567	UNP_PCB_LOCK(unp);
568	unp->unp_flags &= ~UNP_BINDING;
569	UNP_PCB_UNLOCK(unp);
570	free(buf, M_TEMP);
571	return (error);
572}
573
574static int
575uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
576{
577
578	return (uipc_bindat(AT_FDCWD, so, nam, td));
579}
580
581static int
582uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
583{
584	int error;
585
586	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
587	UNP_LINK_WLOCK();
588	error = unp_connect(so, nam, td);
589	UNP_LINK_WUNLOCK();
590	return (error);
591}
592
593static int
594uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
595    struct thread *td)
596{
597	int error;
598
599	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
600	UNP_LINK_WLOCK();
601	error = unp_connectat(fd, so, nam, td);
602	UNP_LINK_WUNLOCK();
603	return (error);
604}
605
606static void
607uipc_close(struct socket *so)
608{
609	struct unpcb *unp, *unp2;
610
611	unp = sotounpcb(so);
612	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
613
614	UNP_LINK_WLOCK();
615	UNP_PCB_LOCK(unp);
616	unp2 = unp->unp_conn;
617	if (unp2 != NULL) {
618		UNP_PCB_LOCK(unp2);
619		unp_disconnect(unp, unp2);
620		UNP_PCB_UNLOCK(unp2);
621	}
622	UNP_PCB_UNLOCK(unp);
623	UNP_LINK_WUNLOCK();
624}
625
626static int
627uipc_connect2(struct socket *so1, struct socket *so2)
628{
629	struct unpcb *unp, *unp2;
630	int error;
631
632	UNP_LINK_WLOCK();
633	unp = so1->so_pcb;
634	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
635	UNP_PCB_LOCK(unp);
636	unp2 = so2->so_pcb;
637	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
638	UNP_PCB_LOCK(unp2);
639	error = unp_connect2(so1, so2, PRU_CONNECT2);
640	UNP_PCB_UNLOCK(unp2);
641	UNP_PCB_UNLOCK(unp);
642	UNP_LINK_WUNLOCK();
643	return (error);
644}
645
646static void
647uipc_detach(struct socket *so)
648{
649	struct unpcb *unp, *unp2;
650	struct sockaddr_un *saved_unp_addr;
651	struct vnode *vp;
652	int freeunp, local_unp_rights;
653
654	unp = sotounpcb(so);
655	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
656
657	vp = NULL;
658	local_unp_rights = 0;
659
660	UNP_LIST_LOCK();
661	LIST_REMOVE(unp, unp_link);
662	unp->unp_gencnt = ++unp_gencnt;
663	--unp_count;
664	UNP_LIST_UNLOCK();
665
666	if ((unp->unp_flags & UNP_NASCENT) != 0) {
667		UNP_PCB_LOCK(unp);
668		goto teardown;
669	}
670	UNP_LINK_WLOCK();
671	UNP_PCB_LOCK(unp);
672
673	/*
674	 * XXXRW: Should assert vp->v_socket == so.
675	 */
676	if ((vp = unp->unp_vnode) != NULL) {
677		VOP_UNP_DETACH(vp);
678		unp->unp_vnode = NULL;
679	}
680	unp2 = unp->unp_conn;
681	if (unp2 != NULL) {
682		UNP_PCB_LOCK(unp2);
683		unp_disconnect(unp, unp2);
684		UNP_PCB_UNLOCK(unp2);
685	}
686
687	/*
688	 * We hold the linkage lock exclusively, so it's OK to acquire
689	 * multiple pcb locks at a time.
690	 */
691	while (!LIST_EMPTY(&unp->unp_refs)) {
692		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
693
694		UNP_PCB_LOCK(ref);
695		unp_drop(ref);
696		UNP_PCB_UNLOCK(ref);
697	}
698	local_unp_rights = unp_rights;
699	UNP_LINK_WUNLOCK();
700teardown:
701	unp->unp_socket->so_pcb = NULL;
702	saved_unp_addr = unp->unp_addr;
703	unp->unp_addr = NULL;
704	unp->unp_refcount--;
705	freeunp = (unp->unp_refcount == 0);
706	if (saved_unp_addr != NULL)
707		free(saved_unp_addr, M_SONAME);
708	if (freeunp) {
709		UNP_PCB_LOCK_DESTROY(unp);
710		uma_zfree(unp_zone, unp);
711	} else
712		UNP_PCB_UNLOCK(unp);
713	if (vp)
714		vrele(vp);
715	if (local_unp_rights)
716		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
717}
718
719static int
720uipc_disconnect(struct socket *so)
721{
722	struct unpcb *unp, *unp2;
723
724	unp = sotounpcb(so);
725	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
726
727	UNP_LINK_WLOCK();
728	UNP_PCB_LOCK(unp);
729	unp2 = unp->unp_conn;
730	if (unp2 != NULL) {
731		UNP_PCB_LOCK(unp2);
732		unp_disconnect(unp, unp2);
733		UNP_PCB_UNLOCK(unp2);
734	}
735	UNP_PCB_UNLOCK(unp);
736	UNP_LINK_WUNLOCK();
737	return (0);
738}
739
740static int
741uipc_listen(struct socket *so, int backlog, struct thread *td)
742{
743	struct unpcb *unp;
744	int error;
745
746	unp = sotounpcb(so);
747	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
748
749	UNP_PCB_LOCK(unp);
750	if (unp->unp_vnode == NULL) {
751		/* Already connected or not bound to an address. */
752		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
753		UNP_PCB_UNLOCK(unp);
754		return (error);
755	}
756
757	SOCK_LOCK(so);
758	error = solisten_proto_check(so);
759	if (error == 0) {
760		cru2x(td->td_ucred, &unp->unp_peercred);
761		unp->unp_flags |= UNP_HAVEPCCACHED;
762		solisten_proto(so, backlog);
763	}
764	SOCK_UNLOCK(so);
765	UNP_PCB_UNLOCK(unp);
766	return (error);
767}
768
769static int
770uipc_peeraddr(struct socket *so, struct sockaddr **nam)
771{
772	struct unpcb *unp, *unp2;
773	const struct sockaddr *sa;
774
775	unp = sotounpcb(so);
776	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
777
778	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
779	UNP_LINK_RLOCK();
780	/*
781	 * XXX: It seems that this test always fails even when connection is
782	 * established.  So, this else clause is added as workaround to
783	 * return PF_LOCAL sockaddr.
784	 */
785	unp2 = unp->unp_conn;
786	if (unp2 != NULL) {
787		UNP_PCB_LOCK(unp2);
788		if (unp2->unp_addr != NULL)
789			sa = (struct sockaddr *) unp2->unp_addr;
790		else
791			sa = &sun_noname;
792		bcopy(sa, *nam, sa->sa_len);
793		UNP_PCB_UNLOCK(unp2);
794	} else {
795		sa = &sun_noname;
796		bcopy(sa, *nam, sa->sa_len);
797	}
798	UNP_LINK_RUNLOCK();
799	return (0);
800}
801
802static int
803uipc_rcvd(struct socket *so, int flags)
804{
805	struct unpcb *unp, *unp2;
806	struct socket *so2;
807	u_int mbcnt, sbcc;
808
809	unp = sotounpcb(so);
810	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
811	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
812	    ("%s: socktype %d", __func__, so->so_type));
813
814	/*
815	 * Adjust backpressure on sender and wakeup any waiting to write.
816	 *
817	 * The unp lock is acquired to maintain the validity of the unp_conn
818	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
819	 * static as long as we don't permit unp2 to disconnect from unp,
820	 * which is prevented by the lock on unp.  We cache values from
821	 * so_rcv to avoid holding the so_rcv lock over the entire
822	 * transaction on the remote so_snd.
823	 */
824	SOCKBUF_LOCK(&so->so_rcv);
825	mbcnt = so->so_rcv.sb_mbcnt;
826	sbcc = sbavail(&so->so_rcv);
827	SOCKBUF_UNLOCK(&so->so_rcv);
828	/*
829	 * There is a benign race condition at this point.  If we're planning to
830	 * clear SB_STOP, but uipc_send is called on the connected socket at
831	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
832	 * we would erroneously clear SB_STOP below, even though the sockbuf is
833	 * full.  The race is benign because the only ill effect is to allow the
834	 * sockbuf to exceed its size limit, and the size limits are not
835	 * strictly guaranteed anyway.
836	 */
837	UNP_PCB_LOCK(unp);
838	unp2 = unp->unp_conn;
839	if (unp2 == NULL) {
840		UNP_PCB_UNLOCK(unp);
841		return (0);
842	}
843	so2 = unp2->unp_socket;
844	SOCKBUF_LOCK(&so2->so_snd);
845	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
846		so2->so_snd.sb_flags &= ~SB_STOP;
847	sowwakeup_locked(so2);
848	UNP_PCB_UNLOCK(unp);
849	return (0);
850}
851
852static int
853uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
854    struct mbuf *control, struct thread *td)
855{
856	struct unpcb *unp, *unp2;
857	struct socket *so2;
858	u_int mbcnt, sbcc;
859	int error = 0;
860
861	unp = sotounpcb(so);
862	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
863	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
864	    so->so_type == SOCK_SEQPACKET,
865	    ("%s: socktype %d", __func__, so->so_type));
866
867	if (flags & PRUS_OOB) {
868		error = EOPNOTSUPP;
869		goto release;
870	}
871	if (control != NULL && (error = unp_internalize(&control, td)))
872		goto release;
873	if ((nam != NULL) || (flags & PRUS_EOF))
874		UNP_LINK_WLOCK();
875	else
876		UNP_LINK_RLOCK();
877	switch (so->so_type) {
878	case SOCK_DGRAM:
879	{
880		const struct sockaddr *from;
881
882		unp2 = unp->unp_conn;
883		if (nam != NULL) {
884			UNP_LINK_WLOCK_ASSERT();
885			if (unp2 != NULL) {
886				error = EISCONN;
887				break;
888			}
889			error = unp_connect(so, nam, td);
890			if (error)
891				break;
892			unp2 = unp->unp_conn;
893		}
894
895		/*
896		 * Because connect() and send() are non-atomic in a sendto()
897		 * with a target address, it's possible that the socket will
898		 * have disconnected before the send() can run.  In that case
899		 * return the slightly counter-intuitive but otherwise
900		 * correct error that the socket is not connected.
901		 */
902		if (unp2 == NULL) {
903			error = ENOTCONN;
904			break;
905		}
906		/* Lockless read. */
907		if (unp2->unp_flags & UNP_WANTCRED)
908			control = unp_addsockcred(td, control);
909		UNP_PCB_LOCK(unp);
910		if (unp->unp_addr != NULL)
911			from = (struct sockaddr *)unp->unp_addr;
912		else
913			from = &sun_noname;
914		so2 = unp2->unp_socket;
915		SOCKBUF_LOCK(&so2->so_rcv);
916		if (sbappendaddr_locked(&so2->so_rcv, from, m,
917		    control)) {
918			sorwakeup_locked(so2);
919			m = NULL;
920			control = NULL;
921		} else {
922			SOCKBUF_UNLOCK(&so2->so_rcv);
923			error = ENOBUFS;
924		}
925		if (nam != NULL) {
926			UNP_LINK_WLOCK_ASSERT();
927			UNP_PCB_LOCK(unp2);
928			unp_disconnect(unp, unp2);
929			UNP_PCB_UNLOCK(unp2);
930		}
931		UNP_PCB_UNLOCK(unp);
932		break;
933	}
934
935	case SOCK_SEQPACKET:
936	case SOCK_STREAM:
937		if ((so->so_state & SS_ISCONNECTED) == 0) {
938			if (nam != NULL) {
939				UNP_LINK_WLOCK_ASSERT();
940				error = unp_connect(so, nam, td);
941				if (error)
942					break;	/* XXX */
943			} else {
944				error = ENOTCONN;
945				break;
946			}
947		}
948
949		/* Lockless read. */
950		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
951			error = EPIPE;
952			break;
953		}
954
955		/*
956		 * Because connect() and send() are non-atomic in a sendto()
957		 * with a target address, it's possible that the socket will
958		 * have disconnected before the send() can run.  In that case
959		 * return the slightly counter-intuitive but otherwise
960		 * correct error that the socket is not connected.
961		 *
962		 * Locking here must be done carefully: the linkage lock
963		 * prevents interconnections between unpcbs from changing, so
964		 * we can traverse from unp to unp2 without acquiring unp's
965		 * lock.  Socket buffer locks follow unpcb locks, so we can
966		 * acquire both remote and lock socket buffer locks.
967		 */
968		unp2 = unp->unp_conn;
969		if (unp2 == NULL) {
970			error = ENOTCONN;
971			break;
972		}
973		so2 = unp2->unp_socket;
974		UNP_PCB_LOCK(unp2);
975		SOCKBUF_LOCK(&so2->so_rcv);
976		if (unp2->unp_flags & UNP_WANTCRED) {
977			/*
978			 * Credentials are passed only once on SOCK_STREAM
979			 * and SOCK_SEQPACKET.
980			 */
981			unp2->unp_flags &= ~UNP_WANTCRED;
982			control = unp_addsockcred(td, control);
983		}
984
985		/*
986		 * Send to paired receive port and wake up readers.  Don't
987		 * check for space available in the receive buffer if we're
988		 * attaching ancillary data; Unix domain sockets only check
989		 * for space in the sending sockbuf, and that check is
990		 * performed one level up the stack.  At that level we cannot
991		 * precisely account for the amount of buffer space used
992		 * (e.g., because control messages are not yet internalized).
993		 */
994		switch (so->so_type) {
995		case SOCK_STREAM:
996			if (control != NULL) {
997				sbappendcontrol_locked(&so2->so_rcv, m,
998				    control);
999				control = NULL;
1000			} else
1001				sbappend_locked(&so2->so_rcv, m, flags);
1002			break;
1003
1004		case SOCK_SEQPACKET: {
1005			const struct sockaddr *from;
1006
1007			from = &sun_noname;
1008			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
1009			    from, m, control))
1010				control = NULL;
1011			break;
1012			}
1013		}
1014
1015		mbcnt = so2->so_rcv.sb_mbcnt;
1016		sbcc = sbavail(&so2->so_rcv);
1017		if (sbcc)
1018			sorwakeup_locked(so2);
1019		else
1020			SOCKBUF_UNLOCK(&so2->so_rcv);
1021
1022		/*
1023		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
1024		 * it would be possible for uipc_rcvd to be called at this
1025		 * point, drain the receiving sockbuf, clear SB_STOP, and then
1026		 * we would set SB_STOP below.  That could lead to an empty
1027		 * sockbuf having SB_STOP set
1028		 */
1029		SOCKBUF_LOCK(&so->so_snd);
1030		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
1031			so->so_snd.sb_flags |= SB_STOP;
1032		SOCKBUF_UNLOCK(&so->so_snd);
1033		UNP_PCB_UNLOCK(unp2);
1034		m = NULL;
1035		break;
1036	}
1037
1038	/*
1039	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
1040	 */
1041	if (flags & PRUS_EOF) {
1042		UNP_PCB_LOCK(unp);
1043		socantsendmore(so);
1044		unp_shutdown(unp);
1045		UNP_PCB_UNLOCK(unp);
1046	}
1047
1048	if ((nam != NULL) || (flags & PRUS_EOF))
1049		UNP_LINK_WUNLOCK();
1050	else
1051		UNP_LINK_RUNLOCK();
1052
1053	if (control != NULL && error != 0)
1054		unp_dispose(control);
1055
1056release:
1057	if (control != NULL)
1058		m_freem(control);
1059	/*
1060	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
1061	 * for freeing memory.
1062	 */
1063	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
1064		m_freem(m);
1065	return (error);
1066}
1067
1068static int
1069uipc_ready(struct socket *so, struct mbuf *m, int count)
1070{
1071	struct unpcb *unp, *unp2;
1072	struct socket *so2;
1073	int error;
1074
1075	unp = sotounpcb(so);
1076
1077	UNP_LINK_RLOCK();
1078	if ((unp2 = unp->unp_conn) == NULL) {
1079		UNP_LINK_RUNLOCK();
1080		for (int i = 0; i < count; i++)
1081			m = m_free(m);
1082		return (ECONNRESET);
1083	}
1084	UNP_PCB_LOCK(unp2);
1085	so2 = unp2->unp_socket;
1086
1087	SOCKBUF_LOCK(&so2->so_rcv);
1088	if ((error = sbready(&so2->so_rcv, m, count)) == 0)
1089		sorwakeup_locked(so2);
1090	else
1091		SOCKBUF_UNLOCK(&so2->so_rcv);
1092
1093	UNP_PCB_UNLOCK(unp2);
1094	UNP_LINK_RUNLOCK();
1095
1096	return (error);
1097}
1098
1099static int
1100uipc_sense(struct socket *so, struct stat *sb)
1101{
1102	struct unpcb *unp;
1103
1104	unp = sotounpcb(so);
1105	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
1106
1107	sb->st_blksize = so->so_snd.sb_hiwat;
1108	UNP_PCB_LOCK(unp);
1109	sb->st_dev = NODEV;
1110	if (unp->unp_ino == 0)
1111		unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
1112	sb->st_ino = unp->unp_ino;
1113	UNP_PCB_UNLOCK(unp);
1114	return (0);
1115}
1116
1117static int
1118uipc_shutdown(struct socket *so)
1119{
1120	struct unpcb *unp;
1121
1122	unp = sotounpcb(so);
1123	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
1124
1125	UNP_LINK_WLOCK();
1126	UNP_PCB_LOCK(unp);
1127	socantsendmore(so);
1128	unp_shutdown(unp);
1129	UNP_PCB_UNLOCK(unp);
1130	UNP_LINK_WUNLOCK();
1131	return (0);
1132}
1133
1134static int
1135uipc_sockaddr(struct socket *so, struct sockaddr **nam)
1136{
1137	struct unpcb *unp;
1138	const struct sockaddr *sa;
1139
1140	unp = sotounpcb(so);
1141	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
1142
1143	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
1144	UNP_PCB_LOCK(unp);
1145	if (unp->unp_addr != NULL)
1146		sa = (struct sockaddr *) unp->unp_addr;
1147	else
1148		sa = &sun_noname;
1149	bcopy(sa, *nam, sa->sa_len);
1150	UNP_PCB_UNLOCK(unp);
1151	return (0);
1152}
1153
1154static struct pr_usrreqs uipc_usrreqs_dgram = {
1155	.pru_abort = 		uipc_abort,
1156	.pru_accept =		uipc_accept,
1157	.pru_attach =		uipc_attach,
1158	.pru_bind =		uipc_bind,
1159	.pru_bindat =		uipc_bindat,
1160	.pru_connect =		uipc_connect,
1161	.pru_connectat =	uipc_connectat,
1162	.pru_connect2 =		uipc_connect2,
1163	.pru_detach =		uipc_detach,
1164	.pru_disconnect =	uipc_disconnect,
1165	.pru_listen =		uipc_listen,
1166	.pru_peeraddr =		uipc_peeraddr,
1167	.pru_rcvd =		uipc_rcvd,
1168	.pru_send =		uipc_send,
1169	.pru_sense =		uipc_sense,
1170	.pru_shutdown =		uipc_shutdown,
1171	.pru_sockaddr =		uipc_sockaddr,
1172	.pru_soreceive =	soreceive_dgram,
1173	.pru_close =		uipc_close,
1174};
1175
1176static struct pr_usrreqs uipc_usrreqs_seqpacket = {
1177	.pru_abort =		uipc_abort,
1178	.pru_accept =		uipc_accept,
1179	.pru_attach =		uipc_attach,
1180	.pru_bind =		uipc_bind,
1181	.pru_bindat =		uipc_bindat,
1182	.pru_connect =		uipc_connect,
1183	.pru_connectat =	uipc_connectat,
1184	.pru_connect2 =		uipc_connect2,
1185	.pru_detach =		uipc_detach,
1186	.pru_disconnect =	uipc_disconnect,
1187	.pru_listen =		uipc_listen,
1188	.pru_peeraddr =		uipc_peeraddr,
1189	.pru_rcvd =		uipc_rcvd,
1190	.pru_send =		uipc_send,
1191	.pru_sense =		uipc_sense,
1192	.pru_shutdown =		uipc_shutdown,
1193	.pru_sockaddr =		uipc_sockaddr,
1194	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
1195	.pru_close =		uipc_close,
1196};
1197
1198static struct pr_usrreqs uipc_usrreqs_stream = {
1199	.pru_abort = 		uipc_abort,
1200	.pru_accept =		uipc_accept,
1201	.pru_attach =		uipc_attach,
1202	.pru_bind =		uipc_bind,
1203	.pru_bindat =		uipc_bindat,
1204	.pru_connect =		uipc_connect,
1205	.pru_connectat =	uipc_connectat,
1206	.pru_connect2 =		uipc_connect2,
1207	.pru_detach =		uipc_detach,
1208	.pru_disconnect =	uipc_disconnect,
1209	.pru_listen =		uipc_listen,
1210	.pru_peeraddr =		uipc_peeraddr,
1211	.pru_rcvd =		uipc_rcvd,
1212	.pru_send =		uipc_send,
1213	.pru_ready =		uipc_ready,
1214	.pru_sense =		uipc_sense,
1215	.pru_shutdown =		uipc_shutdown,
1216	.pru_sockaddr =		uipc_sockaddr,
1217	.pru_soreceive =	soreceive_generic,
1218	.pru_close =		uipc_close,
1219};
1220
1221static int
1222uipc_ctloutput(struct socket *so, struct sockopt *sopt)
1223{
1224	struct unpcb *unp;
1225	struct xucred xu;
1226	int error, optval;
1227
1228	if (sopt->sopt_level != 0)
1229		return (EINVAL);
1230
1231	unp = sotounpcb(so);
1232	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
1233	error = 0;
1234	switch (sopt->sopt_dir) {
1235	case SOPT_GET:
1236		switch (sopt->sopt_name) {
1237		case LOCAL_PEERCRED:
1238			UNP_PCB_LOCK(unp);
1239			if (unp->unp_flags & UNP_HAVEPC)
1240				xu = unp->unp_peercred;
1241			else {
1242				if (so->so_type == SOCK_STREAM)
1243					error = ENOTCONN;
1244				else
1245					error = EINVAL;
1246			}
1247			UNP_PCB_UNLOCK(unp);
1248			if (error == 0)
1249				error = sooptcopyout(sopt, &xu, sizeof(xu));
1250			break;
1251
1252		case LOCAL_CREDS:
1253			/* Unlocked read. */
1254			optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
1255			error = sooptcopyout(sopt, &optval, sizeof(optval));
1256			break;
1257
1258		case LOCAL_CONNWAIT:
1259			/* Unlocked read. */
1260			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
1261			error = sooptcopyout(sopt, &optval, sizeof(optval));
1262			break;
1263
1264		default:
1265			error = EOPNOTSUPP;
1266			break;
1267		}
1268		break;
1269
1270	case SOPT_SET:
1271		switch (sopt->sopt_name) {
1272		case LOCAL_CREDS:
1273		case LOCAL_CONNWAIT:
1274			error = sooptcopyin(sopt, &optval, sizeof(optval),
1275					    sizeof(optval));
1276			if (error)
1277				break;
1278
1279#define	OPTSET(bit) do {						\
1280	UNP_PCB_LOCK(unp);						\
1281	if (optval)							\
1282		unp->unp_flags |= bit;					\
1283	else								\
1284		unp->unp_flags &= ~bit;					\
1285	UNP_PCB_UNLOCK(unp);						\
1286} while (0)
1287
1288			switch (sopt->sopt_name) {
1289			case LOCAL_CREDS:
1290				OPTSET(UNP_WANTCRED);
1291				break;
1292
1293			case LOCAL_CONNWAIT:
1294				OPTSET(UNP_CONNWAIT);
1295				break;
1296
1297			default:
1298				break;
1299			}
1300			break;
1301#undef	OPTSET
1302		default:
1303			error = ENOPROTOOPT;
1304			break;
1305		}
1306		break;
1307
1308	default:
1309		error = EOPNOTSUPP;
1310		break;
1311	}
1312	return (error);
1313}
1314
1315static int
1316unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1317{
1318
1319	return (unp_connectat(AT_FDCWD, so, nam, td));
1320}
1321
1322static int
1323unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
1324    struct thread *td)
1325{
1326	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1327	struct vnode *vp;
1328	struct socket *so2, *so3;
1329	struct unpcb *unp, *unp2, *unp3;
1330	struct nameidata nd;
1331	char buf[SOCK_MAXADDRLEN];
1332	struct sockaddr *sa;
1333	cap_rights_t rights;
1334	int error, len;
1335
1336	if (nam->sa_family != AF_UNIX)
1337		return (EAFNOSUPPORT);
1338
1339	UNP_LINK_WLOCK_ASSERT();
1340
1341	unp = sotounpcb(so);
1342	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1343
1344	if (nam->sa_len > sizeof(struct sockaddr_un))
1345		return (EINVAL);
1346	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1347	if (len <= 0)
1348		return (EINVAL);
1349	bcopy(soun->sun_path, buf, len);
1350	buf[len] = 0;
1351
1352	UNP_PCB_LOCK(unp);
1353	if (unp->unp_flags & UNP_CONNECTING) {
1354		UNP_PCB_UNLOCK(unp);
1355		return (EALREADY);
1356	}
1357	UNP_LINK_WUNLOCK();
1358	unp->unp_flags |= UNP_CONNECTING;
1359	UNP_PCB_UNLOCK(unp);
1360
1361	sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
1362	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
1363	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
1364	error = namei(&nd);
1365	if (error)
1366		vp = NULL;
1367	else
1368		vp = nd.ni_vp;
1369	ASSERT_VOP_LOCKED(vp, "unp_connect");
1370	NDFREE(&nd, NDF_ONLY_PNBUF);
1371	if (error)
1372		goto bad;
1373
1374	if (vp->v_type != VSOCK) {
1375		error = ENOTSOCK;
1376		goto bad;
1377	}
1378#ifdef MAC
1379	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
1380	if (error)
1381		goto bad;
1382#endif
1383	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
1384	if (error)
1385		goto bad;
1386
1387	unp = sotounpcb(so);
1388	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1389
1390	/*
1391	 * Lock linkage lock for two reasons: make sure v_socket is stable,
1392	 * and to protect simultaneous locking of multiple pcbs.
1393	 */
1394	UNP_LINK_WLOCK();
1395	VOP_UNP_CONNECT(vp, &so2);
1396	if (so2 == NULL) {
1397		error = ECONNREFUSED;
1398		goto bad2;
1399	}
1400	if (so->so_type != so2->so_type) {
1401		error = EPROTOTYPE;
1402		goto bad2;
1403	}
1404	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1405		if (so2->so_options & SO_ACCEPTCONN) {
1406			CURVNET_SET(so2->so_vnet);
1407			so3 = sonewconn(so2, 0);
1408			CURVNET_RESTORE();
1409		} else
1410			so3 = NULL;
1411		if (so3 == NULL) {
1412			error = ECONNREFUSED;
1413			goto bad2;
1414		}
1415		unp = sotounpcb(so);
1416		unp2 = sotounpcb(so2);
1417		unp3 = sotounpcb(so3);
1418		UNP_PCB_LOCK(unp);
1419		UNP_PCB_LOCK(unp2);
1420		UNP_PCB_LOCK(unp3);
1421		if (unp2->unp_addr != NULL) {
1422			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
1423			unp3->unp_addr = (struct sockaddr_un *) sa;
1424			sa = NULL;
1425		}
1426
1427		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1428		    ("unp_connect: listener without cached peercred"));
1429		unp_copy_peercred(td, unp3, unp, unp2);
1430
1431		UNP_PCB_UNLOCK(unp3);
1432		UNP_PCB_UNLOCK(unp2);
1433		UNP_PCB_UNLOCK(unp);
1434#ifdef MAC
1435		mac_socketpeer_set_from_socket(so, so3);
1436		mac_socketpeer_set_from_socket(so3, so);
1437#endif
1438
1439		so2 = so3;
1440	}
1441	unp = sotounpcb(so);
1442	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1443	unp2 = sotounpcb(so2);
1444	KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
1445	UNP_PCB_LOCK(unp);
1446	UNP_PCB_LOCK(unp2);
1447	error = unp_connect2(so, so2, PRU_CONNECT);
1448	UNP_PCB_UNLOCK(unp2);
1449	UNP_PCB_UNLOCK(unp);
1450bad2:
1451	UNP_LINK_WUNLOCK();
1452bad:
1453	if (vp != NULL)
1454		vput(vp);
1455	free(sa, M_SONAME);
1456	UNP_LINK_WLOCK();
1457	UNP_PCB_LOCK(unp);
1458	unp->unp_flags &= ~UNP_CONNECTING;
1459	UNP_PCB_UNLOCK(unp);
1460	return (error);
1461}
1462
1463/*
1464 * Set socket peer credentials at connection time.
1465 *
1466 * The client's PCB credentials are copied from its process structure.  The
1467 * server's PCB credentials are copied from the socket on which it called
1468 * listen(2).  uipc_listen cached that process's credentials at the time.
1469 */
1470void
1471unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
1472    struct unpcb *server_unp, struct unpcb *listen_unp)
1473{
1474	cru2x(td->td_ucred, &client_unp->unp_peercred);
1475	client_unp->unp_flags |= UNP_HAVEPC;
1476
1477	memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
1478	    sizeof(server_unp->unp_peercred));
1479	server_unp->unp_flags |= UNP_HAVEPC;
1480	if (listen_unp->unp_flags & UNP_WANTCRED)
1481		client_unp->unp_flags |= UNP_WANTCRED;
1482}
1483
1484static int
1485unp_connect2(struct socket *so, struct socket *so2, int req)
1486{
1487	struct unpcb *unp;
1488	struct unpcb *unp2;
1489
1490	unp = sotounpcb(so);
1491	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
1492	unp2 = sotounpcb(so2);
1493	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
1494
1495	UNP_LINK_WLOCK_ASSERT();
1496	UNP_PCB_LOCK_ASSERT(unp);
1497	UNP_PCB_LOCK_ASSERT(unp2);
1498
1499	if (so2->so_type != so->so_type)
1500		return (EPROTOTYPE);
1501	unp2->unp_flags &= ~UNP_NASCENT;
1502	unp->unp_conn = unp2;
1503
1504	switch (so->so_type) {
1505	case SOCK_DGRAM:
1506		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1507		soisconnected(so);
1508		break;
1509
1510	case SOCK_STREAM:
1511	case SOCK_SEQPACKET:
1512		unp2->unp_conn = unp;
1513		if (req == PRU_CONNECT &&
1514		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
1515			soisconnecting(so);
1516		else
1517			soisconnected(so);
1518		soisconnected(so2);
1519		break;
1520
1521	default:
1522		panic("unp_connect2");
1523	}
1524	return (0);
1525}
1526
1527static void
1528unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
1529{
1530	struct socket *so;
1531
1532	KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL"));
1533
1534	UNP_LINK_WLOCK_ASSERT();
1535	UNP_PCB_LOCK_ASSERT(unp);
1536	UNP_PCB_LOCK_ASSERT(unp2);
1537
1538	unp->unp_conn = NULL;
1539	switch (unp->unp_socket->so_type) {
1540	case SOCK_DGRAM:
1541		LIST_REMOVE(unp, unp_reflink);
1542		so = unp->unp_socket;
1543		SOCK_LOCK(so);
1544		so->so_state &= ~SS_ISCONNECTED;
1545		SOCK_UNLOCK(so);
1546		break;
1547
1548	case SOCK_STREAM:
1549	case SOCK_SEQPACKET:
1550		soisdisconnected(unp->unp_socket);
1551		unp2->unp_conn = NULL;
1552		soisdisconnected(unp2->unp_socket);
1553		break;
1554	}
1555}
1556
1557/*
1558 * unp_pcblist() walks the global list of struct unpcb's to generate a
1559 * pointer list, bumping the refcount on each unpcb.  It then copies them out
1560 * sequentially, validating the generation number on each to see if it has
1561 * been detached.  All of this is necessary because copyout() may sleep on
1562 * disk I/O.
1563 */
1564static int
1565unp_pcblist(SYSCTL_HANDLER_ARGS)
1566{
1567	int error, i, n;
1568	int freeunp;
1569	struct unpcb *unp, **unp_list;
1570	unp_gen_t gencnt;
1571	struct xunpgen *xug;
1572	struct unp_head *head;
1573	struct xunpcb *xu;
1574
1575	switch ((intptr_t)arg1) {
1576	case SOCK_STREAM:
1577		head = &unp_shead;
1578		break;
1579
1580	case SOCK_DGRAM:
1581		head = &unp_dhead;
1582		break;
1583
1584	case SOCK_SEQPACKET:
1585		head = &unp_sphead;
1586		break;
1587
1588	default:
1589		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
1590	}
1591
1592	/*
1593	 * The process of preparing the PCB list is too time-consuming and
1594	 * resource-intensive to repeat twice on every request.
1595	 */
1596	if (req->oldptr == NULL) {
1597		n = unp_count;
1598		req->oldidx = 2 * (sizeof *xug)
1599			+ (n + n/8) * sizeof(struct xunpcb);
1600		return (0);
1601	}
1602
1603	if (req->newptr != NULL)
1604		return (EPERM);
1605
1606	/*
1607	 * OK, now we're committed to doing something.
1608	 */
1609	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
1610	UNP_LIST_LOCK();
1611	gencnt = unp_gencnt;
1612	n = unp_count;
1613	UNP_LIST_UNLOCK();
1614
1615	xug->xug_len = sizeof *xug;
1616	xug->xug_count = n;
1617	xug->xug_gen = gencnt;
1618	xug->xug_sogen = so_gencnt;
1619	error = SYSCTL_OUT(req, xug, sizeof *xug);
1620	if (error) {
1621		free(xug, M_TEMP);
1622		return (error);
1623	}
1624
1625	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
1626
1627	UNP_LIST_LOCK();
1628	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
1629	     unp = LIST_NEXT(unp, unp_link)) {
1630		UNP_PCB_LOCK(unp);
1631		if (unp->unp_gencnt <= gencnt) {
1632			if (cr_cansee(req->td->td_ucred,
1633			    unp->unp_socket->so_cred)) {
1634				UNP_PCB_UNLOCK(unp);
1635				continue;
1636			}
1637			unp_list[i++] = unp;
1638			unp->unp_refcount++;
1639		}
1640		UNP_PCB_UNLOCK(unp);
1641	}
1642	UNP_LIST_UNLOCK();
1643	n = i;			/* In case we lost some during malloc. */
1644
1645	error = 0;
1646	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
1647	for (i = 0; i < n; i++) {
1648		unp = unp_list[i];
1649		UNP_PCB_LOCK(unp);
1650		unp->unp_refcount--;
1651	        if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
1652			xu->xu_len = sizeof *xu;
1653			xu->xu_unpp = unp;
1654			/*
1655			 * XXX - need more locking here to protect against
1656			 * connect/disconnect races for SMP.
1657			 */
1658			if (unp->unp_addr != NULL)
1659				bcopy(unp->unp_addr, &xu->xu_addr,
1660				      unp->unp_addr->sun_len);
1661			if (unp->unp_conn != NULL &&
1662			    unp->unp_conn->unp_addr != NULL)
1663				bcopy(unp->unp_conn->unp_addr,
1664				      &xu->xu_caddr,
1665				      unp->unp_conn->unp_addr->sun_len);
1666			bcopy(unp, &xu->xu_unp, sizeof *unp);
1667			sotoxsocket(unp->unp_socket, &xu->xu_socket);
1668			UNP_PCB_UNLOCK(unp);
1669			error = SYSCTL_OUT(req, xu, sizeof *xu);
1670		} else {
1671			freeunp = (unp->unp_refcount == 0);
1672			UNP_PCB_UNLOCK(unp);
1673			if (freeunp) {
1674				UNP_PCB_LOCK_DESTROY(unp);
1675				uma_zfree(unp_zone, unp);
1676			}
1677		}
1678	}
1679	free(xu, M_TEMP);
1680	if (!error) {
1681		/*
1682		 * Give the user an updated idea of our state.  If the
1683		 * generation differs from what we told her before, she knows
1684		 * that something happened while we were processing this
1685		 * request, and it might be necessary to retry.
1686		 */
1687		xug->xug_gen = unp_gencnt;
1688		xug->xug_sogen = so_gencnt;
1689		xug->xug_count = unp_count;
1690		error = SYSCTL_OUT(req, xug, sizeof *xug);
1691	}
1692	free(unp_list, M_TEMP);
1693	free(xug, M_TEMP);
1694	return (error);
1695}
1696
1697SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
1698    (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1699    "List of active local datagram sockets");
1700SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
1701    (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1702    "List of active local stream sockets");
1703SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
1704    CTLTYPE_OPAQUE | CTLFLAG_RD,
1705    (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
1706    "List of active local seqpacket sockets");
1707
1708static void
1709unp_shutdown(struct unpcb *unp)
1710{
1711	struct unpcb *unp2;
1712	struct socket *so;
1713
1714	UNP_LINK_WLOCK_ASSERT();
1715	UNP_PCB_LOCK_ASSERT(unp);
1716
1717	unp2 = unp->unp_conn;
1718	if ((unp->unp_socket->so_type == SOCK_STREAM ||
1719	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
1720		so = unp2->unp_socket;
1721		if (so != NULL)
1722			socantrcvmore(so);
1723	}
1724}
1725
1726static void
1727unp_drop(struct unpcb *unp)
1728{
1729	struct socket *so = unp->unp_socket;
1730	struct unpcb *unp2;
1731
1732	UNP_LINK_WLOCK_ASSERT();
1733	UNP_PCB_LOCK_ASSERT(unp);
1734
1735	/*
1736	 * Regardless of whether the socket's peer dropped the connection
1737	 * with this socket by aborting or disconnecting, POSIX requires
1738	 * that ECONNRESET is returned.
1739	 */
1740	so->so_error = ECONNRESET;
1741	unp2 = unp->unp_conn;
1742	if (unp2 == NULL)
1743		return;
1744	UNP_PCB_LOCK(unp2);
1745	unp_disconnect(unp, unp2);
1746	UNP_PCB_UNLOCK(unp2);
1747}
1748
1749static void
1750unp_freerights(struct filedescent **fdep, int fdcount)
1751{
1752	struct file *fp;
1753	int i;
1754
1755	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
1756
1757	for (i = 0; i < fdcount; i++) {
1758		fp = fdep[i]->fde_file;
1759		filecaps_free(&fdep[i]->fde_caps);
1760		unp_discard(fp);
1761	}
1762	free(fdep[0], M_FILECAPS);
1763}
1764
1765static int
1766unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
1767{
1768	struct thread *td = curthread;		/* XXX */
1769	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1770	int i;
1771	int *fdp;
1772	struct filedesc *fdesc = td->td_proc->p_fd;
1773	struct filedescent **fdep;
1774	void *data;
1775	socklen_t clen = control->m_len, datalen;
1776	int error, newfds;
1777	u_int newlen;
1778
1779	UNP_LINK_UNLOCK_ASSERT();
1780
1781	error = 0;
1782	if (controlp != NULL) /* controlp == NULL => free control messages */
1783		*controlp = NULL;
1784	while (cm != NULL) {
1785		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
1786			error = EINVAL;
1787			break;
1788		}
1789		data = CMSG_DATA(cm);
1790		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1791		if (cm->cmsg_level == SOL_SOCKET
1792		    && cm->cmsg_type == SCM_RIGHTS) {
1793			newfds = datalen / sizeof(*fdep);
1794			if (newfds == 0)
1795				goto next;
1796			fdep = data;
1797
1798			/* If we're not outputting the descriptors free them. */
1799			if (error || controlp == NULL) {
1800				unp_freerights(fdep, newfds);
1801				goto next;
1802			}
1803			FILEDESC_XLOCK(fdesc);
1804
1805			/*
1806			 * Now change each pointer to an fd in the global
1807			 * table to an integer that is the index to the local
1808			 * fd table entry that we set up to point to the
1809			 * global one we are transferring.
1810			 */
1811			newlen = newfds * sizeof(int);
1812			*controlp = sbcreatecontrol(NULL, newlen,
1813			    SCM_RIGHTS, SOL_SOCKET);
1814			if (*controlp == NULL) {
1815				FILEDESC_XUNLOCK(fdesc);
1816				error = E2BIG;
1817				unp_freerights(fdep, newfds);
1818				goto next;
1819			}
1820
1821			fdp = (int *)
1822			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1823			if (fdallocn(td, 0, fdp, newfds) != 0) {
1824				FILEDESC_XUNLOCK(fdesc);
1825				error = EMSGSIZE;
1826				unp_freerights(fdep, newfds);
1827				m_freem(*controlp);
1828				*controlp = NULL;
1829				goto next;
1830			}
1831			for (i = 0; i < newfds; i++, fdp++) {
1832				_finstall(fdesc, fdep[i]->fde_file, *fdp,
1833				    (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
1834				    &fdep[i]->fde_caps);
1835				unp_externalize_fp(fdep[i]->fde_file);
1836			}
1837
1838			/*
1839			 * The new type indicates that the mbuf data refers to
1840			 * kernel resources that may need to be released before
1841			 * the mbuf is freed.
1842			 */
1843			m_chtype(*controlp, MT_EXTCONTROL);
1844			FILEDESC_XUNLOCK(fdesc);
1845			free(fdep[0], M_FILECAPS);
1846		} else {
1847			/* We can just copy anything else across. */
1848			if (error || controlp == NULL)
1849				goto next;
1850			*controlp = sbcreatecontrol(NULL, datalen,
1851			    cm->cmsg_type, cm->cmsg_level);
1852			if (*controlp == NULL) {
1853				error = ENOBUFS;
1854				goto next;
1855			}
1856			bcopy(data,
1857			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
1858			    datalen);
1859		}
1860		controlp = &(*controlp)->m_next;
1861
1862next:
1863		if (CMSG_SPACE(datalen) < clen) {
1864			clen -= CMSG_SPACE(datalen);
1865			cm = (struct cmsghdr *)
1866			    ((caddr_t)cm + CMSG_SPACE(datalen));
1867		} else {
1868			clen = 0;
1869			cm = NULL;
1870		}
1871	}
1872
1873	m_freem(control);
1874	return (error);
1875}
1876
1877static void
1878unp_zone_change(void *tag)
1879{
1880
1881	uma_zone_set_max(unp_zone, maxsockets);
1882}
1883
1884static void
1885unp_init(void)
1886{
1887
1888#ifdef VIMAGE
1889	if (!IS_DEFAULT_VNET(curvnet))
1890		return;
1891#endif
1892	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
1893	    NULL, NULL, UMA_ALIGN_PTR, 0);
1894	if (unp_zone == NULL)
1895		panic("unp_init");
1896	uma_zone_set_max(unp_zone, maxsockets);
1897	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
1898	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
1899	    NULL, EVENTHANDLER_PRI_ANY);
1900	LIST_INIT(&unp_dhead);
1901	LIST_INIT(&unp_shead);
1902	LIST_INIT(&unp_sphead);
1903	SLIST_INIT(&unp_defers);
1904	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
1905	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
1906	UNP_LINK_LOCK_INIT();
1907	UNP_LIST_LOCK_INIT();
1908	UNP_DEFERRED_LOCK_INIT();
1909}
1910
1911static void
1912unp_internalize_cleanup_rights(struct mbuf *control)
1913{
1914	struct cmsghdr *cp;
1915	struct mbuf *m;
1916	void *data;
1917	socklen_t datalen;
1918
1919	for (m = control; m != NULL; m = m->m_next) {
1920		cp = mtod(m, struct cmsghdr *);
1921		if (cp->cmsg_level != SOL_SOCKET ||
1922		    cp->cmsg_type != SCM_RIGHTS)
1923			continue;
1924		data = CMSG_DATA(cp);
1925		datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
1926		unp_freerights(data, datalen / sizeof(struct filedesc *));
1927	}
1928}
1929
1930static int
1931unp_internalize(struct mbuf **controlp, struct thread *td)
1932{
1933	struct mbuf *control, **initial_controlp;
1934	struct proc *p;
1935	struct filedesc *fdesc;
1936	struct bintime *bt;
1937	struct cmsghdr *cm;
1938	struct cmsgcred *cmcred;
1939	struct filedescent *fde, **fdep, *fdev;
1940	struct file *fp;
1941	struct timeval *tv;
1942	struct timespec *ts;
1943	void *data;
1944	socklen_t clen, datalen;
1945	int i, error, *fdp, oldfds;
1946	u_int newlen;
1947
1948	UNP_LINK_UNLOCK_ASSERT();
1949
1950	p = td->td_proc;
1951	fdesc = p->p_fd;
1952	error = 0;
1953	control = *controlp;
1954	clen = control->m_len;
1955	*controlp = NULL;
1956	initial_controlp = controlp;
1957	for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
1958		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
1959		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
1960			error = EINVAL;
1961			goto out;
1962		}
1963		data = CMSG_DATA(cm);
1964		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1965
1966		switch (cm->cmsg_type) {
1967		/*
1968		 * Fill in credential information.
1969		 */
1970		case SCM_CREDS:
1971			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
1972			    SCM_CREDS, SOL_SOCKET);
1973			if (*controlp == NULL) {
1974				error = ENOBUFS;
1975				goto out;
1976			}
1977			cmcred = (struct cmsgcred *)
1978			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1979			cmcred->cmcred_pid = p->p_pid;
1980			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
1981			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
1982			cmcred->cmcred_euid = td->td_ucred->cr_uid;
1983			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
1984			    CMGROUP_MAX);
1985			for (i = 0; i < cmcred->cmcred_ngroups; i++)
1986				cmcred->cmcred_groups[i] =
1987				    td->td_ucred->cr_groups[i];
1988			break;
1989
1990		case SCM_RIGHTS:
1991			oldfds = datalen / sizeof (int);
1992			if (oldfds == 0)
1993				break;
1994			/*
1995			 * Check that all the FDs passed in refer to legal
1996			 * files.  If not, reject the entire operation.
1997			 */
1998			fdp = data;
1999			FILEDESC_SLOCK(fdesc);
2000			for (i = 0; i < oldfds; i++, fdp++) {
2001				fp = fget_locked(fdesc, *fdp);
2002				if (fp == NULL) {
2003					FILEDESC_SUNLOCK(fdesc);
2004					error = EBADF;
2005					goto out;
2006				}
2007				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
2008					FILEDESC_SUNLOCK(fdesc);
2009					error = EOPNOTSUPP;
2010					goto out;
2011				}
2012
2013			}
2014
2015			/*
2016			 * Now replace the integer FDs with pointers to the
2017			 * file structure and capability rights.
2018			 */
2019			newlen = oldfds * sizeof(fdep[0]);
2020			*controlp = sbcreatecontrol(NULL, newlen,
2021			    SCM_RIGHTS, SOL_SOCKET);
2022			if (*controlp == NULL) {
2023				FILEDESC_SUNLOCK(fdesc);
2024				error = E2BIG;
2025				goto out;
2026			}
2027			fdp = data;
2028			fdep = (struct filedescent **)
2029			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2030			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
2031			    M_WAITOK);
2032			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
2033				fde = &fdesc->fd_ofiles[*fdp];
2034				fdep[i] = fdev;
2035				fdep[i]->fde_file = fde->fde_file;
2036				filecaps_copy(&fde->fde_caps,
2037				    &fdep[i]->fde_caps, true);
2038				unp_internalize_fp(fdep[i]->fde_file);
2039			}
2040			FILEDESC_SUNLOCK(fdesc);
2041			break;
2042
2043		case SCM_TIMESTAMP:
2044			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
2045			    SCM_TIMESTAMP, SOL_SOCKET);
2046			if (*controlp == NULL) {
2047				error = ENOBUFS;
2048				goto out;
2049			}
2050			tv = (struct timeval *)
2051			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2052			microtime(tv);
2053			break;
2054
2055		case SCM_BINTIME:
2056			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
2057			    SCM_BINTIME, SOL_SOCKET);
2058			if (*controlp == NULL) {
2059				error = ENOBUFS;
2060				goto out;
2061			}
2062			bt = (struct bintime *)
2063			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2064			bintime(bt);
2065			break;
2066
2067		case SCM_REALTIME:
2068			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
2069			    SCM_REALTIME, SOL_SOCKET);
2070			if (*controlp == NULL) {
2071				error = ENOBUFS;
2072				goto out;
2073			}
2074			ts = (struct timespec *)
2075			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2076			nanotime(ts);
2077			break;
2078
2079		case SCM_MONOTONIC:
2080			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
2081			    SCM_MONOTONIC, SOL_SOCKET);
2082			if (*controlp == NULL) {
2083				error = ENOBUFS;
2084				goto out;
2085			}
2086			ts = (struct timespec *)
2087			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2088			nanouptime(ts);
2089			break;
2090
2091		default:
2092			error = EINVAL;
2093			goto out;
2094		}
2095
2096		controlp = &(*controlp)->m_next;
2097		if (CMSG_SPACE(datalen) < clen) {
2098			clen -= CMSG_SPACE(datalen);
2099			cm = (struct cmsghdr *)
2100			    ((caddr_t)cm + CMSG_SPACE(datalen));
2101		} else {
2102			clen = 0;
2103			cm = NULL;
2104		}
2105	}
2106
2107out:
2108	if (error != 0 && initial_controlp != NULL)
2109		unp_internalize_cleanup_rights(*initial_controlp);
2110	m_freem(control);
2111	return (error);
2112}
2113
2114static struct mbuf *
2115unp_addsockcred(struct thread *td, struct mbuf *control)
2116{
2117	struct mbuf *m, *n, *n_prev;
2118	struct sockcred *sc;
2119	const struct cmsghdr *cm;
2120	int ngroups;
2121	int i;
2122
2123	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
2124	m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
2125	if (m == NULL)
2126		return (control);
2127
2128	sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
2129	sc->sc_uid = td->td_ucred->cr_ruid;
2130	sc->sc_euid = td->td_ucred->cr_uid;
2131	sc->sc_gid = td->td_ucred->cr_rgid;
2132	sc->sc_egid = td->td_ucred->cr_gid;
2133	sc->sc_ngroups = ngroups;
2134	for (i = 0; i < sc->sc_ngroups; i++)
2135		sc->sc_groups[i] = td->td_ucred->cr_groups[i];
2136
2137	/*
2138	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
2139	 * created SCM_CREDS control message (struct sockcred) has another
2140	 * format.
2141	 */
2142	if (control != NULL)
2143		for (n = control, n_prev = NULL; n != NULL;) {
2144			cm = mtod(n, struct cmsghdr *);
2145    			if (cm->cmsg_level == SOL_SOCKET &&
2146			    cm->cmsg_type == SCM_CREDS) {
2147    				if (n_prev == NULL)
2148					control = n->m_next;
2149				else
2150					n_prev->m_next = n->m_next;
2151				n = m_free(n);
2152			} else {
2153				n_prev = n;
2154				n = n->m_next;
2155			}
2156		}
2157
2158	/* Prepend it to the head. */
2159	m->m_next = control;
2160	return (m);
2161}
2162
2163static struct unpcb *
2164fptounp(struct file *fp)
2165{
2166	struct socket *so;
2167
2168	if (fp->f_type != DTYPE_SOCKET)
2169		return (NULL);
2170	if ((so = fp->f_data) == NULL)
2171		return (NULL);
2172	if (so->so_proto->pr_domain != &localdomain)
2173		return (NULL);
2174	return sotounpcb(so);
2175}
2176
2177static void
2178unp_discard(struct file *fp)
2179{
2180	struct unp_defer *dr;
2181
2182	if (unp_externalize_fp(fp)) {
2183		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
2184		dr->ud_fp = fp;
2185		UNP_DEFERRED_LOCK();
2186		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
2187		UNP_DEFERRED_UNLOCK();
2188		atomic_add_int(&unp_defers_count, 1);
2189		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
2190	} else
2191		(void) closef(fp, (struct thread *)NULL);
2192}
2193
2194static void
2195unp_process_defers(void *arg __unused, int pending)
2196{
2197	struct unp_defer *dr;
2198	SLIST_HEAD(, unp_defer) drl;
2199	int count;
2200
2201	SLIST_INIT(&drl);
2202	for (;;) {
2203		UNP_DEFERRED_LOCK();
2204		if (SLIST_FIRST(&unp_defers) == NULL) {
2205			UNP_DEFERRED_UNLOCK();
2206			break;
2207		}
2208		SLIST_SWAP(&unp_defers, &drl, unp_defer);
2209		UNP_DEFERRED_UNLOCK();
2210		count = 0;
2211		while ((dr = SLIST_FIRST(&drl)) != NULL) {
2212			SLIST_REMOVE_HEAD(&drl, ud_link);
2213			closef(dr->ud_fp, NULL);
2214			free(dr, M_TEMP);
2215			count++;
2216		}
2217		atomic_add_int(&unp_defers_count, -count);
2218	}
2219}
2220
2221static void
2222unp_internalize_fp(struct file *fp)
2223{
2224	struct unpcb *unp;
2225
2226	UNP_LINK_WLOCK();
2227	if ((unp = fptounp(fp)) != NULL) {
2228		unp->unp_file = fp;
2229		unp->unp_msgcount++;
2230	}
2231	fhold(fp);
2232	unp_rights++;
2233	UNP_LINK_WUNLOCK();
2234}
2235
2236static int
2237unp_externalize_fp(struct file *fp)
2238{
2239	struct unpcb *unp;
2240	int ret;
2241
2242	UNP_LINK_WLOCK();
2243	if ((unp = fptounp(fp)) != NULL) {
2244		unp->unp_msgcount--;
2245		ret = 1;
2246	} else
2247		ret = 0;
2248	unp_rights--;
2249	UNP_LINK_WUNLOCK();
2250	return (ret);
2251}
2252
2253/*
2254 * unp_defer indicates whether additional work has been defered for a future
2255 * pass through unp_gc().  It is thread local and does not require explicit
2256 * synchronization.
2257 */
2258static int	unp_marked;
2259static int	unp_unreachable;
2260
2261static void
2262unp_accessable(struct filedescent **fdep, int fdcount)
2263{
2264	struct unpcb *unp;
2265	struct file *fp;
2266	int i;
2267
2268	for (i = 0; i < fdcount; i++) {
2269		fp = fdep[i]->fde_file;
2270		if ((unp = fptounp(fp)) == NULL)
2271			continue;
2272		if (unp->unp_gcflag & UNPGC_REF)
2273			continue;
2274		unp->unp_gcflag &= ~UNPGC_DEAD;
2275		unp->unp_gcflag |= UNPGC_REF;
2276		unp_marked++;
2277	}
2278}
2279
2280static void
2281unp_gc_process(struct unpcb *unp)
2282{
2283	struct socket *soa;
2284	struct socket *so;
2285	struct file *fp;
2286
2287	/* Already processed. */
2288	if (unp->unp_gcflag & UNPGC_SCANNED)
2289		return;
2290	fp = unp->unp_file;
2291
2292	/*
2293	 * Check for a socket potentially in a cycle.  It must be in a
2294	 * queue as indicated by msgcount, and this must equal the file
2295	 * reference count.  Note that when msgcount is 0 the file is NULL.
2296	 */
2297	if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
2298	    unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
2299		unp->unp_gcflag |= UNPGC_DEAD;
2300		unp_unreachable++;
2301		return;
2302	}
2303
2304	/*
2305	 * Mark all sockets we reference with RIGHTS.
2306	 */
2307	so = unp->unp_socket;
2308	if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
2309		SOCKBUF_LOCK(&so->so_rcv);
2310		unp_scan(so->so_rcv.sb_mb, unp_accessable);
2311		SOCKBUF_UNLOCK(&so->so_rcv);
2312	}
2313
2314	/*
2315	 * Mark all sockets in our accept queue.
2316	 */
2317	ACCEPT_LOCK();
2318	TAILQ_FOREACH(soa, &so->so_comp, so_list) {
2319		if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
2320			continue;
2321		SOCKBUF_LOCK(&soa->so_rcv);
2322		unp_scan(soa->so_rcv.sb_mb, unp_accessable);
2323		SOCKBUF_UNLOCK(&soa->so_rcv);
2324	}
2325	ACCEPT_UNLOCK();
2326	unp->unp_gcflag |= UNPGC_SCANNED;
2327}
2328
2329static int unp_recycled;
2330SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0,
2331    "Number of unreachable sockets claimed by the garbage collector.");
2332
2333static int unp_taskcount;
2334SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0,
2335    "Number of times the garbage collector has run.");
2336
2337static void
2338unp_gc(__unused void *arg, int pending)
2339{
2340	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
2341				    NULL };
2342	struct unp_head **head;
2343	struct file *f, **unref;
2344	struct unpcb *unp;
2345	int i, total;
2346
2347	unp_taskcount++;
2348	UNP_LIST_LOCK();
2349	/*
2350	 * First clear all gc flags from previous runs, apart from
2351	 * UNPGC_IGNORE_RIGHTS.
2352	 */
2353	for (head = heads; *head != NULL; head++)
2354		LIST_FOREACH(unp, *head, unp_link)
2355			unp->unp_gcflag =
2356			    (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
2357
2358	/*
2359	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
2360	 * is reachable all of the sockets it references are reachable.
2361	 * Stop the scan once we do a complete loop without discovering
2362	 * a new reachable socket.
2363	 */
2364	do {
2365		unp_unreachable = 0;
2366		unp_marked = 0;
2367		for (head = heads; *head != NULL; head++)
2368			LIST_FOREACH(unp, *head, unp_link)
2369				unp_gc_process(unp);
2370	} while (unp_marked);
2371	UNP_LIST_UNLOCK();
2372	if (unp_unreachable == 0)
2373		return;
2374
2375	/*
2376	 * Allocate space for a local list of dead unpcbs.
2377	 */
2378	unref = malloc(unp_unreachable * sizeof(struct file *),
2379	    M_TEMP, M_WAITOK);
2380
2381	/*
2382	 * Iterate looking for sockets which have been specifically marked
2383	 * as as unreachable and store them locally.
2384	 */
2385	UNP_LINK_RLOCK();
2386	UNP_LIST_LOCK();
2387	for (total = 0, head = heads; *head != NULL; head++)
2388		LIST_FOREACH(unp, *head, unp_link)
2389			if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
2390				f = unp->unp_file;
2391				if (unp->unp_msgcount == 0 || f == NULL ||
2392				    f->f_count != unp->unp_msgcount)
2393					continue;
2394				unref[total++] = f;
2395				fhold(f);
2396				KASSERT(total <= unp_unreachable,
2397				    ("unp_gc: incorrect unreachable count."));
2398			}
2399	UNP_LIST_UNLOCK();
2400	UNP_LINK_RUNLOCK();
2401
2402	/*
2403	 * Now flush all sockets, free'ing rights.  This will free the
2404	 * struct files associated with these sockets but leave each socket
2405	 * with one remaining ref.
2406	 */
2407	for (i = 0; i < total; i++) {
2408		struct socket *so;
2409
2410		so = unref[i]->f_data;
2411		CURVNET_SET(so->so_vnet);
2412		sorflush(so);
2413		CURVNET_RESTORE();
2414	}
2415
2416	/*
2417	 * And finally release the sockets so they can be reclaimed.
2418	 */
2419	for (i = 0; i < total; i++)
2420		fdrop(unref[i], NULL);
2421	unp_recycled += total;
2422	free(unref, M_TEMP);
2423}
2424
2425static void
2426unp_dispose(struct mbuf *m)
2427{
2428
2429	if (m)
2430		unp_scan(m, unp_freerights);
2431}
2432
2433/*
2434 * Synchronize against unp_gc, which can trip over data as we are freeing it.
2435 */
2436static void
2437unp_dispose_so(struct socket *so)
2438{
2439	struct unpcb *unp;
2440
2441	unp = sotounpcb(so);
2442	UNP_LIST_LOCK();
2443	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
2444	UNP_LIST_UNLOCK();
2445	unp_dispose(so->so_rcv.sb_mb);
2446}
2447
2448static void
2449unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
2450{
2451	struct mbuf *m;
2452	struct cmsghdr *cm;
2453	void *data;
2454	socklen_t clen, datalen;
2455
2456	while (m0 != NULL) {
2457		for (m = m0; m; m = m->m_next) {
2458			if (m->m_type != MT_CONTROL)
2459				continue;
2460
2461			cm = mtod(m, struct cmsghdr *);
2462			clen = m->m_len;
2463
2464			while (cm != NULL) {
2465				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
2466					break;
2467
2468				data = CMSG_DATA(cm);
2469				datalen = (caddr_t)cm + cm->cmsg_len
2470				    - (caddr_t)data;
2471
2472				if (cm->cmsg_level == SOL_SOCKET &&
2473				    cm->cmsg_type == SCM_RIGHTS) {
2474					(*op)(data, datalen /
2475					    sizeof(struct filedescent *));
2476				}
2477
2478				if (CMSG_SPACE(datalen) < clen) {
2479					clen -= CMSG_SPACE(datalen);
2480					cm = (struct cmsghdr *)
2481					    ((caddr_t)cm + CMSG_SPACE(datalen));
2482				} else {
2483					clen = 0;
2484					cm = NULL;
2485				}
2486			}
2487		}
2488		m0 = m0->m_nextpkt;
2489	}
2490}
2491
2492/*
2493 * A helper function called by VFS before socket-type vnode reclamation.
2494 * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
2495 * use count.
2496 */
2497void
2498vfs_unp_reclaim(struct vnode *vp)
2499{
2500	struct socket *so;
2501	struct unpcb *unp;
2502	int active;
2503
2504	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
2505	KASSERT(vp->v_type == VSOCK,
2506	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
2507
2508	active = 0;
2509	UNP_LINK_WLOCK();
2510	VOP_UNP_CONNECT(vp, &so);
2511	if (so == NULL)
2512		goto done;
2513	unp = sotounpcb(so);
2514	if (unp == NULL)
2515		goto done;
2516	UNP_PCB_LOCK(unp);
2517	if (unp->unp_vnode == vp) {
2518		VOP_UNP_DETACH(vp);
2519		unp->unp_vnode = NULL;
2520		active = 1;
2521	}
2522	UNP_PCB_UNLOCK(unp);
2523done:
2524	UNP_LINK_WUNLOCK();
2525	if (active)
2526		vunref(vp);
2527}
2528
2529#ifdef DDB
2530static void
2531db_print_indent(int indent)
2532{
2533	int i;
2534
2535	for (i = 0; i < indent; i++)
2536		db_printf(" ");
2537}
2538
2539static void
2540db_print_unpflags(int unp_flags)
2541{
2542	int comma;
2543
2544	comma = 0;
2545	if (unp_flags & UNP_HAVEPC) {
2546		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
2547		comma = 1;
2548	}
2549	if (unp_flags & UNP_HAVEPCCACHED) {
2550		db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : "");
2551		comma = 1;
2552	}
2553	if (unp_flags & UNP_WANTCRED) {
2554		db_printf("%sUNP_WANTCRED", comma ? ", " : "");
2555		comma = 1;
2556	}
2557	if (unp_flags & UNP_CONNWAIT) {
2558		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
2559		comma = 1;
2560	}
2561	if (unp_flags & UNP_CONNECTING) {
2562		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
2563		comma = 1;
2564	}
2565	if (unp_flags & UNP_BINDING) {
2566		db_printf("%sUNP_BINDING", comma ? ", " : "");
2567		comma = 1;
2568	}
2569}
2570
2571static void
2572db_print_xucred(int indent, struct xucred *xu)
2573{
2574	int comma, i;
2575
2576	db_print_indent(indent);
2577	db_printf("cr_version: %u   cr_uid: %u   cr_ngroups: %d\n",
2578	    xu->cr_version, xu->cr_uid, xu->cr_ngroups);
2579	db_print_indent(indent);
2580	db_printf("cr_groups: ");
2581	comma = 0;
2582	for (i = 0; i < xu->cr_ngroups; i++) {
2583		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
2584		comma = 1;
2585	}
2586	db_printf("\n");
2587}
2588
2589static void
2590db_print_unprefs(int indent, struct unp_head *uh)
2591{
2592	struct unpcb *unp;
2593	int counter;
2594
2595	counter = 0;
2596	LIST_FOREACH(unp, uh, unp_reflink) {
2597		if (counter % 4 == 0)
2598			db_print_indent(indent);
2599		db_printf("%p  ", unp);
2600		if (counter % 4 == 3)
2601			db_printf("\n");
2602		counter++;
2603	}
2604	if (counter != 0 && counter % 4 != 0)
2605		db_printf("\n");
2606}
2607
2608DB_SHOW_COMMAND(unpcb, db_show_unpcb)
2609{
2610	struct unpcb *unp;
2611
2612        if (!have_addr) {
2613                db_printf("usage: show unpcb <addr>\n");
2614                return;
2615        }
2616        unp = (struct unpcb *)addr;
2617
2618	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
2619	    unp->unp_vnode);
2620
2621	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
2622	    unp->unp_conn);
2623
2624	db_printf("unp_refs:\n");
2625	db_print_unprefs(2, &unp->unp_refs);
2626
2627	/* XXXRW: Would be nice to print the full address, if any. */
2628	db_printf("unp_addr: %p\n", unp->unp_addr);
2629
2630	db_printf("unp_gencnt: %llu\n",
2631	    (unsigned long long)unp->unp_gencnt);
2632
2633	db_printf("unp_flags: %x (", unp->unp_flags);
2634	db_print_unpflags(unp->unp_flags);
2635	db_printf(")\n");
2636
2637	db_printf("unp_peercred:\n");
2638	db_print_xucred(2, &unp->unp_peercred);
2639
2640	db_printf("unp_refcount: %u\n", unp->unp_refcount);
2641}
2642#endif
2643