uipc_usrreq.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 *	The Regents of the University of California.
6 * Copyright (c) 2004-2009 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34 */
35
36/*
37 * UNIX Domain (Local) Sockets
38 *
39 * This is an implementation of UNIX (local) domain sockets.  Each socket has
40 * an associated struct unpcb (UNIX protocol control block).  Stream sockets
41 * may be connected to 0 or 1 other socket.  Datagram sockets may be
42 * connected to 0, 1, or many other sockets.  Sockets may be created and
43 * connected in pairs (socketpair(2)), or bound/connected to using the file
44 * system name space.  For most purposes, only the receive socket buffer is
45 * used, as sending on one socket delivers directly to the receive socket
46 * buffer of a second socket.
47 *
48 * The implementation is substantially complicated by the fact that
49 * "ancillary data", such as file descriptors or credentials, may be passed
50 * across UNIX domain sockets.  The potential for passing UNIX domain sockets
51 * over other UNIX domain sockets requires the implementation of a simple
52 * garbage collector to find and tear down cycles of disconnected sockets.
53 *
54 * TODO:
55 *	RDM
56 *	rethink name space problems
57 *	need a proper out-of-band
58 */
59
60#include <sys/cdefs.h>
61__FBSDID("$FreeBSD: stable/11/sys/kern/uipc_usrreq.c 330897 2018-03-14 03:19:51Z eadler $");
62
63#include "opt_ddb.h"
64
65#include <sys/param.h>
66#include <sys/capsicum.h>
67#include <sys/domain.h>
68#include <sys/fcntl.h>
69#include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
70#include <sys/eventhandler.h>
71#include <sys/file.h>
72#include <sys/filedesc.h>
73#include <sys/kernel.h>
74#include <sys/lock.h>
75#include <sys/mbuf.h>
76#include <sys/mount.h>
77#include <sys/mutex.h>
78#include <sys/namei.h>
79#include <sys/proc.h>
80#include <sys/protosw.h>
81#include <sys/queue.h>
82#include <sys/resourcevar.h>
83#include <sys/rwlock.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/signalvar.h>
87#include <sys/stat.h>
88#include <sys/sx.h>
89#include <sys/sysctl.h>
90#include <sys/systm.h>
91#include <sys/taskqueue.h>
92#include <sys/un.h>
93#include <sys/unpcb.h>
94#include <sys/vnode.h>
95
96#include <net/vnet.h>
97
98#ifdef DDB
99#include <ddb/ddb.h>
100#endif
101
102#include <security/mac/mac_framework.h>
103
104#include <vm/uma.h>
105
106MALLOC_DECLARE(M_FILECAPS);
107
108/*
109 * Locking key:
110 * (l)	Locked using list lock
111 * (g)	Locked using linkage lock
112 */
113
114static uma_zone_t	unp_zone;
115static unp_gen_t	unp_gencnt;	/* (l) */
116static u_int		unp_count;	/* (l) Count of local sockets. */
117static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
118static int		unp_rights;	/* (g) File descriptors in flight. */
119static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
120static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
121static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
122
123struct unp_defer {
124	SLIST_ENTRY(unp_defer) ud_link;
125	struct file *ud_fp;
126};
127static SLIST_HEAD(, unp_defer) unp_defers;
128static int unp_defers_count;
129
130static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
131
132/*
133 * Garbage collection of cyclic file descriptor/socket references occurs
134 * asynchronously in a taskqueue context in order to avoid recursion and
135 * reentrance in the UNIX domain socket, file descriptor, and socket layer
136 * code.  See unp_gc() for a full description.
137 */
138static struct timeout_task unp_gc_task;
139
140/*
141 * The close of unix domain sockets attached as SCM_RIGHTS is
142 * postponed to the taskqueue, to avoid arbitrary recursion depth.
143 * The attached sockets might have another sockets attached.
144 */
145static struct task	unp_defer_task;
146
147/*
148 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
149 * stream sockets, although the total for sender and receiver is actually
150 * only PIPSIZ.
151 *
152 * Datagram sockets really use the sendspace as the maximum datagram size,
153 * and don't really want to reserve the sendspace.  Their recvspace should be
154 * large enough for at least one max-size datagram plus address.
155 */
156#ifndef PIPSIZ
157#define	PIPSIZ	8192
158#endif
159static u_long	unpst_sendspace = PIPSIZ;
160static u_long	unpst_recvspace = PIPSIZ;
161static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
162static u_long	unpdg_recvspace = 4*1024;
163static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
164static u_long	unpsp_recvspace = PIPSIZ;
165
166static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
167static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0,
168    "SOCK_STREAM");
169static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
170static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0,
171    "SOCK_SEQPACKET");
172
173SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
174	   &unpst_sendspace, 0, "Default stream send space.");
175SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
176	   &unpst_recvspace, 0, "Default stream receive space.");
177SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
178	   &unpdg_sendspace, 0, "Default datagram send space.");
179SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
180	   &unpdg_recvspace, 0, "Default datagram receive space.");
181SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
182	   &unpsp_sendspace, 0, "Default seqpacket send space.");
183SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
184	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
185SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
186    "File descriptors in flight.");
187SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
188    &unp_defers_count, 0,
189    "File descriptors deferred to taskqueue for close.");
190
191/*
192 * Locking and synchronization:
193 *
194 * Three types of locks exit in the local domain socket implementation: a
195 * global list mutex, a global linkage rwlock, and per-unpcb mutexes.  Of the
196 * global locks, the list lock protects the socket count, global generation
197 * number, and stream/datagram global lists.  The linkage lock protects the
198 * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
199 * held exclusively over the acquisition of multiple unpcb locks to prevent
200 * deadlock.
201 *
202 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
203 * allocated in pru_attach() and freed in pru_detach().  The validity of that
204 * pointer is an invariant, so no lock is required to dereference the so_pcb
205 * pointer if a valid socket reference is held by the caller.  In practice,
206 * this is always true during operations performed on a socket.  Each unpcb
207 * has a back-pointer to its socket, unp_socket, which will be stable under
208 * the same circumstances.
209 *
210 * This pointer may only be safely dereferenced as long as a valid reference
211 * to the unpcb is held.  Typically, this reference will be from the socket,
212 * or from another unpcb when the referring unpcb's lock is held (in order
213 * that the reference not be invalidated during use).  For example, to follow
214 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn,
215 * as unp_socket remains valid as long as the reference to unp_conn is valid.
216 *
217 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx.  Individual
218 * atomic reads without the lock may be performed "lockless", but more
219 * complex reads and read-modify-writes require the mutex to be held.  No
220 * lock order is defined between unpcb locks -- multiple unpcb locks may be
221 * acquired at the same time only when holding the linkage rwlock
222 * exclusively, which prevents deadlocks.
223 *
224 * Blocking with UNIX domain sockets is a tricky issue: unlike most network
225 * protocols, bind() is a non-atomic operation, and connect() requires
226 * potential sleeping in the protocol, due to potentially waiting on local or
227 * distributed file systems.  We try to separate "lookup" operations, which
228 * may sleep, and the IPC operations themselves, which typically can occur
229 * with relative atomicity as locks can be held over the entire operation.
230 *
231 * Another tricky issue is simultaneous multi-threaded or multi-process
232 * access to a single UNIX domain socket.  These are handled by the flags
233 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
234 * binding, both of which involve dropping UNIX domain socket locks in order
235 * to perform namei() and other file system operations.
236 */
237static struct rwlock	unp_link_rwlock;
238static struct mtx	unp_list_lock;
239static struct mtx	unp_defers_lock;
240
241#define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
242					    "unp_link_rwlock")
243
244#define	UNP_LINK_LOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
245					    RA_LOCKED)
246#define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
247					    RA_UNLOCKED)
248
249#define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
250#define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
251#define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
252#define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
253#define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
254					    RA_WLOCKED)
255
256#define	UNP_LIST_LOCK_INIT()		mtx_init(&unp_list_lock,	\
257					    "unp_list_lock", NULL, MTX_DEF)
258#define	UNP_LIST_LOCK()			mtx_lock(&unp_list_lock)
259#define	UNP_LIST_UNLOCK()		mtx_unlock(&unp_list_lock)
260
261#define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
262					    "unp_defer", NULL, MTX_DEF)
263#define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
264#define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
265
266#define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
267					    "unp_mtx", "unp_mtx",	\
268					    MTX_DUPOK|MTX_DEF|MTX_RECURSE)
269#define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
270#define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
271#define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
272#define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
273
274static int	uipc_connect2(struct socket *, struct socket *);
275static int	uipc_ctloutput(struct socket *, struct sockopt *);
276static int	unp_connect(struct socket *, struct sockaddr *,
277		    struct thread *);
278static int	unp_connectat(int, struct socket *, struct sockaddr *,
279		    struct thread *);
280static int	unp_connect2(struct socket *so, struct socket *so2, int);
281static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
282static void	unp_dispose(struct mbuf *);
283static void	unp_dispose_so(struct socket *so);
284static void	unp_shutdown(struct unpcb *);
285static void	unp_drop(struct unpcb *);
286static void	unp_gc(__unused void *, int);
287static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
288static void	unp_discard(struct file *);
289static void	unp_freerights(struct filedescent **, int);
290static void	unp_init(void);
291static int	unp_internalize(struct mbuf **, struct thread *);
292static void	unp_internalize_fp(struct file *);
293static int	unp_externalize(struct mbuf *, struct mbuf **, int);
294static int	unp_externalize_fp(struct file *);
295static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *);
296static void	unp_process_defers(void * __unused, int);
297
298/*
299 * Definitions of protocols supported in the LOCAL domain.
300 */
301static struct domain localdomain;
302static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
303static struct pr_usrreqs uipc_usrreqs_seqpacket;
304static struct protosw localsw[] = {
305{
306	.pr_type =		SOCK_STREAM,
307	.pr_domain =		&localdomain,
308	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
309	.pr_ctloutput =		&uipc_ctloutput,
310	.pr_usrreqs =		&uipc_usrreqs_stream
311},
312{
313	.pr_type =		SOCK_DGRAM,
314	.pr_domain =		&localdomain,
315	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
316	.pr_ctloutput =		&uipc_ctloutput,
317	.pr_usrreqs =		&uipc_usrreqs_dgram
318},
319{
320	.pr_type =		SOCK_SEQPACKET,
321	.pr_domain =		&localdomain,
322
323	/*
324	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
325	 * due to our use of sbappendaddr.  A new sbappend variants is needed
326	 * that supports both atomic record writes and control data.
327	 */
328	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
329				    PR_RIGHTS,
330	.pr_ctloutput =		&uipc_ctloutput,
331	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
332},
333};
334
335static struct domain localdomain = {
336	.dom_family =		AF_LOCAL,
337	.dom_name =		"local",
338	.dom_init =		unp_init,
339	.dom_externalize =	unp_externalize,
340	.dom_dispose =		unp_dispose_so,
341	.dom_protosw =		localsw,
342	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
343};
344DOMAIN_SET(local);
345
346static void
347uipc_abort(struct socket *so)
348{
349	struct unpcb *unp, *unp2;
350
351	unp = sotounpcb(so);
352	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
353
354	UNP_LINK_WLOCK();
355	UNP_PCB_LOCK(unp);
356	unp2 = unp->unp_conn;
357	if (unp2 != NULL) {
358		UNP_PCB_LOCK(unp2);
359		unp_drop(unp2);
360		UNP_PCB_UNLOCK(unp2);
361	}
362	UNP_PCB_UNLOCK(unp);
363	UNP_LINK_WUNLOCK();
364}
365
366static int
367uipc_accept(struct socket *so, struct sockaddr **nam)
368{
369	struct unpcb *unp, *unp2;
370	const struct sockaddr *sa;
371
372	/*
373	 * Pass back name of connected socket, if it was bound and we are
374	 * still connected (our peer may have closed already!).
375	 */
376	unp = sotounpcb(so);
377	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
378
379	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
380	UNP_LINK_RLOCK();
381	unp2 = unp->unp_conn;
382	if (unp2 != NULL && unp2->unp_addr != NULL) {
383		UNP_PCB_LOCK(unp2);
384		sa = (struct sockaddr *) unp2->unp_addr;
385		bcopy(sa, *nam, sa->sa_len);
386		UNP_PCB_UNLOCK(unp2);
387	} else {
388		sa = &sun_noname;
389		bcopy(sa, *nam, sa->sa_len);
390	}
391	UNP_LINK_RUNLOCK();
392	return (0);
393}
394
395static int
396uipc_attach(struct socket *so, int proto, struct thread *td)
397{
398	u_long sendspace, recvspace;
399	struct unpcb *unp;
400	int error;
401
402	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
403	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
404		switch (so->so_type) {
405		case SOCK_STREAM:
406			sendspace = unpst_sendspace;
407			recvspace = unpst_recvspace;
408			break;
409
410		case SOCK_DGRAM:
411			sendspace = unpdg_sendspace;
412			recvspace = unpdg_recvspace;
413			break;
414
415		case SOCK_SEQPACKET:
416			sendspace = unpsp_sendspace;
417			recvspace = unpsp_recvspace;
418			break;
419
420		default:
421			panic("uipc_attach");
422		}
423		error = soreserve(so, sendspace, recvspace);
424		if (error)
425			return (error);
426	}
427	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
428	if (unp == NULL)
429		return (ENOBUFS);
430	LIST_INIT(&unp->unp_refs);
431	UNP_PCB_LOCK_INIT(unp);
432	unp->unp_socket = so;
433	so->so_pcb = unp;
434	unp->unp_refcount = 1;
435	if (so->so_head != NULL)
436		unp->unp_flags |= UNP_NASCENT;
437
438	UNP_LIST_LOCK();
439	unp->unp_gencnt = ++unp_gencnt;
440	unp_count++;
441	switch (so->so_type) {
442	case SOCK_STREAM:
443		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
444		break;
445
446	case SOCK_DGRAM:
447		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
448		break;
449
450	case SOCK_SEQPACKET:
451		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
452		break;
453
454	default:
455		panic("uipc_attach");
456	}
457	UNP_LIST_UNLOCK();
458
459	return (0);
460}
461
462static int
463uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
464{
465	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
466	struct vattr vattr;
467	int error, namelen;
468	struct nameidata nd;
469	struct unpcb *unp;
470	struct vnode *vp;
471	struct mount *mp;
472	cap_rights_t rights;
473	char *buf;
474
475	if (nam->sa_family != AF_UNIX)
476		return (EAFNOSUPPORT);
477
478	unp = sotounpcb(so);
479	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
480
481	if (soun->sun_len > sizeof(struct sockaddr_un))
482		return (EINVAL);
483	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
484	if (namelen <= 0)
485		return (EINVAL);
486
487	/*
488	 * We don't allow simultaneous bind() calls on a single UNIX domain
489	 * socket, so flag in-progress operations, and return an error if an
490	 * operation is already in progress.
491	 *
492	 * Historically, we have not allowed a socket to be rebound, so this
493	 * also returns an error.  Not allowing re-binding simplifies the
494	 * implementation and avoids a great many possible failure modes.
495	 */
496	UNP_PCB_LOCK(unp);
497	if (unp->unp_vnode != NULL) {
498		UNP_PCB_UNLOCK(unp);
499		return (EINVAL);
500	}
501	if (unp->unp_flags & UNP_BINDING) {
502		UNP_PCB_UNLOCK(unp);
503		return (EALREADY);
504	}
505	unp->unp_flags |= UNP_BINDING;
506	UNP_PCB_UNLOCK(unp);
507
508	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
509	bcopy(soun->sun_path, buf, namelen);
510	buf[namelen] = 0;
511
512restart:
513	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
514	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
515/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
516	error = namei(&nd);
517	if (error)
518		goto error;
519	vp = nd.ni_vp;
520	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
521		NDFREE(&nd, NDF_ONLY_PNBUF);
522		if (nd.ni_dvp == vp)
523			vrele(nd.ni_dvp);
524		else
525			vput(nd.ni_dvp);
526		if (vp != NULL) {
527			vrele(vp);
528			error = EADDRINUSE;
529			goto error;
530		}
531		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
532		if (error)
533			goto error;
534		goto restart;
535	}
536	VATTR_NULL(&vattr);
537	vattr.va_type = VSOCK;
538	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
539#ifdef MAC
540	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
541	    &vattr);
542#endif
543	if (error == 0)
544		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
545	NDFREE(&nd, NDF_ONLY_PNBUF);
546	vput(nd.ni_dvp);
547	if (error) {
548		vn_finished_write(mp);
549		goto error;
550	}
551	vp = nd.ni_vp;
552	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
553	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
554
555	UNP_LINK_WLOCK();
556	UNP_PCB_LOCK(unp);
557	VOP_UNP_BIND(vp, unp->unp_socket);
558	unp->unp_vnode = vp;
559	unp->unp_addr = soun;
560	unp->unp_flags &= ~UNP_BINDING;
561	UNP_PCB_UNLOCK(unp);
562	UNP_LINK_WUNLOCK();
563	VOP_UNLOCK(vp, 0);
564	vn_finished_write(mp);
565	free(buf, M_TEMP);
566	return (0);
567
568error:
569	UNP_PCB_LOCK(unp);
570	unp->unp_flags &= ~UNP_BINDING;
571	UNP_PCB_UNLOCK(unp);
572	free(buf, M_TEMP);
573	return (error);
574}
575
576static int
577uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
578{
579
580	return (uipc_bindat(AT_FDCWD, so, nam, td));
581}
582
583static int
584uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
585{
586	int error;
587
588	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
589	UNP_LINK_WLOCK();
590	error = unp_connect(so, nam, td);
591	UNP_LINK_WUNLOCK();
592	return (error);
593}
594
595static int
596uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
597    struct thread *td)
598{
599	int error;
600
601	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
602	UNP_LINK_WLOCK();
603	error = unp_connectat(fd, so, nam, td);
604	UNP_LINK_WUNLOCK();
605	return (error);
606}
607
608static void
609uipc_close(struct socket *so)
610{
611	struct unpcb *unp, *unp2;
612
613	unp = sotounpcb(so);
614	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
615
616	UNP_LINK_WLOCK();
617	UNP_PCB_LOCK(unp);
618	unp2 = unp->unp_conn;
619	if (unp2 != NULL) {
620		UNP_PCB_LOCK(unp2);
621		unp_disconnect(unp, unp2);
622		UNP_PCB_UNLOCK(unp2);
623	}
624	UNP_PCB_UNLOCK(unp);
625	UNP_LINK_WUNLOCK();
626}
627
628static int
629uipc_connect2(struct socket *so1, struct socket *so2)
630{
631	struct unpcb *unp, *unp2;
632	int error;
633
634	UNP_LINK_WLOCK();
635	unp = so1->so_pcb;
636	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
637	UNP_PCB_LOCK(unp);
638	unp2 = so2->so_pcb;
639	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
640	UNP_PCB_LOCK(unp2);
641	error = unp_connect2(so1, so2, PRU_CONNECT2);
642	UNP_PCB_UNLOCK(unp2);
643	UNP_PCB_UNLOCK(unp);
644	UNP_LINK_WUNLOCK();
645	return (error);
646}
647
648static void
649uipc_detach(struct socket *so)
650{
651	struct unpcb *unp, *unp2;
652	struct sockaddr_un *saved_unp_addr;
653	struct vnode *vp;
654	int freeunp, local_unp_rights;
655
656	unp = sotounpcb(so);
657	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
658
659	vp = NULL;
660	local_unp_rights = 0;
661
662	UNP_LIST_LOCK();
663	LIST_REMOVE(unp, unp_link);
664	unp->unp_gencnt = ++unp_gencnt;
665	--unp_count;
666	UNP_LIST_UNLOCK();
667
668	if ((unp->unp_flags & UNP_NASCENT) != 0) {
669		UNP_PCB_LOCK(unp);
670		goto teardown;
671	}
672	UNP_LINK_WLOCK();
673	UNP_PCB_LOCK(unp);
674
675	/*
676	 * XXXRW: Should assert vp->v_socket == so.
677	 */
678	if ((vp = unp->unp_vnode) != NULL) {
679		VOP_UNP_DETACH(vp);
680		unp->unp_vnode = NULL;
681	}
682	unp2 = unp->unp_conn;
683	if (unp2 != NULL) {
684		UNP_PCB_LOCK(unp2);
685		unp_disconnect(unp, unp2);
686		UNP_PCB_UNLOCK(unp2);
687	}
688
689	/*
690	 * We hold the linkage lock exclusively, so it's OK to acquire
691	 * multiple pcb locks at a time.
692	 */
693	while (!LIST_EMPTY(&unp->unp_refs)) {
694		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
695
696		UNP_PCB_LOCK(ref);
697		unp_drop(ref);
698		UNP_PCB_UNLOCK(ref);
699	}
700	local_unp_rights = unp_rights;
701	UNP_LINK_WUNLOCK();
702teardown:
703	unp->unp_socket->so_pcb = NULL;
704	saved_unp_addr = unp->unp_addr;
705	unp->unp_addr = NULL;
706	unp->unp_refcount--;
707	freeunp = (unp->unp_refcount == 0);
708	if (saved_unp_addr != NULL)
709		free(saved_unp_addr, M_SONAME);
710	if (freeunp) {
711		UNP_PCB_LOCK_DESTROY(unp);
712		uma_zfree(unp_zone, unp);
713	} else
714		UNP_PCB_UNLOCK(unp);
715	if (vp)
716		vrele(vp);
717	if (local_unp_rights)
718		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
719}
720
721static int
722uipc_disconnect(struct socket *so)
723{
724	struct unpcb *unp, *unp2;
725
726	unp = sotounpcb(so);
727	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
728
729	UNP_LINK_WLOCK();
730	UNP_PCB_LOCK(unp);
731	unp2 = unp->unp_conn;
732	if (unp2 != NULL) {
733		UNP_PCB_LOCK(unp2);
734		unp_disconnect(unp, unp2);
735		UNP_PCB_UNLOCK(unp2);
736	}
737	UNP_PCB_UNLOCK(unp);
738	UNP_LINK_WUNLOCK();
739	return (0);
740}
741
742static int
743uipc_listen(struct socket *so, int backlog, struct thread *td)
744{
745	struct unpcb *unp;
746	int error;
747
748	unp = sotounpcb(so);
749	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
750
751	UNP_PCB_LOCK(unp);
752	if (unp->unp_vnode == NULL) {
753		/* Already connected or not bound to an address. */
754		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
755		UNP_PCB_UNLOCK(unp);
756		return (error);
757	}
758
759	SOCK_LOCK(so);
760	error = solisten_proto_check(so);
761	if (error == 0) {
762		cru2x(td->td_ucred, &unp->unp_peercred);
763		unp->unp_flags |= UNP_HAVEPCCACHED;
764		solisten_proto(so, backlog);
765	}
766	SOCK_UNLOCK(so);
767	UNP_PCB_UNLOCK(unp);
768	return (error);
769}
770
771static int
772uipc_peeraddr(struct socket *so, struct sockaddr **nam)
773{
774	struct unpcb *unp, *unp2;
775	const struct sockaddr *sa;
776
777	unp = sotounpcb(so);
778	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
779
780	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
781	UNP_LINK_RLOCK();
782	/*
783	 * XXX: It seems that this test always fails even when connection is
784	 * established.  So, this else clause is added as workaround to
785	 * return PF_LOCAL sockaddr.
786	 */
787	unp2 = unp->unp_conn;
788	if (unp2 != NULL) {
789		UNP_PCB_LOCK(unp2);
790		if (unp2->unp_addr != NULL)
791			sa = (struct sockaddr *) unp2->unp_addr;
792		else
793			sa = &sun_noname;
794		bcopy(sa, *nam, sa->sa_len);
795		UNP_PCB_UNLOCK(unp2);
796	} else {
797		sa = &sun_noname;
798		bcopy(sa, *nam, sa->sa_len);
799	}
800	UNP_LINK_RUNLOCK();
801	return (0);
802}
803
804static int
805uipc_rcvd(struct socket *so, int flags)
806{
807	struct unpcb *unp, *unp2;
808	struct socket *so2;
809	u_int mbcnt, sbcc;
810
811	unp = sotounpcb(so);
812	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
813	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
814	    ("%s: socktype %d", __func__, so->so_type));
815
816	/*
817	 * Adjust backpressure on sender and wakeup any waiting to write.
818	 *
819	 * The unp lock is acquired to maintain the validity of the unp_conn
820	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
821	 * static as long as we don't permit unp2 to disconnect from unp,
822	 * which is prevented by the lock on unp.  We cache values from
823	 * so_rcv to avoid holding the so_rcv lock over the entire
824	 * transaction on the remote so_snd.
825	 */
826	SOCKBUF_LOCK(&so->so_rcv);
827	mbcnt = so->so_rcv.sb_mbcnt;
828	sbcc = sbavail(&so->so_rcv);
829	SOCKBUF_UNLOCK(&so->so_rcv);
830	/*
831	 * There is a benign race condition at this point.  If we're planning to
832	 * clear SB_STOP, but uipc_send is called on the connected socket at
833	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
834	 * we would erroneously clear SB_STOP below, even though the sockbuf is
835	 * full.  The race is benign because the only ill effect is to allow the
836	 * sockbuf to exceed its size limit, and the size limits are not
837	 * strictly guaranteed anyway.
838	 */
839	UNP_PCB_LOCK(unp);
840	unp2 = unp->unp_conn;
841	if (unp2 == NULL) {
842		UNP_PCB_UNLOCK(unp);
843		return (0);
844	}
845	so2 = unp2->unp_socket;
846	SOCKBUF_LOCK(&so2->so_snd);
847	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
848		so2->so_snd.sb_flags &= ~SB_STOP;
849	sowwakeup_locked(so2);
850	UNP_PCB_UNLOCK(unp);
851	return (0);
852}
853
854static int
855uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
856    struct mbuf *control, struct thread *td)
857{
858	struct unpcb *unp, *unp2;
859	struct socket *so2;
860	u_int mbcnt, sbcc;
861	int error = 0;
862
863	unp = sotounpcb(so);
864	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
865	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
866	    so->so_type == SOCK_SEQPACKET,
867	    ("%s: socktype %d", __func__, so->so_type));
868
869	if (flags & PRUS_OOB) {
870		error = EOPNOTSUPP;
871		goto release;
872	}
873	if (control != NULL && (error = unp_internalize(&control, td)))
874		goto release;
875	if ((nam != NULL) || (flags & PRUS_EOF))
876		UNP_LINK_WLOCK();
877	else
878		UNP_LINK_RLOCK();
879	switch (so->so_type) {
880	case SOCK_DGRAM:
881	{
882		const struct sockaddr *from;
883
884		unp2 = unp->unp_conn;
885		if (nam != NULL) {
886			UNP_LINK_WLOCK_ASSERT();
887			if (unp2 != NULL) {
888				error = EISCONN;
889				break;
890			}
891			error = unp_connect(so, nam, td);
892			if (error)
893				break;
894			unp2 = unp->unp_conn;
895		}
896
897		/*
898		 * Because connect() and send() are non-atomic in a sendto()
899		 * with a target address, it's possible that the socket will
900		 * have disconnected before the send() can run.  In that case
901		 * return the slightly counter-intuitive but otherwise
902		 * correct error that the socket is not connected.
903		 */
904		if (unp2 == NULL) {
905			error = ENOTCONN;
906			break;
907		}
908		/* Lockless read. */
909		if (unp2->unp_flags & UNP_WANTCRED)
910			control = unp_addsockcred(td, control);
911		UNP_PCB_LOCK(unp);
912		if (unp->unp_addr != NULL)
913			from = (struct sockaddr *)unp->unp_addr;
914		else
915			from = &sun_noname;
916		so2 = unp2->unp_socket;
917		SOCKBUF_LOCK(&so2->so_rcv);
918		if (sbappendaddr_locked(&so2->so_rcv, from, m,
919		    control)) {
920			sorwakeup_locked(so2);
921			m = NULL;
922			control = NULL;
923		} else {
924			SOCKBUF_UNLOCK(&so2->so_rcv);
925			error = ENOBUFS;
926		}
927		if (nam != NULL) {
928			UNP_LINK_WLOCK_ASSERT();
929			UNP_PCB_LOCK(unp2);
930			unp_disconnect(unp, unp2);
931			UNP_PCB_UNLOCK(unp2);
932		}
933		UNP_PCB_UNLOCK(unp);
934		break;
935	}
936
937	case SOCK_SEQPACKET:
938	case SOCK_STREAM:
939		if ((so->so_state & SS_ISCONNECTED) == 0) {
940			if (nam != NULL) {
941				UNP_LINK_WLOCK_ASSERT();
942				error = unp_connect(so, nam, td);
943				if (error)
944					break;	/* XXX */
945			} else {
946				error = ENOTCONN;
947				break;
948			}
949		}
950
951		/* Lockless read. */
952		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
953			error = EPIPE;
954			break;
955		}
956
957		/*
958		 * Because connect() and send() are non-atomic in a sendto()
959		 * with a target address, it's possible that the socket will
960		 * have disconnected before the send() can run.  In that case
961		 * return the slightly counter-intuitive but otherwise
962		 * correct error that the socket is not connected.
963		 *
964		 * Locking here must be done carefully: the linkage lock
965		 * prevents interconnections between unpcbs from changing, so
966		 * we can traverse from unp to unp2 without acquiring unp's
967		 * lock.  Socket buffer locks follow unpcb locks, so we can
968		 * acquire both remote and lock socket buffer locks.
969		 */
970		unp2 = unp->unp_conn;
971		if (unp2 == NULL) {
972			error = ENOTCONN;
973			break;
974		}
975		so2 = unp2->unp_socket;
976		UNP_PCB_LOCK(unp2);
977		SOCKBUF_LOCK(&so2->so_rcv);
978		if (unp2->unp_flags & UNP_WANTCRED) {
979			/*
980			 * Credentials are passed only once on SOCK_STREAM
981			 * and SOCK_SEQPACKET.
982			 */
983			unp2->unp_flags &= ~UNP_WANTCRED;
984			control = unp_addsockcred(td, control);
985		}
986		/*
987		 * Send to paired receive port, and then reduce send buffer
988		 * hiwater marks to maintain backpressure.  Wake up readers.
989		 */
990		switch (so->so_type) {
991		case SOCK_STREAM:
992			if (control != NULL) {
993				if (sbappendcontrol_locked(&so2->so_rcv, m,
994				    control))
995					control = NULL;
996			} else
997				sbappend_locked(&so2->so_rcv, m, flags);
998			break;
999
1000		case SOCK_SEQPACKET: {
1001			const struct sockaddr *from;
1002
1003			from = &sun_noname;
1004			/*
1005			 * Don't check for space available in so2->so_rcv.
1006			 * Unix domain sockets only check for space in the
1007			 * sending sockbuf, and that check is performed one
1008			 * level up the stack.
1009			 */
1010			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
1011				from, m, control))
1012				control = NULL;
1013			break;
1014			}
1015		}
1016
1017		mbcnt = so2->so_rcv.sb_mbcnt;
1018		sbcc = sbavail(&so2->so_rcv);
1019		if (sbcc)
1020			sorwakeup_locked(so2);
1021		else
1022			SOCKBUF_UNLOCK(&so2->so_rcv);
1023
1024		/*
1025		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
1026		 * it would be possible for uipc_rcvd to be called at this
1027		 * point, drain the receiving sockbuf, clear SB_STOP, and then
1028		 * we would set SB_STOP below.  That could lead to an empty
1029		 * sockbuf having SB_STOP set
1030		 */
1031		SOCKBUF_LOCK(&so->so_snd);
1032		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
1033			so->so_snd.sb_flags |= SB_STOP;
1034		SOCKBUF_UNLOCK(&so->so_snd);
1035		UNP_PCB_UNLOCK(unp2);
1036		m = NULL;
1037		break;
1038	}
1039
1040	/*
1041	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
1042	 */
1043	if (flags & PRUS_EOF) {
1044		UNP_PCB_LOCK(unp);
1045		socantsendmore(so);
1046		unp_shutdown(unp);
1047		UNP_PCB_UNLOCK(unp);
1048	}
1049
1050	if ((nam != NULL) || (flags & PRUS_EOF))
1051		UNP_LINK_WUNLOCK();
1052	else
1053		UNP_LINK_RUNLOCK();
1054
1055	if (control != NULL && error != 0)
1056		unp_dispose(control);
1057
1058release:
1059	if (control != NULL)
1060		m_freem(control);
1061	/*
1062	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
1063	 * for freeing memory.
1064	 */
1065	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
1066		m_freem(m);
1067	return (error);
1068}
1069
1070static int
1071uipc_ready(struct socket *so, struct mbuf *m, int count)
1072{
1073	struct unpcb *unp, *unp2;
1074	struct socket *so2;
1075	int error;
1076
1077	unp = sotounpcb(so);
1078
1079	UNP_LINK_RLOCK();
1080	if ((unp2 = unp->unp_conn) == NULL) {
1081		UNP_LINK_RUNLOCK();
1082		for (int i = 0; i < count; i++)
1083			m = m_free(m);
1084		return (ECONNRESET);
1085	}
1086	UNP_PCB_LOCK(unp2);
1087	so2 = unp2->unp_socket;
1088
1089	SOCKBUF_LOCK(&so2->so_rcv);
1090	if ((error = sbready(&so2->so_rcv, m, count)) == 0)
1091		sorwakeup_locked(so2);
1092	else
1093		SOCKBUF_UNLOCK(&so2->so_rcv);
1094
1095	UNP_PCB_UNLOCK(unp2);
1096	UNP_LINK_RUNLOCK();
1097
1098	return (error);
1099}
1100
1101static int
1102uipc_sense(struct socket *so, struct stat *sb)
1103{
1104	struct unpcb *unp;
1105
1106	unp = sotounpcb(so);
1107	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
1108
1109	sb->st_blksize = so->so_snd.sb_hiwat;
1110	UNP_PCB_LOCK(unp);
1111	sb->st_dev = NODEV;
1112	if (unp->unp_ino == 0)
1113		unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
1114	sb->st_ino = unp->unp_ino;
1115	UNP_PCB_UNLOCK(unp);
1116	return (0);
1117}
1118
1119static int
1120uipc_shutdown(struct socket *so)
1121{
1122	struct unpcb *unp;
1123
1124	unp = sotounpcb(so);
1125	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
1126
1127	UNP_LINK_WLOCK();
1128	UNP_PCB_LOCK(unp);
1129	socantsendmore(so);
1130	unp_shutdown(unp);
1131	UNP_PCB_UNLOCK(unp);
1132	UNP_LINK_WUNLOCK();
1133	return (0);
1134}
1135
1136static int
1137uipc_sockaddr(struct socket *so, struct sockaddr **nam)
1138{
1139	struct unpcb *unp;
1140	const struct sockaddr *sa;
1141
1142	unp = sotounpcb(so);
1143	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
1144
1145	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
1146	UNP_PCB_LOCK(unp);
1147	if (unp->unp_addr != NULL)
1148		sa = (struct sockaddr *) unp->unp_addr;
1149	else
1150		sa = &sun_noname;
1151	bcopy(sa, *nam, sa->sa_len);
1152	UNP_PCB_UNLOCK(unp);
1153	return (0);
1154}
1155
1156static struct pr_usrreqs uipc_usrreqs_dgram = {
1157	.pru_abort = 		uipc_abort,
1158	.pru_accept =		uipc_accept,
1159	.pru_attach =		uipc_attach,
1160	.pru_bind =		uipc_bind,
1161	.pru_bindat =		uipc_bindat,
1162	.pru_connect =		uipc_connect,
1163	.pru_connectat =	uipc_connectat,
1164	.pru_connect2 =		uipc_connect2,
1165	.pru_detach =		uipc_detach,
1166	.pru_disconnect =	uipc_disconnect,
1167	.pru_listen =		uipc_listen,
1168	.pru_peeraddr =		uipc_peeraddr,
1169	.pru_rcvd =		uipc_rcvd,
1170	.pru_send =		uipc_send,
1171	.pru_sense =		uipc_sense,
1172	.pru_shutdown =		uipc_shutdown,
1173	.pru_sockaddr =		uipc_sockaddr,
1174	.pru_soreceive =	soreceive_dgram,
1175	.pru_close =		uipc_close,
1176};
1177
1178static struct pr_usrreqs uipc_usrreqs_seqpacket = {
1179	.pru_abort =		uipc_abort,
1180	.pru_accept =		uipc_accept,
1181	.pru_attach =		uipc_attach,
1182	.pru_bind =		uipc_bind,
1183	.pru_bindat =		uipc_bindat,
1184	.pru_connect =		uipc_connect,
1185	.pru_connectat =	uipc_connectat,
1186	.pru_connect2 =		uipc_connect2,
1187	.pru_detach =		uipc_detach,
1188	.pru_disconnect =	uipc_disconnect,
1189	.pru_listen =		uipc_listen,
1190	.pru_peeraddr =		uipc_peeraddr,
1191	.pru_rcvd =		uipc_rcvd,
1192	.pru_send =		uipc_send,
1193	.pru_sense =		uipc_sense,
1194	.pru_shutdown =		uipc_shutdown,
1195	.pru_sockaddr =		uipc_sockaddr,
1196	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
1197	.pru_close =		uipc_close,
1198};
1199
1200static struct pr_usrreqs uipc_usrreqs_stream = {
1201	.pru_abort = 		uipc_abort,
1202	.pru_accept =		uipc_accept,
1203	.pru_attach =		uipc_attach,
1204	.pru_bind =		uipc_bind,
1205	.pru_bindat =		uipc_bindat,
1206	.pru_connect =		uipc_connect,
1207	.pru_connectat =	uipc_connectat,
1208	.pru_connect2 =		uipc_connect2,
1209	.pru_detach =		uipc_detach,
1210	.pru_disconnect =	uipc_disconnect,
1211	.pru_listen =		uipc_listen,
1212	.pru_peeraddr =		uipc_peeraddr,
1213	.pru_rcvd =		uipc_rcvd,
1214	.pru_send =		uipc_send,
1215	.pru_ready =		uipc_ready,
1216	.pru_sense =		uipc_sense,
1217	.pru_shutdown =		uipc_shutdown,
1218	.pru_sockaddr =		uipc_sockaddr,
1219	.pru_soreceive =	soreceive_generic,
1220	.pru_close =		uipc_close,
1221};
1222
1223static int
1224uipc_ctloutput(struct socket *so, struct sockopt *sopt)
1225{
1226	struct unpcb *unp;
1227	struct xucred xu;
1228	int error, optval;
1229
1230	if (sopt->sopt_level != 0)
1231		return (EINVAL);
1232
1233	unp = sotounpcb(so);
1234	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
1235	error = 0;
1236	switch (sopt->sopt_dir) {
1237	case SOPT_GET:
1238		switch (sopt->sopt_name) {
1239		case LOCAL_PEERCRED:
1240			UNP_PCB_LOCK(unp);
1241			if (unp->unp_flags & UNP_HAVEPC)
1242				xu = unp->unp_peercred;
1243			else {
1244				if (so->so_type == SOCK_STREAM)
1245					error = ENOTCONN;
1246				else
1247					error = EINVAL;
1248			}
1249			UNP_PCB_UNLOCK(unp);
1250			if (error == 0)
1251				error = sooptcopyout(sopt, &xu, sizeof(xu));
1252			break;
1253
1254		case LOCAL_CREDS:
1255			/* Unlocked read. */
1256			optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
1257			error = sooptcopyout(sopt, &optval, sizeof(optval));
1258			break;
1259
1260		case LOCAL_CONNWAIT:
1261			/* Unlocked read. */
1262			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
1263			error = sooptcopyout(sopt, &optval, sizeof(optval));
1264			break;
1265
1266		default:
1267			error = EOPNOTSUPP;
1268			break;
1269		}
1270		break;
1271
1272	case SOPT_SET:
1273		switch (sopt->sopt_name) {
1274		case LOCAL_CREDS:
1275		case LOCAL_CONNWAIT:
1276			error = sooptcopyin(sopt, &optval, sizeof(optval),
1277					    sizeof(optval));
1278			if (error)
1279				break;
1280
1281#define	OPTSET(bit) do {						\
1282	UNP_PCB_LOCK(unp);						\
1283	if (optval)							\
1284		unp->unp_flags |= bit;					\
1285	else								\
1286		unp->unp_flags &= ~bit;					\
1287	UNP_PCB_UNLOCK(unp);						\
1288} while (0)
1289
1290			switch (sopt->sopt_name) {
1291			case LOCAL_CREDS:
1292				OPTSET(UNP_WANTCRED);
1293				break;
1294
1295			case LOCAL_CONNWAIT:
1296				OPTSET(UNP_CONNWAIT);
1297				break;
1298
1299			default:
1300				break;
1301			}
1302			break;
1303#undef	OPTSET
1304		default:
1305			error = ENOPROTOOPT;
1306			break;
1307		}
1308		break;
1309
1310	default:
1311		error = EOPNOTSUPP;
1312		break;
1313	}
1314	return (error);
1315}
1316
1317static int
1318unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1319{
1320
1321	return (unp_connectat(AT_FDCWD, so, nam, td));
1322}
1323
1324static int
1325unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
1326    struct thread *td)
1327{
1328	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1329	struct vnode *vp;
1330	struct socket *so2, *so3;
1331	struct unpcb *unp, *unp2, *unp3;
1332	struct nameidata nd;
1333	char buf[SOCK_MAXADDRLEN];
1334	struct sockaddr *sa;
1335	cap_rights_t rights;
1336	int error, len;
1337
1338	if (nam->sa_family != AF_UNIX)
1339		return (EAFNOSUPPORT);
1340
1341	UNP_LINK_WLOCK_ASSERT();
1342
1343	unp = sotounpcb(so);
1344	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1345
1346	if (nam->sa_len > sizeof(struct sockaddr_un))
1347		return (EINVAL);
1348	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1349	if (len <= 0)
1350		return (EINVAL);
1351	bcopy(soun->sun_path, buf, len);
1352	buf[len] = 0;
1353
1354	UNP_PCB_LOCK(unp);
1355	if (unp->unp_flags & UNP_CONNECTING) {
1356		UNP_PCB_UNLOCK(unp);
1357		return (EALREADY);
1358	}
1359	UNP_LINK_WUNLOCK();
1360	unp->unp_flags |= UNP_CONNECTING;
1361	UNP_PCB_UNLOCK(unp);
1362
1363	sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
1364	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
1365	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
1366	error = namei(&nd);
1367	if (error)
1368		vp = NULL;
1369	else
1370		vp = nd.ni_vp;
1371	ASSERT_VOP_LOCKED(vp, "unp_connect");
1372	NDFREE(&nd, NDF_ONLY_PNBUF);
1373	if (error)
1374		goto bad;
1375
1376	if (vp->v_type != VSOCK) {
1377		error = ENOTSOCK;
1378		goto bad;
1379	}
1380#ifdef MAC
1381	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
1382	if (error)
1383		goto bad;
1384#endif
1385	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
1386	if (error)
1387		goto bad;
1388
1389	unp = sotounpcb(so);
1390	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1391
1392	/*
1393	 * Lock linkage lock for two reasons: make sure v_socket is stable,
1394	 * and to protect simultaneous locking of multiple pcbs.
1395	 */
1396	UNP_LINK_WLOCK();
1397	VOP_UNP_CONNECT(vp, &so2);
1398	if (so2 == NULL) {
1399		error = ECONNREFUSED;
1400		goto bad2;
1401	}
1402	if (so->so_type != so2->so_type) {
1403		error = EPROTOTYPE;
1404		goto bad2;
1405	}
1406	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1407		if (so2->so_options & SO_ACCEPTCONN) {
1408			CURVNET_SET(so2->so_vnet);
1409			so3 = sonewconn(so2, 0);
1410			CURVNET_RESTORE();
1411		} else
1412			so3 = NULL;
1413		if (so3 == NULL) {
1414			error = ECONNREFUSED;
1415			goto bad2;
1416		}
1417		unp = sotounpcb(so);
1418		unp2 = sotounpcb(so2);
1419		unp3 = sotounpcb(so3);
1420		UNP_PCB_LOCK(unp);
1421		UNP_PCB_LOCK(unp2);
1422		UNP_PCB_LOCK(unp3);
1423		if (unp2->unp_addr != NULL) {
1424			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
1425			unp3->unp_addr = (struct sockaddr_un *) sa;
1426			sa = NULL;
1427		}
1428
1429		/*
1430		 * The connector's (client's) credentials are copied from its
1431		 * process structure at the time of connect() (which is now).
1432		 */
1433		cru2x(td->td_ucred, &unp3->unp_peercred);
1434		unp3->unp_flags |= UNP_HAVEPC;
1435
1436		/*
1437		 * The receiver's (server's) credentials are copied from the
1438		 * unp_peercred member of socket on which the former called
1439		 * listen(); uipc_listen() cached that process's credentials
1440		 * at that time so we can use them now.
1441		 */
1442		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1443		    ("unp_connect: listener without cached peercred"));
1444		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1445		    sizeof(unp->unp_peercred));
1446		unp->unp_flags |= UNP_HAVEPC;
1447		if (unp2->unp_flags & UNP_WANTCRED)
1448			unp3->unp_flags |= UNP_WANTCRED;
1449		UNP_PCB_UNLOCK(unp3);
1450		UNP_PCB_UNLOCK(unp2);
1451		UNP_PCB_UNLOCK(unp);
1452#ifdef MAC
1453		mac_socketpeer_set_from_socket(so, so3);
1454		mac_socketpeer_set_from_socket(so3, so);
1455#endif
1456
1457		so2 = so3;
1458	}
1459	unp = sotounpcb(so);
1460	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
1461	unp2 = sotounpcb(so2);
1462	KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
1463	UNP_PCB_LOCK(unp);
1464	UNP_PCB_LOCK(unp2);
1465	error = unp_connect2(so, so2, PRU_CONNECT);
1466	UNP_PCB_UNLOCK(unp2);
1467	UNP_PCB_UNLOCK(unp);
1468bad2:
1469	UNP_LINK_WUNLOCK();
1470bad:
1471	if (vp != NULL)
1472		vput(vp);
1473	free(sa, M_SONAME);
1474	UNP_LINK_WLOCK();
1475	UNP_PCB_LOCK(unp);
1476	unp->unp_flags &= ~UNP_CONNECTING;
1477	UNP_PCB_UNLOCK(unp);
1478	return (error);
1479}
1480
1481static int
1482unp_connect2(struct socket *so, struct socket *so2, int req)
1483{
1484	struct unpcb *unp;
1485	struct unpcb *unp2;
1486
1487	unp = sotounpcb(so);
1488	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
1489	unp2 = sotounpcb(so2);
1490	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
1491
1492	UNP_LINK_WLOCK_ASSERT();
1493	UNP_PCB_LOCK_ASSERT(unp);
1494	UNP_PCB_LOCK_ASSERT(unp2);
1495
1496	if (so2->so_type != so->so_type)
1497		return (EPROTOTYPE);
1498	unp2->unp_flags &= ~UNP_NASCENT;
1499	unp->unp_conn = unp2;
1500
1501	switch (so->so_type) {
1502	case SOCK_DGRAM:
1503		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1504		soisconnected(so);
1505		break;
1506
1507	case SOCK_STREAM:
1508	case SOCK_SEQPACKET:
1509		unp2->unp_conn = unp;
1510		if (req == PRU_CONNECT &&
1511		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
1512			soisconnecting(so);
1513		else
1514			soisconnected(so);
1515		soisconnected(so2);
1516		break;
1517
1518	default:
1519		panic("unp_connect2");
1520	}
1521	return (0);
1522}
1523
1524static void
1525unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
1526{
1527	struct socket *so;
1528
1529	KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL"));
1530
1531	UNP_LINK_WLOCK_ASSERT();
1532	UNP_PCB_LOCK_ASSERT(unp);
1533	UNP_PCB_LOCK_ASSERT(unp2);
1534
1535	unp->unp_conn = NULL;
1536	switch (unp->unp_socket->so_type) {
1537	case SOCK_DGRAM:
1538		LIST_REMOVE(unp, unp_reflink);
1539		so = unp->unp_socket;
1540		SOCK_LOCK(so);
1541		so->so_state &= ~SS_ISCONNECTED;
1542		SOCK_UNLOCK(so);
1543		break;
1544
1545	case SOCK_STREAM:
1546	case SOCK_SEQPACKET:
1547		soisdisconnected(unp->unp_socket);
1548		unp2->unp_conn = NULL;
1549		soisdisconnected(unp2->unp_socket);
1550		break;
1551	}
1552}
1553
1554/*
1555 * unp_pcblist() walks the global list of struct unpcb's to generate a
1556 * pointer list, bumping the refcount on each unpcb.  It then copies them out
1557 * sequentially, validating the generation number on each to see if it has
1558 * been detached.  All of this is necessary because copyout() may sleep on
1559 * disk I/O.
1560 */
1561static int
1562unp_pcblist(SYSCTL_HANDLER_ARGS)
1563{
1564	int error, i, n;
1565	int freeunp;
1566	struct unpcb *unp, **unp_list;
1567	unp_gen_t gencnt;
1568	struct xunpgen *xug;
1569	struct unp_head *head;
1570	struct xunpcb *xu;
1571
1572	switch ((intptr_t)arg1) {
1573	case SOCK_STREAM:
1574		head = &unp_shead;
1575		break;
1576
1577	case SOCK_DGRAM:
1578		head = &unp_dhead;
1579		break;
1580
1581	case SOCK_SEQPACKET:
1582		head = &unp_sphead;
1583		break;
1584
1585	default:
1586		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
1587	}
1588
1589	/*
1590	 * The process of preparing the PCB list is too time-consuming and
1591	 * resource-intensive to repeat twice on every request.
1592	 */
1593	if (req->oldptr == NULL) {
1594		n = unp_count;
1595		req->oldidx = 2 * (sizeof *xug)
1596			+ (n + n/8) * sizeof(struct xunpcb);
1597		return (0);
1598	}
1599
1600	if (req->newptr != NULL)
1601		return (EPERM);
1602
1603	/*
1604	 * OK, now we're committed to doing something.
1605	 */
1606	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
1607	UNP_LIST_LOCK();
1608	gencnt = unp_gencnt;
1609	n = unp_count;
1610	UNP_LIST_UNLOCK();
1611
1612	xug->xug_len = sizeof *xug;
1613	xug->xug_count = n;
1614	xug->xug_gen = gencnt;
1615	xug->xug_sogen = so_gencnt;
1616	error = SYSCTL_OUT(req, xug, sizeof *xug);
1617	if (error) {
1618		free(xug, M_TEMP);
1619		return (error);
1620	}
1621
1622	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
1623
1624	UNP_LIST_LOCK();
1625	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
1626	     unp = LIST_NEXT(unp, unp_link)) {
1627		UNP_PCB_LOCK(unp);
1628		if (unp->unp_gencnt <= gencnt) {
1629			if (cr_cansee(req->td->td_ucred,
1630			    unp->unp_socket->so_cred)) {
1631				UNP_PCB_UNLOCK(unp);
1632				continue;
1633			}
1634			unp_list[i++] = unp;
1635			unp->unp_refcount++;
1636		}
1637		UNP_PCB_UNLOCK(unp);
1638	}
1639	UNP_LIST_UNLOCK();
1640	n = i;			/* In case we lost some during malloc. */
1641
1642	error = 0;
1643	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
1644	for (i = 0; i < n; i++) {
1645		unp = unp_list[i];
1646		UNP_PCB_LOCK(unp);
1647		unp->unp_refcount--;
1648	        if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
1649			xu->xu_len = sizeof *xu;
1650			xu->xu_unpp = unp;
1651			/*
1652			 * XXX - need more locking here to protect against
1653			 * connect/disconnect races for SMP.
1654			 */
1655			if (unp->unp_addr != NULL)
1656				bcopy(unp->unp_addr, &xu->xu_addr,
1657				      unp->unp_addr->sun_len);
1658			if (unp->unp_conn != NULL &&
1659			    unp->unp_conn->unp_addr != NULL)
1660				bcopy(unp->unp_conn->unp_addr,
1661				      &xu->xu_caddr,
1662				      unp->unp_conn->unp_addr->sun_len);
1663			bcopy(unp, &xu->xu_unp, sizeof *unp);
1664			sotoxsocket(unp->unp_socket, &xu->xu_socket);
1665			UNP_PCB_UNLOCK(unp);
1666			error = SYSCTL_OUT(req, xu, sizeof *xu);
1667		} else {
1668			freeunp = (unp->unp_refcount == 0);
1669			UNP_PCB_UNLOCK(unp);
1670			if (freeunp) {
1671				UNP_PCB_LOCK_DESTROY(unp);
1672				uma_zfree(unp_zone, unp);
1673			}
1674		}
1675	}
1676	free(xu, M_TEMP);
1677	if (!error) {
1678		/*
1679		 * Give the user an updated idea of our state.  If the
1680		 * generation differs from what we told her before, she knows
1681		 * that something happened while we were processing this
1682		 * request, and it might be necessary to retry.
1683		 */
1684		xug->xug_gen = unp_gencnt;
1685		xug->xug_sogen = so_gencnt;
1686		xug->xug_count = unp_count;
1687		error = SYSCTL_OUT(req, xug, sizeof *xug);
1688	}
1689	free(unp_list, M_TEMP);
1690	free(xug, M_TEMP);
1691	return (error);
1692}
1693
1694SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
1695    (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1696    "List of active local datagram sockets");
1697SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
1698    (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1699    "List of active local stream sockets");
1700SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
1701    CTLTYPE_OPAQUE | CTLFLAG_RD,
1702    (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
1703    "List of active local seqpacket sockets");
1704
1705static void
1706unp_shutdown(struct unpcb *unp)
1707{
1708	struct unpcb *unp2;
1709	struct socket *so;
1710
1711	UNP_LINK_WLOCK_ASSERT();
1712	UNP_PCB_LOCK_ASSERT(unp);
1713
1714	unp2 = unp->unp_conn;
1715	if ((unp->unp_socket->so_type == SOCK_STREAM ||
1716	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
1717		so = unp2->unp_socket;
1718		if (so != NULL)
1719			socantrcvmore(so);
1720	}
1721}
1722
1723static void
1724unp_drop(struct unpcb *unp)
1725{
1726	struct socket *so = unp->unp_socket;
1727	struct unpcb *unp2;
1728
1729	UNP_LINK_WLOCK_ASSERT();
1730	UNP_PCB_LOCK_ASSERT(unp);
1731
1732	/*
1733	 * Regardless of whether the socket's peer dropped the connection
1734	 * with this socket by aborting or disconnecting, POSIX requires
1735	 * that ECONNRESET is returned.
1736	 */
1737	so->so_error = ECONNRESET;
1738	unp2 = unp->unp_conn;
1739	if (unp2 == NULL)
1740		return;
1741	UNP_PCB_LOCK(unp2);
1742	unp_disconnect(unp, unp2);
1743	UNP_PCB_UNLOCK(unp2);
1744}
1745
1746static void
1747unp_freerights(struct filedescent **fdep, int fdcount)
1748{
1749	struct file *fp;
1750	int i;
1751
1752	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
1753
1754	for (i = 0; i < fdcount; i++) {
1755		fp = fdep[i]->fde_file;
1756		filecaps_free(&fdep[i]->fde_caps);
1757		unp_discard(fp);
1758	}
1759	free(fdep[0], M_FILECAPS);
1760}
1761
1762static int
1763unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
1764{
1765	struct thread *td = curthread;		/* XXX */
1766	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1767	int i;
1768	int *fdp;
1769	struct filedesc *fdesc = td->td_proc->p_fd;
1770	struct filedescent **fdep;
1771	void *data;
1772	socklen_t clen = control->m_len, datalen;
1773	int error, newfds;
1774	u_int newlen;
1775
1776	UNP_LINK_UNLOCK_ASSERT();
1777
1778	error = 0;
1779	if (controlp != NULL) /* controlp == NULL => free control messages */
1780		*controlp = NULL;
1781	while (cm != NULL) {
1782		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
1783			error = EINVAL;
1784			break;
1785		}
1786		data = CMSG_DATA(cm);
1787		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1788		if (cm->cmsg_level == SOL_SOCKET
1789		    && cm->cmsg_type == SCM_RIGHTS) {
1790			newfds = datalen / sizeof(*fdep);
1791			if (newfds == 0)
1792				goto next;
1793			fdep = data;
1794
1795			/* If we're not outputting the descriptors free them. */
1796			if (error || controlp == NULL) {
1797				unp_freerights(fdep, newfds);
1798				goto next;
1799			}
1800			FILEDESC_XLOCK(fdesc);
1801
1802			/*
1803			 * Now change each pointer to an fd in the global
1804			 * table to an integer that is the index to the local
1805			 * fd table entry that we set up to point to the
1806			 * global one we are transferring.
1807			 */
1808			newlen = newfds * sizeof(int);
1809			*controlp = sbcreatecontrol(NULL, newlen,
1810			    SCM_RIGHTS, SOL_SOCKET);
1811			if (*controlp == NULL) {
1812				FILEDESC_XUNLOCK(fdesc);
1813				error = E2BIG;
1814				unp_freerights(fdep, newfds);
1815				goto next;
1816			}
1817
1818			fdp = (int *)
1819			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1820			if (fdallocn(td, 0, fdp, newfds) != 0) {
1821				FILEDESC_XUNLOCK(fdesc);
1822				error = EMSGSIZE;
1823				unp_freerights(fdep, newfds);
1824				m_freem(*controlp);
1825				*controlp = NULL;
1826				goto next;
1827			}
1828			for (i = 0; i < newfds; i++, fdp++) {
1829				_finstall(fdesc, fdep[i]->fde_file, *fdp,
1830				    (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
1831				    &fdep[i]->fde_caps);
1832				unp_externalize_fp(fdep[i]->fde_file);
1833			}
1834			FILEDESC_XUNLOCK(fdesc);
1835			free(fdep[0], M_FILECAPS);
1836		} else {
1837			/* We can just copy anything else across. */
1838			if (error || controlp == NULL)
1839				goto next;
1840			*controlp = sbcreatecontrol(NULL, datalen,
1841			    cm->cmsg_type, cm->cmsg_level);
1842			if (*controlp == NULL) {
1843				error = ENOBUFS;
1844				goto next;
1845			}
1846			bcopy(data,
1847			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
1848			    datalen);
1849		}
1850		controlp = &(*controlp)->m_next;
1851
1852next:
1853		if (CMSG_SPACE(datalen) < clen) {
1854			clen -= CMSG_SPACE(datalen);
1855			cm = (struct cmsghdr *)
1856			    ((caddr_t)cm + CMSG_SPACE(datalen));
1857		} else {
1858			clen = 0;
1859			cm = NULL;
1860		}
1861	}
1862
1863	m_freem(control);
1864	return (error);
1865}
1866
1867static void
1868unp_zone_change(void *tag)
1869{
1870
1871	uma_zone_set_max(unp_zone, maxsockets);
1872}
1873
1874static void
1875unp_init(void)
1876{
1877
1878#ifdef VIMAGE
1879	if (!IS_DEFAULT_VNET(curvnet))
1880		return;
1881#endif
1882	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
1883	    NULL, NULL, UMA_ALIGN_PTR, 0);
1884	if (unp_zone == NULL)
1885		panic("unp_init");
1886	uma_zone_set_max(unp_zone, maxsockets);
1887	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
1888	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
1889	    NULL, EVENTHANDLER_PRI_ANY);
1890	LIST_INIT(&unp_dhead);
1891	LIST_INIT(&unp_shead);
1892	LIST_INIT(&unp_sphead);
1893	SLIST_INIT(&unp_defers);
1894	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
1895	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
1896	UNP_LINK_LOCK_INIT();
1897	UNP_LIST_LOCK_INIT();
1898	UNP_DEFERRED_LOCK_INIT();
1899}
1900
1901static int
1902unp_internalize(struct mbuf **controlp, struct thread *td)
1903{
1904	struct mbuf *control = *controlp;
1905	struct proc *p = td->td_proc;
1906	struct filedesc *fdesc = p->p_fd;
1907	struct bintime *bt;
1908	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1909	struct cmsgcred *cmcred;
1910	struct filedescent *fde, **fdep, *fdev;
1911	struct file *fp;
1912	struct timeval *tv;
1913	int i, *fdp;
1914	void *data;
1915	socklen_t clen = control->m_len, datalen;
1916	int error, oldfds;
1917	u_int newlen;
1918
1919	UNP_LINK_UNLOCK_ASSERT();
1920
1921	error = 0;
1922	*controlp = NULL;
1923	while (cm != NULL) {
1924		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
1925		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
1926			error = EINVAL;
1927			goto out;
1928		}
1929		data = CMSG_DATA(cm);
1930		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1931
1932		switch (cm->cmsg_type) {
1933		/*
1934		 * Fill in credential information.
1935		 */
1936		case SCM_CREDS:
1937			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
1938			    SCM_CREDS, SOL_SOCKET);
1939			if (*controlp == NULL) {
1940				error = ENOBUFS;
1941				goto out;
1942			}
1943			cmcred = (struct cmsgcred *)
1944			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1945			cmcred->cmcred_pid = p->p_pid;
1946			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
1947			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
1948			cmcred->cmcred_euid = td->td_ucred->cr_uid;
1949			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
1950			    CMGROUP_MAX);
1951			for (i = 0; i < cmcred->cmcred_ngroups; i++)
1952				cmcred->cmcred_groups[i] =
1953				    td->td_ucred->cr_groups[i];
1954			break;
1955
1956		case SCM_RIGHTS:
1957			oldfds = datalen / sizeof (int);
1958			if (oldfds == 0)
1959				break;
1960			/*
1961			 * Check that all the FDs passed in refer to legal
1962			 * files.  If not, reject the entire operation.
1963			 */
1964			fdp = data;
1965			FILEDESC_SLOCK(fdesc);
1966			for (i = 0; i < oldfds; i++, fdp++) {
1967				fp = fget_locked(fdesc, *fdp);
1968				if (fp == NULL) {
1969					FILEDESC_SUNLOCK(fdesc);
1970					error = EBADF;
1971					goto out;
1972				}
1973				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
1974					FILEDESC_SUNLOCK(fdesc);
1975					error = EOPNOTSUPP;
1976					goto out;
1977				}
1978
1979			}
1980
1981			/*
1982			 * Now replace the integer FDs with pointers to the
1983			 * file structure and capability rights.
1984			 */
1985			newlen = oldfds * sizeof(fdep[0]);
1986			*controlp = sbcreatecontrol(NULL, newlen,
1987			    SCM_RIGHTS, SOL_SOCKET);
1988			if (*controlp == NULL) {
1989				FILEDESC_SUNLOCK(fdesc);
1990				error = E2BIG;
1991				goto out;
1992			}
1993			fdp = data;
1994			fdep = (struct filedescent **)
1995			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1996			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
1997			    M_WAITOK);
1998			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
1999				fde = &fdesc->fd_ofiles[*fdp];
2000				fdep[i] = fdev;
2001				fdep[i]->fde_file = fde->fde_file;
2002				filecaps_copy(&fde->fde_caps,
2003				    &fdep[i]->fde_caps, true);
2004				unp_internalize_fp(fdep[i]->fde_file);
2005			}
2006			FILEDESC_SUNLOCK(fdesc);
2007			break;
2008
2009		case SCM_TIMESTAMP:
2010			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
2011			    SCM_TIMESTAMP, SOL_SOCKET);
2012			if (*controlp == NULL) {
2013				error = ENOBUFS;
2014				goto out;
2015			}
2016			tv = (struct timeval *)
2017			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2018			microtime(tv);
2019			break;
2020
2021		case SCM_BINTIME:
2022			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
2023			    SCM_BINTIME, SOL_SOCKET);
2024			if (*controlp == NULL) {
2025				error = ENOBUFS;
2026				goto out;
2027			}
2028			bt = (struct bintime *)
2029			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
2030			bintime(bt);
2031			break;
2032
2033		default:
2034			error = EINVAL;
2035			goto out;
2036		}
2037
2038		controlp = &(*controlp)->m_next;
2039		if (CMSG_SPACE(datalen) < clen) {
2040			clen -= CMSG_SPACE(datalen);
2041			cm = (struct cmsghdr *)
2042			    ((caddr_t)cm + CMSG_SPACE(datalen));
2043		} else {
2044			clen = 0;
2045			cm = NULL;
2046		}
2047	}
2048
2049out:
2050	m_freem(control);
2051	return (error);
2052}
2053
2054static struct mbuf *
2055unp_addsockcred(struct thread *td, struct mbuf *control)
2056{
2057	struct mbuf *m, *n, *n_prev;
2058	struct sockcred *sc;
2059	const struct cmsghdr *cm;
2060	int ngroups;
2061	int i;
2062
2063	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
2064	m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
2065	if (m == NULL)
2066		return (control);
2067
2068	sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
2069	sc->sc_uid = td->td_ucred->cr_ruid;
2070	sc->sc_euid = td->td_ucred->cr_uid;
2071	sc->sc_gid = td->td_ucred->cr_rgid;
2072	sc->sc_egid = td->td_ucred->cr_gid;
2073	sc->sc_ngroups = ngroups;
2074	for (i = 0; i < sc->sc_ngroups; i++)
2075		sc->sc_groups[i] = td->td_ucred->cr_groups[i];
2076
2077	/*
2078	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
2079	 * created SCM_CREDS control message (struct sockcred) has another
2080	 * format.
2081	 */
2082	if (control != NULL)
2083		for (n = control, n_prev = NULL; n != NULL;) {
2084			cm = mtod(n, struct cmsghdr *);
2085    			if (cm->cmsg_level == SOL_SOCKET &&
2086			    cm->cmsg_type == SCM_CREDS) {
2087    				if (n_prev == NULL)
2088					control = n->m_next;
2089				else
2090					n_prev->m_next = n->m_next;
2091				n = m_free(n);
2092			} else {
2093				n_prev = n;
2094				n = n->m_next;
2095			}
2096		}
2097
2098	/* Prepend it to the head. */
2099	m->m_next = control;
2100	return (m);
2101}
2102
2103static struct unpcb *
2104fptounp(struct file *fp)
2105{
2106	struct socket *so;
2107
2108	if (fp->f_type != DTYPE_SOCKET)
2109		return (NULL);
2110	if ((so = fp->f_data) == NULL)
2111		return (NULL);
2112	if (so->so_proto->pr_domain != &localdomain)
2113		return (NULL);
2114	return sotounpcb(so);
2115}
2116
2117static void
2118unp_discard(struct file *fp)
2119{
2120	struct unp_defer *dr;
2121
2122	if (unp_externalize_fp(fp)) {
2123		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
2124		dr->ud_fp = fp;
2125		UNP_DEFERRED_LOCK();
2126		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
2127		UNP_DEFERRED_UNLOCK();
2128		atomic_add_int(&unp_defers_count, 1);
2129		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
2130	} else
2131		(void) closef(fp, (struct thread *)NULL);
2132}
2133
2134static void
2135unp_process_defers(void *arg __unused, int pending)
2136{
2137	struct unp_defer *dr;
2138	SLIST_HEAD(, unp_defer) drl;
2139	int count;
2140
2141	SLIST_INIT(&drl);
2142	for (;;) {
2143		UNP_DEFERRED_LOCK();
2144		if (SLIST_FIRST(&unp_defers) == NULL) {
2145			UNP_DEFERRED_UNLOCK();
2146			break;
2147		}
2148		SLIST_SWAP(&unp_defers, &drl, unp_defer);
2149		UNP_DEFERRED_UNLOCK();
2150		count = 0;
2151		while ((dr = SLIST_FIRST(&drl)) != NULL) {
2152			SLIST_REMOVE_HEAD(&drl, ud_link);
2153			closef(dr->ud_fp, NULL);
2154			free(dr, M_TEMP);
2155			count++;
2156		}
2157		atomic_add_int(&unp_defers_count, -count);
2158	}
2159}
2160
2161static void
2162unp_internalize_fp(struct file *fp)
2163{
2164	struct unpcb *unp;
2165
2166	UNP_LINK_WLOCK();
2167	if ((unp = fptounp(fp)) != NULL) {
2168		unp->unp_file = fp;
2169		unp->unp_msgcount++;
2170	}
2171	fhold(fp);
2172	unp_rights++;
2173	UNP_LINK_WUNLOCK();
2174}
2175
2176static int
2177unp_externalize_fp(struct file *fp)
2178{
2179	struct unpcb *unp;
2180	int ret;
2181
2182	UNP_LINK_WLOCK();
2183	if ((unp = fptounp(fp)) != NULL) {
2184		unp->unp_msgcount--;
2185		ret = 1;
2186	} else
2187		ret = 0;
2188	unp_rights--;
2189	UNP_LINK_WUNLOCK();
2190	return (ret);
2191}
2192
2193/*
2194 * unp_defer indicates whether additional work has been defered for a future
2195 * pass through unp_gc().  It is thread local and does not require explicit
2196 * synchronization.
2197 */
2198static int	unp_marked;
2199static int	unp_unreachable;
2200
2201static void
2202unp_accessable(struct filedescent **fdep, int fdcount)
2203{
2204	struct unpcb *unp;
2205	struct file *fp;
2206	int i;
2207
2208	for (i = 0; i < fdcount; i++) {
2209		fp = fdep[i]->fde_file;
2210		if ((unp = fptounp(fp)) == NULL)
2211			continue;
2212		if (unp->unp_gcflag & UNPGC_REF)
2213			continue;
2214		unp->unp_gcflag &= ~UNPGC_DEAD;
2215		unp->unp_gcflag |= UNPGC_REF;
2216		unp_marked++;
2217	}
2218}
2219
2220static void
2221unp_gc_process(struct unpcb *unp)
2222{
2223	struct socket *soa;
2224	struct socket *so;
2225	struct file *fp;
2226
2227	/* Already processed. */
2228	if (unp->unp_gcflag & UNPGC_SCANNED)
2229		return;
2230	fp = unp->unp_file;
2231
2232	/*
2233	 * Check for a socket potentially in a cycle.  It must be in a
2234	 * queue as indicated by msgcount, and this must equal the file
2235	 * reference count.  Note that when msgcount is 0 the file is NULL.
2236	 */
2237	if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
2238	    unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
2239		unp->unp_gcflag |= UNPGC_DEAD;
2240		unp_unreachable++;
2241		return;
2242	}
2243
2244	/*
2245	 * Mark all sockets we reference with RIGHTS.
2246	 */
2247	so = unp->unp_socket;
2248	if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
2249		SOCKBUF_LOCK(&so->so_rcv);
2250		unp_scan(so->so_rcv.sb_mb, unp_accessable);
2251		SOCKBUF_UNLOCK(&so->so_rcv);
2252	}
2253
2254	/*
2255	 * Mark all sockets in our accept queue.
2256	 */
2257	ACCEPT_LOCK();
2258	TAILQ_FOREACH(soa, &so->so_comp, so_list) {
2259		if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
2260			continue;
2261		SOCKBUF_LOCK(&soa->so_rcv);
2262		unp_scan(soa->so_rcv.sb_mb, unp_accessable);
2263		SOCKBUF_UNLOCK(&soa->so_rcv);
2264	}
2265	ACCEPT_UNLOCK();
2266	unp->unp_gcflag |= UNPGC_SCANNED;
2267}
2268
2269static int unp_recycled;
2270SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0,
2271    "Number of unreachable sockets claimed by the garbage collector.");
2272
2273static int unp_taskcount;
2274SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0,
2275    "Number of times the garbage collector has run.");
2276
2277static void
2278unp_gc(__unused void *arg, int pending)
2279{
2280	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
2281				    NULL };
2282	struct unp_head **head;
2283	struct file *f, **unref;
2284	struct unpcb *unp;
2285	int i, total;
2286
2287	unp_taskcount++;
2288	UNP_LIST_LOCK();
2289	/*
2290	 * First clear all gc flags from previous runs, apart from
2291	 * UNPGC_IGNORE_RIGHTS.
2292	 */
2293	for (head = heads; *head != NULL; head++)
2294		LIST_FOREACH(unp, *head, unp_link)
2295			unp->unp_gcflag =
2296			    (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
2297
2298	/*
2299	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
2300	 * is reachable all of the sockets it references are reachable.
2301	 * Stop the scan once we do a complete loop without discovering
2302	 * a new reachable socket.
2303	 */
2304	do {
2305		unp_unreachable = 0;
2306		unp_marked = 0;
2307		for (head = heads; *head != NULL; head++)
2308			LIST_FOREACH(unp, *head, unp_link)
2309				unp_gc_process(unp);
2310	} while (unp_marked);
2311	UNP_LIST_UNLOCK();
2312	if (unp_unreachable == 0)
2313		return;
2314
2315	/*
2316	 * Allocate space for a local list of dead unpcbs.
2317	 */
2318	unref = malloc(unp_unreachable * sizeof(struct file *),
2319	    M_TEMP, M_WAITOK);
2320
2321	/*
2322	 * Iterate looking for sockets which have been specifically marked
2323	 * as as unreachable and store them locally.
2324	 */
2325	UNP_LINK_RLOCK();
2326	UNP_LIST_LOCK();
2327	for (total = 0, head = heads; *head != NULL; head++)
2328		LIST_FOREACH(unp, *head, unp_link)
2329			if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
2330				f = unp->unp_file;
2331				if (unp->unp_msgcount == 0 || f == NULL ||
2332				    f->f_count != unp->unp_msgcount)
2333					continue;
2334				unref[total++] = f;
2335				fhold(f);
2336				KASSERT(total <= unp_unreachable,
2337				    ("unp_gc: incorrect unreachable count."));
2338			}
2339	UNP_LIST_UNLOCK();
2340	UNP_LINK_RUNLOCK();
2341
2342	/*
2343	 * Now flush all sockets, free'ing rights.  This will free the
2344	 * struct files associated with these sockets but leave each socket
2345	 * with one remaining ref.
2346	 */
2347	for (i = 0; i < total; i++) {
2348		struct socket *so;
2349
2350		so = unref[i]->f_data;
2351		CURVNET_SET(so->so_vnet);
2352		sorflush(so);
2353		CURVNET_RESTORE();
2354	}
2355
2356	/*
2357	 * And finally release the sockets so they can be reclaimed.
2358	 */
2359	for (i = 0; i < total; i++)
2360		fdrop(unref[i], NULL);
2361	unp_recycled += total;
2362	free(unref, M_TEMP);
2363}
2364
2365static void
2366unp_dispose(struct mbuf *m)
2367{
2368
2369	if (m)
2370		unp_scan(m, unp_freerights);
2371}
2372
2373/*
2374 * Synchronize against unp_gc, which can trip over data as we are freeing it.
2375 */
2376static void
2377unp_dispose_so(struct socket *so)
2378{
2379	struct unpcb *unp;
2380
2381	unp = sotounpcb(so);
2382	UNP_LIST_LOCK();
2383	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
2384	UNP_LIST_UNLOCK();
2385	unp_dispose(so->so_rcv.sb_mb);
2386}
2387
2388static void
2389unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
2390{
2391	struct mbuf *m;
2392	struct cmsghdr *cm;
2393	void *data;
2394	socklen_t clen, datalen;
2395
2396	while (m0 != NULL) {
2397		for (m = m0; m; m = m->m_next) {
2398			if (m->m_type != MT_CONTROL)
2399				continue;
2400
2401			cm = mtod(m, struct cmsghdr *);
2402			clen = m->m_len;
2403
2404			while (cm != NULL) {
2405				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
2406					break;
2407
2408				data = CMSG_DATA(cm);
2409				datalen = (caddr_t)cm + cm->cmsg_len
2410				    - (caddr_t)data;
2411
2412				if (cm->cmsg_level == SOL_SOCKET &&
2413				    cm->cmsg_type == SCM_RIGHTS) {
2414					(*op)(data, datalen /
2415					    sizeof(struct filedescent *));
2416				}
2417
2418				if (CMSG_SPACE(datalen) < clen) {
2419					clen -= CMSG_SPACE(datalen);
2420					cm = (struct cmsghdr *)
2421					    ((caddr_t)cm + CMSG_SPACE(datalen));
2422				} else {
2423					clen = 0;
2424					cm = NULL;
2425				}
2426			}
2427		}
2428		m0 = m0->m_nextpkt;
2429	}
2430}
2431
2432/*
2433 * A helper function called by VFS before socket-type vnode reclamation.
2434 * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
2435 * use count.
2436 */
2437void
2438vfs_unp_reclaim(struct vnode *vp)
2439{
2440	struct socket *so;
2441	struct unpcb *unp;
2442	int active;
2443
2444	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
2445	KASSERT(vp->v_type == VSOCK,
2446	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
2447
2448	active = 0;
2449	UNP_LINK_WLOCK();
2450	VOP_UNP_CONNECT(vp, &so);
2451	if (so == NULL)
2452		goto done;
2453	unp = sotounpcb(so);
2454	if (unp == NULL)
2455		goto done;
2456	UNP_PCB_LOCK(unp);
2457	if (unp->unp_vnode == vp) {
2458		VOP_UNP_DETACH(vp);
2459		unp->unp_vnode = NULL;
2460		active = 1;
2461	}
2462	UNP_PCB_UNLOCK(unp);
2463done:
2464	UNP_LINK_WUNLOCK();
2465	if (active)
2466		vunref(vp);
2467}
2468
2469#ifdef DDB
2470static void
2471db_print_indent(int indent)
2472{
2473	int i;
2474
2475	for (i = 0; i < indent; i++)
2476		db_printf(" ");
2477}
2478
2479static void
2480db_print_unpflags(int unp_flags)
2481{
2482	int comma;
2483
2484	comma = 0;
2485	if (unp_flags & UNP_HAVEPC) {
2486		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
2487		comma = 1;
2488	}
2489	if (unp_flags & UNP_HAVEPCCACHED) {
2490		db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : "");
2491		comma = 1;
2492	}
2493	if (unp_flags & UNP_WANTCRED) {
2494		db_printf("%sUNP_WANTCRED", comma ? ", " : "");
2495		comma = 1;
2496	}
2497	if (unp_flags & UNP_CONNWAIT) {
2498		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
2499		comma = 1;
2500	}
2501	if (unp_flags & UNP_CONNECTING) {
2502		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
2503		comma = 1;
2504	}
2505	if (unp_flags & UNP_BINDING) {
2506		db_printf("%sUNP_BINDING", comma ? ", " : "");
2507		comma = 1;
2508	}
2509}
2510
2511static void
2512db_print_xucred(int indent, struct xucred *xu)
2513{
2514	int comma, i;
2515
2516	db_print_indent(indent);
2517	db_printf("cr_version: %u   cr_uid: %u   cr_ngroups: %d\n",
2518	    xu->cr_version, xu->cr_uid, xu->cr_ngroups);
2519	db_print_indent(indent);
2520	db_printf("cr_groups: ");
2521	comma = 0;
2522	for (i = 0; i < xu->cr_ngroups; i++) {
2523		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
2524		comma = 1;
2525	}
2526	db_printf("\n");
2527}
2528
2529static void
2530db_print_unprefs(int indent, struct unp_head *uh)
2531{
2532	struct unpcb *unp;
2533	int counter;
2534
2535	counter = 0;
2536	LIST_FOREACH(unp, uh, unp_reflink) {
2537		if (counter % 4 == 0)
2538			db_print_indent(indent);
2539		db_printf("%p  ", unp);
2540		if (counter % 4 == 3)
2541			db_printf("\n");
2542		counter++;
2543	}
2544	if (counter != 0 && counter % 4 != 0)
2545		db_printf("\n");
2546}
2547
2548DB_SHOW_COMMAND(unpcb, db_show_unpcb)
2549{
2550	struct unpcb *unp;
2551
2552        if (!have_addr) {
2553                db_printf("usage: show unpcb <addr>\n");
2554                return;
2555        }
2556        unp = (struct unpcb *)addr;
2557
2558	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
2559	    unp->unp_vnode);
2560
2561	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
2562	    unp->unp_conn);
2563
2564	db_printf("unp_refs:\n");
2565	db_print_unprefs(2, &unp->unp_refs);
2566
2567	/* XXXRW: Would be nice to print the full address, if any. */
2568	db_printf("unp_addr: %p\n", unp->unp_addr);
2569
2570	db_printf("unp_gencnt: %llu\n",
2571	    (unsigned long long)unp->unp_gencnt);
2572
2573	db_printf("unp_flags: %x (", unp->unp_flags);
2574	db_print_unpflags(unp->unp_flags);
2575	db_printf(")\n");
2576
2577	db_printf("unp_peercred:\n");
2578	db_print_xucred(2, &unp->unp_peercred);
2579
2580	db_printf("unp_refcount: %u\n", unp->unp_refcount);
2581}
2582#endif
2583