uipc_usrreq.c revision 183563
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2008 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 32 */ 33 34/* 35 * UNIX Domain (Local) Sockets 36 * 37 * This is an implementation of UNIX (local) domain sockets. Each socket has 38 * an associated struct unpcb (UNIX protocol control block). Stream sockets 39 * may be connected to 0 or 1 other socket. Datagram sockets may be 40 * connected to 0, 1, or many other sockets. Sockets may be created and 41 * connected in pairs (socketpair(2)), or bound/connected to using the file 42 * system name space. For most purposes, only the receive socket buffer is 43 * used, as sending on one socket delivers directly to the receive socket 44 * buffer of a second socket. 45 * 46 * The implementation is substantially complicated by the fact that 47 * "ancillary data", such as file descriptors or credentials, may be passed 48 * across UNIX domain sockets. The potential for passing UNIX domain sockets 49 * over other UNIX domain sockets requires the implementation of a simple 50 * garbage collector to find and tear down cycles of disconnected sockets. 51 * 52 * TODO: 53 * SEQPACKET, RDM 54 * rethink name space problems 55 * need a proper out-of-band 56 */ 57 58#include <sys/cdefs.h> 59__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 183563 2008-10-03 09:01:55Z rwatson $"); 60 61#include "opt_ddb.h" 62#include "opt_mac.h" 63 64#include <sys/param.h> 65#include <sys/domain.h> 66#include <sys/fcntl.h> 67#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 68#include <sys/eventhandler.h> 69#include <sys/file.h> 70#include <sys/filedesc.h> 71#include <sys/jail.h> 72#include <sys/kernel.h> 73#include <sys/lock.h> 74#include <sys/mbuf.h> 75#include <sys/mount.h> 76#include <sys/mutex.h> 77#include <sys/namei.h> 78#include <sys/proc.h> 79#include <sys/protosw.h> 80#include <sys/resourcevar.h> 81#include <sys/rwlock.h> 82#include <sys/socket.h> 83#include <sys/socketvar.h> 84#include <sys/signalvar.h> 85#include <sys/stat.h> 86#include <sys/sx.h> 87#include <sys/sysctl.h> 88#include <sys/systm.h> 89#include <sys/taskqueue.h> 90#include <sys/un.h> 91#include <sys/unpcb.h> 92#include <sys/vnode.h> 93 94#ifdef DDB 95#include <ddb/ddb.h> 96#endif 97 98#include <security/mac/mac_framework.h> 99 100#include <vm/uma.h> 101 102static uma_zone_t unp_zone; 103static unp_gen_t unp_gencnt; 104static u_int unp_count; /* Count of local sockets. */ 105static ino_t unp_ino; /* Prototype for fake inode numbers. */ 106static int unp_rights; /* File descriptors in flight. */ 107static struct unp_head unp_shead; /* List of local stream sockets. */ 108static struct unp_head unp_dhead; /* List of local datagram sockets. */ 109 110static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 111 112/* 113 * Garbage collection of cyclic file descriptor/socket references occurs 114 * asynchronously in a taskqueue context in order to avoid recursion and 115 * reentrance in the UNIX domain socket, file descriptor, and socket layer 116 * code. See unp_gc() for a full description. 117 */ 118static struct task unp_gc_task; 119 120/* 121 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for 122 * stream sockets, although the total for sender and receiver is actually 123 * only PIPSIZ. 124 * 125 * Datagram sockets really use the sendspace as the maximum datagram size, 126 * and don't really want to reserve the sendspace. Their recvspace should be 127 * large enough for at least one max-size datagram plus address. 128 */ 129#ifndef PIPSIZ 130#define PIPSIZ 8192 131#endif 132static u_long unpst_sendspace = PIPSIZ; 133static u_long unpst_recvspace = PIPSIZ; 134static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 135static u_long unpdg_recvspace = 4*1024; 136 137SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); 138SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); 139SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); 140 141SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 142 &unpst_sendspace, 0, "Default stream send space."); 143SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 144 &unpst_recvspace, 0, "Default stream receive space."); 145SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 146 &unpdg_sendspace, 0, "Default datagram send space."); 147SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 148 &unpdg_recvspace, 0, "Default datagram receive space."); 149SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 150 "File descriptors in flight."); 151 152/*- 153 * Locking and synchronization: 154 * 155 * The global UNIX domain socket rwlock (unp_global_rwlock) protects all 156 * global variables, including the linked lists tracking the set of allocated 157 * UNIX domain sockets. The global rwlock also serves to prevent deadlock 158 * when more than one PCB lock is acquired at a time (i.e., during 159 * connect()). Finally, the global rwlock protects uncounted references from 160 * vnodes to sockets bound to those vnodes: to safely dereference the 161 * v_socket pointer, the global rwlock must be held while a full reference is 162 * acquired. 163 * 164 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, 165 * allocated in pru_attach() and freed in pru_detach(). The validity of that 166 * pointer is an invariant, so no lock is required to dereference the so_pcb 167 * pointer if a valid socket reference is held by the caller. In practice, 168 * this is always true during operations performed on a socket. Each unpcb 169 * has a back-pointer to its socket, unp_socket, which will be stable under 170 * the same circumstances. 171 * 172 * This pointer may only be safely dereferenced as long as a valid reference 173 * to the unpcb is held. Typically, this reference will be from the socket, 174 * or from another unpcb when the referring unpcb's lock is held (in order 175 * that the reference not be invalidated during use). For example, to follow 176 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, 177 * as unp_socket remains valid as long as the reference to unp_conn is valid. 178 * 179 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual 180 * atomic reads without the lock may be performed "lockless", but more 181 * complex reads and read-modify-writes require the mutex to be held. No 182 * lock order is defined between unpcb locks -- multiple unpcb locks may be 183 * acquired at the same time only when holding the global UNIX domain socket 184 * rwlock exclusively, which prevents deadlocks. 185 * 186 * Blocking with UNIX domain sockets is a tricky issue: unlike most network 187 * protocols, bind() is a non-atomic operation, and connect() requires 188 * potential sleeping in the protocol, due to potentially waiting on local or 189 * distributed file systems. We try to separate "lookup" operations, which 190 * may sleep, and the IPC operations themselves, which typically can occur 191 * with relative atomicity as locks can be held over the entire operation. 192 * 193 * Another tricky issue is simultaneous multi-threaded or multi-process 194 * access to a single UNIX domain socket. These are handled by the flags 195 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or 196 * binding, both of which involve dropping UNIX domain socket locks in order 197 * to perform namei() and other file system operations. 198 */ 199static struct rwlock unp_global_rwlock; 200 201#define UNP_GLOBAL_LOCK_INIT() rw_init(&unp_global_rwlock, \ 202 "unp_global_rwlock") 203 204#define UNP_GLOBAL_LOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 205 RA_LOCKED) 206#define UNP_GLOBAL_UNLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 207 RA_UNLOCKED) 208 209#define UNP_GLOBAL_WLOCK() rw_wlock(&unp_global_rwlock) 210#define UNP_GLOBAL_WUNLOCK() rw_wunlock(&unp_global_rwlock) 211#define UNP_GLOBAL_WLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 212 RA_WLOCKED) 213#define UNP_GLOBAL_WOWNED() rw_wowned(&unp_global_rwlock) 214 215#define UNP_GLOBAL_RLOCK() rw_rlock(&unp_global_rwlock) 216#define UNP_GLOBAL_RUNLOCK() rw_runlock(&unp_global_rwlock) 217#define UNP_GLOBAL_RLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 218 RA_RLOCKED) 219 220#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ 221 "unp_mtx", "unp_mtx", \ 222 MTX_DUPOK|MTX_DEF|MTX_RECURSE) 223#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) 224#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) 225#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) 226#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) 227 228static int unp_connect(struct socket *, struct sockaddr *, 229 struct thread *); 230static int unp_connect2(struct socket *so, struct socket *so2, int); 231static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); 232static void unp_shutdown(struct unpcb *); 233static void unp_drop(struct unpcb *, int); 234static void unp_gc(__unused void *, int); 235static void unp_scan(struct mbuf *, void (*)(struct file *)); 236static void unp_discard(struct file *); 237static void unp_freerights(struct file **, int); 238static int unp_internalize(struct mbuf **, struct thread *); 239static void unp_internalize_fp(struct file *); 240static void unp_externalize_fp(struct file *); 241static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); 242 243/* 244 * Definitions of protocols supported in the LOCAL domain. 245 */ 246static struct domain localdomain; 247static struct protosw localsw[] = { 248{ 249 .pr_type = SOCK_STREAM, 250 .pr_domain = &localdomain, 251 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, 252 .pr_ctloutput = &uipc_ctloutput, 253 .pr_usrreqs = &uipc_usrreqs 254}, 255{ 256 .pr_type = SOCK_DGRAM, 257 .pr_domain = &localdomain, 258 .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, 259 .pr_usrreqs = &uipc_usrreqs 260}, 261}; 262 263static struct domain localdomain = { 264 .dom_family = AF_LOCAL, 265 .dom_name = "local", 266 .dom_init = unp_init, 267 .dom_externalize = unp_externalize, 268 .dom_dispose = unp_dispose, 269 .dom_protosw = localsw, 270 .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] 271}; 272DOMAIN_SET(local); 273 274static void 275uipc_abort(struct socket *so) 276{ 277 struct unpcb *unp, *unp2; 278 279 unp = sotounpcb(so); 280 KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); 281 282 UNP_GLOBAL_WLOCK(); 283 UNP_PCB_LOCK(unp); 284 unp2 = unp->unp_conn; 285 if (unp2 != NULL) { 286 UNP_PCB_LOCK(unp2); 287 unp_drop(unp2, ECONNABORTED); 288 UNP_PCB_UNLOCK(unp2); 289 } 290 UNP_PCB_UNLOCK(unp); 291 UNP_GLOBAL_WUNLOCK(); 292} 293 294static int 295uipc_accept(struct socket *so, struct sockaddr **nam) 296{ 297 struct unpcb *unp, *unp2; 298 const struct sockaddr *sa; 299 300 /* 301 * Pass back name of connected socket, if it was bound and we are 302 * still connected (our peer may have closed already!). 303 */ 304 unp = sotounpcb(so); 305 KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); 306 307 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 308 UNP_GLOBAL_RLOCK(); 309 unp2 = unp->unp_conn; 310 if (unp2 != NULL && unp2->unp_addr != NULL) { 311 UNP_PCB_LOCK(unp2); 312 sa = (struct sockaddr *) unp2->unp_addr; 313 bcopy(sa, *nam, sa->sa_len); 314 UNP_PCB_UNLOCK(unp2); 315 } else { 316 sa = &sun_noname; 317 bcopy(sa, *nam, sa->sa_len); 318 } 319 UNP_GLOBAL_RUNLOCK(); 320 return (0); 321} 322 323static int 324uipc_attach(struct socket *so, int proto, struct thread *td) 325{ 326 u_long sendspace, recvspace; 327 struct unpcb *unp; 328 int error, locked; 329 330 KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); 331 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 332 switch (so->so_type) { 333 case SOCK_STREAM: 334 sendspace = unpst_sendspace; 335 recvspace = unpst_recvspace; 336 break; 337 338 case SOCK_DGRAM: 339 sendspace = unpdg_sendspace; 340 recvspace = unpdg_recvspace; 341 break; 342 343 default: 344 panic("uipc_attach"); 345 } 346 error = soreserve(so, sendspace, recvspace); 347 if (error) 348 return (error); 349 } 350 unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); 351 if (unp == NULL) 352 return (ENOBUFS); 353 LIST_INIT(&unp->unp_refs); 354 UNP_PCB_LOCK_INIT(unp); 355 unp->unp_socket = so; 356 so->so_pcb = unp; 357 unp->unp_refcount = 1; 358 359 /* 360 * uipc_attach() may be called indirectly from within the UNIX domain 361 * socket code via sonewconn() in unp_connect(). Since rwlocks can 362 * not be recursed, we do the closest thing. 363 */ 364 locked = 0; 365 if (!UNP_GLOBAL_WOWNED()) { 366 UNP_GLOBAL_WLOCK(); 367 locked = 1; 368 } 369 unp->unp_gencnt = ++unp_gencnt; 370 unp_count++; 371 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, 372 unp, unp_link); 373 if (locked) 374 UNP_GLOBAL_WUNLOCK(); 375 376 return (0); 377} 378 379static int 380uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 381{ 382 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 383 struct vattr vattr; 384 int error, namelen, vfslocked; 385 struct nameidata nd; 386 struct unpcb *unp; 387 struct vnode *vp; 388 struct mount *mp; 389 char *buf; 390 391 unp = sotounpcb(so); 392 KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); 393 394 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 395 if (namelen <= 0) 396 return (EINVAL); 397 398 /* 399 * We don't allow simultaneous bind() calls on a single UNIX domain 400 * socket, so flag in-progress operations, and return an error if an 401 * operation is already in progress. 402 * 403 * Historically, we have not allowed a socket to be rebound, so this 404 * also returns an error. Not allowing re-binding simplifies the 405 * implementation and avoids a great many possible failure modes. 406 */ 407 UNP_PCB_LOCK(unp); 408 if (unp->unp_vnode != NULL) { 409 UNP_PCB_UNLOCK(unp); 410 return (EINVAL); 411 } 412 if (unp->unp_flags & UNP_BINDING) { 413 UNP_PCB_UNLOCK(unp); 414 return (EALREADY); 415 } 416 unp->unp_flags |= UNP_BINDING; 417 UNP_PCB_UNLOCK(unp); 418 419 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 420 bcopy(soun->sun_path, buf, namelen); 421 buf[namelen] = 0; 422 423restart: 424 vfslocked = 0; 425 NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME, 426 UIO_SYSSPACE, buf, td); 427/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 428 error = namei(&nd); 429 if (error) 430 goto error; 431 vp = nd.ni_vp; 432 vfslocked = NDHASGIANT(&nd); 433 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 434 NDFREE(&nd, NDF_ONLY_PNBUF); 435 if (nd.ni_dvp == vp) 436 vrele(nd.ni_dvp); 437 else 438 vput(nd.ni_dvp); 439 if (vp != NULL) { 440 vrele(vp); 441 error = EADDRINUSE; 442 goto error; 443 } 444 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 445 if (error) 446 goto error; 447 VFS_UNLOCK_GIANT(vfslocked); 448 goto restart; 449 } 450 VATTR_NULL(&vattr); 451 vattr.va_type = VSOCK; 452 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 453#ifdef MAC 454 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 455 &vattr); 456#endif 457 if (error == 0) { 458 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 459 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 460 } 461 NDFREE(&nd, NDF_ONLY_PNBUF); 462 vput(nd.ni_dvp); 463 if (error) { 464 vn_finished_write(mp); 465 goto error; 466 } 467 vp = nd.ni_vp; 468 ASSERT_VOP_ELOCKED(vp, "uipc_bind"); 469 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 470 471 UNP_GLOBAL_WLOCK(); 472 UNP_PCB_LOCK(unp); 473 vp->v_socket = unp->unp_socket; 474 unp->unp_vnode = vp; 475 unp->unp_addr = soun; 476 unp->unp_flags &= ~UNP_BINDING; 477 UNP_PCB_UNLOCK(unp); 478 UNP_GLOBAL_WUNLOCK(); 479 VOP_UNLOCK(vp, 0); 480 vn_finished_write(mp); 481 VFS_UNLOCK_GIANT(vfslocked); 482 free(buf, M_TEMP); 483 return (0); 484 485error: 486 VFS_UNLOCK_GIANT(vfslocked); 487 UNP_PCB_LOCK(unp); 488 unp->unp_flags &= ~UNP_BINDING; 489 UNP_PCB_UNLOCK(unp); 490 free(buf, M_TEMP); 491 return (error); 492} 493 494static int 495uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 496{ 497 int error; 498 499 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 500 UNP_GLOBAL_WLOCK(); 501 error = unp_connect(so, nam, td); 502 UNP_GLOBAL_WUNLOCK(); 503 return (error); 504} 505 506static void 507uipc_close(struct socket *so) 508{ 509 struct unpcb *unp, *unp2; 510 511 unp = sotounpcb(so); 512 KASSERT(unp != NULL, ("uipc_close: unp == NULL")); 513 514 UNP_GLOBAL_WLOCK(); 515 UNP_PCB_LOCK(unp); 516 unp2 = unp->unp_conn; 517 if (unp2 != NULL) { 518 UNP_PCB_LOCK(unp2); 519 unp_disconnect(unp, unp2); 520 UNP_PCB_UNLOCK(unp2); 521 } 522 UNP_PCB_UNLOCK(unp); 523 UNP_GLOBAL_WUNLOCK(); 524} 525 526int 527uipc_connect2(struct socket *so1, struct socket *so2) 528{ 529 struct unpcb *unp, *unp2; 530 int error; 531 532 UNP_GLOBAL_WLOCK(); 533 unp = so1->so_pcb; 534 KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); 535 UNP_PCB_LOCK(unp); 536 unp2 = so2->so_pcb; 537 KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); 538 UNP_PCB_LOCK(unp2); 539 error = unp_connect2(so1, so2, PRU_CONNECT2); 540 UNP_PCB_UNLOCK(unp2); 541 UNP_PCB_UNLOCK(unp); 542 UNP_GLOBAL_WUNLOCK(); 543 return (error); 544} 545 546/* control is EOPNOTSUPP */ 547 548static void 549uipc_detach(struct socket *so) 550{ 551 struct unpcb *unp, *unp2; 552 struct sockaddr_un *saved_unp_addr; 553 struct vnode *vp; 554 int freeunp, local_unp_rights; 555 556 unp = sotounpcb(so); 557 KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); 558 559 UNP_GLOBAL_WLOCK(); 560 UNP_PCB_LOCK(unp); 561 562 LIST_REMOVE(unp, unp_link); 563 unp->unp_gencnt = ++unp_gencnt; 564 --unp_count; 565 566 /* 567 * XXXRW: Should assert vp->v_socket == so. 568 */ 569 if ((vp = unp->unp_vnode) != NULL) { 570 unp->unp_vnode->v_socket = NULL; 571 unp->unp_vnode = NULL; 572 } 573 unp2 = unp->unp_conn; 574 if (unp2 != NULL) { 575 UNP_PCB_LOCK(unp2); 576 unp_disconnect(unp, unp2); 577 UNP_PCB_UNLOCK(unp2); 578 } 579 580 /* 581 * We hold the global lock exclusively, so it's OK to acquire 582 * multiple pcb locks at a time. 583 */ 584 while (!LIST_EMPTY(&unp->unp_refs)) { 585 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 586 587 UNP_PCB_LOCK(ref); 588 unp_drop(ref, ECONNRESET); 589 UNP_PCB_UNLOCK(ref); 590 } 591 local_unp_rights = unp_rights; 592 UNP_GLOBAL_WUNLOCK(); 593 unp->unp_socket->so_pcb = NULL; 594 saved_unp_addr = unp->unp_addr; 595 unp->unp_addr = NULL; 596 unp->unp_refcount--; 597 freeunp = (unp->unp_refcount == 0); 598 if (saved_unp_addr != NULL) 599 FREE(saved_unp_addr, M_SONAME); 600 if (freeunp) { 601 UNP_PCB_LOCK_DESTROY(unp); 602 uma_zfree(unp_zone, unp); 603 } else 604 UNP_PCB_UNLOCK(unp); 605 if (vp) { 606 int vfslocked; 607 608 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 609 vrele(vp); 610 VFS_UNLOCK_GIANT(vfslocked); 611 } 612 if (local_unp_rights) 613 taskqueue_enqueue(taskqueue_thread, &unp_gc_task); 614} 615 616static int 617uipc_disconnect(struct socket *so) 618{ 619 struct unpcb *unp, *unp2; 620 621 unp = sotounpcb(so); 622 KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); 623 624 UNP_GLOBAL_WLOCK(); 625 UNP_PCB_LOCK(unp); 626 unp2 = unp->unp_conn; 627 if (unp2 != NULL) { 628 UNP_PCB_LOCK(unp2); 629 unp_disconnect(unp, unp2); 630 UNP_PCB_UNLOCK(unp2); 631 } 632 UNP_PCB_UNLOCK(unp); 633 UNP_GLOBAL_WUNLOCK(); 634 return (0); 635} 636 637static int 638uipc_listen(struct socket *so, int backlog, struct thread *td) 639{ 640 struct unpcb *unp; 641 int error; 642 643 unp = sotounpcb(so); 644 KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); 645 646 UNP_PCB_LOCK(unp); 647 if (unp->unp_vnode == NULL) { 648 UNP_PCB_UNLOCK(unp); 649 return (EINVAL); 650 } 651 652 SOCK_LOCK(so); 653 error = solisten_proto_check(so); 654 if (error == 0) { 655 cru2x(td->td_ucred, &unp->unp_peercred); 656 unp->unp_flags |= UNP_HAVEPCCACHED; 657 solisten_proto(so, backlog); 658 } 659 SOCK_UNLOCK(so); 660 UNP_PCB_UNLOCK(unp); 661 return (error); 662} 663 664static int 665uipc_peeraddr(struct socket *so, struct sockaddr **nam) 666{ 667 struct unpcb *unp, *unp2; 668 const struct sockaddr *sa; 669 670 unp = sotounpcb(so); 671 KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); 672 673 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 674 UNP_PCB_LOCK(unp); 675 /* 676 * XXX: It seems that this test always fails even when connection is 677 * established. So, this else clause is added as workaround to 678 * return PF_LOCAL sockaddr. 679 */ 680 unp2 = unp->unp_conn; 681 if (unp2 != NULL) { 682 UNP_PCB_LOCK(unp2); 683 if (unp2->unp_addr != NULL) 684 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 685 else 686 sa = &sun_noname; 687 bcopy(sa, *nam, sa->sa_len); 688 UNP_PCB_UNLOCK(unp2); 689 } else { 690 sa = &sun_noname; 691 bcopy(sa, *nam, sa->sa_len); 692 } 693 UNP_PCB_UNLOCK(unp); 694 return (0); 695} 696 697static int 698uipc_rcvd(struct socket *so, int flags) 699{ 700 struct unpcb *unp, *unp2; 701 struct socket *so2; 702 u_int mbcnt, sbcc; 703 u_long newhiwat; 704 705 unp = sotounpcb(so); 706 KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); 707 708 if (so->so_type == SOCK_DGRAM) 709 panic("uipc_rcvd DGRAM?"); 710 711 if (so->so_type != SOCK_STREAM) 712 panic("uipc_rcvd unknown socktype"); 713 714 /* 715 * Adjust backpressure on sender and wakeup any waiting to write. 716 * 717 * The unp lock is acquired to maintain the validity of the unp_conn 718 * pointer; no lock on unp2 is required as unp2->unp_socket will be 719 * static as long as we don't permit unp2 to disconnect from unp, 720 * which is prevented by the lock on unp. We cache values from 721 * so_rcv to avoid holding the so_rcv lock over the entire 722 * transaction on the remote so_snd. 723 */ 724 SOCKBUF_LOCK(&so->so_rcv); 725 mbcnt = so->so_rcv.sb_mbcnt; 726 sbcc = so->so_rcv.sb_cc; 727 SOCKBUF_UNLOCK(&so->so_rcv); 728 UNP_PCB_LOCK(unp); 729 unp2 = unp->unp_conn; 730 if (unp2 == NULL) { 731 UNP_PCB_UNLOCK(unp); 732 return (0); 733 } 734 so2 = unp2->unp_socket; 735 SOCKBUF_LOCK(&so2->so_snd); 736 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; 737 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; 738 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 739 newhiwat, RLIM_INFINITY); 740 sowwakeup_locked(so2); 741 unp->unp_mbcnt = mbcnt; 742 unp->unp_cc = sbcc; 743 UNP_PCB_UNLOCK(unp); 744 return (0); 745} 746 747static int 748uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 749 struct mbuf *control, struct thread *td) 750{ 751 struct unpcb *unp, *unp2; 752 struct socket *so2; 753 u_int mbcnt, sbcc; 754 u_long newhiwat; 755 int error = 0; 756 757 unp = sotounpcb(so); 758 KASSERT(unp != NULL, ("uipc_send: unp == NULL")); 759 760 if (flags & PRUS_OOB) { 761 error = EOPNOTSUPP; 762 goto release; 763 } 764 if (control != NULL && (error = unp_internalize(&control, td))) 765 goto release; 766 if ((nam != NULL) || (flags & PRUS_EOF)) 767 UNP_GLOBAL_WLOCK(); 768 else 769 UNP_GLOBAL_RLOCK(); 770 switch (so->so_type) { 771 case SOCK_DGRAM: 772 { 773 const struct sockaddr *from; 774 775 unp2 = unp->unp_conn; 776 if (nam != NULL) { 777 UNP_GLOBAL_WLOCK_ASSERT(); 778 if (unp2 != NULL) { 779 error = EISCONN; 780 break; 781 } 782 error = unp_connect(so, nam, td); 783 if (error) 784 break; 785 unp2 = unp->unp_conn; 786 } 787 788 /* 789 * Because connect() and send() are non-atomic in a sendto() 790 * with a target address, it's possible that the socket will 791 * have disconnected before the send() can run. In that case 792 * return the slightly counter-intuitive but otherwise 793 * correct error that the socket is not connected. 794 */ 795 if (unp2 == NULL) { 796 error = ENOTCONN; 797 break; 798 } 799 /* Lockless read. */ 800 if (unp2->unp_flags & UNP_WANTCRED) 801 control = unp_addsockcred(td, control); 802 UNP_PCB_LOCK(unp); 803 if (unp->unp_addr != NULL) 804 from = (struct sockaddr *)unp->unp_addr; 805 else 806 from = &sun_noname; 807 so2 = unp2->unp_socket; 808 SOCKBUF_LOCK(&so2->so_rcv); 809 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 810 sorwakeup_locked(so2); 811 m = NULL; 812 control = NULL; 813 } else { 814 SOCKBUF_UNLOCK(&so2->so_rcv); 815 error = ENOBUFS; 816 } 817 if (nam != NULL) { 818 UNP_GLOBAL_WLOCK_ASSERT(); 819 UNP_PCB_LOCK(unp2); 820 unp_disconnect(unp, unp2); 821 UNP_PCB_UNLOCK(unp2); 822 } 823 UNP_PCB_UNLOCK(unp); 824 break; 825 } 826 827 case SOCK_STREAM: 828 if ((so->so_state & SS_ISCONNECTED) == 0) { 829 if (nam != NULL) { 830 UNP_GLOBAL_WLOCK_ASSERT(); 831 error = unp_connect(so, nam, td); 832 if (error) 833 break; /* XXX */ 834 } else { 835 error = ENOTCONN; 836 break; 837 } 838 } 839 840 /* Lockless read. */ 841 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 842 error = EPIPE; 843 break; 844 } 845 846 /* 847 * Because connect() and send() are non-atomic in a sendto() 848 * with a target address, it's possible that the socket will 849 * have disconnected before the send() can run. In that case 850 * return the slightly counter-intuitive but otherwise 851 * correct error that the socket is not connected. 852 * 853 * Locking here must be done carefully: the global lock 854 * prevents interconnections between unpcbs from changing, so 855 * we can traverse from unp to unp2 without acquiring unp's 856 * lock. Socket buffer locks follow unpcb locks, so we can 857 * acquire both remote and lock socket buffer locks. 858 */ 859 unp2 = unp->unp_conn; 860 if (unp2 == NULL) { 861 error = ENOTCONN; 862 break; 863 } 864 so2 = unp2->unp_socket; 865 UNP_PCB_LOCK(unp2); 866 SOCKBUF_LOCK(&so2->so_rcv); 867 if (unp2->unp_flags & UNP_WANTCRED) { 868 /* 869 * Credentials are passed only once on SOCK_STREAM. 870 */ 871 unp2->unp_flags &= ~UNP_WANTCRED; 872 control = unp_addsockcred(td, control); 873 } 874 /* 875 * Send to paired receive port, and then reduce send buffer 876 * hiwater marks to maintain backpressure. Wake up readers. 877 */ 878 if (control != NULL) { 879 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 880 control = NULL; 881 } else 882 sbappend_locked(&so2->so_rcv, m); 883 mbcnt = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; 884 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; 885 sbcc = so2->so_rcv.sb_cc; 886 sorwakeup_locked(so2); 887 888 SOCKBUF_LOCK(&so->so_snd); 889 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc); 890 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 891 newhiwat, RLIM_INFINITY); 892 so->so_snd.sb_mbmax -= mbcnt; 893 SOCKBUF_UNLOCK(&so->so_snd); 894 unp2->unp_cc = sbcc; 895 UNP_PCB_UNLOCK(unp2); 896 m = NULL; 897 break; 898 899 default: 900 panic("uipc_send unknown socktype"); 901 } 902 903 /* 904 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. 905 */ 906 if (flags & PRUS_EOF) { 907 UNP_PCB_LOCK(unp); 908 socantsendmore(so); 909 unp_shutdown(unp); 910 UNP_PCB_UNLOCK(unp); 911 } 912 913 if ((nam != NULL) || (flags & PRUS_EOF)) 914 UNP_GLOBAL_WUNLOCK(); 915 else 916 UNP_GLOBAL_RUNLOCK(); 917 918 if (control != NULL && error != 0) 919 unp_dispose(control); 920 921release: 922 if (control != NULL) 923 m_freem(control); 924 if (m != NULL) 925 m_freem(m); 926 return (error); 927} 928 929static int 930uipc_sense(struct socket *so, struct stat *sb) 931{ 932 struct unpcb *unp, *unp2; 933 struct socket *so2; 934 935 unp = sotounpcb(so); 936 KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); 937 938 sb->st_blksize = so->so_snd.sb_hiwat; 939 UNP_GLOBAL_RLOCK(); 940 UNP_PCB_LOCK(unp); 941 unp2 = unp->unp_conn; 942 if (so->so_type == SOCK_STREAM && unp2 != NULL) { 943 so2 = unp2->unp_socket; 944 sb->st_blksize += so2->so_rcv.sb_cc; 945 } 946 sb->st_dev = NODEV; 947 if (unp->unp_ino == 0) 948 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 949 sb->st_ino = unp->unp_ino; 950 UNP_PCB_UNLOCK(unp); 951 UNP_GLOBAL_RUNLOCK(); 952 return (0); 953} 954 955static int 956uipc_shutdown(struct socket *so) 957{ 958 struct unpcb *unp; 959 960 unp = sotounpcb(so); 961 KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); 962 963 UNP_GLOBAL_WLOCK(); 964 UNP_PCB_LOCK(unp); 965 socantsendmore(so); 966 unp_shutdown(unp); 967 UNP_PCB_UNLOCK(unp); 968 UNP_GLOBAL_WUNLOCK(); 969 return (0); 970} 971 972static int 973uipc_sockaddr(struct socket *so, struct sockaddr **nam) 974{ 975 struct unpcb *unp; 976 const struct sockaddr *sa; 977 978 unp = sotounpcb(so); 979 KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); 980 981 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 982 UNP_PCB_LOCK(unp); 983 if (unp->unp_addr != NULL) 984 sa = (struct sockaddr *) unp->unp_addr; 985 else 986 sa = &sun_noname; 987 bcopy(sa, *nam, sa->sa_len); 988 UNP_PCB_UNLOCK(unp); 989 return (0); 990} 991 992struct pr_usrreqs uipc_usrreqs = { 993 .pru_abort = uipc_abort, 994 .pru_accept = uipc_accept, 995 .pru_attach = uipc_attach, 996 .pru_bind = uipc_bind, 997 .pru_connect = uipc_connect, 998 .pru_connect2 = uipc_connect2, 999 .pru_detach = uipc_detach, 1000 .pru_disconnect = uipc_disconnect, 1001 .pru_listen = uipc_listen, 1002 .pru_peeraddr = uipc_peeraddr, 1003 .pru_rcvd = uipc_rcvd, 1004 .pru_send = uipc_send, 1005 .pru_sense = uipc_sense, 1006 .pru_shutdown = uipc_shutdown, 1007 .pru_sockaddr = uipc_sockaddr, 1008 .pru_close = uipc_close, 1009}; 1010 1011int 1012uipc_ctloutput(struct socket *so, struct sockopt *sopt) 1013{ 1014 struct unpcb *unp; 1015 struct xucred xu; 1016 int error, optval; 1017 1018 if (sopt->sopt_level != 0) 1019 return (EINVAL); 1020 1021 unp = sotounpcb(so); 1022 KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); 1023 error = 0; 1024 switch (sopt->sopt_dir) { 1025 case SOPT_GET: 1026 switch (sopt->sopt_name) { 1027 case LOCAL_PEERCRED: 1028 UNP_PCB_LOCK(unp); 1029 if (unp->unp_flags & UNP_HAVEPC) 1030 xu = unp->unp_peercred; 1031 else { 1032 if (so->so_type == SOCK_STREAM) 1033 error = ENOTCONN; 1034 else 1035 error = EINVAL; 1036 } 1037 UNP_PCB_UNLOCK(unp); 1038 if (error == 0) 1039 error = sooptcopyout(sopt, &xu, sizeof(xu)); 1040 break; 1041 1042 case LOCAL_CREDS: 1043 /* Unlocked read. */ 1044 optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; 1045 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1046 break; 1047 1048 case LOCAL_CONNWAIT: 1049 /* Unlocked read. */ 1050 optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; 1051 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1052 break; 1053 1054 default: 1055 error = EOPNOTSUPP; 1056 break; 1057 } 1058 break; 1059 1060 case SOPT_SET: 1061 switch (sopt->sopt_name) { 1062 case LOCAL_CREDS: 1063 case LOCAL_CONNWAIT: 1064 error = sooptcopyin(sopt, &optval, sizeof(optval), 1065 sizeof(optval)); 1066 if (error) 1067 break; 1068 1069#define OPTSET(bit) do { \ 1070 UNP_PCB_LOCK(unp); \ 1071 if (optval) \ 1072 unp->unp_flags |= bit; \ 1073 else \ 1074 unp->unp_flags &= ~bit; \ 1075 UNP_PCB_UNLOCK(unp); \ 1076} while (0) 1077 1078 switch (sopt->sopt_name) { 1079 case LOCAL_CREDS: 1080 OPTSET(UNP_WANTCRED); 1081 break; 1082 1083 case LOCAL_CONNWAIT: 1084 OPTSET(UNP_CONNWAIT); 1085 break; 1086 1087 default: 1088 break; 1089 } 1090 break; 1091#undef OPTSET 1092 default: 1093 error = ENOPROTOOPT; 1094 break; 1095 } 1096 break; 1097 1098 default: 1099 error = EOPNOTSUPP; 1100 break; 1101 } 1102 return (error); 1103} 1104 1105static int 1106unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1107{ 1108 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1109 struct vnode *vp; 1110 struct socket *so2, *so3; 1111 struct unpcb *unp, *unp2, *unp3; 1112 int error, len, vfslocked; 1113 struct nameidata nd; 1114 char buf[SOCK_MAXADDRLEN]; 1115 struct sockaddr *sa; 1116 1117 UNP_GLOBAL_WLOCK_ASSERT(); 1118 1119 unp = sotounpcb(so); 1120 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1121 1122 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 1123 if (len <= 0) 1124 return (EINVAL); 1125 bcopy(soun->sun_path, buf, len); 1126 buf[len] = 0; 1127 1128 UNP_PCB_LOCK(unp); 1129 if (unp->unp_flags & UNP_CONNECTING) { 1130 UNP_PCB_UNLOCK(unp); 1131 return (EALREADY); 1132 } 1133 UNP_GLOBAL_WUNLOCK(); 1134 unp->unp_flags |= UNP_CONNECTING; 1135 UNP_PCB_UNLOCK(unp); 1136 1137 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 1138 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, 1139 td); 1140 error = namei(&nd); 1141 if (error) 1142 vp = NULL; 1143 else 1144 vp = nd.ni_vp; 1145 ASSERT_VOP_LOCKED(vp, "unp_connect"); 1146 vfslocked = NDHASGIANT(&nd); 1147 NDFREE(&nd, NDF_ONLY_PNBUF); 1148 if (error) 1149 goto bad; 1150 1151 if (vp->v_type != VSOCK) { 1152 error = ENOTSOCK; 1153 goto bad; 1154 } 1155#ifdef MAC 1156 error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); 1157 if (error) 1158 goto bad; 1159#endif 1160 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 1161 if (error) 1162 goto bad; 1163 VFS_UNLOCK_GIANT(vfslocked); 1164 1165 unp = sotounpcb(so); 1166 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1167 1168 /* 1169 * Lock global lock for two reasons: make sure v_socket is stable, 1170 * and to protect simultaneous locking of multiple pcbs. 1171 */ 1172 UNP_GLOBAL_WLOCK(); 1173 so2 = vp->v_socket; 1174 if (so2 == NULL) { 1175 error = ECONNREFUSED; 1176 goto bad2; 1177 } 1178 if (so->so_type != so2->so_type) { 1179 error = EPROTOTYPE; 1180 goto bad2; 1181 } 1182 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1183 if (so2->so_options & SO_ACCEPTCONN) { 1184 /* 1185 * We can't drop the global lock here or 'so2' may 1186 * become invalid. As a result, we need to handle 1187 * possibly lock recursion in uipc_attach. 1188 */ 1189 so3 = sonewconn(so2, 0); 1190 } else 1191 so3 = NULL; 1192 if (so3 == NULL) { 1193 error = ECONNREFUSED; 1194 goto bad2; 1195 } 1196 unp = sotounpcb(so); 1197 unp2 = sotounpcb(so2); 1198 unp3 = sotounpcb(so3); 1199 UNP_PCB_LOCK(unp); 1200 UNP_PCB_LOCK(unp2); 1201 UNP_PCB_LOCK(unp3); 1202 if (unp2->unp_addr != NULL) { 1203 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 1204 unp3->unp_addr = (struct sockaddr_un *) sa; 1205 sa = NULL; 1206 } 1207 /* 1208 * unp_peercred management: 1209 * 1210 * The connecter's (client's) credentials are copied from its 1211 * process structure at the time of connect() (which is now). 1212 */ 1213 cru2x(td->td_ucred, &unp3->unp_peercred); 1214 unp3->unp_flags |= UNP_HAVEPC; 1215 /* 1216 * The receiver's (server's) credentials are copied from the 1217 * unp_peercred member of socket on which the former called 1218 * listen(); uipc_listen() cached that process's credentials 1219 * at that time so we can use them now. 1220 */ 1221 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1222 ("unp_connect: listener without cached peercred")); 1223 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1224 sizeof(unp->unp_peercred)); 1225 unp->unp_flags |= UNP_HAVEPC; 1226 if (unp2->unp_flags & UNP_WANTCRED) 1227 unp3->unp_flags |= UNP_WANTCRED; 1228 UNP_PCB_UNLOCK(unp3); 1229 UNP_PCB_UNLOCK(unp2); 1230 UNP_PCB_UNLOCK(unp); 1231#ifdef MAC 1232 SOCK_LOCK(so); 1233 mac_socketpeer_set_from_socket(so, so3); 1234 mac_socketpeer_set_from_socket(so3, so); 1235 SOCK_UNLOCK(so); 1236#endif 1237 1238 so2 = so3; 1239 } 1240 unp = sotounpcb(so); 1241 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1242 unp2 = sotounpcb(so2); 1243 KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); 1244 UNP_PCB_LOCK(unp); 1245 UNP_PCB_LOCK(unp2); 1246 error = unp_connect2(so, so2, PRU_CONNECT); 1247 UNP_PCB_UNLOCK(unp2); 1248 UNP_PCB_UNLOCK(unp); 1249bad2: 1250 UNP_GLOBAL_WUNLOCK(); 1251 if (vfslocked) 1252 /* 1253 * Giant has been previously acquired. This means filesystem 1254 * isn't MPSAFE. Do it once again. 1255 */ 1256 mtx_lock(&Giant); 1257bad: 1258 if (vp != NULL) 1259 vput(vp); 1260 VFS_UNLOCK_GIANT(vfslocked); 1261 free(sa, M_SONAME); 1262 UNP_GLOBAL_WLOCK(); 1263 UNP_PCB_LOCK(unp); 1264 unp->unp_flags &= ~UNP_CONNECTING; 1265 UNP_PCB_UNLOCK(unp); 1266 return (error); 1267} 1268 1269static int 1270unp_connect2(struct socket *so, struct socket *so2, int req) 1271{ 1272 struct unpcb *unp; 1273 struct unpcb *unp2; 1274 1275 unp = sotounpcb(so); 1276 KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); 1277 unp2 = sotounpcb(so2); 1278 KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); 1279 1280 UNP_GLOBAL_WLOCK_ASSERT(); 1281 UNP_PCB_LOCK_ASSERT(unp); 1282 UNP_PCB_LOCK_ASSERT(unp2); 1283 1284 if (so2->so_type != so->so_type) 1285 return (EPROTOTYPE); 1286 unp->unp_conn = unp2; 1287 1288 switch (so->so_type) { 1289 case SOCK_DGRAM: 1290 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1291 soisconnected(so); 1292 break; 1293 1294 case SOCK_STREAM: 1295 unp2->unp_conn = unp; 1296 if (req == PRU_CONNECT && 1297 ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) 1298 soisconnecting(so); 1299 else 1300 soisconnected(so); 1301 soisconnected(so2); 1302 break; 1303 1304 default: 1305 panic("unp_connect2"); 1306 } 1307 return (0); 1308} 1309 1310static void 1311unp_disconnect(struct unpcb *unp, struct unpcb *unp2) 1312{ 1313 struct socket *so; 1314 1315 KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); 1316 1317 UNP_GLOBAL_WLOCK_ASSERT(); 1318 UNP_PCB_LOCK_ASSERT(unp); 1319 UNP_PCB_LOCK_ASSERT(unp2); 1320 1321 unp->unp_conn = NULL; 1322 switch (unp->unp_socket->so_type) { 1323 case SOCK_DGRAM: 1324 LIST_REMOVE(unp, unp_reflink); 1325 so = unp->unp_socket; 1326 SOCK_LOCK(so); 1327 so->so_state &= ~SS_ISCONNECTED; 1328 SOCK_UNLOCK(so); 1329 break; 1330 1331 case SOCK_STREAM: 1332 soisdisconnected(unp->unp_socket); 1333 unp2->unp_conn = NULL; 1334 soisdisconnected(unp2->unp_socket); 1335 break; 1336 } 1337} 1338 1339/* 1340 * unp_pcblist() walks the global list of struct unpcb's to generate a 1341 * pointer list, bumping the refcount on each unpcb. It then copies them out 1342 * sequentially, validating the generation number on each to see if it has 1343 * been detached. All of this is necessary because copyout() may sleep on 1344 * disk I/O. 1345 */ 1346static int 1347unp_pcblist(SYSCTL_HANDLER_ARGS) 1348{ 1349 int error, i, n; 1350 int freeunp; 1351 struct unpcb *unp, **unp_list; 1352 unp_gen_t gencnt; 1353 struct xunpgen *xug; 1354 struct unp_head *head; 1355 struct xunpcb *xu; 1356 1357 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1358 1359 /* 1360 * The process of preparing the PCB list is too time-consuming and 1361 * resource-intensive to repeat twice on every request. 1362 */ 1363 if (req->oldptr == NULL) { 1364 n = unp_count; 1365 req->oldidx = 2 * (sizeof *xug) 1366 + (n + n/8) * sizeof(struct xunpcb); 1367 return (0); 1368 } 1369 1370 if (req->newptr != NULL) 1371 return (EPERM); 1372 1373 /* 1374 * OK, now we're committed to doing something. 1375 */ 1376 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1377 UNP_GLOBAL_RLOCK(); 1378 gencnt = unp_gencnt; 1379 n = unp_count; 1380 UNP_GLOBAL_RUNLOCK(); 1381 1382 xug->xug_len = sizeof *xug; 1383 xug->xug_count = n; 1384 xug->xug_gen = gencnt; 1385 xug->xug_sogen = so_gencnt; 1386 error = SYSCTL_OUT(req, xug, sizeof *xug); 1387 if (error) { 1388 free(xug, M_TEMP); 1389 return (error); 1390 } 1391 1392 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1393 1394 UNP_GLOBAL_RLOCK(); 1395 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1396 unp = LIST_NEXT(unp, unp_link)) { 1397 UNP_PCB_LOCK(unp); 1398 if (unp->unp_gencnt <= gencnt) { 1399 if (cr_cansee(req->td->td_ucred, 1400 unp->unp_socket->so_cred)) { 1401 UNP_PCB_UNLOCK(unp); 1402 continue; 1403 } 1404 unp_list[i++] = unp; 1405 unp->unp_refcount++; 1406 } 1407 UNP_PCB_UNLOCK(unp); 1408 } 1409 UNP_GLOBAL_RUNLOCK(); 1410 n = i; /* In case we lost some during malloc. */ 1411 1412 error = 0; 1413 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); 1414 for (i = 0; i < n; i++) { 1415 unp = unp_list[i]; 1416 UNP_PCB_LOCK(unp); 1417 unp->unp_refcount--; 1418 if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { 1419 xu->xu_len = sizeof *xu; 1420 xu->xu_unpp = unp; 1421 /* 1422 * XXX - need more locking here to protect against 1423 * connect/disconnect races for SMP. 1424 */ 1425 if (unp->unp_addr != NULL) 1426 bcopy(unp->unp_addr, &xu->xu_addr, 1427 unp->unp_addr->sun_len); 1428 if (unp->unp_conn != NULL && 1429 unp->unp_conn->unp_addr != NULL) 1430 bcopy(unp->unp_conn->unp_addr, 1431 &xu->xu_caddr, 1432 unp->unp_conn->unp_addr->sun_len); 1433 bcopy(unp, &xu->xu_unp, sizeof *unp); 1434 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1435 UNP_PCB_UNLOCK(unp); 1436 error = SYSCTL_OUT(req, xu, sizeof *xu); 1437 } else { 1438 freeunp = (unp->unp_refcount == 0); 1439 UNP_PCB_UNLOCK(unp); 1440 if (freeunp) { 1441 UNP_PCB_LOCK_DESTROY(unp); 1442 uma_zfree(unp_zone, unp); 1443 } 1444 } 1445 } 1446 free(xu, M_TEMP); 1447 if (!error) { 1448 /* 1449 * Give the user an updated idea of our state. If the 1450 * generation differs from what we told her before, she knows 1451 * that something happened while we were processing this 1452 * request, and it might be necessary to retry. 1453 */ 1454 xug->xug_gen = unp_gencnt; 1455 xug->xug_sogen = so_gencnt; 1456 xug->xug_count = unp_count; 1457 error = SYSCTL_OUT(req, xug, sizeof *xug); 1458 } 1459 free(unp_list, M_TEMP); 1460 free(xug, M_TEMP); 1461 return (error); 1462} 1463 1464SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1465 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1466 "List of active local datagram sockets"); 1467SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1468 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1469 "List of active local stream sockets"); 1470 1471static void 1472unp_shutdown(struct unpcb *unp) 1473{ 1474 struct unpcb *unp2; 1475 struct socket *so; 1476 1477 UNP_GLOBAL_WLOCK_ASSERT(); 1478 UNP_PCB_LOCK_ASSERT(unp); 1479 1480 unp2 = unp->unp_conn; 1481 if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { 1482 so = unp2->unp_socket; 1483 if (so != NULL) 1484 socantrcvmore(so); 1485 } 1486} 1487 1488static void 1489unp_drop(struct unpcb *unp, int errno) 1490{ 1491 struct socket *so = unp->unp_socket; 1492 struct unpcb *unp2; 1493 1494 UNP_GLOBAL_WLOCK_ASSERT(); 1495 UNP_PCB_LOCK_ASSERT(unp); 1496 1497 so->so_error = errno; 1498 unp2 = unp->unp_conn; 1499 if (unp2 == NULL) 1500 return; 1501 1502 UNP_PCB_LOCK(unp2); 1503 unp_disconnect(unp, unp2); 1504 UNP_PCB_UNLOCK(unp2); 1505} 1506 1507static void 1508unp_freerights(struct file **rp, int fdcount) 1509{ 1510 int i; 1511 struct file *fp; 1512 1513 for (i = 0; i < fdcount; i++) { 1514 /* 1515 * Zero the pointer before calling unp_discard since it may 1516 * end up in unp_gc().. 1517 * 1518 * XXXRW: This is less true than it used to be. 1519 */ 1520 fp = *rp; 1521 *rp++ = NULL; 1522 unp_discard(fp); 1523 } 1524} 1525 1526int 1527unp_externalize(struct mbuf *control, struct mbuf **controlp) 1528{ 1529 struct thread *td = curthread; /* XXX */ 1530 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1531 int i; 1532 int *fdp; 1533 struct file **rp; 1534 struct file *fp; 1535 void *data; 1536 socklen_t clen = control->m_len, datalen; 1537 int error, newfds; 1538 int f; 1539 u_int newlen; 1540 1541 UNP_GLOBAL_UNLOCK_ASSERT(); 1542 1543 error = 0; 1544 if (controlp != NULL) /* controlp == NULL => free control messages */ 1545 *controlp = NULL; 1546 1547 while (cm != NULL) { 1548 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1549 error = EINVAL; 1550 break; 1551 } 1552 1553 data = CMSG_DATA(cm); 1554 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1555 1556 if (cm->cmsg_level == SOL_SOCKET 1557 && cm->cmsg_type == SCM_RIGHTS) { 1558 newfds = datalen / sizeof(struct file *); 1559 rp = data; 1560 1561 /* If we're not outputting the descriptors free them. */ 1562 if (error || controlp == NULL) { 1563 unp_freerights(rp, newfds); 1564 goto next; 1565 } 1566 FILEDESC_XLOCK(td->td_proc->p_fd); 1567 /* if the new FD's will not fit free them. */ 1568 if (!fdavail(td, newfds)) { 1569 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1570 error = EMSGSIZE; 1571 unp_freerights(rp, newfds); 1572 goto next; 1573 } 1574 1575 /* 1576 * Now change each pointer to an fd in the global 1577 * table to an integer that is the index to the local 1578 * fd table entry that we set up to point to the 1579 * global one we are transferring. 1580 */ 1581 newlen = newfds * sizeof(int); 1582 *controlp = sbcreatecontrol(NULL, newlen, 1583 SCM_RIGHTS, SOL_SOCKET); 1584 if (*controlp == NULL) { 1585 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1586 error = E2BIG; 1587 unp_freerights(rp, newfds); 1588 goto next; 1589 } 1590 1591 fdp = (int *) 1592 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1593 for (i = 0; i < newfds; i++) { 1594 if (fdalloc(td, 0, &f)) 1595 panic("unp_externalize fdalloc failed"); 1596 fp = *rp++; 1597 td->td_proc->p_fd->fd_ofiles[f] = fp; 1598 unp_externalize_fp(fp); 1599 *fdp++ = f; 1600 } 1601 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1602 } else { 1603 /* We can just copy anything else across. */ 1604 if (error || controlp == NULL) 1605 goto next; 1606 *controlp = sbcreatecontrol(NULL, datalen, 1607 cm->cmsg_type, cm->cmsg_level); 1608 if (*controlp == NULL) { 1609 error = ENOBUFS; 1610 goto next; 1611 } 1612 bcopy(data, 1613 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1614 datalen); 1615 } 1616 1617 controlp = &(*controlp)->m_next; 1618 1619next: 1620 if (CMSG_SPACE(datalen) < clen) { 1621 clen -= CMSG_SPACE(datalen); 1622 cm = (struct cmsghdr *) 1623 ((caddr_t)cm + CMSG_SPACE(datalen)); 1624 } else { 1625 clen = 0; 1626 cm = NULL; 1627 } 1628 } 1629 1630 m_freem(control); 1631 1632 return (error); 1633} 1634 1635static void 1636unp_zone_change(void *tag) 1637{ 1638 1639 uma_zone_set_max(unp_zone, maxsockets); 1640} 1641 1642void 1643unp_init(void) 1644{ 1645 1646 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1647 NULL, NULL, UMA_ALIGN_PTR, 0); 1648 if (unp_zone == NULL) 1649 panic("unp_init"); 1650 uma_zone_set_max(unp_zone, maxsockets); 1651 EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, 1652 NULL, EVENTHANDLER_PRI_ANY); 1653 LIST_INIT(&unp_dhead); 1654 LIST_INIT(&unp_shead); 1655 TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); 1656 UNP_GLOBAL_LOCK_INIT(); 1657} 1658 1659static int 1660unp_internalize(struct mbuf **controlp, struct thread *td) 1661{ 1662 struct mbuf *control = *controlp; 1663 struct proc *p = td->td_proc; 1664 struct filedesc *fdescp = p->p_fd; 1665 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1666 struct cmsgcred *cmcred; 1667 struct file **rp; 1668 struct file *fp; 1669 struct timeval *tv; 1670 int i, fd, *fdp; 1671 void *data; 1672 socklen_t clen = control->m_len, datalen; 1673 int error, oldfds; 1674 u_int newlen; 1675 1676 UNP_GLOBAL_UNLOCK_ASSERT(); 1677 1678 error = 0; 1679 *controlp = NULL; 1680 1681 while (cm != NULL) { 1682 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1683 || cm->cmsg_len > clen) { 1684 error = EINVAL; 1685 goto out; 1686 } 1687 1688 data = CMSG_DATA(cm); 1689 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1690 1691 switch (cm->cmsg_type) { 1692 /* 1693 * Fill in credential information. 1694 */ 1695 case SCM_CREDS: 1696 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1697 SCM_CREDS, SOL_SOCKET); 1698 if (*controlp == NULL) { 1699 error = ENOBUFS; 1700 goto out; 1701 } 1702 1703 cmcred = (struct cmsgcred *) 1704 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1705 cmcred->cmcred_pid = p->p_pid; 1706 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1707 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1708 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1709 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1710 CMGROUP_MAX); 1711 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1712 cmcred->cmcred_groups[i] = 1713 td->td_ucred->cr_groups[i]; 1714 break; 1715 1716 case SCM_RIGHTS: 1717 oldfds = datalen / sizeof (int); 1718 /* 1719 * Check that all the FDs passed in refer to legal 1720 * files. If not, reject the entire operation. 1721 */ 1722 fdp = data; 1723 FILEDESC_SLOCK(fdescp); 1724 for (i = 0; i < oldfds; i++) { 1725 fd = *fdp++; 1726 if ((unsigned)fd >= fdescp->fd_nfiles || 1727 fdescp->fd_ofiles[fd] == NULL) { 1728 FILEDESC_SUNLOCK(fdescp); 1729 error = EBADF; 1730 goto out; 1731 } 1732 fp = fdescp->fd_ofiles[fd]; 1733 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1734 FILEDESC_SUNLOCK(fdescp); 1735 error = EOPNOTSUPP; 1736 goto out; 1737 } 1738 1739 } 1740 1741 /* 1742 * Now replace the integer FDs with pointers to 1743 * the associated global file table entry.. 1744 */ 1745 newlen = oldfds * sizeof(struct file *); 1746 *controlp = sbcreatecontrol(NULL, newlen, 1747 SCM_RIGHTS, SOL_SOCKET); 1748 if (*controlp == NULL) { 1749 FILEDESC_SUNLOCK(fdescp); 1750 error = E2BIG; 1751 goto out; 1752 } 1753 1754 fdp = data; 1755 rp = (struct file **) 1756 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1757 for (i = 0; i < oldfds; i++) { 1758 fp = fdescp->fd_ofiles[*fdp++]; 1759 *rp++ = fp; 1760 unp_internalize_fp(fp); 1761 } 1762 FILEDESC_SUNLOCK(fdescp); 1763 break; 1764 1765 case SCM_TIMESTAMP: 1766 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1767 SCM_TIMESTAMP, SOL_SOCKET); 1768 if (*controlp == NULL) { 1769 error = ENOBUFS; 1770 goto out; 1771 } 1772 tv = (struct timeval *) 1773 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1774 microtime(tv); 1775 break; 1776 1777 default: 1778 error = EINVAL; 1779 goto out; 1780 } 1781 1782 controlp = &(*controlp)->m_next; 1783 1784 if (CMSG_SPACE(datalen) < clen) { 1785 clen -= CMSG_SPACE(datalen); 1786 cm = (struct cmsghdr *) 1787 ((caddr_t)cm + CMSG_SPACE(datalen)); 1788 } else { 1789 clen = 0; 1790 cm = NULL; 1791 } 1792 } 1793 1794out: 1795 m_freem(control); 1796 1797 return (error); 1798} 1799 1800static struct mbuf * 1801unp_addsockcred(struct thread *td, struct mbuf *control) 1802{ 1803 struct mbuf *m, *n, *n_prev; 1804 struct sockcred *sc; 1805 const struct cmsghdr *cm; 1806 int ngroups; 1807 int i; 1808 1809 ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); 1810 1811 m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); 1812 if (m == NULL) 1813 return (control); 1814 1815 sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); 1816 sc->sc_uid = td->td_ucred->cr_ruid; 1817 sc->sc_euid = td->td_ucred->cr_uid; 1818 sc->sc_gid = td->td_ucred->cr_rgid; 1819 sc->sc_egid = td->td_ucred->cr_gid; 1820 sc->sc_ngroups = ngroups; 1821 for (i = 0; i < sc->sc_ngroups; i++) 1822 sc->sc_groups[i] = td->td_ucred->cr_groups[i]; 1823 1824 /* 1825 * Unlink SCM_CREDS control messages (struct cmsgcred), since just 1826 * created SCM_CREDS control message (struct sockcred) has another 1827 * format. 1828 */ 1829 if (control != NULL) 1830 for (n = control, n_prev = NULL; n != NULL;) { 1831 cm = mtod(n, struct cmsghdr *); 1832 if (cm->cmsg_level == SOL_SOCKET && 1833 cm->cmsg_type == SCM_CREDS) { 1834 if (n_prev == NULL) 1835 control = n->m_next; 1836 else 1837 n_prev->m_next = n->m_next; 1838 n = m_free(n); 1839 } else { 1840 n_prev = n; 1841 n = n->m_next; 1842 } 1843 } 1844 1845 /* Prepend it to the head. */ 1846 m->m_next = control; 1847 1848 return (m); 1849} 1850 1851static struct unpcb * 1852fptounp(struct file *fp) 1853{ 1854 struct socket *so; 1855 1856 if (fp->f_type != DTYPE_SOCKET) 1857 return (NULL); 1858 if ((so = fp->f_data) == NULL) 1859 return (NULL); 1860 if (so->so_proto->pr_domain != &localdomain) 1861 return (NULL); 1862 return sotounpcb(so); 1863} 1864 1865static void 1866unp_discard(struct file *fp) 1867{ 1868 1869 unp_externalize_fp(fp); 1870 (void) closef(fp, (struct thread *)NULL); 1871} 1872 1873static void 1874unp_internalize_fp(struct file *fp) 1875{ 1876 struct unpcb *unp; 1877 1878 UNP_GLOBAL_WLOCK(); 1879 if ((unp = fptounp(fp)) != NULL) { 1880 unp->unp_file = fp; 1881 unp->unp_msgcount++; 1882 } 1883 fhold(fp); 1884 unp_rights++; 1885 UNP_GLOBAL_WUNLOCK(); 1886} 1887 1888static void 1889unp_externalize_fp(struct file *fp) 1890{ 1891 struct unpcb *unp; 1892 1893 UNP_GLOBAL_WLOCK(); 1894 if ((unp = fptounp(fp)) != NULL) 1895 unp->unp_msgcount--; 1896 unp_rights--; 1897 UNP_GLOBAL_WUNLOCK(); 1898} 1899 1900/* 1901 * unp_defer indicates whether additional work has been defered for a future 1902 * pass through unp_gc(). It is thread local and does not require explicit 1903 * synchronization. 1904 */ 1905static int unp_marked; 1906static int unp_unreachable; 1907 1908static void 1909unp_accessable(struct file *fp) 1910{ 1911 struct unpcb *unp; 1912 1913 if ((unp = fptounp(fp)) == NULL) 1914 return; 1915 if (unp->unp_gcflag & UNPGC_REF) 1916 return; 1917 unp->unp_gcflag &= ~UNPGC_DEAD; 1918 unp->unp_gcflag |= UNPGC_REF; 1919 unp_marked++; 1920} 1921 1922static void 1923unp_gc_process(struct unpcb *unp) 1924{ 1925 struct socket *soa; 1926 struct socket *so; 1927 struct file *fp; 1928 1929 /* Already processed. */ 1930 if (unp->unp_gcflag & UNPGC_SCANNED) 1931 return; 1932 fp = unp->unp_file; 1933 1934 /* 1935 * Check for a socket potentially in a cycle. It must be in a 1936 * queue as indicated by msgcount, and this must equal the file 1937 * reference count. Note that when msgcount is 0 the file is NULL. 1938 */ 1939 if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && 1940 unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { 1941 unp->unp_gcflag |= UNPGC_DEAD; 1942 unp_unreachable++; 1943 return; 1944 } 1945 1946 /* 1947 * Mark all sockets we reference with RIGHTS. 1948 */ 1949 so = unp->unp_socket; 1950 SOCKBUF_LOCK(&so->so_rcv); 1951 unp_scan(so->so_rcv.sb_mb, unp_accessable); 1952 SOCKBUF_UNLOCK(&so->so_rcv); 1953 1954 /* 1955 * Mark all sockets in our accept queue. 1956 */ 1957 ACCEPT_LOCK(); 1958 TAILQ_FOREACH(soa, &so->so_comp, so_list) { 1959 SOCKBUF_LOCK(&soa->so_rcv); 1960 unp_scan(soa->so_rcv.sb_mb, unp_accessable); 1961 SOCKBUF_UNLOCK(&soa->so_rcv); 1962 } 1963 ACCEPT_UNLOCK(); 1964 unp->unp_gcflag |= UNPGC_SCANNED; 1965} 1966 1967static int unp_recycled; 1968SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 1969 "Number of unreachable sockets claimed by the garbage collector."); 1970 1971static int unp_taskcount; 1972SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 1973 "Number of times the garbage collector has run."); 1974 1975static void 1976unp_gc(__unused void *arg, int pending) 1977{ 1978 struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; 1979 struct unp_head **head; 1980 struct file **unref; 1981 struct unpcb *unp; 1982 int i; 1983 1984 unp_taskcount++; 1985 UNP_GLOBAL_RLOCK(); 1986 /* 1987 * First clear all gc flags from previous runs. 1988 */ 1989 for (head = heads; *head != NULL; head++) 1990 LIST_FOREACH(unp, *head, unp_link) 1991 unp->unp_gcflag = 0; 1992 1993 /* 1994 * Scan marking all reachable sockets with UNPGC_REF. Once a socket 1995 * is reachable all of the sockets it references are reachable. 1996 * Stop the scan once we do a complete loop without discovering 1997 * a new reachable socket. 1998 */ 1999 do { 2000 unp_unreachable = 0; 2001 unp_marked = 0; 2002 for (head = heads; *head != NULL; head++) 2003 LIST_FOREACH(unp, *head, unp_link) 2004 unp_gc_process(unp); 2005 } while (unp_marked); 2006 UNP_GLOBAL_RUNLOCK(); 2007 if (unp_unreachable == 0) 2008 return; 2009 2010 /* 2011 * Allocate space for a local list of dead unpcbs. 2012 */ 2013 unref = malloc(unp_unreachable * sizeof(struct file *), 2014 M_TEMP, M_WAITOK); 2015 2016 /* 2017 * Iterate looking for sockets which have been specifically marked 2018 * as as unreachable and store them locally. 2019 */ 2020 UNP_GLOBAL_RLOCK(); 2021 for (i = 0, head = heads; *head != NULL; head++) 2022 LIST_FOREACH(unp, *head, unp_link) 2023 if (unp->unp_gcflag & UNPGC_DEAD) { 2024 unref[i++] = unp->unp_file; 2025 fhold(unp->unp_file); 2026 KASSERT(unp->unp_file != NULL, 2027 ("unp_gc: Invalid unpcb.")); 2028 KASSERT(i <= unp_unreachable, 2029 ("unp_gc: incorrect unreachable count.")); 2030 } 2031 UNP_GLOBAL_RUNLOCK(); 2032 2033 /* 2034 * Now flush all sockets, free'ing rights. This will free the 2035 * struct files associated with these sockets but leave each socket 2036 * with one remaining ref. 2037 */ 2038 for (i = 0; i < unp_unreachable; i++) 2039 sorflush(unref[i]->f_data); 2040 2041 /* 2042 * And finally release the sockets so they can be reclaimed. 2043 */ 2044 for (i = 0; i < unp_unreachable; i++) 2045 fdrop(unref[i], NULL); 2046 unp_recycled += unp_unreachable; 2047 free(unref, M_TEMP); 2048} 2049 2050void 2051unp_dispose(struct mbuf *m) 2052{ 2053 2054 if (m) 2055 unp_scan(m, unp_discard); 2056} 2057 2058static void 2059unp_scan(struct mbuf *m0, void (*op)(struct file *)) 2060{ 2061 struct mbuf *m; 2062 struct file **rp; 2063 struct cmsghdr *cm; 2064 void *data; 2065 int i; 2066 socklen_t clen, datalen; 2067 int qfds; 2068 2069 while (m0 != NULL) { 2070 for (m = m0; m; m = m->m_next) { 2071 if (m->m_type != MT_CONTROL) 2072 continue; 2073 2074 cm = mtod(m, struct cmsghdr *); 2075 clen = m->m_len; 2076 2077 while (cm != NULL) { 2078 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 2079 break; 2080 2081 data = CMSG_DATA(cm); 2082 datalen = (caddr_t)cm + cm->cmsg_len 2083 - (caddr_t)data; 2084 2085 if (cm->cmsg_level == SOL_SOCKET && 2086 cm->cmsg_type == SCM_RIGHTS) { 2087 qfds = datalen / sizeof (struct file *); 2088 rp = data; 2089 for (i = 0; i < qfds; i++) 2090 (*op)(*rp++); 2091 } 2092 2093 if (CMSG_SPACE(datalen) < clen) { 2094 clen -= CMSG_SPACE(datalen); 2095 cm = (struct cmsghdr *) 2096 ((caddr_t)cm + CMSG_SPACE(datalen)); 2097 } else { 2098 clen = 0; 2099 cm = NULL; 2100 } 2101 } 2102 } 2103 m0 = m0->m_act; 2104 } 2105} 2106 2107#ifdef DDB 2108static void 2109db_print_indent(int indent) 2110{ 2111 int i; 2112 2113 for (i = 0; i < indent; i++) 2114 db_printf(" "); 2115} 2116 2117static void 2118db_print_unpflags(int unp_flags) 2119{ 2120 int comma; 2121 2122 comma = 0; 2123 if (unp_flags & UNP_HAVEPC) { 2124 db_printf("%sUNP_HAVEPC", comma ? ", " : ""); 2125 comma = 1; 2126 } 2127 if (unp_flags & UNP_HAVEPCCACHED) { 2128 db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); 2129 comma = 1; 2130 } 2131 if (unp_flags & UNP_WANTCRED) { 2132 db_printf("%sUNP_WANTCRED", comma ? ", " : ""); 2133 comma = 1; 2134 } 2135 if (unp_flags & UNP_CONNWAIT) { 2136 db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); 2137 comma = 1; 2138 } 2139 if (unp_flags & UNP_CONNECTING) { 2140 db_printf("%sUNP_CONNECTING", comma ? ", " : ""); 2141 comma = 1; 2142 } 2143 if (unp_flags & UNP_BINDING) { 2144 db_printf("%sUNP_BINDING", comma ? ", " : ""); 2145 comma = 1; 2146 } 2147} 2148 2149static void 2150db_print_xucred(int indent, struct xucred *xu) 2151{ 2152 int comma, i; 2153 2154 db_print_indent(indent); 2155 db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", 2156 xu->cr_version, xu->cr_uid, xu->cr_ngroups); 2157 db_print_indent(indent); 2158 db_printf("cr_groups: "); 2159 comma = 0; 2160 for (i = 0; i < xu->cr_ngroups; i++) { 2161 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); 2162 comma = 1; 2163 } 2164 db_printf("\n"); 2165} 2166 2167static void 2168db_print_unprefs(int indent, struct unp_head *uh) 2169{ 2170 struct unpcb *unp; 2171 int counter; 2172 2173 counter = 0; 2174 LIST_FOREACH(unp, uh, unp_reflink) { 2175 if (counter % 4 == 0) 2176 db_print_indent(indent); 2177 db_printf("%p ", unp); 2178 if (counter % 4 == 3) 2179 db_printf("\n"); 2180 counter++; 2181 } 2182 if (counter != 0 && counter % 4 != 0) 2183 db_printf("\n"); 2184} 2185 2186DB_SHOW_COMMAND(unpcb, db_show_unpcb) 2187{ 2188 struct unpcb *unp; 2189 2190 if (!have_addr) { 2191 db_printf("usage: show unpcb <addr>\n"); 2192 return; 2193 } 2194 unp = (struct unpcb *)addr; 2195 2196 db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, 2197 unp->unp_vnode); 2198 2199 db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino, 2200 unp->unp_conn); 2201 2202 db_printf("unp_refs:\n"); 2203 db_print_unprefs(2, &unp->unp_refs); 2204 2205 /* XXXRW: Would be nice to print the full address, if any. */ 2206 db_printf("unp_addr: %p\n", unp->unp_addr); 2207 2208 db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n", 2209 unp->unp_cc, unp->unp_mbcnt, 2210 (unsigned long long)unp->unp_gencnt); 2211 2212 db_printf("unp_flags: %x (", unp->unp_flags); 2213 db_print_unpflags(unp->unp_flags); 2214 db_printf(")\n"); 2215 2216 db_printf("unp_peercred:\n"); 2217 db_print_xucred(2, &unp->unp_peercred); 2218 2219 db_printf("unp_refcount: %u\n", unp->unp_refcount); 2220} 2221#endif 2222