uipc_usrreq.c revision 183690
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2008 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 32 */ 33 34/* 35 * UNIX Domain (Local) Sockets 36 * 37 * This is an implementation of UNIX (local) domain sockets. Each socket has 38 * an associated struct unpcb (UNIX protocol control block). Stream sockets 39 * may be connected to 0 or 1 other socket. Datagram sockets may be 40 * connected to 0, 1, or many other sockets. Sockets may be created and 41 * connected in pairs (socketpair(2)), or bound/connected to using the file 42 * system name space. For most purposes, only the receive socket buffer is 43 * used, as sending on one socket delivers directly to the receive socket 44 * buffer of a second socket. 45 * 46 * The implementation is substantially complicated by the fact that 47 * "ancillary data", such as file descriptors or credentials, may be passed 48 * across UNIX domain sockets. The potential for passing UNIX domain sockets 49 * over other UNIX domain sockets requires the implementation of a simple 50 * garbage collector to find and tear down cycles of disconnected sockets. 51 * 52 * TODO: 53 * SEQPACKET, RDM 54 * rethink name space problems 55 * need a proper out-of-band 56 */ 57 58#include <sys/cdefs.h> 59__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 183690 2008-10-08 06:26:51Z rwatson $"); 60 61#include "opt_ddb.h" 62#include "opt_mac.h" 63 64#include <sys/param.h> 65#include <sys/domain.h> 66#include <sys/fcntl.h> 67#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 68#include <sys/eventhandler.h> 69#include <sys/file.h> 70#include <sys/filedesc.h> 71#include <sys/jail.h> 72#include <sys/kernel.h> 73#include <sys/lock.h> 74#include <sys/mbuf.h> 75#include <sys/mount.h> 76#include <sys/mutex.h> 77#include <sys/namei.h> 78#include <sys/proc.h> 79#include <sys/protosw.h> 80#include <sys/resourcevar.h> 81#include <sys/rwlock.h> 82#include <sys/socket.h> 83#include <sys/socketvar.h> 84#include <sys/signalvar.h> 85#include <sys/stat.h> 86#include <sys/sx.h> 87#include <sys/sysctl.h> 88#include <sys/systm.h> 89#include <sys/taskqueue.h> 90#include <sys/un.h> 91#include <sys/unpcb.h> 92#include <sys/vnode.h> 93 94#ifdef DDB 95#include <ddb/ddb.h> 96#endif 97 98#include <security/mac/mac_framework.h> 99 100#include <vm/uma.h> 101 102static uma_zone_t unp_zone; 103static unp_gen_t unp_gencnt; 104static u_int unp_count; /* Count of local sockets. */ 105static ino_t unp_ino; /* Prototype for fake inode numbers. */ 106static int unp_rights; /* File descriptors in flight. */ 107static struct unp_head unp_shead; /* List of local stream sockets. */ 108static struct unp_head unp_dhead; /* List of local datagram sockets. */ 109 110static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 111 112/* 113 * Garbage collection of cyclic file descriptor/socket references occurs 114 * asynchronously in a taskqueue context in order to avoid recursion and 115 * reentrance in the UNIX domain socket, file descriptor, and socket layer 116 * code. See unp_gc() for a full description. 117 */ 118static struct task unp_gc_task; 119 120/* 121 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for 122 * stream sockets, although the total for sender and receiver is actually 123 * only PIPSIZ. 124 * 125 * Datagram sockets really use the sendspace as the maximum datagram size, 126 * and don't really want to reserve the sendspace. Their recvspace should be 127 * large enough for at least one max-size datagram plus address. 128 */ 129#ifndef PIPSIZ 130#define PIPSIZ 8192 131#endif 132static u_long unpst_sendspace = PIPSIZ; 133static u_long unpst_recvspace = PIPSIZ; 134static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 135static u_long unpdg_recvspace = 4*1024; 136 137SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); 138SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); 139SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); 140 141SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 142 &unpst_sendspace, 0, "Default stream send space."); 143SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 144 &unpst_recvspace, 0, "Default stream receive space."); 145SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 146 &unpdg_sendspace, 0, "Default datagram send space."); 147SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 148 &unpdg_recvspace, 0, "Default datagram receive space."); 149SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 150 "File descriptors in flight."); 151 152/*- 153 * Locking and synchronization: 154 * 155 * The global UNIX domain socket rwlock (unp_global_rwlock) protects all 156 * global variables, including the linked lists tracking the set of allocated 157 * UNIX domain sockets. The global rwlock also serves to prevent deadlock 158 * when more than one PCB lock is acquired at a time (i.e., during 159 * connect()). Finally, the global rwlock protects uncounted references from 160 * vnodes to sockets bound to those vnodes: to safely dereference the 161 * v_socket pointer, the global rwlock must be held while a full reference is 162 * acquired. 163 * 164 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, 165 * allocated in pru_attach() and freed in pru_detach(). The validity of that 166 * pointer is an invariant, so no lock is required to dereference the so_pcb 167 * pointer if a valid socket reference is held by the caller. In practice, 168 * this is always true during operations performed on a socket. Each unpcb 169 * has a back-pointer to its socket, unp_socket, which will be stable under 170 * the same circumstances. 171 * 172 * This pointer may only be safely dereferenced as long as a valid reference 173 * to the unpcb is held. Typically, this reference will be from the socket, 174 * or from another unpcb when the referring unpcb's lock is held (in order 175 * that the reference not be invalidated during use). For example, to follow 176 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, 177 * as unp_socket remains valid as long as the reference to unp_conn is valid. 178 * 179 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual 180 * atomic reads without the lock may be performed "lockless", but more 181 * complex reads and read-modify-writes require the mutex to be held. No 182 * lock order is defined between unpcb locks -- multiple unpcb locks may be 183 * acquired at the same time only when holding the global UNIX domain socket 184 * rwlock exclusively, which prevents deadlocks. 185 * 186 * Blocking with UNIX domain sockets is a tricky issue: unlike most network 187 * protocols, bind() is a non-atomic operation, and connect() requires 188 * potential sleeping in the protocol, due to potentially waiting on local or 189 * distributed file systems. We try to separate "lookup" operations, which 190 * may sleep, and the IPC operations themselves, which typically can occur 191 * with relative atomicity as locks can be held over the entire operation. 192 * 193 * Another tricky issue is simultaneous multi-threaded or multi-process 194 * access to a single UNIX domain socket. These are handled by the flags 195 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or 196 * binding, both of which involve dropping UNIX domain socket locks in order 197 * to perform namei() and other file system operations. 198 */ 199static struct rwlock unp_global_rwlock; 200 201#define UNP_GLOBAL_LOCK_INIT() rw_init(&unp_global_rwlock, \ 202 "unp_global_rwlock") 203 204#define UNP_GLOBAL_LOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 205 RA_LOCKED) 206#define UNP_GLOBAL_UNLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 207 RA_UNLOCKED) 208 209#define UNP_GLOBAL_WLOCK() rw_wlock(&unp_global_rwlock) 210#define UNP_GLOBAL_WUNLOCK() rw_wunlock(&unp_global_rwlock) 211#define UNP_GLOBAL_WLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 212 RA_WLOCKED) 213#define UNP_GLOBAL_WOWNED() rw_wowned(&unp_global_rwlock) 214 215#define UNP_GLOBAL_RLOCK() rw_rlock(&unp_global_rwlock) 216#define UNP_GLOBAL_RUNLOCK() rw_runlock(&unp_global_rwlock) 217#define UNP_GLOBAL_RLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 218 RA_RLOCKED) 219 220#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ 221 "unp_mtx", "unp_mtx", \ 222 MTX_DUPOK|MTX_DEF|MTX_RECURSE) 223#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) 224#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) 225#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) 226#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) 227 228static int uipc_connect2(struct socket *, struct socket *); 229static int uipc_ctloutput(struct socket *, struct sockopt *); 230static int unp_connect(struct socket *, struct sockaddr *, 231 struct thread *); 232static int unp_connect2(struct socket *so, struct socket *so2, int); 233static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); 234static void unp_dispose(struct mbuf *); 235static void unp_shutdown(struct unpcb *); 236static void unp_drop(struct unpcb *, int); 237static void unp_gc(__unused void *, int); 238static void unp_scan(struct mbuf *, void (*)(struct file *)); 239static void unp_discard(struct file *); 240static void unp_freerights(struct file **, int); 241static void unp_init(void); 242static int unp_internalize(struct mbuf **, struct thread *); 243static void unp_internalize_fp(struct file *); 244static int unp_externalize(struct mbuf *, struct mbuf **); 245static void unp_externalize_fp(struct file *); 246static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); 247 248/* 249 * Definitions of protocols supported in the LOCAL domain. 250 */ 251static struct domain localdomain; 252static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; 253static struct protosw localsw[] = { 254{ 255 .pr_type = SOCK_STREAM, 256 .pr_domain = &localdomain, 257 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, 258 .pr_ctloutput = &uipc_ctloutput, 259 .pr_usrreqs = &uipc_usrreqs_stream 260}, 261{ 262 .pr_type = SOCK_DGRAM, 263 .pr_domain = &localdomain, 264 .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, 265 .pr_usrreqs = &uipc_usrreqs_dgram 266}, 267}; 268 269static struct domain localdomain = { 270 .dom_family = AF_LOCAL, 271 .dom_name = "local", 272 .dom_init = unp_init, 273 .dom_externalize = unp_externalize, 274 .dom_dispose = unp_dispose, 275 .dom_protosw = localsw, 276 .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] 277}; 278DOMAIN_SET(local); 279 280static void 281uipc_abort(struct socket *so) 282{ 283 struct unpcb *unp, *unp2; 284 285 unp = sotounpcb(so); 286 KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); 287 288 UNP_GLOBAL_WLOCK(); 289 UNP_PCB_LOCK(unp); 290 unp2 = unp->unp_conn; 291 if (unp2 != NULL) { 292 UNP_PCB_LOCK(unp2); 293 unp_drop(unp2, ECONNABORTED); 294 UNP_PCB_UNLOCK(unp2); 295 } 296 UNP_PCB_UNLOCK(unp); 297 UNP_GLOBAL_WUNLOCK(); 298} 299 300static int 301uipc_accept(struct socket *so, struct sockaddr **nam) 302{ 303 struct unpcb *unp, *unp2; 304 const struct sockaddr *sa; 305 306 /* 307 * Pass back name of connected socket, if it was bound and we are 308 * still connected (our peer may have closed already!). 309 */ 310 unp = sotounpcb(so); 311 KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); 312 313 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 314 UNP_GLOBAL_RLOCK(); 315 unp2 = unp->unp_conn; 316 if (unp2 != NULL && unp2->unp_addr != NULL) { 317 UNP_PCB_LOCK(unp2); 318 sa = (struct sockaddr *) unp2->unp_addr; 319 bcopy(sa, *nam, sa->sa_len); 320 UNP_PCB_UNLOCK(unp2); 321 } else { 322 sa = &sun_noname; 323 bcopy(sa, *nam, sa->sa_len); 324 } 325 UNP_GLOBAL_RUNLOCK(); 326 return (0); 327} 328 329static int 330uipc_attach(struct socket *so, int proto, struct thread *td) 331{ 332 u_long sendspace, recvspace; 333 struct unpcb *unp; 334 int error, locked; 335 336 KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); 337 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 338 switch (so->so_type) { 339 case SOCK_STREAM: 340 sendspace = unpst_sendspace; 341 recvspace = unpst_recvspace; 342 break; 343 344 case SOCK_DGRAM: 345 sendspace = unpdg_sendspace; 346 recvspace = unpdg_recvspace; 347 break; 348 349 default: 350 panic("uipc_attach"); 351 } 352 error = soreserve(so, sendspace, recvspace); 353 if (error) 354 return (error); 355 } 356 unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); 357 if (unp == NULL) 358 return (ENOBUFS); 359 LIST_INIT(&unp->unp_refs); 360 UNP_PCB_LOCK_INIT(unp); 361 unp->unp_socket = so; 362 so->so_pcb = unp; 363 unp->unp_refcount = 1; 364 365 /* 366 * uipc_attach() may be called indirectly from within the UNIX domain 367 * socket code via sonewconn() in unp_connect(). Since rwlocks can 368 * not be recursed, we do the closest thing. 369 */ 370 locked = 0; 371 if (!UNP_GLOBAL_WOWNED()) { 372 UNP_GLOBAL_WLOCK(); 373 locked = 1; 374 } 375 unp->unp_gencnt = ++unp_gencnt; 376 unp_count++; 377 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, 378 unp, unp_link); 379 if (locked) 380 UNP_GLOBAL_WUNLOCK(); 381 382 return (0); 383} 384 385static int 386uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 387{ 388 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 389 struct vattr vattr; 390 int error, namelen, vfslocked; 391 struct nameidata nd; 392 struct unpcb *unp; 393 struct vnode *vp; 394 struct mount *mp; 395 char *buf; 396 397 unp = sotounpcb(so); 398 KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); 399 400 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 401 if (namelen <= 0) 402 return (EINVAL); 403 404 /* 405 * We don't allow simultaneous bind() calls on a single UNIX domain 406 * socket, so flag in-progress operations, and return an error if an 407 * operation is already in progress. 408 * 409 * Historically, we have not allowed a socket to be rebound, so this 410 * also returns an error. Not allowing re-binding simplifies the 411 * implementation and avoids a great many possible failure modes. 412 */ 413 UNP_PCB_LOCK(unp); 414 if (unp->unp_vnode != NULL) { 415 UNP_PCB_UNLOCK(unp); 416 return (EINVAL); 417 } 418 if (unp->unp_flags & UNP_BINDING) { 419 UNP_PCB_UNLOCK(unp); 420 return (EALREADY); 421 } 422 unp->unp_flags |= UNP_BINDING; 423 UNP_PCB_UNLOCK(unp); 424 425 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 426 bcopy(soun->sun_path, buf, namelen); 427 buf[namelen] = 0; 428 429restart: 430 vfslocked = 0; 431 NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME, 432 UIO_SYSSPACE, buf, td); 433/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 434 error = namei(&nd); 435 if (error) 436 goto error; 437 vp = nd.ni_vp; 438 vfslocked = NDHASGIANT(&nd); 439 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 440 NDFREE(&nd, NDF_ONLY_PNBUF); 441 if (nd.ni_dvp == vp) 442 vrele(nd.ni_dvp); 443 else 444 vput(nd.ni_dvp); 445 if (vp != NULL) { 446 vrele(vp); 447 error = EADDRINUSE; 448 goto error; 449 } 450 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 451 if (error) 452 goto error; 453 VFS_UNLOCK_GIANT(vfslocked); 454 goto restart; 455 } 456 VATTR_NULL(&vattr); 457 vattr.va_type = VSOCK; 458 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 459#ifdef MAC 460 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 461 &vattr); 462#endif 463 if (error == 0) { 464 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 465 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 466 } 467 NDFREE(&nd, NDF_ONLY_PNBUF); 468 vput(nd.ni_dvp); 469 if (error) { 470 vn_finished_write(mp); 471 goto error; 472 } 473 vp = nd.ni_vp; 474 ASSERT_VOP_ELOCKED(vp, "uipc_bind"); 475 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 476 477 UNP_GLOBAL_WLOCK(); 478 UNP_PCB_LOCK(unp); 479 vp->v_socket = unp->unp_socket; 480 unp->unp_vnode = vp; 481 unp->unp_addr = soun; 482 unp->unp_flags &= ~UNP_BINDING; 483 UNP_PCB_UNLOCK(unp); 484 UNP_GLOBAL_WUNLOCK(); 485 VOP_UNLOCK(vp, 0); 486 vn_finished_write(mp); 487 VFS_UNLOCK_GIANT(vfslocked); 488 free(buf, M_TEMP); 489 return (0); 490 491error: 492 VFS_UNLOCK_GIANT(vfslocked); 493 UNP_PCB_LOCK(unp); 494 unp->unp_flags &= ~UNP_BINDING; 495 UNP_PCB_UNLOCK(unp); 496 free(buf, M_TEMP); 497 return (error); 498} 499 500static int 501uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 502{ 503 int error; 504 505 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 506 UNP_GLOBAL_WLOCK(); 507 error = unp_connect(so, nam, td); 508 UNP_GLOBAL_WUNLOCK(); 509 return (error); 510} 511 512static void 513uipc_close(struct socket *so) 514{ 515 struct unpcb *unp, *unp2; 516 517 unp = sotounpcb(so); 518 KASSERT(unp != NULL, ("uipc_close: unp == NULL")); 519 520 UNP_GLOBAL_WLOCK(); 521 UNP_PCB_LOCK(unp); 522 unp2 = unp->unp_conn; 523 if (unp2 != NULL) { 524 UNP_PCB_LOCK(unp2); 525 unp_disconnect(unp, unp2); 526 UNP_PCB_UNLOCK(unp2); 527 } 528 UNP_PCB_UNLOCK(unp); 529 UNP_GLOBAL_WUNLOCK(); 530} 531 532/* 533 * uipc_connect2() is not static as it is invoked directly by fifofs. 534 */ 535static int 536uipc_connect2(struct socket *so1, struct socket *so2) 537{ 538 struct unpcb *unp, *unp2; 539 int error; 540 541 UNP_GLOBAL_WLOCK(); 542 unp = so1->so_pcb; 543 KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); 544 UNP_PCB_LOCK(unp); 545 unp2 = so2->so_pcb; 546 KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); 547 UNP_PCB_LOCK(unp2); 548 error = unp_connect2(so1, so2, PRU_CONNECT2); 549 UNP_PCB_UNLOCK(unp2); 550 UNP_PCB_UNLOCK(unp); 551 UNP_GLOBAL_WUNLOCK(); 552 return (error); 553} 554 555static void 556uipc_detach(struct socket *so) 557{ 558 struct unpcb *unp, *unp2; 559 struct sockaddr_un *saved_unp_addr; 560 struct vnode *vp; 561 int freeunp, local_unp_rights; 562 563 unp = sotounpcb(so); 564 KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); 565 566 UNP_GLOBAL_WLOCK(); 567 UNP_PCB_LOCK(unp); 568 569 LIST_REMOVE(unp, unp_link); 570 unp->unp_gencnt = ++unp_gencnt; 571 --unp_count; 572 573 /* 574 * XXXRW: Should assert vp->v_socket == so. 575 */ 576 if ((vp = unp->unp_vnode) != NULL) { 577 unp->unp_vnode->v_socket = NULL; 578 unp->unp_vnode = NULL; 579 } 580 unp2 = unp->unp_conn; 581 if (unp2 != NULL) { 582 UNP_PCB_LOCK(unp2); 583 unp_disconnect(unp, unp2); 584 UNP_PCB_UNLOCK(unp2); 585 } 586 587 /* 588 * We hold the global lock exclusively, so it's OK to acquire 589 * multiple pcb locks at a time. 590 */ 591 while (!LIST_EMPTY(&unp->unp_refs)) { 592 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 593 594 UNP_PCB_LOCK(ref); 595 unp_drop(ref, ECONNRESET); 596 UNP_PCB_UNLOCK(ref); 597 } 598 local_unp_rights = unp_rights; 599 UNP_GLOBAL_WUNLOCK(); 600 unp->unp_socket->so_pcb = NULL; 601 saved_unp_addr = unp->unp_addr; 602 unp->unp_addr = NULL; 603 unp->unp_refcount--; 604 freeunp = (unp->unp_refcount == 0); 605 if (saved_unp_addr != NULL) 606 FREE(saved_unp_addr, M_SONAME); 607 if (freeunp) { 608 UNP_PCB_LOCK_DESTROY(unp); 609 uma_zfree(unp_zone, unp); 610 } else 611 UNP_PCB_UNLOCK(unp); 612 if (vp) { 613 int vfslocked; 614 615 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 616 vrele(vp); 617 VFS_UNLOCK_GIANT(vfslocked); 618 } 619 if (local_unp_rights) 620 taskqueue_enqueue(taskqueue_thread, &unp_gc_task); 621} 622 623static int 624uipc_disconnect(struct socket *so) 625{ 626 struct unpcb *unp, *unp2; 627 628 unp = sotounpcb(so); 629 KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); 630 631 UNP_GLOBAL_WLOCK(); 632 UNP_PCB_LOCK(unp); 633 unp2 = unp->unp_conn; 634 if (unp2 != NULL) { 635 UNP_PCB_LOCK(unp2); 636 unp_disconnect(unp, unp2); 637 UNP_PCB_UNLOCK(unp2); 638 } 639 UNP_PCB_UNLOCK(unp); 640 UNP_GLOBAL_WUNLOCK(); 641 return (0); 642} 643 644static int 645uipc_listen(struct socket *so, int backlog, struct thread *td) 646{ 647 struct unpcb *unp; 648 int error; 649 650 unp = sotounpcb(so); 651 KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); 652 653 UNP_PCB_LOCK(unp); 654 if (unp->unp_vnode == NULL) { 655 UNP_PCB_UNLOCK(unp); 656 return (EINVAL); 657 } 658 659 SOCK_LOCK(so); 660 error = solisten_proto_check(so); 661 if (error == 0) { 662 cru2x(td->td_ucred, &unp->unp_peercred); 663 unp->unp_flags |= UNP_HAVEPCCACHED; 664 solisten_proto(so, backlog); 665 } 666 SOCK_UNLOCK(so); 667 UNP_PCB_UNLOCK(unp); 668 return (error); 669} 670 671static int 672uipc_peeraddr(struct socket *so, struct sockaddr **nam) 673{ 674 struct unpcb *unp, *unp2; 675 const struct sockaddr *sa; 676 677 unp = sotounpcb(so); 678 KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); 679 680 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 681 UNP_PCB_LOCK(unp); 682 /* 683 * XXX: It seems that this test always fails even when connection is 684 * established. So, this else clause is added as workaround to 685 * return PF_LOCAL sockaddr. 686 */ 687 unp2 = unp->unp_conn; 688 if (unp2 != NULL) { 689 UNP_PCB_LOCK(unp2); 690 if (unp2->unp_addr != NULL) 691 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 692 else 693 sa = &sun_noname; 694 bcopy(sa, *nam, sa->sa_len); 695 UNP_PCB_UNLOCK(unp2); 696 } else { 697 sa = &sun_noname; 698 bcopy(sa, *nam, sa->sa_len); 699 } 700 UNP_PCB_UNLOCK(unp); 701 return (0); 702} 703 704static int 705uipc_rcvd(struct socket *so, int flags) 706{ 707 struct unpcb *unp, *unp2; 708 struct socket *so2; 709 u_int mbcnt, sbcc; 710 u_long newhiwat; 711 712 unp = sotounpcb(so); 713 KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); 714 715 if (so->so_type == SOCK_DGRAM) 716 panic("uipc_rcvd DGRAM?"); 717 718 if (so->so_type != SOCK_STREAM) 719 panic("uipc_rcvd unknown socktype"); 720 721 /* 722 * Adjust backpressure on sender and wakeup any waiting to write. 723 * 724 * The unp lock is acquired to maintain the validity of the unp_conn 725 * pointer; no lock on unp2 is required as unp2->unp_socket will be 726 * static as long as we don't permit unp2 to disconnect from unp, 727 * which is prevented by the lock on unp. We cache values from 728 * so_rcv to avoid holding the so_rcv lock over the entire 729 * transaction on the remote so_snd. 730 */ 731 SOCKBUF_LOCK(&so->so_rcv); 732 mbcnt = so->so_rcv.sb_mbcnt; 733 sbcc = so->so_rcv.sb_cc; 734 SOCKBUF_UNLOCK(&so->so_rcv); 735 UNP_PCB_LOCK(unp); 736 unp2 = unp->unp_conn; 737 if (unp2 == NULL) { 738 UNP_PCB_UNLOCK(unp); 739 return (0); 740 } 741 so2 = unp2->unp_socket; 742 SOCKBUF_LOCK(&so2->so_snd); 743 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; 744 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; 745 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 746 newhiwat, RLIM_INFINITY); 747 sowwakeup_locked(so2); 748 unp->unp_mbcnt = mbcnt; 749 unp->unp_cc = sbcc; 750 UNP_PCB_UNLOCK(unp); 751 return (0); 752} 753 754static int 755uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 756 struct mbuf *control, struct thread *td) 757{ 758 struct unpcb *unp, *unp2; 759 struct socket *so2; 760 u_int mbcnt, sbcc; 761 u_long newhiwat; 762 int error = 0; 763 764 unp = sotounpcb(so); 765 KASSERT(unp != NULL, ("uipc_send: unp == NULL")); 766 767 if (flags & PRUS_OOB) { 768 error = EOPNOTSUPP; 769 goto release; 770 } 771 if (control != NULL && (error = unp_internalize(&control, td))) 772 goto release; 773 if ((nam != NULL) || (flags & PRUS_EOF)) 774 UNP_GLOBAL_WLOCK(); 775 else 776 UNP_GLOBAL_RLOCK(); 777 switch (so->so_type) { 778 case SOCK_DGRAM: 779 { 780 const struct sockaddr *from; 781 782 unp2 = unp->unp_conn; 783 if (nam != NULL) { 784 UNP_GLOBAL_WLOCK_ASSERT(); 785 if (unp2 != NULL) { 786 error = EISCONN; 787 break; 788 } 789 error = unp_connect(so, nam, td); 790 if (error) 791 break; 792 unp2 = unp->unp_conn; 793 } 794 795 /* 796 * Because connect() and send() are non-atomic in a sendto() 797 * with a target address, it's possible that the socket will 798 * have disconnected before the send() can run. In that case 799 * return the slightly counter-intuitive but otherwise 800 * correct error that the socket is not connected. 801 */ 802 if (unp2 == NULL) { 803 error = ENOTCONN; 804 break; 805 } 806 /* Lockless read. */ 807 if (unp2->unp_flags & UNP_WANTCRED) 808 control = unp_addsockcred(td, control); 809 UNP_PCB_LOCK(unp); 810 if (unp->unp_addr != NULL) 811 from = (struct sockaddr *)unp->unp_addr; 812 else 813 from = &sun_noname; 814 so2 = unp2->unp_socket; 815 SOCKBUF_LOCK(&so2->so_rcv); 816 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 817 sorwakeup_locked(so2); 818 m = NULL; 819 control = NULL; 820 } else { 821 SOCKBUF_UNLOCK(&so2->so_rcv); 822 error = ENOBUFS; 823 } 824 if (nam != NULL) { 825 UNP_GLOBAL_WLOCK_ASSERT(); 826 UNP_PCB_LOCK(unp2); 827 unp_disconnect(unp, unp2); 828 UNP_PCB_UNLOCK(unp2); 829 } 830 UNP_PCB_UNLOCK(unp); 831 break; 832 } 833 834 case SOCK_STREAM: 835 if ((so->so_state & SS_ISCONNECTED) == 0) { 836 if (nam != NULL) { 837 UNP_GLOBAL_WLOCK_ASSERT(); 838 error = unp_connect(so, nam, td); 839 if (error) 840 break; /* XXX */ 841 } else { 842 error = ENOTCONN; 843 break; 844 } 845 } 846 847 /* Lockless read. */ 848 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 849 error = EPIPE; 850 break; 851 } 852 853 /* 854 * Because connect() and send() are non-atomic in a sendto() 855 * with a target address, it's possible that the socket will 856 * have disconnected before the send() can run. In that case 857 * return the slightly counter-intuitive but otherwise 858 * correct error that the socket is not connected. 859 * 860 * Locking here must be done carefully: the global lock 861 * prevents interconnections between unpcbs from changing, so 862 * we can traverse from unp to unp2 without acquiring unp's 863 * lock. Socket buffer locks follow unpcb locks, so we can 864 * acquire both remote and lock socket buffer locks. 865 */ 866 unp2 = unp->unp_conn; 867 if (unp2 == NULL) { 868 error = ENOTCONN; 869 break; 870 } 871 so2 = unp2->unp_socket; 872 UNP_PCB_LOCK(unp2); 873 SOCKBUF_LOCK(&so2->so_rcv); 874 if (unp2->unp_flags & UNP_WANTCRED) { 875 /* 876 * Credentials are passed only once on SOCK_STREAM. 877 */ 878 unp2->unp_flags &= ~UNP_WANTCRED; 879 control = unp_addsockcred(td, control); 880 } 881 /* 882 * Send to paired receive port, and then reduce send buffer 883 * hiwater marks to maintain backpressure. Wake up readers. 884 */ 885 if (control != NULL) { 886 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 887 control = NULL; 888 } else 889 sbappend_locked(&so2->so_rcv, m); 890 mbcnt = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; 891 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; 892 sbcc = so2->so_rcv.sb_cc; 893 sorwakeup_locked(so2); 894 895 SOCKBUF_LOCK(&so->so_snd); 896 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc); 897 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 898 newhiwat, RLIM_INFINITY); 899 so->so_snd.sb_mbmax -= mbcnt; 900 SOCKBUF_UNLOCK(&so->so_snd); 901 unp2->unp_cc = sbcc; 902 UNP_PCB_UNLOCK(unp2); 903 m = NULL; 904 break; 905 906 default: 907 panic("uipc_send unknown socktype"); 908 } 909 910 /* 911 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. 912 */ 913 if (flags & PRUS_EOF) { 914 UNP_PCB_LOCK(unp); 915 socantsendmore(so); 916 unp_shutdown(unp); 917 UNP_PCB_UNLOCK(unp); 918 } 919 920 if ((nam != NULL) || (flags & PRUS_EOF)) 921 UNP_GLOBAL_WUNLOCK(); 922 else 923 UNP_GLOBAL_RUNLOCK(); 924 925 if (control != NULL && error != 0) 926 unp_dispose(control); 927 928release: 929 if (control != NULL) 930 m_freem(control); 931 if (m != NULL) 932 m_freem(m); 933 return (error); 934} 935 936static int 937uipc_sense(struct socket *so, struct stat *sb) 938{ 939 struct unpcb *unp, *unp2; 940 struct socket *so2; 941 942 unp = sotounpcb(so); 943 KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); 944 945 sb->st_blksize = so->so_snd.sb_hiwat; 946 UNP_GLOBAL_RLOCK(); 947 UNP_PCB_LOCK(unp); 948 unp2 = unp->unp_conn; 949 if (so->so_type == SOCK_STREAM && unp2 != NULL) { 950 so2 = unp2->unp_socket; 951 sb->st_blksize += so2->so_rcv.sb_cc; 952 } 953 sb->st_dev = NODEV; 954 if (unp->unp_ino == 0) 955 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 956 sb->st_ino = unp->unp_ino; 957 UNP_PCB_UNLOCK(unp); 958 UNP_GLOBAL_RUNLOCK(); 959 return (0); 960} 961 962static int 963uipc_shutdown(struct socket *so) 964{ 965 struct unpcb *unp; 966 967 unp = sotounpcb(so); 968 KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); 969 970 UNP_GLOBAL_WLOCK(); 971 UNP_PCB_LOCK(unp); 972 socantsendmore(so); 973 unp_shutdown(unp); 974 UNP_PCB_UNLOCK(unp); 975 UNP_GLOBAL_WUNLOCK(); 976 return (0); 977} 978 979static int 980uipc_sockaddr(struct socket *so, struct sockaddr **nam) 981{ 982 struct unpcb *unp; 983 const struct sockaddr *sa; 984 985 unp = sotounpcb(so); 986 KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); 987 988 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 989 UNP_PCB_LOCK(unp); 990 if (unp->unp_addr != NULL) 991 sa = (struct sockaddr *) unp->unp_addr; 992 else 993 sa = &sun_noname; 994 bcopy(sa, *nam, sa->sa_len); 995 UNP_PCB_UNLOCK(unp); 996 return (0); 997} 998 999static struct pr_usrreqs uipc_usrreqs_dgram = { 1000 .pru_abort = uipc_abort, 1001 .pru_accept = uipc_accept, 1002 .pru_attach = uipc_attach, 1003 .pru_bind = uipc_bind, 1004 .pru_connect = uipc_connect, 1005 .pru_connect2 = uipc_connect2, 1006 .pru_detach = uipc_detach, 1007 .pru_disconnect = uipc_disconnect, 1008 .pru_listen = uipc_listen, 1009 .pru_peeraddr = uipc_peeraddr, 1010 .pru_rcvd = uipc_rcvd, 1011 .pru_send = uipc_send, 1012 .pru_sense = uipc_sense, 1013 .pru_shutdown = uipc_shutdown, 1014 .pru_sockaddr = uipc_sockaddr, 1015 .pru_soreceive = soreceive_dgram, 1016 .pru_close = uipc_close, 1017}; 1018 1019static struct pr_usrreqs uipc_usrreqs_stream = { 1020 .pru_abort = uipc_abort, 1021 .pru_accept = uipc_accept, 1022 .pru_attach = uipc_attach, 1023 .pru_bind = uipc_bind, 1024 .pru_connect = uipc_connect, 1025 .pru_connect2 = uipc_connect2, 1026 .pru_detach = uipc_detach, 1027 .pru_disconnect = uipc_disconnect, 1028 .pru_listen = uipc_listen, 1029 .pru_peeraddr = uipc_peeraddr, 1030 .pru_rcvd = uipc_rcvd, 1031 .pru_send = uipc_send, 1032 .pru_sense = uipc_sense, 1033 .pru_shutdown = uipc_shutdown, 1034 .pru_sockaddr = uipc_sockaddr, 1035 .pru_soreceive = soreceive_generic, 1036 .pru_close = uipc_close, 1037}; 1038 1039static int 1040uipc_ctloutput(struct socket *so, struct sockopt *sopt) 1041{ 1042 struct unpcb *unp; 1043 struct xucred xu; 1044 int error, optval; 1045 1046 if (sopt->sopt_level != 0) 1047 return (EINVAL); 1048 1049 unp = sotounpcb(so); 1050 KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); 1051 error = 0; 1052 switch (sopt->sopt_dir) { 1053 case SOPT_GET: 1054 switch (sopt->sopt_name) { 1055 case LOCAL_PEERCRED: 1056 UNP_PCB_LOCK(unp); 1057 if (unp->unp_flags & UNP_HAVEPC) 1058 xu = unp->unp_peercred; 1059 else { 1060 if (so->so_type == SOCK_STREAM) 1061 error = ENOTCONN; 1062 else 1063 error = EINVAL; 1064 } 1065 UNP_PCB_UNLOCK(unp); 1066 if (error == 0) 1067 error = sooptcopyout(sopt, &xu, sizeof(xu)); 1068 break; 1069 1070 case LOCAL_CREDS: 1071 /* Unlocked read. */ 1072 optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; 1073 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1074 break; 1075 1076 case LOCAL_CONNWAIT: 1077 /* Unlocked read. */ 1078 optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; 1079 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1080 break; 1081 1082 default: 1083 error = EOPNOTSUPP; 1084 break; 1085 } 1086 break; 1087 1088 case SOPT_SET: 1089 switch (sopt->sopt_name) { 1090 case LOCAL_CREDS: 1091 case LOCAL_CONNWAIT: 1092 error = sooptcopyin(sopt, &optval, sizeof(optval), 1093 sizeof(optval)); 1094 if (error) 1095 break; 1096 1097#define OPTSET(bit) do { \ 1098 UNP_PCB_LOCK(unp); \ 1099 if (optval) \ 1100 unp->unp_flags |= bit; \ 1101 else \ 1102 unp->unp_flags &= ~bit; \ 1103 UNP_PCB_UNLOCK(unp); \ 1104} while (0) 1105 1106 switch (sopt->sopt_name) { 1107 case LOCAL_CREDS: 1108 OPTSET(UNP_WANTCRED); 1109 break; 1110 1111 case LOCAL_CONNWAIT: 1112 OPTSET(UNP_CONNWAIT); 1113 break; 1114 1115 default: 1116 break; 1117 } 1118 break; 1119#undef OPTSET 1120 default: 1121 error = ENOPROTOOPT; 1122 break; 1123 } 1124 break; 1125 1126 default: 1127 error = EOPNOTSUPP; 1128 break; 1129 } 1130 return (error); 1131} 1132 1133static int 1134unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1135{ 1136 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1137 struct vnode *vp; 1138 struct socket *so2, *so3; 1139 struct unpcb *unp, *unp2, *unp3; 1140 int error, len, vfslocked; 1141 struct nameidata nd; 1142 char buf[SOCK_MAXADDRLEN]; 1143 struct sockaddr *sa; 1144 1145 UNP_GLOBAL_WLOCK_ASSERT(); 1146 1147 unp = sotounpcb(so); 1148 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1149 1150 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 1151 if (len <= 0) 1152 return (EINVAL); 1153 bcopy(soun->sun_path, buf, len); 1154 buf[len] = 0; 1155 1156 UNP_PCB_LOCK(unp); 1157 if (unp->unp_flags & UNP_CONNECTING) { 1158 UNP_PCB_UNLOCK(unp); 1159 return (EALREADY); 1160 } 1161 UNP_GLOBAL_WUNLOCK(); 1162 unp->unp_flags |= UNP_CONNECTING; 1163 UNP_PCB_UNLOCK(unp); 1164 1165 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 1166 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, 1167 td); 1168 error = namei(&nd); 1169 if (error) 1170 vp = NULL; 1171 else 1172 vp = nd.ni_vp; 1173 ASSERT_VOP_LOCKED(vp, "unp_connect"); 1174 vfslocked = NDHASGIANT(&nd); 1175 NDFREE(&nd, NDF_ONLY_PNBUF); 1176 if (error) 1177 goto bad; 1178 1179 if (vp->v_type != VSOCK) { 1180 error = ENOTSOCK; 1181 goto bad; 1182 } 1183#ifdef MAC 1184 error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); 1185 if (error) 1186 goto bad; 1187#endif 1188 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 1189 if (error) 1190 goto bad; 1191 VFS_UNLOCK_GIANT(vfslocked); 1192 1193 unp = sotounpcb(so); 1194 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1195 1196 /* 1197 * Lock global lock for two reasons: make sure v_socket is stable, 1198 * and to protect simultaneous locking of multiple pcbs. 1199 */ 1200 UNP_GLOBAL_WLOCK(); 1201 so2 = vp->v_socket; 1202 if (so2 == NULL) { 1203 error = ECONNREFUSED; 1204 goto bad2; 1205 } 1206 if (so->so_type != so2->so_type) { 1207 error = EPROTOTYPE; 1208 goto bad2; 1209 } 1210 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1211 if (so2->so_options & SO_ACCEPTCONN) { 1212 /* 1213 * We can't drop the global lock here or 'so2' may 1214 * become invalid. As a result, we need to handle 1215 * possibly lock recursion in uipc_attach. 1216 */ 1217 so3 = sonewconn(so2, 0); 1218 } else 1219 so3 = NULL; 1220 if (so3 == NULL) { 1221 error = ECONNREFUSED; 1222 goto bad2; 1223 } 1224 unp = sotounpcb(so); 1225 unp2 = sotounpcb(so2); 1226 unp3 = sotounpcb(so3); 1227 UNP_PCB_LOCK(unp); 1228 UNP_PCB_LOCK(unp2); 1229 UNP_PCB_LOCK(unp3); 1230 if (unp2->unp_addr != NULL) { 1231 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 1232 unp3->unp_addr = (struct sockaddr_un *) sa; 1233 sa = NULL; 1234 } 1235 /* 1236 * unp_peercred management: 1237 * 1238 * The connecter's (client's) credentials are copied from its 1239 * process structure at the time of connect() (which is now). 1240 */ 1241 cru2x(td->td_ucred, &unp3->unp_peercred); 1242 unp3->unp_flags |= UNP_HAVEPC; 1243 /* 1244 * The receiver's (server's) credentials are copied from the 1245 * unp_peercred member of socket on which the former called 1246 * listen(); uipc_listen() cached that process's credentials 1247 * at that time so we can use them now. 1248 */ 1249 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1250 ("unp_connect: listener without cached peercred")); 1251 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1252 sizeof(unp->unp_peercred)); 1253 unp->unp_flags |= UNP_HAVEPC; 1254 if (unp2->unp_flags & UNP_WANTCRED) 1255 unp3->unp_flags |= UNP_WANTCRED; 1256 UNP_PCB_UNLOCK(unp3); 1257 UNP_PCB_UNLOCK(unp2); 1258 UNP_PCB_UNLOCK(unp); 1259#ifdef MAC 1260 SOCK_LOCK(so); 1261 mac_socketpeer_set_from_socket(so, so3); 1262 mac_socketpeer_set_from_socket(so3, so); 1263 SOCK_UNLOCK(so); 1264#endif 1265 1266 so2 = so3; 1267 } 1268 unp = sotounpcb(so); 1269 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1270 unp2 = sotounpcb(so2); 1271 KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); 1272 UNP_PCB_LOCK(unp); 1273 UNP_PCB_LOCK(unp2); 1274 error = unp_connect2(so, so2, PRU_CONNECT); 1275 UNP_PCB_UNLOCK(unp2); 1276 UNP_PCB_UNLOCK(unp); 1277bad2: 1278 UNP_GLOBAL_WUNLOCK(); 1279 if (vfslocked) 1280 /* 1281 * Giant has been previously acquired. This means filesystem 1282 * isn't MPSAFE. Do it once again. 1283 */ 1284 mtx_lock(&Giant); 1285bad: 1286 if (vp != NULL) 1287 vput(vp); 1288 VFS_UNLOCK_GIANT(vfslocked); 1289 free(sa, M_SONAME); 1290 UNP_GLOBAL_WLOCK(); 1291 UNP_PCB_LOCK(unp); 1292 unp->unp_flags &= ~UNP_CONNECTING; 1293 UNP_PCB_UNLOCK(unp); 1294 return (error); 1295} 1296 1297static int 1298unp_connect2(struct socket *so, struct socket *so2, int req) 1299{ 1300 struct unpcb *unp; 1301 struct unpcb *unp2; 1302 1303 unp = sotounpcb(so); 1304 KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); 1305 unp2 = sotounpcb(so2); 1306 KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); 1307 1308 UNP_GLOBAL_WLOCK_ASSERT(); 1309 UNP_PCB_LOCK_ASSERT(unp); 1310 UNP_PCB_LOCK_ASSERT(unp2); 1311 1312 if (so2->so_type != so->so_type) 1313 return (EPROTOTYPE); 1314 unp->unp_conn = unp2; 1315 1316 switch (so->so_type) { 1317 case SOCK_DGRAM: 1318 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1319 soisconnected(so); 1320 break; 1321 1322 case SOCK_STREAM: 1323 unp2->unp_conn = unp; 1324 if (req == PRU_CONNECT && 1325 ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) 1326 soisconnecting(so); 1327 else 1328 soisconnected(so); 1329 soisconnected(so2); 1330 break; 1331 1332 default: 1333 panic("unp_connect2"); 1334 } 1335 return (0); 1336} 1337 1338static void 1339unp_disconnect(struct unpcb *unp, struct unpcb *unp2) 1340{ 1341 struct socket *so; 1342 1343 KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); 1344 1345 UNP_GLOBAL_WLOCK_ASSERT(); 1346 UNP_PCB_LOCK_ASSERT(unp); 1347 UNP_PCB_LOCK_ASSERT(unp2); 1348 1349 unp->unp_conn = NULL; 1350 switch (unp->unp_socket->so_type) { 1351 case SOCK_DGRAM: 1352 LIST_REMOVE(unp, unp_reflink); 1353 so = unp->unp_socket; 1354 SOCK_LOCK(so); 1355 so->so_state &= ~SS_ISCONNECTED; 1356 SOCK_UNLOCK(so); 1357 break; 1358 1359 case SOCK_STREAM: 1360 soisdisconnected(unp->unp_socket); 1361 unp2->unp_conn = NULL; 1362 soisdisconnected(unp2->unp_socket); 1363 break; 1364 } 1365} 1366 1367/* 1368 * unp_pcblist() walks the global list of struct unpcb's to generate a 1369 * pointer list, bumping the refcount on each unpcb. It then copies them out 1370 * sequentially, validating the generation number on each to see if it has 1371 * been detached. All of this is necessary because copyout() may sleep on 1372 * disk I/O. 1373 */ 1374static int 1375unp_pcblist(SYSCTL_HANDLER_ARGS) 1376{ 1377 int error, i, n; 1378 int freeunp; 1379 struct unpcb *unp, **unp_list; 1380 unp_gen_t gencnt; 1381 struct xunpgen *xug; 1382 struct unp_head *head; 1383 struct xunpcb *xu; 1384 1385 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1386 1387 /* 1388 * The process of preparing the PCB list is too time-consuming and 1389 * resource-intensive to repeat twice on every request. 1390 */ 1391 if (req->oldptr == NULL) { 1392 n = unp_count; 1393 req->oldidx = 2 * (sizeof *xug) 1394 + (n + n/8) * sizeof(struct xunpcb); 1395 return (0); 1396 } 1397 1398 if (req->newptr != NULL) 1399 return (EPERM); 1400 1401 /* 1402 * OK, now we're committed to doing something. 1403 */ 1404 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1405 UNP_GLOBAL_RLOCK(); 1406 gencnt = unp_gencnt; 1407 n = unp_count; 1408 UNP_GLOBAL_RUNLOCK(); 1409 1410 xug->xug_len = sizeof *xug; 1411 xug->xug_count = n; 1412 xug->xug_gen = gencnt; 1413 xug->xug_sogen = so_gencnt; 1414 error = SYSCTL_OUT(req, xug, sizeof *xug); 1415 if (error) { 1416 free(xug, M_TEMP); 1417 return (error); 1418 } 1419 1420 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1421 1422 UNP_GLOBAL_RLOCK(); 1423 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1424 unp = LIST_NEXT(unp, unp_link)) { 1425 UNP_PCB_LOCK(unp); 1426 if (unp->unp_gencnt <= gencnt) { 1427 if (cr_cansee(req->td->td_ucred, 1428 unp->unp_socket->so_cred)) { 1429 UNP_PCB_UNLOCK(unp); 1430 continue; 1431 } 1432 unp_list[i++] = unp; 1433 unp->unp_refcount++; 1434 } 1435 UNP_PCB_UNLOCK(unp); 1436 } 1437 UNP_GLOBAL_RUNLOCK(); 1438 n = i; /* In case we lost some during malloc. */ 1439 1440 error = 0; 1441 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); 1442 for (i = 0; i < n; i++) { 1443 unp = unp_list[i]; 1444 UNP_PCB_LOCK(unp); 1445 unp->unp_refcount--; 1446 if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { 1447 xu->xu_len = sizeof *xu; 1448 xu->xu_unpp = unp; 1449 /* 1450 * XXX - need more locking here to protect against 1451 * connect/disconnect races for SMP. 1452 */ 1453 if (unp->unp_addr != NULL) 1454 bcopy(unp->unp_addr, &xu->xu_addr, 1455 unp->unp_addr->sun_len); 1456 if (unp->unp_conn != NULL && 1457 unp->unp_conn->unp_addr != NULL) 1458 bcopy(unp->unp_conn->unp_addr, 1459 &xu->xu_caddr, 1460 unp->unp_conn->unp_addr->sun_len); 1461 bcopy(unp, &xu->xu_unp, sizeof *unp); 1462 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1463 UNP_PCB_UNLOCK(unp); 1464 error = SYSCTL_OUT(req, xu, sizeof *xu); 1465 } else { 1466 freeunp = (unp->unp_refcount == 0); 1467 UNP_PCB_UNLOCK(unp); 1468 if (freeunp) { 1469 UNP_PCB_LOCK_DESTROY(unp); 1470 uma_zfree(unp_zone, unp); 1471 } 1472 } 1473 } 1474 free(xu, M_TEMP); 1475 if (!error) { 1476 /* 1477 * Give the user an updated idea of our state. If the 1478 * generation differs from what we told her before, she knows 1479 * that something happened while we were processing this 1480 * request, and it might be necessary to retry. 1481 */ 1482 xug->xug_gen = unp_gencnt; 1483 xug->xug_sogen = so_gencnt; 1484 xug->xug_count = unp_count; 1485 error = SYSCTL_OUT(req, xug, sizeof *xug); 1486 } 1487 free(unp_list, M_TEMP); 1488 free(xug, M_TEMP); 1489 return (error); 1490} 1491 1492SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1493 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1494 "List of active local datagram sockets"); 1495SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1496 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1497 "List of active local stream sockets"); 1498 1499static void 1500unp_shutdown(struct unpcb *unp) 1501{ 1502 struct unpcb *unp2; 1503 struct socket *so; 1504 1505 UNP_GLOBAL_WLOCK_ASSERT(); 1506 UNP_PCB_LOCK_ASSERT(unp); 1507 1508 unp2 = unp->unp_conn; 1509 if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { 1510 so = unp2->unp_socket; 1511 if (so != NULL) 1512 socantrcvmore(so); 1513 } 1514} 1515 1516static void 1517unp_drop(struct unpcb *unp, int errno) 1518{ 1519 struct socket *so = unp->unp_socket; 1520 struct unpcb *unp2; 1521 1522 UNP_GLOBAL_WLOCK_ASSERT(); 1523 UNP_PCB_LOCK_ASSERT(unp); 1524 1525 so->so_error = errno; 1526 unp2 = unp->unp_conn; 1527 if (unp2 == NULL) 1528 return; 1529 UNP_PCB_LOCK(unp2); 1530 unp_disconnect(unp, unp2); 1531 UNP_PCB_UNLOCK(unp2); 1532} 1533 1534static void 1535unp_freerights(struct file **rp, int fdcount) 1536{ 1537 int i; 1538 struct file *fp; 1539 1540 for (i = 0; i < fdcount; i++) { 1541 fp = *rp; 1542 *rp++ = NULL; 1543 unp_discard(fp); 1544 } 1545} 1546 1547static int 1548unp_externalize(struct mbuf *control, struct mbuf **controlp) 1549{ 1550 struct thread *td = curthread; /* XXX */ 1551 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1552 int i; 1553 int *fdp; 1554 struct file **rp; 1555 struct file *fp; 1556 void *data; 1557 socklen_t clen = control->m_len, datalen; 1558 int error, newfds; 1559 int f; 1560 u_int newlen; 1561 1562 UNP_GLOBAL_UNLOCK_ASSERT(); 1563 1564 error = 0; 1565 if (controlp != NULL) /* controlp == NULL => free control messages */ 1566 *controlp = NULL; 1567 while (cm != NULL) { 1568 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1569 error = EINVAL; 1570 break; 1571 } 1572 data = CMSG_DATA(cm); 1573 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1574 if (cm->cmsg_level == SOL_SOCKET 1575 && cm->cmsg_type == SCM_RIGHTS) { 1576 newfds = datalen / sizeof(struct file *); 1577 rp = data; 1578 1579 /* If we're not outputting the descriptors free them. */ 1580 if (error || controlp == NULL) { 1581 unp_freerights(rp, newfds); 1582 goto next; 1583 } 1584 FILEDESC_XLOCK(td->td_proc->p_fd); 1585 /* if the new FD's will not fit free them. */ 1586 if (!fdavail(td, newfds)) { 1587 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1588 error = EMSGSIZE; 1589 unp_freerights(rp, newfds); 1590 goto next; 1591 } 1592 1593 /* 1594 * Now change each pointer to an fd in the global 1595 * table to an integer that is the index to the local 1596 * fd table entry that we set up to point to the 1597 * global one we are transferring. 1598 */ 1599 newlen = newfds * sizeof(int); 1600 *controlp = sbcreatecontrol(NULL, newlen, 1601 SCM_RIGHTS, SOL_SOCKET); 1602 if (*controlp == NULL) { 1603 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1604 error = E2BIG; 1605 unp_freerights(rp, newfds); 1606 goto next; 1607 } 1608 1609 fdp = (int *) 1610 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1611 for (i = 0; i < newfds; i++) { 1612 if (fdalloc(td, 0, &f)) 1613 panic("unp_externalize fdalloc failed"); 1614 fp = *rp++; 1615 td->td_proc->p_fd->fd_ofiles[f] = fp; 1616 unp_externalize_fp(fp); 1617 *fdp++ = f; 1618 } 1619 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1620 } else { 1621 /* We can just copy anything else across. */ 1622 if (error || controlp == NULL) 1623 goto next; 1624 *controlp = sbcreatecontrol(NULL, datalen, 1625 cm->cmsg_type, cm->cmsg_level); 1626 if (*controlp == NULL) { 1627 error = ENOBUFS; 1628 goto next; 1629 } 1630 bcopy(data, 1631 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1632 datalen); 1633 } 1634 controlp = &(*controlp)->m_next; 1635 1636next: 1637 if (CMSG_SPACE(datalen) < clen) { 1638 clen -= CMSG_SPACE(datalen); 1639 cm = (struct cmsghdr *) 1640 ((caddr_t)cm + CMSG_SPACE(datalen)); 1641 } else { 1642 clen = 0; 1643 cm = NULL; 1644 } 1645 } 1646 1647 m_freem(control); 1648 return (error); 1649} 1650 1651static void 1652unp_zone_change(void *tag) 1653{ 1654 1655 uma_zone_set_max(unp_zone, maxsockets); 1656} 1657 1658static void 1659unp_init(void) 1660{ 1661 1662 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1663 NULL, NULL, UMA_ALIGN_PTR, 0); 1664 if (unp_zone == NULL) 1665 panic("unp_init"); 1666 uma_zone_set_max(unp_zone, maxsockets); 1667 EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, 1668 NULL, EVENTHANDLER_PRI_ANY); 1669 LIST_INIT(&unp_dhead); 1670 LIST_INIT(&unp_shead); 1671 TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); 1672 UNP_GLOBAL_LOCK_INIT(); 1673} 1674 1675static int 1676unp_internalize(struct mbuf **controlp, struct thread *td) 1677{ 1678 struct mbuf *control = *controlp; 1679 struct proc *p = td->td_proc; 1680 struct filedesc *fdescp = p->p_fd; 1681 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1682 struct cmsgcred *cmcred; 1683 struct file **rp; 1684 struct file *fp; 1685 struct timeval *tv; 1686 int i, fd, *fdp; 1687 void *data; 1688 socklen_t clen = control->m_len, datalen; 1689 int error, oldfds; 1690 u_int newlen; 1691 1692 UNP_GLOBAL_UNLOCK_ASSERT(); 1693 1694 error = 0; 1695 *controlp = NULL; 1696 while (cm != NULL) { 1697 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1698 || cm->cmsg_len > clen) { 1699 error = EINVAL; 1700 goto out; 1701 } 1702 data = CMSG_DATA(cm); 1703 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1704 1705 switch (cm->cmsg_type) { 1706 /* 1707 * Fill in credential information. 1708 */ 1709 case SCM_CREDS: 1710 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1711 SCM_CREDS, SOL_SOCKET); 1712 if (*controlp == NULL) { 1713 error = ENOBUFS; 1714 goto out; 1715 } 1716 cmcred = (struct cmsgcred *) 1717 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1718 cmcred->cmcred_pid = p->p_pid; 1719 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1720 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1721 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1722 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1723 CMGROUP_MAX); 1724 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1725 cmcred->cmcred_groups[i] = 1726 td->td_ucred->cr_groups[i]; 1727 break; 1728 1729 case SCM_RIGHTS: 1730 oldfds = datalen / sizeof (int); 1731 /* 1732 * Check that all the FDs passed in refer to legal 1733 * files. If not, reject the entire operation. 1734 */ 1735 fdp = data; 1736 FILEDESC_SLOCK(fdescp); 1737 for (i = 0; i < oldfds; i++) { 1738 fd = *fdp++; 1739 if ((unsigned)fd >= fdescp->fd_nfiles || 1740 fdescp->fd_ofiles[fd] == NULL) { 1741 FILEDESC_SUNLOCK(fdescp); 1742 error = EBADF; 1743 goto out; 1744 } 1745 fp = fdescp->fd_ofiles[fd]; 1746 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1747 FILEDESC_SUNLOCK(fdescp); 1748 error = EOPNOTSUPP; 1749 goto out; 1750 } 1751 1752 } 1753 1754 /* 1755 * Now replace the integer FDs with pointers to the 1756 * associated global file table entry.. 1757 */ 1758 newlen = oldfds * sizeof(struct file *); 1759 *controlp = sbcreatecontrol(NULL, newlen, 1760 SCM_RIGHTS, SOL_SOCKET); 1761 if (*controlp == NULL) { 1762 FILEDESC_SUNLOCK(fdescp); 1763 error = E2BIG; 1764 goto out; 1765 } 1766 fdp = data; 1767 rp = (struct file **) 1768 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1769 for (i = 0; i < oldfds; i++) { 1770 fp = fdescp->fd_ofiles[*fdp++]; 1771 *rp++ = fp; 1772 unp_internalize_fp(fp); 1773 } 1774 FILEDESC_SUNLOCK(fdescp); 1775 break; 1776 1777 case SCM_TIMESTAMP: 1778 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1779 SCM_TIMESTAMP, SOL_SOCKET); 1780 if (*controlp == NULL) { 1781 error = ENOBUFS; 1782 goto out; 1783 } 1784 tv = (struct timeval *) 1785 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1786 microtime(tv); 1787 break; 1788 1789 default: 1790 error = EINVAL; 1791 goto out; 1792 } 1793 1794 controlp = &(*controlp)->m_next; 1795 if (CMSG_SPACE(datalen) < clen) { 1796 clen -= CMSG_SPACE(datalen); 1797 cm = (struct cmsghdr *) 1798 ((caddr_t)cm + CMSG_SPACE(datalen)); 1799 } else { 1800 clen = 0; 1801 cm = NULL; 1802 } 1803 } 1804 1805out: 1806 m_freem(control); 1807 return (error); 1808} 1809 1810static struct mbuf * 1811unp_addsockcred(struct thread *td, struct mbuf *control) 1812{ 1813 struct mbuf *m, *n, *n_prev; 1814 struct sockcred *sc; 1815 const struct cmsghdr *cm; 1816 int ngroups; 1817 int i; 1818 1819 ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); 1820 m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); 1821 if (m == NULL) 1822 return (control); 1823 1824 sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); 1825 sc->sc_uid = td->td_ucred->cr_ruid; 1826 sc->sc_euid = td->td_ucred->cr_uid; 1827 sc->sc_gid = td->td_ucred->cr_rgid; 1828 sc->sc_egid = td->td_ucred->cr_gid; 1829 sc->sc_ngroups = ngroups; 1830 for (i = 0; i < sc->sc_ngroups; i++) 1831 sc->sc_groups[i] = td->td_ucred->cr_groups[i]; 1832 1833 /* 1834 * Unlink SCM_CREDS control messages (struct cmsgcred), since just 1835 * created SCM_CREDS control message (struct sockcred) has another 1836 * format. 1837 */ 1838 if (control != NULL) 1839 for (n = control, n_prev = NULL; n != NULL;) { 1840 cm = mtod(n, struct cmsghdr *); 1841 if (cm->cmsg_level == SOL_SOCKET && 1842 cm->cmsg_type == SCM_CREDS) { 1843 if (n_prev == NULL) 1844 control = n->m_next; 1845 else 1846 n_prev->m_next = n->m_next; 1847 n = m_free(n); 1848 } else { 1849 n_prev = n; 1850 n = n->m_next; 1851 } 1852 } 1853 1854 /* Prepend it to the head. */ 1855 m->m_next = control; 1856 return (m); 1857} 1858 1859static struct unpcb * 1860fptounp(struct file *fp) 1861{ 1862 struct socket *so; 1863 1864 if (fp->f_type != DTYPE_SOCKET) 1865 return (NULL); 1866 if ((so = fp->f_data) == NULL) 1867 return (NULL); 1868 if (so->so_proto->pr_domain != &localdomain) 1869 return (NULL); 1870 return sotounpcb(so); 1871} 1872 1873static void 1874unp_discard(struct file *fp) 1875{ 1876 1877 unp_externalize_fp(fp); 1878 (void) closef(fp, (struct thread *)NULL); 1879} 1880 1881static void 1882unp_internalize_fp(struct file *fp) 1883{ 1884 struct unpcb *unp; 1885 1886 UNP_GLOBAL_WLOCK(); 1887 if ((unp = fptounp(fp)) != NULL) { 1888 unp->unp_file = fp; 1889 unp->unp_msgcount++; 1890 } 1891 fhold(fp); 1892 unp_rights++; 1893 UNP_GLOBAL_WUNLOCK(); 1894} 1895 1896static void 1897unp_externalize_fp(struct file *fp) 1898{ 1899 struct unpcb *unp; 1900 1901 UNP_GLOBAL_WLOCK(); 1902 if ((unp = fptounp(fp)) != NULL) 1903 unp->unp_msgcount--; 1904 unp_rights--; 1905 UNP_GLOBAL_WUNLOCK(); 1906} 1907 1908/* 1909 * unp_defer indicates whether additional work has been defered for a future 1910 * pass through unp_gc(). It is thread local and does not require explicit 1911 * synchronization. 1912 */ 1913static int unp_marked; 1914static int unp_unreachable; 1915 1916static void 1917unp_accessable(struct file *fp) 1918{ 1919 struct unpcb *unp; 1920 1921 if ((unp = fptounp(fp)) == NULL) 1922 return; 1923 if (unp->unp_gcflag & UNPGC_REF) 1924 return; 1925 unp->unp_gcflag &= ~UNPGC_DEAD; 1926 unp->unp_gcflag |= UNPGC_REF; 1927 unp_marked++; 1928} 1929 1930static void 1931unp_gc_process(struct unpcb *unp) 1932{ 1933 struct socket *soa; 1934 struct socket *so; 1935 struct file *fp; 1936 1937 /* Already processed. */ 1938 if (unp->unp_gcflag & UNPGC_SCANNED) 1939 return; 1940 fp = unp->unp_file; 1941 1942 /* 1943 * Check for a socket potentially in a cycle. It must be in a 1944 * queue as indicated by msgcount, and this must equal the file 1945 * reference count. Note that when msgcount is 0 the file is NULL. 1946 */ 1947 if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && 1948 unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { 1949 unp->unp_gcflag |= UNPGC_DEAD; 1950 unp_unreachable++; 1951 return; 1952 } 1953 1954 /* 1955 * Mark all sockets we reference with RIGHTS. 1956 */ 1957 so = unp->unp_socket; 1958 SOCKBUF_LOCK(&so->so_rcv); 1959 unp_scan(so->so_rcv.sb_mb, unp_accessable); 1960 SOCKBUF_UNLOCK(&so->so_rcv); 1961 1962 /* 1963 * Mark all sockets in our accept queue. 1964 */ 1965 ACCEPT_LOCK(); 1966 TAILQ_FOREACH(soa, &so->so_comp, so_list) { 1967 SOCKBUF_LOCK(&soa->so_rcv); 1968 unp_scan(soa->so_rcv.sb_mb, unp_accessable); 1969 SOCKBUF_UNLOCK(&soa->so_rcv); 1970 } 1971 ACCEPT_UNLOCK(); 1972 unp->unp_gcflag |= UNPGC_SCANNED; 1973} 1974 1975static int unp_recycled; 1976SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 1977 "Number of unreachable sockets claimed by the garbage collector."); 1978 1979static int unp_taskcount; 1980SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 1981 "Number of times the garbage collector has run."); 1982 1983static void 1984unp_gc(__unused void *arg, int pending) 1985{ 1986 struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; 1987 struct unp_head **head; 1988 struct file **unref; 1989 struct unpcb *unp; 1990 int i; 1991 1992 unp_taskcount++; 1993 UNP_GLOBAL_RLOCK(); 1994 /* 1995 * First clear all gc flags from previous runs. 1996 */ 1997 for (head = heads; *head != NULL; head++) 1998 LIST_FOREACH(unp, *head, unp_link) 1999 unp->unp_gcflag = 0; 2000 2001 /* 2002 * Scan marking all reachable sockets with UNPGC_REF. Once a socket 2003 * is reachable all of the sockets it references are reachable. 2004 * Stop the scan once we do a complete loop without discovering 2005 * a new reachable socket. 2006 */ 2007 do { 2008 unp_unreachable = 0; 2009 unp_marked = 0; 2010 for (head = heads; *head != NULL; head++) 2011 LIST_FOREACH(unp, *head, unp_link) 2012 unp_gc_process(unp); 2013 } while (unp_marked); 2014 UNP_GLOBAL_RUNLOCK(); 2015 if (unp_unreachable == 0) 2016 return; 2017 2018 /* 2019 * Allocate space for a local list of dead unpcbs. 2020 */ 2021 unref = malloc(unp_unreachable * sizeof(struct file *), 2022 M_TEMP, M_WAITOK); 2023 2024 /* 2025 * Iterate looking for sockets which have been specifically marked 2026 * as as unreachable and store them locally. 2027 */ 2028 UNP_GLOBAL_RLOCK(); 2029 for (i = 0, head = heads; *head != NULL; head++) 2030 LIST_FOREACH(unp, *head, unp_link) 2031 if (unp->unp_gcflag & UNPGC_DEAD) { 2032 unref[i++] = unp->unp_file; 2033 fhold(unp->unp_file); 2034 KASSERT(unp->unp_file != NULL, 2035 ("unp_gc: Invalid unpcb.")); 2036 KASSERT(i <= unp_unreachable, 2037 ("unp_gc: incorrect unreachable count.")); 2038 } 2039 UNP_GLOBAL_RUNLOCK(); 2040 2041 /* 2042 * Now flush all sockets, free'ing rights. This will free the 2043 * struct files associated with these sockets but leave each socket 2044 * with one remaining ref. 2045 */ 2046 for (i = 0; i < unp_unreachable; i++) 2047 sorflush(unref[i]->f_data); 2048 2049 /* 2050 * And finally release the sockets so they can be reclaimed. 2051 */ 2052 for (i = 0; i < unp_unreachable; i++) 2053 fdrop(unref[i], NULL); 2054 unp_recycled += unp_unreachable; 2055 free(unref, M_TEMP); 2056} 2057 2058static void 2059unp_dispose(struct mbuf *m) 2060{ 2061 2062 if (m) 2063 unp_scan(m, unp_discard); 2064} 2065 2066static void 2067unp_scan(struct mbuf *m0, void (*op)(struct file *)) 2068{ 2069 struct mbuf *m; 2070 struct file **rp; 2071 struct cmsghdr *cm; 2072 void *data; 2073 int i; 2074 socklen_t clen, datalen; 2075 int qfds; 2076 2077 while (m0 != NULL) { 2078 for (m = m0; m; m = m->m_next) { 2079 if (m->m_type != MT_CONTROL) 2080 continue; 2081 2082 cm = mtod(m, struct cmsghdr *); 2083 clen = m->m_len; 2084 2085 while (cm != NULL) { 2086 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 2087 break; 2088 2089 data = CMSG_DATA(cm); 2090 datalen = (caddr_t)cm + cm->cmsg_len 2091 - (caddr_t)data; 2092 2093 if (cm->cmsg_level == SOL_SOCKET && 2094 cm->cmsg_type == SCM_RIGHTS) { 2095 qfds = datalen / sizeof (struct file *); 2096 rp = data; 2097 for (i = 0; i < qfds; i++) 2098 (*op)(*rp++); 2099 } 2100 2101 if (CMSG_SPACE(datalen) < clen) { 2102 clen -= CMSG_SPACE(datalen); 2103 cm = (struct cmsghdr *) 2104 ((caddr_t)cm + CMSG_SPACE(datalen)); 2105 } else { 2106 clen = 0; 2107 cm = NULL; 2108 } 2109 } 2110 } 2111 m0 = m0->m_act; 2112 } 2113} 2114 2115#ifdef DDB 2116static void 2117db_print_indent(int indent) 2118{ 2119 int i; 2120 2121 for (i = 0; i < indent; i++) 2122 db_printf(" "); 2123} 2124 2125static void 2126db_print_unpflags(int unp_flags) 2127{ 2128 int comma; 2129 2130 comma = 0; 2131 if (unp_flags & UNP_HAVEPC) { 2132 db_printf("%sUNP_HAVEPC", comma ? ", " : ""); 2133 comma = 1; 2134 } 2135 if (unp_flags & UNP_HAVEPCCACHED) { 2136 db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); 2137 comma = 1; 2138 } 2139 if (unp_flags & UNP_WANTCRED) { 2140 db_printf("%sUNP_WANTCRED", comma ? ", " : ""); 2141 comma = 1; 2142 } 2143 if (unp_flags & UNP_CONNWAIT) { 2144 db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); 2145 comma = 1; 2146 } 2147 if (unp_flags & UNP_CONNECTING) { 2148 db_printf("%sUNP_CONNECTING", comma ? ", " : ""); 2149 comma = 1; 2150 } 2151 if (unp_flags & UNP_BINDING) { 2152 db_printf("%sUNP_BINDING", comma ? ", " : ""); 2153 comma = 1; 2154 } 2155} 2156 2157static void 2158db_print_xucred(int indent, struct xucred *xu) 2159{ 2160 int comma, i; 2161 2162 db_print_indent(indent); 2163 db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", 2164 xu->cr_version, xu->cr_uid, xu->cr_ngroups); 2165 db_print_indent(indent); 2166 db_printf("cr_groups: "); 2167 comma = 0; 2168 for (i = 0; i < xu->cr_ngroups; i++) { 2169 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); 2170 comma = 1; 2171 } 2172 db_printf("\n"); 2173} 2174 2175static void 2176db_print_unprefs(int indent, struct unp_head *uh) 2177{ 2178 struct unpcb *unp; 2179 int counter; 2180 2181 counter = 0; 2182 LIST_FOREACH(unp, uh, unp_reflink) { 2183 if (counter % 4 == 0) 2184 db_print_indent(indent); 2185 db_printf("%p ", unp); 2186 if (counter % 4 == 3) 2187 db_printf("\n"); 2188 counter++; 2189 } 2190 if (counter != 0 && counter % 4 != 0) 2191 db_printf("\n"); 2192} 2193 2194DB_SHOW_COMMAND(unpcb, db_show_unpcb) 2195{ 2196 struct unpcb *unp; 2197 2198 if (!have_addr) { 2199 db_printf("usage: show unpcb <addr>\n"); 2200 return; 2201 } 2202 unp = (struct unpcb *)addr; 2203 2204 db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, 2205 unp->unp_vnode); 2206 2207 db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino, 2208 unp->unp_conn); 2209 2210 db_printf("unp_refs:\n"); 2211 db_print_unprefs(2, &unp->unp_refs); 2212 2213 /* XXXRW: Would be nice to print the full address, if any. */ 2214 db_printf("unp_addr: %p\n", unp->unp_addr); 2215 2216 db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n", 2217 unp->unp_cc, unp->unp_mbcnt, 2218 (unsigned long long)unp->unp_gencnt); 2219 2220 db_printf("unp_flags: %x (", unp->unp_flags); 2221 db_print_unpflags(unp->unp_flags); 2222 db_printf(")\n"); 2223 2224 db_printf("unp_peercred:\n"); 2225 db_print_xucred(2, &unp->unp_peercred); 2226 2227 db_printf("unp_refcount: %u\n", unp->unp_refcount); 2228} 2229#endif 2230