uipc_usrreq.c revision 133709
1/* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 133709 2004-08-14 03:43:49Z rwatson $"); 34 35#include "opt_mac.h" 36 37#include <sys/param.h> 38#include <sys/domain.h> 39#include <sys/fcntl.h> 40#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 41#include <sys/file.h> 42#include <sys/filedesc.h> 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mbuf.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/proc.h> 51#include <sys/protosw.h> 52#include <sys/resourcevar.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55#include <sys/signalvar.h> 56#include <sys/stat.h> 57#include <sys/sx.h> 58#include <sys/sysctl.h> 59#include <sys/systm.h> 60#include <sys/un.h> 61#include <sys/unpcb.h> 62#include <sys/vnode.h> 63 64#include <vm/uma.h> 65 66static uma_zone_t unp_zone; 67static unp_gen_t unp_gencnt; 68static u_int unp_count; 69 70static struct unp_head unp_shead, unp_dhead; 71 72/* 73 * Unix communications domain. 74 * 75 * TODO: 76 * SEQPACKET, RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82static ino_t unp_ino; /* prototype for fake inode numbers */ 83 84static struct mtx unp_mtx; 85#define UNP_LOCK_INIT() \ 86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 87#define UNP_LOCK() mtx_lock(&unp_mtx) 88#define UNP_UNLOCK() mtx_unlock(&unp_mtx) 89#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 90 91static int unp_attach(struct socket *); 92static void unp_detach(struct unpcb *); 93static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 94static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 95static int unp_connect2(struct socket *so, struct socket *so2); 96static void unp_disconnect(struct unpcb *); 97static void unp_shutdown(struct unpcb *); 98static void unp_drop(struct unpcb *, int); 99static void unp_gc(void); 100static void unp_scan(struct mbuf *, void (*)(struct file *)); 101static void unp_mark(struct file *); 102static void unp_discard(struct file *); 103static void unp_freerights(struct file **, int); 104static int unp_internalize(struct mbuf **, struct thread *); 105static int unp_listen(struct unpcb *, struct thread *); 106 107static int 108uipc_abort(struct socket *so) 109{ 110 struct unpcb *unp = sotounpcb(so); 111 112 if (unp == NULL) 113 return (EINVAL); 114 UNP_LOCK(); 115 unp_drop(unp, ECONNABORTED); 116 unp_detach(unp); /* NB: unlocks */ 117 SOCK_LOCK(so); 118 sotryfree(so); 119 return (0); 120} 121 122static int 123uipc_accept(struct socket *so, struct sockaddr **nam) 124{ 125 struct unpcb *unp = sotounpcb(so); 126 const struct sockaddr *sa; 127 128 if (unp == NULL) 129 return (EINVAL); 130 131 /* 132 * Pass back name of connected socket, 133 * if it was bound and we are still connected 134 * (our peer may have closed already!). 135 */ 136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 137 UNP_LOCK(); 138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 139 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 140 else 141 sa = &sun_noname; 142 bcopy(sa, *nam, sa->sa_len); 143 UNP_UNLOCK(); 144 return (0); 145} 146 147static int 148uipc_attach(struct socket *so, int proto, struct thread *td) 149{ 150 struct unpcb *unp = sotounpcb(so); 151 152 if (unp != NULL) 153 return (EISCONN); 154 return (unp_attach(so)); 155} 156 157static int 158uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 159{ 160 struct unpcb *unp = sotounpcb(so); 161 162 if (unp == NULL) 163 return (EINVAL); 164 165 return (unp_bind(unp, nam, td)); 166} 167 168static int 169uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 170{ 171 struct unpcb *unp; 172 int error; 173 174 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 175 176 UNP_LOCK(); 177 unp = sotounpcb(so); 178 if (unp == NULL) { 179 error = EINVAL; 180 goto out; 181 } 182 error = unp_connect(so, nam, td); 183out: 184 UNP_UNLOCK(); 185 return (error); 186} 187 188int 189uipc_connect2(struct socket *so1, struct socket *so2) 190{ 191 struct unpcb *unp = sotounpcb(so1); 192 int error; 193 194 if (unp == NULL) 195 return (EINVAL); 196 197 UNP_LOCK(); 198 error = unp_connect2(so1, so2); 199 UNP_UNLOCK(); 200 return (error); 201} 202 203/* control is EOPNOTSUPP */ 204 205static int 206uipc_detach(struct socket *so) 207{ 208 struct unpcb *unp = sotounpcb(so); 209 210 if (unp == NULL) 211 return (EINVAL); 212 213 UNP_LOCK(); 214 unp_detach(unp); /* NB: unlocks unp */ 215 return (0); 216} 217 218static int 219uipc_disconnect(struct socket *so) 220{ 221 struct unpcb *unp = sotounpcb(so); 222 223 if (unp == NULL) 224 return (EINVAL); 225 UNP_LOCK(); 226 unp_disconnect(unp); 227 UNP_UNLOCK(); 228 return (0); 229} 230 231static int 232uipc_listen(struct socket *so, struct thread *td) 233{ 234 struct unpcb *unp = sotounpcb(so); 235 int error; 236 237 if (unp == NULL || unp->unp_vnode == NULL) 238 return (EINVAL); 239 UNP_LOCK(); 240 error = unp_listen(unp, td); 241 UNP_UNLOCK(); 242 return (error); 243} 244 245static int 246uipc_peeraddr(struct socket *so, struct sockaddr **nam) 247{ 248 struct unpcb *unp = sotounpcb(so); 249 const struct sockaddr *sa; 250 251 if (unp == NULL) 252 return (EINVAL); 253 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 254 UNP_LOCK(); 255 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 256 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 257 else { 258 /* 259 * XXX: It seems that this test always fails even when 260 * connection is established. So, this else clause is 261 * added as workaround to return PF_LOCAL sockaddr. 262 */ 263 sa = &sun_noname; 264 } 265 bcopy(sa, *nam, sa->sa_len); 266 UNP_UNLOCK(); 267 return (0); 268} 269 270static int 271uipc_rcvd(struct socket *so, int flags) 272{ 273 struct unpcb *unp = sotounpcb(so); 274 struct socket *so2; 275 u_long newhiwat; 276 277 if (unp == NULL) 278 return (EINVAL); 279 UNP_LOCK(); 280 switch (so->so_type) { 281 case SOCK_DGRAM: 282 panic("uipc_rcvd DGRAM?"); 283 /*NOTREACHED*/ 284 285 case SOCK_STREAM: 286 if (unp->unp_conn == NULL) 287 break; 288 so2 = unp->unp_conn->unp_socket; 289 SOCKBUF_LOCK(&so2->so_snd); 290 SOCKBUF_LOCK(&so->so_rcv); 291 /* 292 * Adjust backpressure on sender 293 * and wakeup any waiting to write. 294 */ 295 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 296 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 297 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 298 so->so_rcv.sb_cc; 299 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 300 newhiwat, RLIM_INFINITY); 301 unp->unp_cc = so->so_rcv.sb_cc; 302 SOCKBUF_UNLOCK(&so->so_rcv); 303 sowwakeup_locked(so2); 304 break; 305 306 default: 307 panic("uipc_rcvd unknown socktype"); 308 } 309 UNP_UNLOCK(); 310 return (0); 311} 312 313/* pru_rcvoob is EOPNOTSUPP */ 314 315static int 316uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 317 struct mbuf *control, struct thread *td) 318{ 319 int error = 0; 320 struct unpcb *unp = sotounpcb(so); 321 struct socket *so2; 322 u_long newhiwat; 323 324 if (unp == NULL) { 325 error = EINVAL; 326 goto release; 327 } 328 if (flags & PRUS_OOB) { 329 error = EOPNOTSUPP; 330 goto release; 331 } 332 333 if (control != NULL && (error = unp_internalize(&control, td))) 334 goto release; 335 336 UNP_LOCK(); 337 switch (so->so_type) { 338 case SOCK_DGRAM: 339 { 340 const struct sockaddr *from; 341 342 if (nam != NULL) { 343 if (unp->unp_conn != NULL) { 344 error = EISCONN; 345 break; 346 } 347 error = unp_connect(so, nam, td); 348 if (error) 349 break; 350 } else { 351 if (unp->unp_conn == NULL) { 352 error = ENOTCONN; 353 break; 354 } 355 } 356 so2 = unp->unp_conn->unp_socket; 357 if (unp->unp_addr != NULL) 358 from = (struct sockaddr *)unp->unp_addr; 359 else 360 from = &sun_noname; 361 SOCKBUF_LOCK(&so2->so_rcv); 362 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 363 sorwakeup_locked(so2); 364 m = NULL; 365 control = NULL; 366 } else { 367 SOCKBUF_UNLOCK(&so2->so_rcv); 368 error = ENOBUFS; 369 } 370 if (nam != NULL) 371 unp_disconnect(unp); 372 break; 373 } 374 375 case SOCK_STREAM: 376 /* Connect if not connected yet. */ 377 /* 378 * Note: A better implementation would complain 379 * if not equal to the peer's address. 380 */ 381 if ((so->so_state & SS_ISCONNECTED) == 0) { 382 if (nam != NULL) { 383 error = unp_connect(so, nam, td); 384 if (error) 385 break; /* XXX */ 386 } else { 387 error = ENOTCONN; 388 break; 389 } 390 } 391 392 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 393 error = EPIPE; 394 break; 395 } 396 if (unp->unp_conn == NULL) 397 panic("uipc_send connected but no connection?"); 398 so2 = unp->unp_conn->unp_socket; 399 SOCKBUF_LOCK(&so2->so_rcv); 400 /* 401 * Send to paired receive port, and then reduce 402 * send buffer hiwater marks to maintain backpressure. 403 * Wake up readers. 404 */ 405 if (control != NULL) { 406 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 407 control = NULL; 408 } else { 409 sbappend_locked(&so2->so_rcv, m); 410 } 411 so->so_snd.sb_mbmax -= 412 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 413 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 414 newhiwat = so->so_snd.sb_hiwat - 415 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 416 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 417 newhiwat, RLIM_INFINITY); 418 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 419 sorwakeup_locked(so2); 420 m = NULL; 421 break; 422 423 default: 424 panic("uipc_send unknown socktype"); 425 } 426 427 /* 428 * SEND_EOF is equivalent to a SEND followed by 429 * a SHUTDOWN. 430 */ 431 if (flags & PRUS_EOF) { 432 socantsendmore(so); 433 unp_shutdown(unp); 434 } 435 UNP_UNLOCK(); 436 437 if (control != NULL && error != 0) 438 unp_dispose(control); 439 440release: 441 if (control != NULL) 442 m_freem(control); 443 if (m != NULL) 444 m_freem(m); 445 return (error); 446} 447 448static int 449uipc_sense(struct socket *so, struct stat *sb) 450{ 451 struct unpcb *unp = sotounpcb(so); 452 struct socket *so2; 453 454 if (unp == NULL) 455 return (EINVAL); 456 UNP_LOCK(); 457 sb->st_blksize = so->so_snd.sb_hiwat; 458 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 459 so2 = unp->unp_conn->unp_socket; 460 sb->st_blksize += so2->so_rcv.sb_cc; 461 } 462 sb->st_dev = NODEV; 463 if (unp->unp_ino == 0) 464 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 465 sb->st_ino = unp->unp_ino; 466 UNP_UNLOCK(); 467 return (0); 468} 469 470static int 471uipc_shutdown(struct socket *so) 472{ 473 struct unpcb *unp = sotounpcb(so); 474 475 if (unp == NULL) 476 return (EINVAL); 477 UNP_LOCK(); 478 socantsendmore(so); 479 unp_shutdown(unp); 480 UNP_UNLOCK(); 481 return (0); 482} 483 484static int 485uipc_sockaddr(struct socket *so, struct sockaddr **nam) 486{ 487 struct unpcb *unp = sotounpcb(so); 488 const struct sockaddr *sa; 489 490 if (unp == NULL) 491 return (EINVAL); 492 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 493 UNP_LOCK(); 494 if (unp->unp_addr != NULL) 495 sa = (struct sockaddr *) unp->unp_addr; 496 else 497 sa = &sun_noname; 498 bcopy(sa, *nam, sa->sa_len); 499 UNP_UNLOCK(); 500 return (0); 501} 502 503struct pr_usrreqs uipc_usrreqs = { 504 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 505 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 506 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 507 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 508 sosend, soreceive, sopoll, pru_sosetlabel_null 509}; 510 511int 512uipc_ctloutput(so, sopt) 513 struct socket *so; 514 struct sockopt *sopt; 515{ 516 struct unpcb *unp = sotounpcb(so); 517 struct xucred xu; 518 int error; 519 520 switch (sopt->sopt_dir) { 521 case SOPT_GET: 522 switch (sopt->sopt_name) { 523 case LOCAL_PEERCRED: 524 error = 0; 525 UNP_LOCK(); 526 if (unp->unp_flags & UNP_HAVEPC) 527 xu = unp->unp_peercred; 528 else { 529 if (so->so_type == SOCK_STREAM) 530 error = ENOTCONN; 531 else 532 error = EINVAL; 533 } 534 UNP_UNLOCK(); 535 if (error == 0) 536 error = sooptcopyout(sopt, &xu, sizeof(xu)); 537 break; 538 default: 539 error = EOPNOTSUPP; 540 break; 541 } 542 break; 543 case SOPT_SET: 544 default: 545 error = EOPNOTSUPP; 546 break; 547 } 548 return (error); 549} 550 551/* 552 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 553 * for stream sockets, although the total for sender and receiver is 554 * actually only PIPSIZ. 555 * Datagram sockets really use the sendspace as the maximum datagram size, 556 * and don't really want to reserve the sendspace. Their recvspace should 557 * be large enough for at least one max-size datagram plus address. 558 */ 559#ifndef PIPSIZ 560#define PIPSIZ 8192 561#endif 562static u_long unpst_sendspace = PIPSIZ; 563static u_long unpst_recvspace = PIPSIZ; 564static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 565static u_long unpdg_recvspace = 4*1024; 566 567static int unp_rights; /* file descriptors in flight */ 568 569SYSCTL_DECL(_net_local_stream); 570SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 571 &unpst_sendspace, 0, ""); 572SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 573 &unpst_recvspace, 0, ""); 574SYSCTL_DECL(_net_local_dgram); 575SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 576 &unpdg_sendspace, 0, ""); 577SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 578 &unpdg_recvspace, 0, ""); 579SYSCTL_DECL(_net_local); 580SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 581 582static int 583unp_attach(so) 584 struct socket *so; 585{ 586 register struct unpcb *unp; 587 int error; 588 589 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 590 switch (so->so_type) { 591 592 case SOCK_STREAM: 593 error = soreserve(so, unpst_sendspace, unpst_recvspace); 594 break; 595 596 case SOCK_DGRAM: 597 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 598 break; 599 600 default: 601 panic("unp_attach"); 602 } 603 if (error) 604 return (error); 605 } 606 unp = uma_zalloc(unp_zone, M_WAITOK); 607 if (unp == NULL) 608 return (ENOBUFS); 609 bzero(unp, sizeof *unp); 610 LIST_INIT(&unp->unp_refs); 611 unp->unp_socket = so; 612 613 UNP_LOCK(); 614 unp->unp_gencnt = ++unp_gencnt; 615 unp_count++; 616 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 617 : &unp_shead, unp, unp_link); 618 UNP_UNLOCK(); 619 620 so->so_pcb = unp; 621 return (0); 622} 623 624static void 625unp_detach(unp) 626 register struct unpcb *unp; 627{ 628 struct vnode *vp; 629 630 UNP_LOCK_ASSERT(); 631 632 LIST_REMOVE(unp, unp_link); 633 unp->unp_gencnt = ++unp_gencnt; 634 --unp_count; 635 if ((vp = unp->unp_vnode) != NULL) { 636 /* 637 * XXXRW: should v_socket be frobbed only while holding 638 * Giant? 639 */ 640 unp->unp_vnode->v_socket = NULL; 641 unp->unp_vnode = NULL; 642 } 643 if (unp->unp_conn != NULL) 644 unp_disconnect(unp); 645 while (!LIST_EMPTY(&unp->unp_refs)) { 646 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 647 unp_drop(ref, ECONNRESET); 648 } 649 soisdisconnected(unp->unp_socket); 650 unp->unp_socket->so_pcb = NULL; 651 if (unp_rights) { 652 /* 653 * Normally the receive buffer is flushed later, 654 * in sofree, but if our receive buffer holds references 655 * to descriptors that are now garbage, we will dispose 656 * of those descriptor references after the garbage collector 657 * gets them (resulting in a "panic: closef: count < 0"). 658 */ 659 sorflush(unp->unp_socket); 660 unp_gc(); 661 } 662 UNP_UNLOCK(); 663 if (unp->unp_addr != NULL) 664 FREE(unp->unp_addr, M_SONAME); 665 uma_zfree(unp_zone, unp); 666 if (vp) { 667 mtx_lock(&Giant); 668 vrele(vp); 669 mtx_unlock(&Giant); 670 } 671} 672 673static int 674unp_bind(unp, nam, td) 675 struct unpcb *unp; 676 struct sockaddr *nam; 677 struct thread *td; 678{ 679 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 680 struct vnode *vp; 681 struct mount *mp; 682 struct vattr vattr; 683 int error, namelen; 684 struct nameidata nd; 685 char *buf; 686 687 /* 688 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 689 * unlocked read here is fine, but the value of unp_vnode needs 690 * to be tested again after we do all the lookups to see if the 691 * pcb is still unbound? 692 */ 693 if (unp->unp_vnode != NULL) 694 return (EINVAL); 695 696 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 697 if (namelen <= 0) 698 return (EINVAL); 699 700 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 701 strlcpy(buf, soun->sun_path, namelen + 1); 702 703 mtx_lock(&Giant); 704restart: 705 mtx_assert(&Giant, MA_OWNED); 706 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 707 buf, td); 708/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 709 error = namei(&nd); 710 if (error) 711 goto done; 712 vp = nd.ni_vp; 713 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 714 NDFREE(&nd, NDF_ONLY_PNBUF); 715 if (nd.ni_dvp == vp) 716 vrele(nd.ni_dvp); 717 else 718 vput(nd.ni_dvp); 719 if (vp != NULL) { 720 vrele(vp); 721 error = EADDRINUSE; 722 goto done; 723 } 724 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 725 if (error) 726 goto done; 727 goto restart; 728 } 729 VATTR_NULL(&vattr); 730 vattr.va_type = VSOCK; 731 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 732#ifdef MAC 733 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 734 &vattr); 735#endif 736 if (error == 0) { 737 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 738 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 739 } 740 NDFREE(&nd, NDF_ONLY_PNBUF); 741 vput(nd.ni_dvp); 742 if (error) 743 goto done; 744 vp = nd.ni_vp; 745 ASSERT_VOP_LOCKED(vp, "unp_bind"); 746 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 747 UNP_LOCK(); 748 vp->v_socket = unp->unp_socket; 749 unp->unp_vnode = vp; 750 unp->unp_addr = soun; 751 UNP_UNLOCK(); 752 VOP_UNLOCK(vp, 0, td); 753 vn_finished_write(mp); 754done: 755 mtx_unlock(&Giant); 756 free(buf, M_TEMP); 757 return (error); 758} 759 760static int 761unp_connect(so, nam, td) 762 struct socket *so; 763 struct sockaddr *nam; 764 struct thread *td; 765{ 766 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 767 register struct vnode *vp; 768 register struct socket *so2, *so3; 769 struct unpcb *unp, *unp2, *unp3; 770 int error, len; 771 struct nameidata nd; 772 char buf[SOCK_MAXADDRLEN]; 773 struct sockaddr *sa; 774 775 UNP_LOCK_ASSERT(); 776 unp = sotounpcb(so); 777 778 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 779 if (len <= 0) 780 return (EINVAL); 781 strlcpy(buf, soun->sun_path, len + 1); 782 UNP_UNLOCK(); 783 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 784 mtx_lock(&Giant); 785 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 786 error = namei(&nd); 787 if (error) 788 vp = NULL; 789 else 790 vp = nd.ni_vp; 791 ASSERT_VOP_LOCKED(vp, "unp_connect"); 792 NDFREE(&nd, NDF_ONLY_PNBUF); 793 if (error) 794 goto bad; 795 796 if (vp->v_type != VSOCK) { 797 error = ENOTSOCK; 798 goto bad; 799 } 800 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 801 if (error) 802 goto bad; 803 mtx_unlock(&Giant); 804 UNP_LOCK(); 805 unp = sotounpcb(so); 806 if (unp == NULL) { 807 /* 808 * XXXRW: Temporary debugging printf. 809 */ 810 printf("unp_connect(): lost race to another thread\n"); 811 error = EINVAL; 812 goto bad2; 813 } 814 so2 = vp->v_socket; 815 if (so2 == NULL) { 816 error = ECONNREFUSED; 817 goto bad2; 818 } 819 if (so->so_type != so2->so_type) { 820 error = EPROTOTYPE; 821 goto bad2; 822 } 823 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 824 if (so2->so_options & SO_ACCEPTCONN) { 825 /* 826 * NB: drop locks here so unp_attach is entered 827 * w/o locks; this avoids a recursive lock 828 * of the head and holding sleep locks across 829 * a (potentially) blocking malloc. 830 */ 831 UNP_UNLOCK(); 832 so3 = sonewconn(so2, 0); 833 UNP_LOCK(); 834 } else 835 so3 = NULL; 836 if (so3 == NULL) { 837 error = ECONNREFUSED; 838 goto bad2; 839 } 840 unp = sotounpcb(so); 841 unp2 = sotounpcb(so2); 842 unp3 = sotounpcb(so3); 843 if (unp2->unp_addr != NULL) { 844 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 845 unp3->unp_addr = (struct sockaddr_un *) sa; 846 sa = NULL; 847 } 848 /* 849 * unp_peercred management: 850 * 851 * The connecter's (client's) credentials are copied 852 * from its process structure at the time of connect() 853 * (which is now). 854 */ 855 cru2x(td->td_ucred, &unp3->unp_peercred); 856 unp3->unp_flags |= UNP_HAVEPC; 857 /* 858 * The receiver's (server's) credentials are copied 859 * from the unp_peercred member of socket on which the 860 * former called listen(); unp_listen() cached that 861 * process's credentials at that time so we can use 862 * them now. 863 */ 864 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 865 ("unp_connect: listener without cached peercred")); 866 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 867 sizeof(unp->unp_peercred)); 868 unp->unp_flags |= UNP_HAVEPC; 869#ifdef MAC 870 SOCK_LOCK(so); 871 mac_set_socket_peer_from_socket(so, so3); 872 mac_set_socket_peer_from_socket(so3, so); 873 SOCK_UNLOCK(so); 874#endif 875 876 so2 = so3; 877 } 878 error = unp_connect2(so, so2); 879bad2: 880 UNP_UNLOCK(); 881 mtx_lock(&Giant); 882bad: 883 mtx_assert(&Giant, MA_OWNED); 884 if (vp != NULL) 885 vput(vp); 886 mtx_unlock(&Giant); 887 free(sa, M_SONAME); 888 UNP_LOCK(); 889 return (error); 890} 891 892static int 893unp_connect2(so, so2) 894 register struct socket *so; 895 register struct socket *so2; 896{ 897 register struct unpcb *unp = sotounpcb(so); 898 register struct unpcb *unp2; 899 900 UNP_LOCK_ASSERT(); 901 902 if (so2->so_type != so->so_type) 903 return (EPROTOTYPE); 904 unp2 = sotounpcb(so2); 905 unp->unp_conn = unp2; 906 switch (so->so_type) { 907 908 case SOCK_DGRAM: 909 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 910 soisconnected(so); 911 break; 912 913 case SOCK_STREAM: 914 unp2->unp_conn = unp; 915 soisconnected(so); 916 soisconnected(so2); 917 break; 918 919 default: 920 panic("unp_connect2"); 921 } 922 return (0); 923} 924 925static void 926unp_disconnect(unp) 927 struct unpcb *unp; 928{ 929 register struct unpcb *unp2 = unp->unp_conn; 930 struct socket *so; 931 932 UNP_LOCK_ASSERT(); 933 934 if (unp2 == NULL) 935 return; 936 unp->unp_conn = NULL; 937 switch (unp->unp_socket->so_type) { 938 939 case SOCK_DGRAM: 940 LIST_REMOVE(unp, unp_reflink); 941 so = unp->unp_socket; 942 SOCK_LOCK(so); 943 so->so_state &= ~SS_ISCONNECTED; 944 SOCK_UNLOCK(so); 945 break; 946 947 case SOCK_STREAM: 948 soisdisconnected(unp->unp_socket); 949 unp2->unp_conn = NULL; 950 soisdisconnected(unp2->unp_socket); 951 break; 952 } 953} 954 955#ifdef notdef 956void 957unp_abort(unp) 958 struct unpcb *unp; 959{ 960 961 unp_detach(unp); 962} 963#endif 964 965/* 966 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 967 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 968 * are safe to reference. It first scans the list of struct unpcb's to 969 * generate a pointer list, then it rescans its list one entry at a time to 970 * externalize and copyout. It checks the generation number to see if a 971 * struct unpcb has been reused, and will skip it if so. 972 */ 973static int 974unp_pcblist(SYSCTL_HANDLER_ARGS) 975{ 976 int error, i, n; 977 struct unpcb *unp, **unp_list; 978 unp_gen_t gencnt; 979 struct xunpgen *xug; 980 struct unp_head *head; 981 struct xunpcb *xu; 982 983 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 984 985 /* 986 * The process of preparing the PCB list is too time-consuming and 987 * resource-intensive to repeat twice on every request. 988 */ 989 if (req->oldptr == NULL) { 990 n = unp_count; 991 req->oldidx = 2 * (sizeof *xug) 992 + (n + n/8) * sizeof(struct xunpcb); 993 return (0); 994 } 995 996 if (req->newptr != NULL) 997 return (EPERM); 998 999 /* 1000 * OK, now we're committed to doing something. 1001 */ 1002 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1003 UNP_LOCK(); 1004 gencnt = unp_gencnt; 1005 n = unp_count; 1006 UNP_UNLOCK(); 1007 1008 xug->xug_len = sizeof *xug; 1009 xug->xug_count = n; 1010 xug->xug_gen = gencnt; 1011 xug->xug_sogen = so_gencnt; 1012 error = SYSCTL_OUT(req, xug, sizeof *xug); 1013 if (error) { 1014 free(xug, M_TEMP); 1015 return (error); 1016 } 1017 1018 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1019 1020 UNP_LOCK(); 1021 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1022 unp = LIST_NEXT(unp, unp_link)) { 1023 if (unp->unp_gencnt <= gencnt) { 1024 if (cr_cansee(req->td->td_ucred, 1025 unp->unp_socket->so_cred)) 1026 continue; 1027 unp_list[i++] = unp; 1028 } 1029 } 1030 UNP_UNLOCK(); 1031 n = i; /* in case we lost some during malloc */ 1032 1033 error = 0; 1034 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1035 for (i = 0; i < n; i++) { 1036 unp = unp_list[i]; 1037 if (unp->unp_gencnt <= gencnt) { 1038 xu->xu_len = sizeof *xu; 1039 xu->xu_unpp = unp; 1040 /* 1041 * XXX - need more locking here to protect against 1042 * connect/disconnect races for SMP. 1043 */ 1044 if (unp->unp_addr != NULL) 1045 bcopy(unp->unp_addr, &xu->xu_addr, 1046 unp->unp_addr->sun_len); 1047 if (unp->unp_conn != NULL && 1048 unp->unp_conn->unp_addr != NULL) 1049 bcopy(unp->unp_conn->unp_addr, 1050 &xu->xu_caddr, 1051 unp->unp_conn->unp_addr->sun_len); 1052 bcopy(unp, &xu->xu_unp, sizeof *unp); 1053 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1054 error = SYSCTL_OUT(req, xu, sizeof *xu); 1055 } 1056 } 1057 free(xu, M_TEMP); 1058 if (!error) { 1059 /* 1060 * Give the user an updated idea of our state. 1061 * If the generation differs from what we told 1062 * her before, she knows that something happened 1063 * while we were processing this request, and it 1064 * might be necessary to retry. 1065 */ 1066 xug->xug_gen = unp_gencnt; 1067 xug->xug_sogen = so_gencnt; 1068 xug->xug_count = unp_count; 1069 error = SYSCTL_OUT(req, xug, sizeof *xug); 1070 } 1071 free(unp_list, M_TEMP); 1072 free(xug, M_TEMP); 1073 return (error); 1074} 1075 1076SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1077 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1078 "List of active local datagram sockets"); 1079SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1080 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1081 "List of active local stream sockets"); 1082 1083static void 1084unp_shutdown(unp) 1085 struct unpcb *unp; 1086{ 1087 struct socket *so; 1088 1089 UNP_LOCK_ASSERT(); 1090 1091 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1092 (so = unp->unp_conn->unp_socket)) 1093 socantrcvmore(so); 1094} 1095 1096static void 1097unp_drop(unp, errno) 1098 struct unpcb *unp; 1099 int errno; 1100{ 1101 struct socket *so = unp->unp_socket; 1102 1103 UNP_LOCK_ASSERT(); 1104 1105 so->so_error = errno; 1106 unp_disconnect(unp); 1107} 1108 1109#ifdef notdef 1110void 1111unp_drain() 1112{ 1113 1114} 1115#endif 1116 1117static void 1118unp_freerights(rp, fdcount) 1119 struct file **rp; 1120 int fdcount; 1121{ 1122 int i; 1123 struct file *fp; 1124 1125 for (i = 0; i < fdcount; i++) { 1126 fp = *rp; 1127 /* 1128 * zero the pointer before calling 1129 * unp_discard since it may end up 1130 * in unp_gc().. 1131 */ 1132 *rp++ = 0; 1133 unp_discard(fp); 1134 } 1135} 1136 1137int 1138unp_externalize(control, controlp) 1139 struct mbuf *control, **controlp; 1140{ 1141 struct thread *td = curthread; /* XXX */ 1142 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1143 int i; 1144 int *fdp; 1145 struct file **rp; 1146 struct file *fp; 1147 void *data; 1148 socklen_t clen = control->m_len, datalen; 1149 int error, newfds; 1150 int f; 1151 u_int newlen; 1152 1153 error = 0; 1154 if (controlp != NULL) /* controlp == NULL => free control messages */ 1155 *controlp = NULL; 1156 1157 while (cm != NULL) { 1158 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1159 error = EINVAL; 1160 break; 1161 } 1162 1163 data = CMSG_DATA(cm); 1164 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1165 1166 if (cm->cmsg_level == SOL_SOCKET 1167 && cm->cmsg_type == SCM_RIGHTS) { 1168 newfds = datalen / sizeof(struct file *); 1169 rp = data; 1170 1171 /* If we're not outputting the descriptors free them. */ 1172 if (error || controlp == NULL) { 1173 unp_freerights(rp, newfds); 1174 goto next; 1175 } 1176 FILEDESC_LOCK(td->td_proc->p_fd); 1177 /* if the new FD's will not fit free them. */ 1178 if (!fdavail(td, newfds)) { 1179 FILEDESC_UNLOCK(td->td_proc->p_fd); 1180 error = EMSGSIZE; 1181 unp_freerights(rp, newfds); 1182 goto next; 1183 } 1184 /* 1185 * now change each pointer to an fd in the global 1186 * table to an integer that is the index to the 1187 * local fd table entry that we set up to point 1188 * to the global one we are transferring. 1189 */ 1190 newlen = newfds * sizeof(int); 1191 *controlp = sbcreatecontrol(NULL, newlen, 1192 SCM_RIGHTS, SOL_SOCKET); 1193 if (*controlp == NULL) { 1194 FILEDESC_UNLOCK(td->td_proc->p_fd); 1195 error = E2BIG; 1196 unp_freerights(rp, newfds); 1197 goto next; 1198 } 1199 1200 fdp = (int *) 1201 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1202 for (i = 0; i < newfds; i++) { 1203 if (fdalloc(td, 0, &f)) 1204 panic("unp_externalize fdalloc failed"); 1205 fp = *rp++; 1206 td->td_proc->p_fd->fd_ofiles[f] = fp; 1207 FILE_LOCK(fp); 1208 fp->f_msgcount--; 1209 FILE_UNLOCK(fp); 1210 unp_rights--; 1211 *fdp++ = f; 1212 } 1213 FILEDESC_UNLOCK(td->td_proc->p_fd); 1214 } else { /* We can just copy anything else across */ 1215 if (error || controlp == NULL) 1216 goto next; 1217 *controlp = sbcreatecontrol(NULL, datalen, 1218 cm->cmsg_type, cm->cmsg_level); 1219 if (*controlp == NULL) { 1220 error = ENOBUFS; 1221 goto next; 1222 } 1223 bcopy(data, 1224 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1225 datalen); 1226 } 1227 1228 controlp = &(*controlp)->m_next; 1229 1230next: 1231 if (CMSG_SPACE(datalen) < clen) { 1232 clen -= CMSG_SPACE(datalen); 1233 cm = (struct cmsghdr *) 1234 ((caddr_t)cm + CMSG_SPACE(datalen)); 1235 } else { 1236 clen = 0; 1237 cm = NULL; 1238 } 1239 } 1240 1241 m_freem(control); 1242 1243 return (error); 1244} 1245 1246void 1247unp_init(void) 1248{ 1249 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1250 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1251 if (unp_zone == NULL) 1252 panic("unp_init"); 1253 uma_zone_set_max(unp_zone, nmbclusters); 1254 LIST_INIT(&unp_dhead); 1255 LIST_INIT(&unp_shead); 1256 1257 UNP_LOCK_INIT(); 1258} 1259 1260static int 1261unp_internalize(controlp, td) 1262 struct mbuf **controlp; 1263 struct thread *td; 1264{ 1265 struct mbuf *control = *controlp; 1266 struct proc *p = td->td_proc; 1267 struct filedesc *fdescp = p->p_fd; 1268 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1269 struct cmsgcred *cmcred; 1270 struct file **rp; 1271 struct file *fp; 1272 struct timeval *tv; 1273 int i, fd, *fdp; 1274 void *data; 1275 socklen_t clen = control->m_len, datalen; 1276 int error, oldfds; 1277 u_int newlen; 1278 1279 error = 0; 1280 *controlp = NULL; 1281 1282 while (cm != NULL) { 1283 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1284 || cm->cmsg_len > clen) { 1285 error = EINVAL; 1286 goto out; 1287 } 1288 1289 data = CMSG_DATA(cm); 1290 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1291 1292 switch (cm->cmsg_type) { 1293 /* 1294 * Fill in credential information. 1295 */ 1296 case SCM_CREDS: 1297 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1298 SCM_CREDS, SOL_SOCKET); 1299 if (*controlp == NULL) { 1300 error = ENOBUFS; 1301 goto out; 1302 } 1303 1304 cmcred = (struct cmsgcred *) 1305 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1306 cmcred->cmcred_pid = p->p_pid; 1307 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1308 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1309 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1310 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1311 CMGROUP_MAX); 1312 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1313 cmcred->cmcred_groups[i] = 1314 td->td_ucred->cr_groups[i]; 1315 break; 1316 1317 case SCM_RIGHTS: 1318 oldfds = datalen / sizeof (int); 1319 /* 1320 * check that all the FDs passed in refer to legal files 1321 * If not, reject the entire operation. 1322 */ 1323 fdp = data; 1324 FILEDESC_LOCK(fdescp); 1325 for (i = 0; i < oldfds; i++) { 1326 fd = *fdp++; 1327 if ((unsigned)fd >= fdescp->fd_nfiles || 1328 fdescp->fd_ofiles[fd] == NULL) { 1329 FILEDESC_UNLOCK(fdescp); 1330 error = EBADF; 1331 goto out; 1332 } 1333 fp = fdescp->fd_ofiles[fd]; 1334 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1335 FILEDESC_UNLOCK(fdescp); 1336 error = EOPNOTSUPP; 1337 goto out; 1338 } 1339 1340 } 1341 /* 1342 * Now replace the integer FDs with pointers to 1343 * the associated global file table entry.. 1344 */ 1345 newlen = oldfds * sizeof(struct file *); 1346 *controlp = sbcreatecontrol(NULL, newlen, 1347 SCM_RIGHTS, SOL_SOCKET); 1348 if (*controlp == NULL) { 1349 FILEDESC_UNLOCK(fdescp); 1350 error = E2BIG; 1351 goto out; 1352 } 1353 1354 fdp = data; 1355 rp = (struct file **) 1356 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1357 for (i = 0; i < oldfds; i++) { 1358 fp = fdescp->fd_ofiles[*fdp++]; 1359 *rp++ = fp; 1360 FILE_LOCK(fp); 1361 fp->f_count++; 1362 fp->f_msgcount++; 1363 FILE_UNLOCK(fp); 1364 unp_rights++; 1365 } 1366 FILEDESC_UNLOCK(fdescp); 1367 break; 1368 1369 case SCM_TIMESTAMP: 1370 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1371 SCM_TIMESTAMP, SOL_SOCKET); 1372 if (*controlp == NULL) { 1373 error = ENOBUFS; 1374 goto out; 1375 } 1376 tv = (struct timeval *) 1377 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1378 microtime(tv); 1379 break; 1380 1381 default: 1382 error = EINVAL; 1383 goto out; 1384 } 1385 1386 controlp = &(*controlp)->m_next; 1387 1388 if (CMSG_SPACE(datalen) < clen) { 1389 clen -= CMSG_SPACE(datalen); 1390 cm = (struct cmsghdr *) 1391 ((caddr_t)cm + CMSG_SPACE(datalen)); 1392 } else { 1393 clen = 0; 1394 cm = NULL; 1395 } 1396 } 1397 1398out: 1399 m_freem(control); 1400 1401 return (error); 1402} 1403 1404static int unp_defer, unp_gcing; 1405 1406static void 1407unp_gc() 1408{ 1409 register struct file *fp, *nextfp; 1410 register struct socket *so; 1411 struct file **extra_ref, **fpp; 1412 int nunref, i; 1413 int nfiles_snap; 1414 int nfiles_slack = 20; 1415 1416 UNP_LOCK_ASSERT(); 1417 1418 if (unp_gcing) 1419 return; 1420 unp_gcing = 1; 1421 unp_defer = 0; 1422 /* 1423 * before going through all this, set all FDs to 1424 * be NOT defered and NOT externally accessible 1425 */ 1426 /* 1427 * XXXRW: Acquiring a sleep lock while holding UNP 1428 * mutex cannot be a good thing. 1429 */ 1430 sx_slock(&filelist_lock); 1431 LIST_FOREACH(fp, &filehead, f_list) 1432 fp->f_gcflag &= ~(FMARK|FDEFER); 1433 do { 1434 LIST_FOREACH(fp, &filehead, f_list) { 1435 FILE_LOCK(fp); 1436 /* 1437 * If the file is not open, skip it 1438 */ 1439 if (fp->f_count == 0) { 1440 FILE_UNLOCK(fp); 1441 continue; 1442 } 1443 /* 1444 * If we already marked it as 'defer' in a 1445 * previous pass, then try process it this time 1446 * and un-mark it 1447 */ 1448 if (fp->f_gcflag & FDEFER) { 1449 fp->f_gcflag &= ~FDEFER; 1450 unp_defer--; 1451 } else { 1452 /* 1453 * if it's not defered, then check if it's 1454 * already marked.. if so skip it 1455 */ 1456 if (fp->f_gcflag & FMARK) { 1457 FILE_UNLOCK(fp); 1458 continue; 1459 } 1460 /* 1461 * If all references are from messages 1462 * in transit, then skip it. it's not 1463 * externally accessible. 1464 */ 1465 if (fp->f_count == fp->f_msgcount) { 1466 FILE_UNLOCK(fp); 1467 continue; 1468 } 1469 /* 1470 * If it got this far then it must be 1471 * externally accessible. 1472 */ 1473 fp->f_gcflag |= FMARK; 1474 } 1475 /* 1476 * either it was defered, or it is externally 1477 * accessible and not already marked so. 1478 * Now check if it is possibly one of OUR sockets. 1479 */ 1480 if (fp->f_type != DTYPE_SOCKET || 1481 (so = fp->f_data) == NULL) { 1482 FILE_UNLOCK(fp); 1483 continue; 1484 } 1485 FILE_UNLOCK(fp); 1486 if (so->so_proto->pr_domain != &localdomain || 1487 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1488 continue; 1489#ifdef notdef 1490 if (so->so_rcv.sb_flags & SB_LOCK) { 1491 /* 1492 * This is problematical; it's not clear 1493 * we need to wait for the sockbuf to be 1494 * unlocked (on a uniprocessor, at least), 1495 * and it's also not clear what to do 1496 * if sbwait returns an error due to receipt 1497 * of a signal. If sbwait does return 1498 * an error, we'll go into an infinite 1499 * loop. Delete all of this for now. 1500 */ 1501 (void) sbwait(&so->so_rcv); 1502 goto restart; 1503 } 1504#endif 1505 /* 1506 * So, Ok, it's one of our sockets and it IS externally 1507 * accessible (or was defered). Now we look 1508 * to see if we hold any file descriptors in its 1509 * message buffers. Follow those links and mark them 1510 * as accessible too. 1511 */ 1512 SOCKBUF_LOCK(&so->so_rcv); 1513 unp_scan(so->so_rcv.sb_mb, unp_mark); 1514 SOCKBUF_UNLOCK(&so->so_rcv); 1515 } 1516 } while (unp_defer); 1517 sx_sunlock(&filelist_lock); 1518 /* 1519 * We grab an extra reference to each of the file table entries 1520 * that are not otherwise accessible and then free the rights 1521 * that are stored in messages on them. 1522 * 1523 * The bug in the orginal code is a little tricky, so I'll describe 1524 * what's wrong with it here. 1525 * 1526 * It is incorrect to simply unp_discard each entry for f_msgcount 1527 * times -- consider the case of sockets A and B that contain 1528 * references to each other. On a last close of some other socket, 1529 * we trigger a gc since the number of outstanding rights (unp_rights) 1530 * is non-zero. If during the sweep phase the gc code un_discards, 1531 * we end up doing a (full) closef on the descriptor. A closef on A 1532 * results in the following chain. Closef calls soo_close, which 1533 * calls soclose. Soclose calls first (through the switch 1534 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1535 * returns because the previous instance had set unp_gcing, and 1536 * we return all the way back to soclose, which marks the socket 1537 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1538 * to free up the rights that are queued in messages on the socket A, 1539 * i.e., the reference on B. The sorflush calls via the dom_dispose 1540 * switch unp_dispose, which unp_scans with unp_discard. This second 1541 * instance of unp_discard just calls closef on B. 1542 * 1543 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1544 * which results in another closef on A. Unfortunately, A is already 1545 * being closed, and the descriptor has already been marked with 1546 * SS_NOFDREF, and soclose panics at this point. 1547 * 1548 * Here, we first take an extra reference to each inaccessible 1549 * descriptor. Then, we call sorflush ourself, since we know 1550 * it is a Unix domain socket anyhow. After we destroy all the 1551 * rights carried in messages, we do a last closef to get rid 1552 * of our extra reference. This is the last close, and the 1553 * unp_detach etc will shut down the socket. 1554 * 1555 * 91/09/19, bsy@cs.cmu.edu 1556 */ 1557again: 1558 nfiles_snap = nfiles + nfiles_slack; /* some slack */ 1559 extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, 1560 M_WAITOK); 1561 sx_slock(&filelist_lock); 1562 if (nfiles_snap < nfiles) { 1563 sx_sunlock(&filelist_lock); 1564 free(extra_ref, M_TEMP); 1565 nfiles_slack += 20; 1566 goto again; 1567 } 1568 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1569 fp != NULL; fp = nextfp) { 1570 nextfp = LIST_NEXT(fp, f_list); 1571 FILE_LOCK(fp); 1572 /* 1573 * If it's not open, skip it 1574 */ 1575 if (fp->f_count == 0) { 1576 FILE_UNLOCK(fp); 1577 continue; 1578 } 1579 /* 1580 * If all refs are from msgs, and it's not marked accessible 1581 * then it must be referenced from some unreachable cycle 1582 * of (shut-down) FDs, so include it in our 1583 * list of FDs to remove 1584 */ 1585 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1586 *fpp++ = fp; 1587 nunref++; 1588 fp->f_count++; 1589 } 1590 FILE_UNLOCK(fp); 1591 } 1592 sx_sunlock(&filelist_lock); 1593 /* 1594 * for each FD on our hit list, do the following two things 1595 */ 1596 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1597 struct file *tfp = *fpp; 1598 FILE_LOCK(tfp); 1599 if (tfp->f_type == DTYPE_SOCKET && 1600 tfp->f_data != NULL) { 1601 FILE_UNLOCK(tfp); 1602 sorflush(tfp->f_data); 1603 } else { 1604 FILE_UNLOCK(tfp); 1605 } 1606 } 1607 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1608 closef(*fpp, (struct thread *) NULL); 1609 free(extra_ref, M_TEMP); 1610 unp_gcing = 0; 1611} 1612 1613void 1614unp_dispose(m) 1615 struct mbuf *m; 1616{ 1617 1618 if (m) 1619 unp_scan(m, unp_discard); 1620} 1621 1622static int 1623unp_listen(unp, td) 1624 struct unpcb *unp; 1625 struct thread *td; 1626{ 1627 UNP_LOCK_ASSERT(); 1628 1629 /* 1630 * XXXRW: Why populate the local peer cred with our own credential? 1631 */ 1632 cru2x(td->td_ucred, &unp->unp_peercred); 1633 unp->unp_flags |= UNP_HAVEPCCACHED; 1634 return (0); 1635} 1636 1637static void 1638unp_scan(m0, op) 1639 register struct mbuf *m0; 1640 void (*op)(struct file *); 1641{ 1642 struct mbuf *m; 1643 struct file **rp; 1644 struct cmsghdr *cm; 1645 void *data; 1646 int i; 1647 socklen_t clen, datalen; 1648 int qfds; 1649 1650 while (m0 != NULL) { 1651 for (m = m0; m; m = m->m_next) { 1652 if (m->m_type != MT_CONTROL) 1653 continue; 1654 1655 cm = mtod(m, struct cmsghdr *); 1656 clen = m->m_len; 1657 1658 while (cm != NULL) { 1659 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1660 break; 1661 1662 data = CMSG_DATA(cm); 1663 datalen = (caddr_t)cm + cm->cmsg_len 1664 - (caddr_t)data; 1665 1666 if (cm->cmsg_level == SOL_SOCKET && 1667 cm->cmsg_type == SCM_RIGHTS) { 1668 qfds = datalen / sizeof (struct file *); 1669 rp = data; 1670 for (i = 0; i < qfds; i++) 1671 (*op)(*rp++); 1672 } 1673 1674 if (CMSG_SPACE(datalen) < clen) { 1675 clen -= CMSG_SPACE(datalen); 1676 cm = (struct cmsghdr *) 1677 ((caddr_t)cm + CMSG_SPACE(datalen)); 1678 } else { 1679 clen = 0; 1680 cm = NULL; 1681 } 1682 } 1683 } 1684 m0 = m0->m_act; 1685 } 1686} 1687 1688static void 1689unp_mark(fp) 1690 struct file *fp; 1691{ 1692 if (fp->f_gcflag & FMARK) 1693 return; 1694 unp_defer++; 1695 fp->f_gcflag |= (FMARK|FDEFER); 1696} 1697 1698static void 1699unp_discard(fp) 1700 struct file *fp; 1701{ 1702 FILE_LOCK(fp); 1703 fp->f_msgcount--; 1704 unp_rights--; 1705 FILE_UNLOCK(fp); 1706 (void) closef(fp, (struct thread *)NULL); 1707} 1708