uipc_usrreq.c revision 133792
1192904Sbms/* 2192904Sbms * Copyright 2004 Robert N. M. Watson 3192904Sbms * Copyright (c) 1982, 1986, 1989, 1991, 1993 4192904Sbms * The Regents of the University of California. All rights reserved. 5192904Sbms * 6192904Sbms * Redistribution and use in source and binary forms, with or without 7192904Sbms * modification, are permitted provided that the following conditions 8192904Sbms * are met: 9192904Sbms * 1. Redistributions of source code must retain the above copyright 10192904Sbms * notice, this list of conditions and the following disclaimer. 11192904Sbms * 2. Redistributions in binary form must reproduce the above copyright 12192904Sbms * notice, this list of conditions and the following disclaimer in the 13192904Sbms * documentation and/or other materials provided with the distribution. 14192904Sbms * 4. Neither the name of the University nor the names of its contributors 15192904Sbms * may be used to endorse or promote products derived from this software 16192904Sbms * without specific prior written permission. 17192904Sbms * 18192904Sbms * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19192904Sbms * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20192904Sbms * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21192904Sbms * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22192904Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23192904Sbms * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24192904Sbms * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25192904Sbms * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26192904Sbms * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27192904Sbms * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28192904Sbms * SUCH DAMAGE. 29192904Sbms * 30249252Sae * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 31192904Sbms */ 32192904Sbms 33192904Sbms#include <sys/cdefs.h> 34192904Sbms__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 133792 2004-08-16 01:52:04Z rwatson $"); 35192904Sbms 36192904Sbms#include "opt_mac.h" 37192904Sbms 38192904Sbms#include <sys/param.h> 39192904Sbms#include <sys/domain.h> 40192904Sbms#include <sys/fcntl.h> 41192904Sbms#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 42192904Sbms#include <sys/file.h> 43192904Sbms#include <sys/filedesc.h> 44192904Sbms#include <sys/jail.h> 45192904Sbms#include <sys/kernel.h> 46192904Sbms#include <sys/lock.h> 47192904Sbms#include <sys/mac.h> 48192904Sbms#include <sys/mbuf.h> 49192904Sbms#include <sys/mutex.h> 50192904Sbms#include <sys/namei.h> 51192904Sbms#include <sys/proc.h> 52192904Sbms#include <sys/protosw.h> 53192904Sbms#include <sys/resourcevar.h> 54192904Sbms#include <sys/socket.h> 55233648Seadler#include <sys/socketvar.h> 56192904Sbms#include <sys/signalvar.h> 57192904Sbms#include <sys/stat.h> 58192904Sbms#include <sys/sx.h> 59192904Sbms#include <sys/sysctl.h> 60192904Sbms#include <sys/systm.h> 61192904Sbms#include <sys/un.h> 62192904Sbms#include <sys/unpcb.h> 63192904Sbms#include <sys/vnode.h> 64192904Sbms 65192904Sbms#include <vm/uma.h> 66192904Sbms 67192904Sbmsstatic uma_zone_t unp_zone; 68192904Sbmsstatic unp_gen_t unp_gencnt; 69192904Sbmsstatic u_int unp_count; 70192904Sbms 71192904Sbmsstatic struct unp_head unp_shead, unp_dhead; 72192904Sbms 73192904Sbms/* 74192904Sbms * Unix communications domain. 75192904Sbms * 76192904Sbms * TODO: 77192904Sbms * SEQPACKET, RDM 78192904Sbms * rethink name space problems 79192904Sbms * need a proper out-of-band 80192904Sbms * lock pushdown 81192904Sbms */ 82192904Sbmsstatic const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 83192904Sbmsstatic ino_t unp_ino; /* prototype for fake inode numbers */ 84192904Sbms 85192904Sbms/* 86192904Sbms * Currently, UNIX domain sockets are protected by a single subsystem lock, 87192904Sbms * which covers global data structures and variables, the contents of each 88192904Sbms * per-socket unpcb structure, and the so_pcb field in sockets attached to 89192904Sbms * the UNIX domain. This provides for a moderate degree of paralellism, as 90192904Sbms * receive operations on UNIX domain sockets do not need to acquire the 91192904Sbms * subsystem lock. Finer grained locking to permit send() without acquiring 92249253Sjoel * a global lock would be a logical next step. 93249253Sjoel * 94249252Sae * The UNIX domain socket lock preceds all socket layer locks, including the 95192904Sbms * socket lock and socket buffer lock, permitting UNIX domain socket code to 96192904Sbms * call into socket support routines without releasing its locks. 97249253Sjoel * 98192904Sbms * Some caution is required in areas where the UNIX domain socket code enters 99192904Sbms * VFS in order to create or find rendezvous points. This results in 100192904Sbms * dropping of the UNIX domain socket subsystem lock, acquisition of the 101192904Sbms * Giant lock, and potential sleeping. This increases the chances of races, 102192904Sbms * and exposes weaknesses in the socket->protocol API by offering poor 103 * failure modes. 104 */ 105static struct mtx unp_mtx; 106#define UNP_LOCK_INIT() \ 107 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 108#define UNP_LOCK() mtx_lock(&unp_mtx) 109#define UNP_UNLOCK() mtx_unlock(&unp_mtx) 110#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 111 112static int unp_attach(struct socket *); 113static void unp_detach(struct unpcb *); 114static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 115static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 116static int unp_connect2(struct socket *so, struct socket *so2); 117static void unp_disconnect(struct unpcb *); 118static void unp_shutdown(struct unpcb *); 119static void unp_drop(struct unpcb *, int); 120static void unp_gc(void); 121static void unp_scan(struct mbuf *, void (*)(struct file *)); 122static void unp_mark(struct file *); 123static void unp_discard(struct file *); 124static void unp_freerights(struct file **, int); 125static int unp_internalize(struct mbuf **, struct thread *); 126static int unp_listen(struct unpcb *, struct thread *); 127 128static int 129uipc_abort(struct socket *so) 130{ 131 struct unpcb *unp = sotounpcb(so); 132 133 if (unp == NULL) 134 return (EINVAL); 135 UNP_LOCK(); 136 unp_drop(unp, ECONNABORTED); 137 unp_detach(unp); /* NB: unlocks */ 138 SOCK_LOCK(so); 139 sotryfree(so); 140 return (0); 141} 142 143static int 144uipc_accept(struct socket *so, struct sockaddr **nam) 145{ 146 struct unpcb *unp = sotounpcb(so); 147 const struct sockaddr *sa; 148 149 if (unp == NULL) 150 return (EINVAL); 151 152 /* 153 * Pass back name of connected socket, 154 * if it was bound and we are still connected 155 * (our peer may have closed already!). 156 */ 157 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 158 UNP_LOCK(); 159 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 160 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 161 else 162 sa = &sun_noname; 163 bcopy(sa, *nam, sa->sa_len); 164 UNP_UNLOCK(); 165 return (0); 166} 167 168static int 169uipc_attach(struct socket *so, int proto, struct thread *td) 170{ 171 struct unpcb *unp = sotounpcb(so); 172 173 if (unp != NULL) 174 return (EISCONN); 175 return (unp_attach(so)); 176} 177 178static int 179uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 180{ 181 struct unpcb *unp = sotounpcb(so); 182 183 if (unp == NULL) 184 return (EINVAL); 185 186 return (unp_bind(unp, nam, td)); 187} 188 189static int 190uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 191{ 192 struct unpcb *unp; 193 int error; 194 195 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 196 197 UNP_LOCK(); 198 unp = sotounpcb(so); 199 if (unp == NULL) { 200 error = EINVAL; 201 goto out; 202 } 203 error = unp_connect(so, nam, td); 204out: 205 UNP_UNLOCK(); 206 return (error); 207} 208 209int 210uipc_connect2(struct socket *so1, struct socket *so2) 211{ 212 struct unpcb *unp = sotounpcb(so1); 213 int error; 214 215 if (unp == NULL) 216 return (EINVAL); 217 218 UNP_LOCK(); 219 error = unp_connect2(so1, so2); 220 UNP_UNLOCK(); 221 return (error); 222} 223 224/* control is EOPNOTSUPP */ 225 226static int 227uipc_detach(struct socket *so) 228{ 229 struct unpcb *unp = sotounpcb(so); 230 231 if (unp == NULL) 232 return (EINVAL); 233 234 UNP_LOCK(); 235 unp_detach(unp); /* NB: unlocks unp */ 236 return (0); 237} 238 239static int 240uipc_disconnect(struct socket *so) 241{ 242 struct unpcb *unp = sotounpcb(so); 243 244 if (unp == NULL) 245 return (EINVAL); 246 UNP_LOCK(); 247 unp_disconnect(unp); 248 UNP_UNLOCK(); 249 return (0); 250} 251 252static int 253uipc_listen(struct socket *so, struct thread *td) 254{ 255 struct unpcb *unp = sotounpcb(so); 256 int error; 257 258 if (unp == NULL || unp->unp_vnode == NULL) 259 return (EINVAL); 260 UNP_LOCK(); 261 error = unp_listen(unp, td); 262 UNP_UNLOCK(); 263 return (error); 264} 265 266static int 267uipc_peeraddr(struct socket *so, struct sockaddr **nam) 268{ 269 struct unpcb *unp = sotounpcb(so); 270 const struct sockaddr *sa; 271 272 if (unp == NULL) 273 return (EINVAL); 274 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 275 UNP_LOCK(); 276 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 277 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 278 else { 279 /* 280 * XXX: It seems that this test always fails even when 281 * connection is established. So, this else clause is 282 * added as workaround to return PF_LOCAL sockaddr. 283 */ 284 sa = &sun_noname; 285 } 286 bcopy(sa, *nam, sa->sa_len); 287 UNP_UNLOCK(); 288 return (0); 289} 290 291static int 292uipc_rcvd(struct socket *so, int flags) 293{ 294 struct unpcb *unp = sotounpcb(so); 295 struct socket *so2; 296 u_long newhiwat; 297 298 if (unp == NULL) 299 return (EINVAL); 300 UNP_LOCK(); 301 switch (so->so_type) { 302 case SOCK_DGRAM: 303 panic("uipc_rcvd DGRAM?"); 304 /*NOTREACHED*/ 305 306 case SOCK_STREAM: 307 if (unp->unp_conn == NULL) 308 break; 309 so2 = unp->unp_conn->unp_socket; 310 SOCKBUF_LOCK(&so2->so_snd); 311 SOCKBUF_LOCK(&so->so_rcv); 312 /* 313 * Adjust backpressure on sender 314 * and wakeup any waiting to write. 315 */ 316 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 317 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 318 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 319 so->so_rcv.sb_cc; 320 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 321 newhiwat, RLIM_INFINITY); 322 unp->unp_cc = so->so_rcv.sb_cc; 323 SOCKBUF_UNLOCK(&so->so_rcv); 324 sowwakeup_locked(so2); 325 break; 326 327 default: 328 panic("uipc_rcvd unknown socktype"); 329 } 330 UNP_UNLOCK(); 331 return (0); 332} 333 334/* pru_rcvoob is EOPNOTSUPP */ 335 336static int 337uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 338 struct mbuf *control, struct thread *td) 339{ 340 int error = 0; 341 struct unpcb *unp = sotounpcb(so); 342 struct socket *so2; 343 u_long newhiwat; 344 345 if (unp == NULL) { 346 error = EINVAL; 347 goto release; 348 } 349 if (flags & PRUS_OOB) { 350 error = EOPNOTSUPP; 351 goto release; 352 } 353 354 if (control != NULL && (error = unp_internalize(&control, td))) 355 goto release; 356 357 UNP_LOCK(); 358 switch (so->so_type) { 359 case SOCK_DGRAM: 360 { 361 const struct sockaddr *from; 362 363 if (nam != NULL) { 364 if (unp->unp_conn != NULL) { 365 error = EISCONN; 366 break; 367 } 368 error = unp_connect(so, nam, td); 369 if (error) 370 break; 371 } else { 372 if (unp->unp_conn == NULL) { 373 error = ENOTCONN; 374 break; 375 } 376 } 377 so2 = unp->unp_conn->unp_socket; 378 if (unp->unp_addr != NULL) 379 from = (struct sockaddr *)unp->unp_addr; 380 else 381 from = &sun_noname; 382 SOCKBUF_LOCK(&so2->so_rcv); 383 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 384 sorwakeup_locked(so2); 385 m = NULL; 386 control = NULL; 387 } else { 388 SOCKBUF_UNLOCK(&so2->so_rcv); 389 error = ENOBUFS; 390 } 391 if (nam != NULL) 392 unp_disconnect(unp); 393 break; 394 } 395 396 case SOCK_STREAM: 397 /* Connect if not connected yet. */ 398 /* 399 * Note: A better implementation would complain 400 * if not equal to the peer's address. 401 */ 402 if ((so->so_state & SS_ISCONNECTED) == 0) { 403 if (nam != NULL) { 404 error = unp_connect(so, nam, td); 405 if (error) 406 break; /* XXX */ 407 } else { 408 error = ENOTCONN; 409 break; 410 } 411 } 412 413 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 414 error = EPIPE; 415 break; 416 } 417 if (unp->unp_conn == NULL) 418 panic("uipc_send connected but no connection?"); 419 so2 = unp->unp_conn->unp_socket; 420 SOCKBUF_LOCK(&so2->so_rcv); 421 /* 422 * Send to paired receive port, and then reduce 423 * send buffer hiwater marks to maintain backpressure. 424 * Wake up readers. 425 */ 426 if (control != NULL) { 427 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 428 control = NULL; 429 } else { 430 sbappend_locked(&so2->so_rcv, m); 431 } 432 so->so_snd.sb_mbmax -= 433 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 434 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 435 newhiwat = so->so_snd.sb_hiwat - 436 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 437 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 438 newhiwat, RLIM_INFINITY); 439 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 440 sorwakeup_locked(so2); 441 m = NULL; 442 break; 443 444 default: 445 panic("uipc_send unknown socktype"); 446 } 447 448 /* 449 * SEND_EOF is equivalent to a SEND followed by 450 * a SHUTDOWN. 451 */ 452 if (flags & PRUS_EOF) { 453 socantsendmore(so); 454 unp_shutdown(unp); 455 } 456 UNP_UNLOCK(); 457 458 if (control != NULL && error != 0) 459 unp_dispose(control); 460 461release: 462 if (control != NULL) 463 m_freem(control); 464 if (m != NULL) 465 m_freem(m); 466 return (error); 467} 468 469static int 470uipc_sense(struct socket *so, struct stat *sb) 471{ 472 struct unpcb *unp = sotounpcb(so); 473 struct socket *so2; 474 475 if (unp == NULL) 476 return (EINVAL); 477 UNP_LOCK(); 478 sb->st_blksize = so->so_snd.sb_hiwat; 479 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 480 so2 = unp->unp_conn->unp_socket; 481 sb->st_blksize += so2->so_rcv.sb_cc; 482 } 483 sb->st_dev = NODEV; 484 if (unp->unp_ino == 0) 485 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 486 sb->st_ino = unp->unp_ino; 487 UNP_UNLOCK(); 488 return (0); 489} 490 491static int 492uipc_shutdown(struct socket *so) 493{ 494 struct unpcb *unp = sotounpcb(so); 495 496 if (unp == NULL) 497 return (EINVAL); 498 UNP_LOCK(); 499 socantsendmore(so); 500 unp_shutdown(unp); 501 UNP_UNLOCK(); 502 return (0); 503} 504 505static int 506uipc_sockaddr(struct socket *so, struct sockaddr **nam) 507{ 508 struct unpcb *unp = sotounpcb(so); 509 const struct sockaddr *sa; 510 511 if (unp == NULL) 512 return (EINVAL); 513 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 514 UNP_LOCK(); 515 if (unp->unp_addr != NULL) 516 sa = (struct sockaddr *) unp->unp_addr; 517 else 518 sa = &sun_noname; 519 bcopy(sa, *nam, sa->sa_len); 520 UNP_UNLOCK(); 521 return (0); 522} 523 524struct pr_usrreqs uipc_usrreqs = { 525 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 526 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 527 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 528 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 529 sosend, soreceive, sopoll, pru_sosetlabel_null 530}; 531 532int 533uipc_ctloutput(so, sopt) 534 struct socket *so; 535 struct sockopt *sopt; 536{ 537 struct unpcb *unp = sotounpcb(so); 538 struct xucred xu; 539 int error; 540 541 switch (sopt->sopt_dir) { 542 case SOPT_GET: 543 switch (sopt->sopt_name) { 544 case LOCAL_PEERCRED: 545 error = 0; 546 UNP_LOCK(); 547 if (unp->unp_flags & UNP_HAVEPC) 548 xu = unp->unp_peercred; 549 else { 550 if (so->so_type == SOCK_STREAM) 551 error = ENOTCONN; 552 else 553 error = EINVAL; 554 } 555 UNP_UNLOCK(); 556 if (error == 0) 557 error = sooptcopyout(sopt, &xu, sizeof(xu)); 558 break; 559 default: 560 error = EOPNOTSUPP; 561 break; 562 } 563 break; 564 case SOPT_SET: 565 default: 566 error = EOPNOTSUPP; 567 break; 568 } 569 return (error); 570} 571 572/* 573 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 574 * for stream sockets, although the total for sender and receiver is 575 * actually only PIPSIZ. 576 * Datagram sockets really use the sendspace as the maximum datagram size, 577 * and don't really want to reserve the sendspace. Their recvspace should 578 * be large enough for at least one max-size datagram plus address. 579 */ 580#ifndef PIPSIZ 581#define PIPSIZ 8192 582#endif 583static u_long unpst_sendspace = PIPSIZ; 584static u_long unpst_recvspace = PIPSIZ; 585static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 586static u_long unpdg_recvspace = 4*1024; 587 588static int unp_rights; /* file descriptors in flight */ 589 590SYSCTL_DECL(_net_local_stream); 591SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 592 &unpst_sendspace, 0, ""); 593SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 594 &unpst_recvspace, 0, ""); 595SYSCTL_DECL(_net_local_dgram); 596SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 597 &unpdg_sendspace, 0, ""); 598SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 599 &unpdg_recvspace, 0, ""); 600SYSCTL_DECL(_net_local); 601SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 602 603static int 604unp_attach(so) 605 struct socket *so; 606{ 607 register struct unpcb *unp; 608 int error; 609 610 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 611 switch (so->so_type) { 612 613 case SOCK_STREAM: 614 error = soreserve(so, unpst_sendspace, unpst_recvspace); 615 break; 616 617 case SOCK_DGRAM: 618 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 619 break; 620 621 default: 622 panic("unp_attach"); 623 } 624 if (error) 625 return (error); 626 } 627 unp = uma_zalloc(unp_zone, M_WAITOK); 628 if (unp == NULL) 629 return (ENOBUFS); 630 bzero(unp, sizeof *unp); 631 LIST_INIT(&unp->unp_refs); 632 unp->unp_socket = so; 633 634 UNP_LOCK(); 635 unp->unp_gencnt = ++unp_gencnt; 636 unp_count++; 637 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 638 : &unp_shead, unp, unp_link); 639 UNP_UNLOCK(); 640 641 so->so_pcb = unp; 642 return (0); 643} 644 645static void 646unp_detach(unp) 647 register struct unpcb *unp; 648{ 649 struct vnode *vp; 650 651 UNP_LOCK_ASSERT(); 652 653 LIST_REMOVE(unp, unp_link); 654 unp->unp_gencnt = ++unp_gencnt; 655 --unp_count; 656 if ((vp = unp->unp_vnode) != NULL) { 657 /* 658 * XXXRW: should v_socket be frobbed only while holding 659 * Giant? 660 */ 661 unp->unp_vnode->v_socket = NULL; 662 unp->unp_vnode = NULL; 663 } 664 if (unp->unp_conn != NULL) 665 unp_disconnect(unp); 666 while (!LIST_EMPTY(&unp->unp_refs)) { 667 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 668 unp_drop(ref, ECONNRESET); 669 } 670 soisdisconnected(unp->unp_socket); 671 unp->unp_socket->so_pcb = NULL; 672 if (unp_rights) { 673 /* 674 * Normally the receive buffer is flushed later, 675 * in sofree, but if our receive buffer holds references 676 * to descriptors that are now garbage, we will dispose 677 * of those descriptor references after the garbage collector 678 * gets them (resulting in a "panic: closef: count < 0"). 679 */ 680 sorflush(unp->unp_socket); 681 unp_gc(); 682 } 683 UNP_UNLOCK(); 684 if (unp->unp_addr != NULL) 685 FREE(unp->unp_addr, M_SONAME); 686 uma_zfree(unp_zone, unp); 687 if (vp) { 688 mtx_lock(&Giant); 689 vrele(vp); 690 mtx_unlock(&Giant); 691 } 692} 693 694static int 695unp_bind(unp, nam, td) 696 struct unpcb *unp; 697 struct sockaddr *nam; 698 struct thread *td; 699{ 700 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 701 struct vnode *vp; 702 struct mount *mp; 703 struct vattr vattr; 704 int error, namelen; 705 struct nameidata nd; 706 char *buf; 707 708 /* 709 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 710 * unlocked read here is fine, but the value of unp_vnode needs 711 * to be tested again after we do all the lookups to see if the 712 * pcb is still unbound? 713 */ 714 if (unp->unp_vnode != NULL) 715 return (EINVAL); 716 717 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 718 if (namelen <= 0) 719 return (EINVAL); 720 721 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 722 strlcpy(buf, soun->sun_path, namelen + 1); 723 724 mtx_lock(&Giant); 725restart: 726 mtx_assert(&Giant, MA_OWNED); 727 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 728 buf, td); 729/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 730 error = namei(&nd); 731 if (error) 732 goto done; 733 vp = nd.ni_vp; 734 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 735 NDFREE(&nd, NDF_ONLY_PNBUF); 736 if (nd.ni_dvp == vp) 737 vrele(nd.ni_dvp); 738 else 739 vput(nd.ni_dvp); 740 if (vp != NULL) { 741 vrele(vp); 742 error = EADDRINUSE; 743 goto done; 744 } 745 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 746 if (error) 747 goto done; 748 goto restart; 749 } 750 VATTR_NULL(&vattr); 751 vattr.va_type = VSOCK; 752 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 753#ifdef MAC 754 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 755 &vattr); 756#endif 757 if (error == 0) { 758 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 759 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 760 } 761 NDFREE(&nd, NDF_ONLY_PNBUF); 762 vput(nd.ni_dvp); 763 if (error) 764 goto done; 765 vp = nd.ni_vp; 766 ASSERT_VOP_LOCKED(vp, "unp_bind"); 767 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 768 UNP_LOCK(); 769 vp->v_socket = unp->unp_socket; 770 unp->unp_vnode = vp; 771 unp->unp_addr = soun; 772 UNP_UNLOCK(); 773 VOP_UNLOCK(vp, 0, td); 774 vn_finished_write(mp); 775done: 776 mtx_unlock(&Giant); 777 free(buf, M_TEMP); 778 return (error); 779} 780 781static int 782unp_connect(so, nam, td) 783 struct socket *so; 784 struct sockaddr *nam; 785 struct thread *td; 786{ 787 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 788 register struct vnode *vp; 789 register struct socket *so2, *so3; 790 struct unpcb *unp, *unp2, *unp3; 791 int error, len; 792 struct nameidata nd; 793 char buf[SOCK_MAXADDRLEN]; 794 struct sockaddr *sa; 795 796 UNP_LOCK_ASSERT(); 797 unp = sotounpcb(so); 798 799 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 800 if (len <= 0) 801 return (EINVAL); 802 strlcpy(buf, soun->sun_path, len + 1); 803 UNP_UNLOCK(); 804 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 805 mtx_lock(&Giant); 806 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 807 error = namei(&nd); 808 if (error) 809 vp = NULL; 810 else 811 vp = nd.ni_vp; 812 ASSERT_VOP_LOCKED(vp, "unp_connect"); 813 NDFREE(&nd, NDF_ONLY_PNBUF); 814 if (error) 815 goto bad; 816 817 if (vp->v_type != VSOCK) { 818 error = ENOTSOCK; 819 goto bad; 820 } 821 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 822 if (error) 823 goto bad; 824 mtx_unlock(&Giant); 825 UNP_LOCK(); 826 unp = sotounpcb(so); 827 if (unp == NULL) { 828 /* 829 * XXXRW: Temporary debugging printf. 830 */ 831 printf("unp_connect(): lost race to another thread\n"); 832 error = EINVAL; 833 goto bad2; 834 } 835 so2 = vp->v_socket; 836 if (so2 == NULL) { 837 error = ECONNREFUSED; 838 goto bad2; 839 } 840 if (so->so_type != so2->so_type) { 841 error = EPROTOTYPE; 842 goto bad2; 843 } 844 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 845 if (so2->so_options & SO_ACCEPTCONN) { 846 /* 847 * NB: drop locks here so unp_attach is entered 848 * w/o locks; this avoids a recursive lock 849 * of the head and holding sleep locks across 850 * a (potentially) blocking malloc. 851 */ 852 UNP_UNLOCK(); 853 so3 = sonewconn(so2, 0); 854 UNP_LOCK(); 855 } else 856 so3 = NULL; 857 if (so3 == NULL) { 858 error = ECONNREFUSED; 859 goto bad2; 860 } 861 unp = sotounpcb(so); 862 unp2 = sotounpcb(so2); 863 unp3 = sotounpcb(so3); 864 if (unp2->unp_addr != NULL) { 865 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 866 unp3->unp_addr = (struct sockaddr_un *) sa; 867 sa = NULL; 868 } 869 /* 870 * unp_peercred management: 871 * 872 * The connecter's (client's) credentials are copied 873 * from its process structure at the time of connect() 874 * (which is now). 875 */ 876 cru2x(td->td_ucred, &unp3->unp_peercred); 877 unp3->unp_flags |= UNP_HAVEPC; 878 /* 879 * The receiver's (server's) credentials are copied 880 * from the unp_peercred member of socket on which the 881 * former called listen(); unp_listen() cached that 882 * process's credentials at that time so we can use 883 * them now. 884 */ 885 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 886 ("unp_connect: listener without cached peercred")); 887 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 888 sizeof(unp->unp_peercred)); 889 unp->unp_flags |= UNP_HAVEPC; 890#ifdef MAC 891 SOCK_LOCK(so); 892 mac_set_socket_peer_from_socket(so, so3); 893 mac_set_socket_peer_from_socket(so3, so); 894 SOCK_UNLOCK(so); 895#endif 896 897 so2 = so3; 898 } 899 error = unp_connect2(so, so2); 900bad2: 901 UNP_UNLOCK(); 902 mtx_lock(&Giant); 903bad: 904 mtx_assert(&Giant, MA_OWNED); 905 if (vp != NULL) 906 vput(vp); 907 mtx_unlock(&Giant); 908 free(sa, M_SONAME); 909 UNP_LOCK(); 910 return (error); 911} 912 913static int 914unp_connect2(so, so2) 915 register struct socket *so; 916 register struct socket *so2; 917{ 918 register struct unpcb *unp = sotounpcb(so); 919 register struct unpcb *unp2; 920 921 UNP_LOCK_ASSERT(); 922 923 if (so2->so_type != so->so_type) 924 return (EPROTOTYPE); 925 unp2 = sotounpcb(so2); 926 unp->unp_conn = unp2; 927 switch (so->so_type) { 928 929 case SOCK_DGRAM: 930 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 931 soisconnected(so); 932 break; 933 934 case SOCK_STREAM: 935 unp2->unp_conn = unp; 936 soisconnected(so); 937 soisconnected(so2); 938 break; 939 940 default: 941 panic("unp_connect2"); 942 } 943 return (0); 944} 945 946static void 947unp_disconnect(unp) 948 struct unpcb *unp; 949{ 950 register struct unpcb *unp2 = unp->unp_conn; 951 struct socket *so; 952 953 UNP_LOCK_ASSERT(); 954 955 if (unp2 == NULL) 956 return; 957 unp->unp_conn = NULL; 958 switch (unp->unp_socket->so_type) { 959 960 case SOCK_DGRAM: 961 LIST_REMOVE(unp, unp_reflink); 962 so = unp->unp_socket; 963 SOCK_LOCK(so); 964 so->so_state &= ~SS_ISCONNECTED; 965 SOCK_UNLOCK(so); 966 break; 967 968 case SOCK_STREAM: 969 soisdisconnected(unp->unp_socket); 970 unp2->unp_conn = NULL; 971 soisdisconnected(unp2->unp_socket); 972 break; 973 } 974} 975 976#ifdef notdef 977void 978unp_abort(unp) 979 struct unpcb *unp; 980{ 981 982 unp_detach(unp); 983} 984#endif 985 986/* 987 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 988 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 989 * are safe to reference. It first scans the list of struct unpcb's to 990 * generate a pointer list, then it rescans its list one entry at a time to 991 * externalize and copyout. It checks the generation number to see if a 992 * struct unpcb has been reused, and will skip it if so. 993 */ 994static int 995unp_pcblist(SYSCTL_HANDLER_ARGS) 996{ 997 int error, i, n; 998 struct unpcb *unp, **unp_list; 999 unp_gen_t gencnt; 1000 struct xunpgen *xug; 1001 struct unp_head *head; 1002 struct xunpcb *xu; 1003 1004 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1005 1006 /* 1007 * The process of preparing the PCB list is too time-consuming and 1008 * resource-intensive to repeat twice on every request. 1009 */ 1010 if (req->oldptr == NULL) { 1011 n = unp_count; 1012 req->oldidx = 2 * (sizeof *xug) 1013 + (n + n/8) * sizeof(struct xunpcb); 1014 return (0); 1015 } 1016 1017 if (req->newptr != NULL) 1018 return (EPERM); 1019 1020 /* 1021 * OK, now we're committed to doing something. 1022 */ 1023 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1024 UNP_LOCK(); 1025 gencnt = unp_gencnt; 1026 n = unp_count; 1027 UNP_UNLOCK(); 1028 1029 xug->xug_len = sizeof *xug; 1030 xug->xug_count = n; 1031 xug->xug_gen = gencnt; 1032 xug->xug_sogen = so_gencnt; 1033 error = SYSCTL_OUT(req, xug, sizeof *xug); 1034 if (error) { 1035 free(xug, M_TEMP); 1036 return (error); 1037 } 1038 1039 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1040 1041 UNP_LOCK(); 1042 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1043 unp = LIST_NEXT(unp, unp_link)) { 1044 if (unp->unp_gencnt <= gencnt) { 1045 if (cr_cansee(req->td->td_ucred, 1046 unp->unp_socket->so_cred)) 1047 continue; 1048 unp_list[i++] = unp; 1049 } 1050 } 1051 UNP_UNLOCK(); 1052 n = i; /* in case we lost some during malloc */ 1053 1054 error = 0; 1055 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1056 for (i = 0; i < n; i++) { 1057 unp = unp_list[i]; 1058 if (unp->unp_gencnt <= gencnt) { 1059 xu->xu_len = sizeof *xu; 1060 xu->xu_unpp = unp; 1061 /* 1062 * XXX - need more locking here to protect against 1063 * connect/disconnect races for SMP. 1064 */ 1065 if (unp->unp_addr != NULL) 1066 bcopy(unp->unp_addr, &xu->xu_addr, 1067 unp->unp_addr->sun_len); 1068 if (unp->unp_conn != NULL && 1069 unp->unp_conn->unp_addr != NULL) 1070 bcopy(unp->unp_conn->unp_addr, 1071 &xu->xu_caddr, 1072 unp->unp_conn->unp_addr->sun_len); 1073 bcopy(unp, &xu->xu_unp, sizeof *unp); 1074 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1075 error = SYSCTL_OUT(req, xu, sizeof *xu); 1076 } 1077 } 1078 free(xu, M_TEMP); 1079 if (!error) { 1080 /* 1081 * Give the user an updated idea of our state. 1082 * If the generation differs from what we told 1083 * her before, she knows that something happened 1084 * while we were processing this request, and it 1085 * might be necessary to retry. 1086 */ 1087 xug->xug_gen = unp_gencnt; 1088 xug->xug_sogen = so_gencnt; 1089 xug->xug_count = unp_count; 1090 error = SYSCTL_OUT(req, xug, sizeof *xug); 1091 } 1092 free(unp_list, M_TEMP); 1093 free(xug, M_TEMP); 1094 return (error); 1095} 1096 1097SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1098 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1099 "List of active local datagram sockets"); 1100SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1101 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1102 "List of active local stream sockets"); 1103 1104static void 1105unp_shutdown(unp) 1106 struct unpcb *unp; 1107{ 1108 struct socket *so; 1109 1110 UNP_LOCK_ASSERT(); 1111 1112 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1113 (so = unp->unp_conn->unp_socket)) 1114 socantrcvmore(so); 1115} 1116 1117static void 1118unp_drop(unp, errno) 1119 struct unpcb *unp; 1120 int errno; 1121{ 1122 struct socket *so = unp->unp_socket; 1123 1124 UNP_LOCK_ASSERT(); 1125 1126 so->so_error = errno; 1127 unp_disconnect(unp); 1128} 1129 1130#ifdef notdef 1131void 1132unp_drain() 1133{ 1134 1135} 1136#endif 1137 1138static void 1139unp_freerights(rp, fdcount) 1140 struct file **rp; 1141 int fdcount; 1142{ 1143 int i; 1144 struct file *fp; 1145 1146 for (i = 0; i < fdcount; i++) { 1147 fp = *rp; 1148 /* 1149 * zero the pointer before calling 1150 * unp_discard since it may end up 1151 * in unp_gc().. 1152 */ 1153 *rp++ = 0; 1154 unp_discard(fp); 1155 } 1156} 1157 1158int 1159unp_externalize(control, controlp) 1160 struct mbuf *control, **controlp; 1161{ 1162 struct thread *td = curthread; /* XXX */ 1163 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1164 int i; 1165 int *fdp; 1166 struct file **rp; 1167 struct file *fp; 1168 void *data; 1169 socklen_t clen = control->m_len, datalen; 1170 int error, newfds; 1171 int f; 1172 u_int newlen; 1173 1174 error = 0; 1175 if (controlp != NULL) /* controlp == NULL => free control messages */ 1176 *controlp = NULL; 1177 1178 while (cm != NULL) { 1179 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1180 error = EINVAL; 1181 break; 1182 } 1183 1184 data = CMSG_DATA(cm); 1185 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1186 1187 if (cm->cmsg_level == SOL_SOCKET 1188 && cm->cmsg_type == SCM_RIGHTS) { 1189 newfds = datalen / sizeof(struct file *); 1190 rp = data; 1191 1192 /* If we're not outputting the descriptors free them. */ 1193 if (error || controlp == NULL) { 1194 unp_freerights(rp, newfds); 1195 goto next; 1196 } 1197 FILEDESC_LOCK(td->td_proc->p_fd); 1198 /* if the new FD's will not fit free them. */ 1199 if (!fdavail(td, newfds)) { 1200 FILEDESC_UNLOCK(td->td_proc->p_fd); 1201 error = EMSGSIZE; 1202 unp_freerights(rp, newfds); 1203 goto next; 1204 } 1205 /* 1206 * now change each pointer to an fd in the global 1207 * table to an integer that is the index to the 1208 * local fd table entry that we set up to point 1209 * to the global one we are transferring. 1210 */ 1211 newlen = newfds * sizeof(int); 1212 *controlp = sbcreatecontrol(NULL, newlen, 1213 SCM_RIGHTS, SOL_SOCKET); 1214 if (*controlp == NULL) { 1215 FILEDESC_UNLOCK(td->td_proc->p_fd); 1216 error = E2BIG; 1217 unp_freerights(rp, newfds); 1218 goto next; 1219 } 1220 1221 fdp = (int *) 1222 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1223 for (i = 0; i < newfds; i++) { 1224 if (fdalloc(td, 0, &f)) 1225 panic("unp_externalize fdalloc failed"); 1226 fp = *rp++; 1227 td->td_proc->p_fd->fd_ofiles[f] = fp; 1228 FILE_LOCK(fp); 1229 fp->f_msgcount--; 1230 FILE_UNLOCK(fp); 1231 unp_rights--; 1232 *fdp++ = f; 1233 } 1234 FILEDESC_UNLOCK(td->td_proc->p_fd); 1235 } else { /* We can just copy anything else across */ 1236 if (error || controlp == NULL) 1237 goto next; 1238 *controlp = sbcreatecontrol(NULL, datalen, 1239 cm->cmsg_type, cm->cmsg_level); 1240 if (*controlp == NULL) { 1241 error = ENOBUFS; 1242 goto next; 1243 } 1244 bcopy(data, 1245 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1246 datalen); 1247 } 1248 1249 controlp = &(*controlp)->m_next; 1250 1251next: 1252 if (CMSG_SPACE(datalen) < clen) { 1253 clen -= CMSG_SPACE(datalen); 1254 cm = (struct cmsghdr *) 1255 ((caddr_t)cm + CMSG_SPACE(datalen)); 1256 } else { 1257 clen = 0; 1258 cm = NULL; 1259 } 1260 } 1261 1262 m_freem(control); 1263 1264 return (error); 1265} 1266 1267void 1268unp_init(void) 1269{ 1270 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1271 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1272 if (unp_zone == NULL) 1273 panic("unp_init"); 1274 uma_zone_set_max(unp_zone, nmbclusters); 1275 LIST_INIT(&unp_dhead); 1276 LIST_INIT(&unp_shead); 1277 1278 UNP_LOCK_INIT(); 1279} 1280 1281static int 1282unp_internalize(controlp, td) 1283 struct mbuf **controlp; 1284 struct thread *td; 1285{ 1286 struct mbuf *control = *controlp; 1287 struct proc *p = td->td_proc; 1288 struct filedesc *fdescp = p->p_fd; 1289 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1290 struct cmsgcred *cmcred; 1291 struct file **rp; 1292 struct file *fp; 1293 struct timeval *tv; 1294 int i, fd, *fdp; 1295 void *data; 1296 socklen_t clen = control->m_len, datalen; 1297 int error, oldfds; 1298 u_int newlen; 1299 1300 error = 0; 1301 *controlp = NULL; 1302 1303 while (cm != NULL) { 1304 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1305 || cm->cmsg_len > clen) { 1306 error = EINVAL; 1307 goto out; 1308 } 1309 1310 data = CMSG_DATA(cm); 1311 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1312 1313 switch (cm->cmsg_type) { 1314 /* 1315 * Fill in credential information. 1316 */ 1317 case SCM_CREDS: 1318 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1319 SCM_CREDS, SOL_SOCKET); 1320 if (*controlp == NULL) { 1321 error = ENOBUFS; 1322 goto out; 1323 } 1324 1325 cmcred = (struct cmsgcred *) 1326 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1327 cmcred->cmcred_pid = p->p_pid; 1328 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1329 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1330 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1331 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1332 CMGROUP_MAX); 1333 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1334 cmcred->cmcred_groups[i] = 1335 td->td_ucred->cr_groups[i]; 1336 break; 1337 1338 case SCM_RIGHTS: 1339 oldfds = datalen / sizeof (int); 1340 /* 1341 * check that all the FDs passed in refer to legal files 1342 * If not, reject the entire operation. 1343 */ 1344 fdp = data; 1345 FILEDESC_LOCK(fdescp); 1346 for (i = 0; i < oldfds; i++) { 1347 fd = *fdp++; 1348 if ((unsigned)fd >= fdescp->fd_nfiles || 1349 fdescp->fd_ofiles[fd] == NULL) { 1350 FILEDESC_UNLOCK(fdescp); 1351 error = EBADF; 1352 goto out; 1353 } 1354 fp = fdescp->fd_ofiles[fd]; 1355 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1356 FILEDESC_UNLOCK(fdescp); 1357 error = EOPNOTSUPP; 1358 goto out; 1359 } 1360 1361 } 1362 /* 1363 * Now replace the integer FDs with pointers to 1364 * the associated global file table entry.. 1365 */ 1366 newlen = oldfds * sizeof(struct file *); 1367 *controlp = sbcreatecontrol(NULL, newlen, 1368 SCM_RIGHTS, SOL_SOCKET); 1369 if (*controlp == NULL) { 1370 FILEDESC_UNLOCK(fdescp); 1371 error = E2BIG; 1372 goto out; 1373 } 1374 1375 fdp = data; 1376 rp = (struct file **) 1377 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1378 for (i = 0; i < oldfds; i++) { 1379 fp = fdescp->fd_ofiles[*fdp++]; 1380 *rp++ = fp; 1381 FILE_LOCK(fp); 1382 fp->f_count++; 1383 fp->f_msgcount++; 1384 FILE_UNLOCK(fp); 1385 unp_rights++; 1386 } 1387 FILEDESC_UNLOCK(fdescp); 1388 break; 1389 1390 case SCM_TIMESTAMP: 1391 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1392 SCM_TIMESTAMP, SOL_SOCKET); 1393 if (*controlp == NULL) { 1394 error = ENOBUFS; 1395 goto out; 1396 } 1397 tv = (struct timeval *) 1398 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1399 microtime(tv); 1400 break; 1401 1402 default: 1403 error = EINVAL; 1404 goto out; 1405 } 1406 1407 controlp = &(*controlp)->m_next; 1408 1409 if (CMSG_SPACE(datalen) < clen) { 1410 clen -= CMSG_SPACE(datalen); 1411 cm = (struct cmsghdr *) 1412 ((caddr_t)cm + CMSG_SPACE(datalen)); 1413 } else { 1414 clen = 0; 1415 cm = NULL; 1416 } 1417 } 1418 1419out: 1420 m_freem(control); 1421 1422 return (error); 1423} 1424 1425static int unp_defer, unp_gcing; 1426 1427static void 1428unp_gc() 1429{ 1430 register struct file *fp, *nextfp; 1431 register struct socket *so; 1432 struct file **extra_ref, **fpp; 1433 int nunref, i; 1434 int nfiles_snap; 1435 int nfiles_slack = 20; 1436 1437 UNP_LOCK_ASSERT(); 1438 1439 if (unp_gcing) 1440 return; 1441 unp_gcing = 1; 1442 unp_defer = 0; 1443 /* 1444 * before going through all this, set all FDs to 1445 * be NOT defered and NOT externally accessible 1446 */ 1447 /* 1448 * XXXRW: Acquiring a sleep lock while holding UNP 1449 * mutex cannot be a good thing. 1450 */ 1451 sx_slock(&filelist_lock); 1452 LIST_FOREACH(fp, &filehead, f_list) 1453 fp->f_gcflag &= ~(FMARK|FDEFER); 1454 do { 1455 LIST_FOREACH(fp, &filehead, f_list) { 1456 FILE_LOCK(fp); 1457 /* 1458 * If the file is not open, skip it 1459 */ 1460 if (fp->f_count == 0) { 1461 FILE_UNLOCK(fp); 1462 continue; 1463 } 1464 /* 1465 * If we already marked it as 'defer' in a 1466 * previous pass, then try process it this time 1467 * and un-mark it 1468 */ 1469 if (fp->f_gcflag & FDEFER) { 1470 fp->f_gcflag &= ~FDEFER; 1471 unp_defer--; 1472 } else { 1473 /* 1474 * if it's not defered, then check if it's 1475 * already marked.. if so skip it 1476 */ 1477 if (fp->f_gcflag & FMARK) { 1478 FILE_UNLOCK(fp); 1479 continue; 1480 } 1481 /* 1482 * If all references are from messages 1483 * in transit, then skip it. it's not 1484 * externally accessible. 1485 */ 1486 if (fp->f_count == fp->f_msgcount) { 1487 FILE_UNLOCK(fp); 1488 continue; 1489 } 1490 /* 1491 * If it got this far then it must be 1492 * externally accessible. 1493 */ 1494 fp->f_gcflag |= FMARK; 1495 } 1496 /* 1497 * either it was defered, or it is externally 1498 * accessible and not already marked so. 1499 * Now check if it is possibly one of OUR sockets. 1500 */ 1501 if (fp->f_type != DTYPE_SOCKET || 1502 (so = fp->f_data) == NULL) { 1503 FILE_UNLOCK(fp); 1504 continue; 1505 } 1506 FILE_UNLOCK(fp); 1507 if (so->so_proto->pr_domain != &localdomain || 1508 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1509 continue; 1510#ifdef notdef 1511 if (so->so_rcv.sb_flags & SB_LOCK) { 1512 /* 1513 * This is problematical; it's not clear 1514 * we need to wait for the sockbuf to be 1515 * unlocked (on a uniprocessor, at least), 1516 * and it's also not clear what to do 1517 * if sbwait returns an error due to receipt 1518 * of a signal. If sbwait does return 1519 * an error, we'll go into an infinite 1520 * loop. Delete all of this for now. 1521 */ 1522 (void) sbwait(&so->so_rcv); 1523 goto restart; 1524 } 1525#endif 1526 /* 1527 * So, Ok, it's one of our sockets and it IS externally 1528 * accessible (or was defered). Now we look 1529 * to see if we hold any file descriptors in its 1530 * message buffers. Follow those links and mark them 1531 * as accessible too. 1532 */ 1533 SOCKBUF_LOCK(&so->so_rcv); 1534 unp_scan(so->so_rcv.sb_mb, unp_mark); 1535 SOCKBUF_UNLOCK(&so->so_rcv); 1536 } 1537 } while (unp_defer); 1538 sx_sunlock(&filelist_lock); 1539 /* 1540 * We grab an extra reference to each of the file table entries 1541 * that are not otherwise accessible and then free the rights 1542 * that are stored in messages on them. 1543 * 1544 * The bug in the orginal code is a little tricky, so I'll describe 1545 * what's wrong with it here. 1546 * 1547 * It is incorrect to simply unp_discard each entry for f_msgcount 1548 * times -- consider the case of sockets A and B that contain 1549 * references to each other. On a last close of some other socket, 1550 * we trigger a gc since the number of outstanding rights (unp_rights) 1551 * is non-zero. If during the sweep phase the gc code un_discards, 1552 * we end up doing a (full) closef on the descriptor. A closef on A 1553 * results in the following chain. Closef calls soo_close, which 1554 * calls soclose. Soclose calls first (through the switch 1555 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1556 * returns because the previous instance had set unp_gcing, and 1557 * we return all the way back to soclose, which marks the socket 1558 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1559 * to free up the rights that are queued in messages on the socket A, 1560 * i.e., the reference on B. The sorflush calls via the dom_dispose 1561 * switch unp_dispose, which unp_scans with unp_discard. This second 1562 * instance of unp_discard just calls closef on B. 1563 * 1564 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1565 * which results in another closef on A. Unfortunately, A is already 1566 * being closed, and the descriptor has already been marked with 1567 * SS_NOFDREF, and soclose panics at this point. 1568 * 1569 * Here, we first take an extra reference to each inaccessible 1570 * descriptor. Then, we call sorflush ourself, since we know 1571 * it is a Unix domain socket anyhow. After we destroy all the 1572 * rights carried in messages, we do a last closef to get rid 1573 * of our extra reference. This is the last close, and the 1574 * unp_detach etc will shut down the socket. 1575 * 1576 * 91/09/19, bsy@cs.cmu.edu 1577 */ 1578again: 1579 nfiles_snap = nfiles + nfiles_slack; /* some slack */ 1580 extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, 1581 M_WAITOK); 1582 sx_slock(&filelist_lock); 1583 if (nfiles_snap < nfiles) { 1584 sx_sunlock(&filelist_lock); 1585 free(extra_ref, M_TEMP); 1586 nfiles_slack += 20; 1587 goto again; 1588 } 1589 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1590 fp != NULL; fp = nextfp) { 1591 nextfp = LIST_NEXT(fp, f_list); 1592 FILE_LOCK(fp); 1593 /* 1594 * If it's not open, skip it 1595 */ 1596 if (fp->f_count == 0) { 1597 FILE_UNLOCK(fp); 1598 continue; 1599 } 1600 /* 1601 * If all refs are from msgs, and it's not marked accessible 1602 * then it must be referenced from some unreachable cycle 1603 * of (shut-down) FDs, so include it in our 1604 * list of FDs to remove 1605 */ 1606 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1607 *fpp++ = fp; 1608 nunref++; 1609 fp->f_count++; 1610 } 1611 FILE_UNLOCK(fp); 1612 } 1613 sx_sunlock(&filelist_lock); 1614 /* 1615 * for each FD on our hit list, do the following two things 1616 */ 1617 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1618 struct file *tfp = *fpp; 1619 FILE_LOCK(tfp); 1620 if (tfp->f_type == DTYPE_SOCKET && 1621 tfp->f_data != NULL) { 1622 FILE_UNLOCK(tfp); 1623 sorflush(tfp->f_data); 1624 } else { 1625 FILE_UNLOCK(tfp); 1626 } 1627 } 1628 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1629 closef(*fpp, (struct thread *) NULL); 1630 free(extra_ref, M_TEMP); 1631 unp_gcing = 0; 1632} 1633 1634void 1635unp_dispose(m) 1636 struct mbuf *m; 1637{ 1638 1639 if (m) 1640 unp_scan(m, unp_discard); 1641} 1642 1643static int 1644unp_listen(unp, td) 1645 struct unpcb *unp; 1646 struct thread *td; 1647{ 1648 UNP_LOCK_ASSERT(); 1649 1650 /* 1651 * XXXRW: Why populate the local peer cred with our own credential? 1652 */ 1653 cru2x(td->td_ucred, &unp->unp_peercred); 1654 unp->unp_flags |= UNP_HAVEPCCACHED; 1655 return (0); 1656} 1657 1658static void 1659unp_scan(m0, op) 1660 register struct mbuf *m0; 1661 void (*op)(struct file *); 1662{ 1663 struct mbuf *m; 1664 struct file **rp; 1665 struct cmsghdr *cm; 1666 void *data; 1667 int i; 1668 socklen_t clen, datalen; 1669 int qfds; 1670 1671 while (m0 != NULL) { 1672 for (m = m0; m; m = m->m_next) { 1673 if (m->m_type != MT_CONTROL) 1674 continue; 1675 1676 cm = mtod(m, struct cmsghdr *); 1677 clen = m->m_len; 1678 1679 while (cm != NULL) { 1680 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1681 break; 1682 1683 data = CMSG_DATA(cm); 1684 datalen = (caddr_t)cm + cm->cmsg_len 1685 - (caddr_t)data; 1686 1687 if (cm->cmsg_level == SOL_SOCKET && 1688 cm->cmsg_type == SCM_RIGHTS) { 1689 qfds = datalen / sizeof (struct file *); 1690 rp = data; 1691 for (i = 0; i < qfds; i++) 1692 (*op)(*rp++); 1693 } 1694 1695 if (CMSG_SPACE(datalen) < clen) { 1696 clen -= CMSG_SPACE(datalen); 1697 cm = (struct cmsghdr *) 1698 ((caddr_t)cm + CMSG_SPACE(datalen)); 1699 } else { 1700 clen = 0; 1701 cm = NULL; 1702 } 1703 } 1704 } 1705 m0 = m0->m_act; 1706 } 1707} 1708 1709static void 1710unp_mark(fp) 1711 struct file *fp; 1712{ 1713 if (fp->f_gcflag & FMARK) 1714 return; 1715 unp_defer++; 1716 fp->f_gcflag |= (FMARK|FDEFER); 1717} 1718 1719static void 1720unp_discard(fp) 1721 struct file *fp; 1722{ 1723 FILE_LOCK(fp); 1724 fp->f_msgcount--; 1725 unp_rights--; 1726 FILE_UNLOCK(fp); 1727 (void) closef(fp, (struct thread *)NULL); 1728} 1729