uipc_usrreq.c revision 52070
1/* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_usrreq.c 52070 1999-10-09 20:42:17Z green $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/domain.h> 41#include <sys/fcntl.h> 42#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 43#include <sys/file.h> 44#include <sys/filedesc.h> 45#include <sys/lock.h> 46#include <sys/mbuf.h> 47#include <sys/namei.h> 48#include <sys/proc.h> 49#include <sys/protosw.h> 50#include <sys/socket.h> 51#include <sys/socketvar.h> 52#include <sys/stat.h> 53#include <sys/sysctl.h> 54#include <sys/un.h> 55#include <sys/unpcb.h> 56#include <sys/vnode.h> 57 58#include <vm/vm_zone.h> 59 60static struct vm_zone *unp_zone; 61static unp_gen_t unp_gencnt; 62static u_int unp_count; 63 64static struct unp_head unp_shead, unp_dhead; 65 66/* 67 * Unix communications domain. 68 * 69 * TODO: 70 * SEQPACKET, RDM 71 * rethink name space problems 72 * need a proper out-of-band 73 * lock pushdown 74 */ 75static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 76static ino_t unp_ino; /* prototype for fake inode numbers */ 77 78static int unp_attach __P((struct socket *)); 79static void unp_detach __P((struct unpcb *)); 80static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); 81static int unp_connect __P((struct socket *,struct sockaddr *, 82 struct proc *)); 83static void unp_disconnect __P((struct unpcb *)); 84static void unp_shutdown __P((struct unpcb *)); 85static void unp_drop __P((struct unpcb *, int)); 86static void unp_gc __P((void)); 87static void unp_scan __P((struct mbuf *, void (*)(struct file *))); 88static void unp_mark __P((struct file *)); 89static void unp_discard __P((struct file *)); 90static int unp_internalize __P((struct mbuf *, struct proc *)); 91 92static int 93uipc_abort(struct socket *so) 94{ 95 struct unpcb *unp = sotounpcb(so); 96 97 if (unp == 0) 98 return EINVAL; 99 unp_drop(unp, ECONNABORTED); 100 return 0; 101} 102 103static int 104uipc_accept(struct socket *so, struct sockaddr **nam) 105{ 106 struct unpcb *unp = sotounpcb(so); 107 108 if (unp == 0) 109 return EINVAL; 110 111 /* 112 * Pass back name of connected socket, 113 * if it was bound and we are still connected 114 * (our peer may have closed already!). 115 */ 116 if (unp->unp_conn && unp->unp_conn->unp_addr) { 117 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 118 1); 119 } else { 120 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 121 } 122 return 0; 123} 124 125static int 126uipc_attach(struct socket *so, int proto, struct proc *p) 127{ 128 struct unpcb *unp = sotounpcb(so); 129 130 if (unp != 0) 131 return EISCONN; 132 return unp_attach(so); 133} 134 135static int 136uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 137{ 138 struct unpcb *unp = sotounpcb(so); 139 140 if (unp == 0) 141 return EINVAL; 142 143 return unp_bind(unp, nam, p); 144} 145 146static int 147uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 148{ 149 struct unpcb *unp = sotounpcb(so); 150 151 if (unp == 0) 152 return EINVAL; 153 return unp_connect(so, nam, curproc); 154} 155 156static int 157uipc_connect2(struct socket *so1, struct socket *so2) 158{ 159 struct unpcb *unp = sotounpcb(so1); 160 161 if (unp == 0) 162 return EINVAL; 163 164 return unp_connect2(so1, so2); 165} 166 167/* control is EOPNOTSUPP */ 168 169static int 170uipc_detach(struct socket *so) 171{ 172 struct unpcb *unp = sotounpcb(so); 173 174 if (unp == 0) 175 return EINVAL; 176 177 unp_detach(unp); 178 return 0; 179} 180 181static int 182uipc_disconnect(struct socket *so) 183{ 184 struct unpcb *unp = sotounpcb(so); 185 186 if (unp == 0) 187 return EINVAL; 188 unp_disconnect(unp); 189 return 0; 190} 191 192static int 193uipc_listen(struct socket *so, struct proc *p) 194{ 195 struct unpcb *unp = sotounpcb(so); 196 197 if (unp == 0 || unp->unp_vnode == 0) 198 return EINVAL; 199 return 0; 200} 201 202static int 203uipc_peeraddr(struct socket *so, struct sockaddr **nam) 204{ 205 struct unpcb *unp = sotounpcb(so); 206 207 if (unp == 0) 208 return EINVAL; 209 if (unp->unp_conn && unp->unp_conn->unp_addr) 210 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 211 1); 212 return 0; 213} 214 215static int 216uipc_rcvd(struct socket *so, int flags) 217{ 218 struct unpcb *unp = sotounpcb(so); 219 struct socket *so2; 220 221 if (unp == 0) 222 return EINVAL; 223 switch (so->so_type) { 224 case SOCK_DGRAM: 225 panic("uipc_rcvd DGRAM?"); 226 /*NOTREACHED*/ 227 228 case SOCK_STREAM: 229 if (unp->unp_conn == 0) 230 break; 231 so2 = unp->unp_conn->unp_socket; 232 /* 233 * Adjust backpressure on sender 234 * and wakeup any waiting to write. 235 */ 236 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 237 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 238 so2->so_snd.sb_hiwat += unp->unp_cc - so->so_rcv.sb_cc; 239 (void)chgsbsize(so2->so_cred->cr_uid, 240 (rlim_t)unp->unp_cc - so->so_rcv.sb_cc); 241 unp->unp_cc = so->so_rcv.sb_cc; 242 sowwakeup(so2); 243 break; 244 245 default: 246 panic("uipc_rcvd unknown socktype"); 247 } 248 return 0; 249} 250 251/* pru_rcvoob is EOPNOTSUPP */ 252 253static int 254uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 255 struct mbuf *control, struct proc *p) 256{ 257 int error = 0; 258 struct unpcb *unp = sotounpcb(so); 259 struct socket *so2; 260 261 if (unp == 0) { 262 error = EINVAL; 263 goto release; 264 } 265 if (flags & PRUS_OOB) { 266 error = EOPNOTSUPP; 267 goto release; 268 } 269 270 if (control && (error = unp_internalize(control, p))) 271 goto release; 272 273 switch (so->so_type) { 274 case SOCK_DGRAM: 275 { 276 struct sockaddr *from; 277 278 if (nam) { 279 if (unp->unp_conn) { 280 error = EISCONN; 281 break; 282 } 283 error = unp_connect(so, nam, p); 284 if (error) 285 break; 286 } else { 287 if (unp->unp_conn == 0) { 288 error = ENOTCONN; 289 break; 290 } 291 } 292 so2 = unp->unp_conn->unp_socket; 293 if (unp->unp_addr) 294 from = (struct sockaddr *)unp->unp_addr; 295 else 296 from = &sun_noname; 297 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 298 sorwakeup(so2); 299 m = 0; 300 control = 0; 301 } else 302 error = ENOBUFS; 303 if (nam) 304 unp_disconnect(unp); 305 break; 306 } 307 308 case SOCK_STREAM: 309 /* Connect if not connected yet. */ 310 /* 311 * Note: A better implementation would complain 312 * if not equal to the peer's address. 313 */ 314 if ((so->so_state & SS_ISCONNECTED) == 0) { 315 if (nam) { 316 error = unp_connect(so, nam, p); 317 if (error) 318 break; /* XXX */ 319 } else { 320 error = ENOTCONN; 321 break; 322 } 323 } 324 325 if (so->so_state & SS_CANTSENDMORE) { 326 error = EPIPE; 327 break; 328 } 329 if (unp->unp_conn == 0) 330 panic("uipc_send connected but no connection?"); 331 so2 = unp->unp_conn->unp_socket; 332 /* 333 * Send to paired receive port, and then reduce 334 * send buffer hiwater marks to maintain backpressure. 335 * Wake up readers. 336 */ 337 if (control) { 338 if (sbappendcontrol(&so2->so_rcv, m, control)) 339 control = 0; 340 } else 341 sbappend(&so2->so_rcv, m); 342 so->so_snd.sb_mbmax -= 343 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 344 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 345 so->so_snd.sb_hiwat -= 346 so2->so_rcv.sb_cc - unp->unp_conn->unp_cc; 347 (void)chgsbsize(so->so_cred->cr_uid, 348 (rlim_t)unp->unp_conn->unp_cc - so2->so_rcv.sb_cc); 349 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 350 sorwakeup(so2); 351 m = 0; 352 break; 353 354 default: 355 panic("uipc_send unknown socktype"); 356 } 357 358 /* 359 * SEND_EOF is equivalent to a SEND followed by 360 * a SHUTDOWN. 361 */ 362 if (flags & PRUS_EOF) { 363 socantsendmore(so); 364 unp_shutdown(unp); 365 } 366 367 if (control && error != 0) 368 unp_dispose(control); 369 370release: 371 if (control) 372 m_freem(control); 373 if (m) 374 m_freem(m); 375 return error; 376} 377 378static int 379uipc_sense(struct socket *so, struct stat *sb) 380{ 381 struct unpcb *unp = sotounpcb(so); 382 struct socket *so2; 383 384 if (unp == 0) 385 return EINVAL; 386 sb->st_blksize = so->so_snd.sb_hiwat; 387 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 388 so2 = unp->unp_conn->unp_socket; 389 sb->st_blksize += so2->so_rcv.sb_cc; 390 } 391 sb->st_dev = NOUDEV; 392 if (unp->unp_ino == 0) 393 unp->unp_ino = unp_ino++; 394 sb->st_ino = unp->unp_ino; 395 return (0); 396} 397 398static int 399uipc_shutdown(struct socket *so) 400{ 401 struct unpcb *unp = sotounpcb(so); 402 403 if (unp == 0) 404 return EINVAL; 405 socantsendmore(so); 406 unp_shutdown(unp); 407 return 0; 408} 409 410static int 411uipc_sockaddr(struct socket *so, struct sockaddr **nam) 412{ 413 struct unpcb *unp = sotounpcb(so); 414 415 if (unp == 0) 416 return EINVAL; 417 if (unp->unp_addr) 418 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); 419 return 0; 420} 421 422struct pr_usrreqs uipc_usrreqs = { 423 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 424 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 425 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 426 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 427 sosend, soreceive, sopoll 428}; 429 430/* 431 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 432 * for stream sockets, although the total for sender and receiver is 433 * actually only PIPSIZ. 434 * Datagram sockets really use the sendspace as the maximum datagram size, 435 * and don't really want to reserve the sendspace. Their recvspace should 436 * be large enough for at least one max-size datagram plus address. 437 */ 438#ifndef PIPSIZ 439#define PIPSIZ 8192 440#endif 441static u_long unpst_sendspace = PIPSIZ; 442static u_long unpst_recvspace = PIPSIZ; 443static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 444static u_long unpdg_recvspace = 4*1024; 445 446static int unp_rights; /* file descriptors in flight */ 447 448SYSCTL_DECL(_net_local_stream); 449SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 450 &unpst_sendspace, 0, ""); 451SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 452 &unpst_recvspace, 0, ""); 453SYSCTL_DECL(_net_local_dgram); 454SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 455 &unpdg_sendspace, 0, ""); 456SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 457 &unpdg_recvspace, 0, ""); 458SYSCTL_DECL(_net_local); 459SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 460 461static int 462unp_attach(so) 463 struct socket *so; 464{ 465 register struct unpcb *unp; 466 int error; 467 468 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 469 switch (so->so_type) { 470 471 case SOCK_STREAM: 472 error = soreserve(so, unpst_sendspace, unpst_recvspace); 473 break; 474 475 case SOCK_DGRAM: 476 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 477 break; 478 479 default: 480 panic("unp_attach"); 481 } 482 if (error) 483 return (error); 484 } 485 unp = zalloc(unp_zone); 486 if (unp == NULL) 487 return (ENOBUFS); 488 bzero(unp, sizeof *unp); 489 unp->unp_gencnt = ++unp_gencnt; 490 unp_count++; 491 LIST_INIT(&unp->unp_refs); 492 unp->unp_socket = so; 493 unp->unp_rvnode = curproc->p_fd->fd_rdir; 494 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 495 : &unp_shead, unp, unp_link); 496 so->so_pcb = (caddr_t)unp; 497 return (0); 498} 499 500static void 501unp_detach(unp) 502 register struct unpcb *unp; 503{ 504 LIST_REMOVE(unp, unp_link); 505 unp->unp_gencnt = ++unp_gencnt; 506 --unp_count; 507 if (unp->unp_vnode) { 508 unp->unp_vnode->v_socket = 0; 509 vrele(unp->unp_vnode); 510 unp->unp_vnode = 0; 511 } 512 if (unp->unp_conn) 513 unp_disconnect(unp); 514 while (unp->unp_refs.lh_first) 515 unp_drop(unp->unp_refs.lh_first, ECONNRESET); 516 soisdisconnected(unp->unp_socket); 517 unp->unp_socket->so_pcb = 0; 518 if (unp_rights) { 519 /* 520 * Normally the receive buffer is flushed later, 521 * in sofree, but if our receive buffer holds references 522 * to descriptors that are now garbage, we will dispose 523 * of those descriptor references after the garbage collector 524 * gets them (resulting in a "panic: closef: count < 0"). 525 */ 526 sorflush(unp->unp_socket); 527 unp_gc(); 528 } 529 if (unp->unp_addr) 530 FREE(unp->unp_addr, M_SONAME); 531 zfree(unp_zone, unp); 532} 533 534static int 535unp_bind(unp, nam, p) 536 struct unpcb *unp; 537 struct sockaddr *nam; 538 struct proc *p; 539{ 540 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 541 register struct vnode *vp; 542 struct vattr vattr; 543 int error, namelen; 544 struct nameidata nd; 545 char buf[SOCK_MAXADDRLEN]; 546 547 if (unp->unp_vnode != NULL) 548 return (EINVAL); 549#define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) 550 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 551 if (namelen <= 0) 552 return EINVAL; 553 strncpy(buf, soun->sun_path, namelen); 554 buf[namelen] = 0; /* null-terminate the string */ 555 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 556 buf, p); 557/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 558 error = namei(&nd); 559 if (error) 560 return (error); 561 vp = nd.ni_vp; 562 if (vp != NULL) { 563 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 564 if (nd.ni_dvp == vp) 565 vrele(nd.ni_dvp); 566 else 567 vput(nd.ni_dvp); 568 vrele(vp); 569 return (EADDRINUSE); 570 } 571 VATTR_NULL(&vattr); 572 vattr.va_type = VSOCK; 573 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 574 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 575 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 576 vput(nd.ni_dvp); 577 if (error) 578 return (error); 579 vp = nd.ni_vp; 580 vp->v_socket = unp->unp_socket; 581 unp->unp_vnode = vp; 582 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); 583 VOP_UNLOCK(vp, 0, p); 584 return (0); 585} 586 587static int 588unp_connect(so, nam, p) 589 struct socket *so; 590 struct sockaddr *nam; 591 struct proc *p; 592{ 593 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 594 register struct vnode *vp; 595 register struct socket *so2, *so3; 596 struct unpcb *unp2, *unp3; 597 int error, len; 598 struct nameidata nd; 599 char buf[SOCK_MAXADDRLEN]; 600 601 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 602 if (len <= 0) 603 return EINVAL; 604 strncpy(buf, soun->sun_path, len); 605 buf[len] = 0; 606 607 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); 608 error = namei(&nd); 609 if (error) 610 return (error); 611 vp = nd.ni_vp; 612 if (vp->v_type != VSOCK) { 613 error = ENOTSOCK; 614 goto bad; 615 } 616 error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); 617 if (error) 618 goto bad; 619 so2 = vp->v_socket; 620 if (so2 == 0) { 621 error = ECONNREFUSED; 622 goto bad; 623 } 624 if (so->so_type != so2->so_type) { 625 error = EPROTOTYPE; 626 goto bad; 627 } 628 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 629 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 630 (so3 = sonewconn3(so2, 0, p)) == 0) { 631 error = ECONNREFUSED; 632 goto bad; 633 } 634 unp2 = sotounpcb(so2); 635 unp3 = sotounpcb(so3); 636 if (unp2->unp_addr) 637 unp3->unp_addr = (struct sockaddr_un *) 638 dup_sockaddr((struct sockaddr *) 639 unp2->unp_addr, 1); 640 so2 = so3; 641 } 642 error = unp_connect2(so, so2); 643bad: 644 vput(vp); 645 return (error); 646} 647 648int 649unp_connect2(so, so2) 650 register struct socket *so; 651 register struct socket *so2; 652{ 653 register struct unpcb *unp = sotounpcb(so); 654 register struct unpcb *unp2; 655 656 if (so2->so_type != so->so_type) 657 return (EPROTOTYPE); 658 unp2 = sotounpcb(so2); 659 unp->unp_conn = unp2; 660 switch (so->so_type) { 661 662 case SOCK_DGRAM: 663 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 664 soisconnected(so); 665 break; 666 667 case SOCK_STREAM: 668 unp2->unp_conn = unp; 669 soisconnected(so); 670 soisconnected(so2); 671 break; 672 673 default: 674 panic("unp_connect2"); 675 } 676 return (0); 677} 678 679static void 680unp_disconnect(unp) 681 struct unpcb *unp; 682{ 683 register struct unpcb *unp2 = unp->unp_conn; 684 685 if (unp2 == 0) 686 return; 687 unp->unp_conn = 0; 688 switch (unp->unp_socket->so_type) { 689 690 case SOCK_DGRAM: 691 LIST_REMOVE(unp, unp_reflink); 692 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 693 break; 694 695 case SOCK_STREAM: 696 soisdisconnected(unp->unp_socket); 697 unp2->unp_conn = 0; 698 soisdisconnected(unp2->unp_socket); 699 break; 700 } 701} 702 703#ifdef notdef 704void 705unp_abort(unp) 706 struct unpcb *unp; 707{ 708 709 unp_detach(unp); 710} 711#endif 712 713static int 714prison_unpcb(struct proc *p, struct unpcb *unp) 715{ 716 if (!p->p_prison) 717 return (0); 718 if (p->p_fd->fd_rdir == unp->unp_rvnode) 719 return (0); 720 return (1); 721} 722 723static int 724unp_pcblist SYSCTL_HANDLER_ARGS 725{ 726 int error, i, n; 727 struct unpcb *unp, **unp_list; 728 unp_gen_t gencnt; 729 struct xunpgen xug; 730 struct unp_head *head; 731 732 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 733 734 /* 735 * The process of preparing the PCB list is too time-consuming and 736 * resource-intensive to repeat twice on every request. 737 */ 738 if (req->oldptr == 0) { 739 n = unp_count; 740 req->oldidx = 2 * (sizeof xug) 741 + (n + n/8) * sizeof(struct xunpcb); 742 return 0; 743 } 744 745 if (req->newptr != 0) 746 return EPERM; 747 748 /* 749 * OK, now we're committed to doing something. 750 */ 751 gencnt = unp_gencnt; 752 n = unp_count; 753 754 xug.xug_len = sizeof xug; 755 xug.xug_count = n; 756 xug.xug_gen = gencnt; 757 xug.xug_sogen = so_gencnt; 758 error = SYSCTL_OUT(req, &xug, sizeof xug); 759 if (error) 760 return error; 761 762 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 763 if (unp_list == 0) 764 return ENOMEM; 765 766 for (unp = head->lh_first, i = 0; unp && i < n; 767 unp = unp->unp_link.le_next) { 768 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->p, unp)) 769 unp_list[i++] = unp; 770 } 771 n = i; /* in case we lost some during malloc */ 772 773 error = 0; 774 for (i = 0; i < n; i++) { 775 unp = unp_list[i]; 776 if (unp->unp_gencnt <= gencnt) { 777 struct xunpcb xu; 778 xu.xu_len = sizeof xu; 779 xu.xu_unpp = unp; 780 /* 781 * XXX - need more locking here to protect against 782 * connect/disconnect races for SMP. 783 */ 784 if (unp->unp_addr) 785 bcopy(unp->unp_addr, &xu.xu_addr, 786 unp->unp_addr->sun_len); 787 if (unp->unp_conn && unp->unp_conn->unp_addr) 788 bcopy(unp->unp_conn->unp_addr, 789 &xu.xu_caddr, 790 unp->unp_conn->unp_addr->sun_len); 791 bcopy(unp, &xu.xu_unp, sizeof *unp); 792 sotoxsocket(unp->unp_socket, &xu.xu_socket); 793 error = SYSCTL_OUT(req, &xu, sizeof xu); 794 } 795 } 796 if (!error) { 797 /* 798 * Give the user an updated idea of our state. 799 * If the generation differs from what we told 800 * her before, she knows that something happened 801 * while we were processing this request, and it 802 * might be necessary to retry. 803 */ 804 xug.xug_gen = unp_gencnt; 805 xug.xug_sogen = so_gencnt; 806 xug.xug_count = unp_count; 807 error = SYSCTL_OUT(req, &xug, sizeof xug); 808 } 809 free(unp_list, M_TEMP); 810 return error; 811} 812 813SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 814 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 815 "List of active local datagram sockets"); 816SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 817 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 818 "List of active local stream sockets"); 819 820static void 821unp_shutdown(unp) 822 struct unpcb *unp; 823{ 824 struct socket *so; 825 826 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 827 (so = unp->unp_conn->unp_socket)) 828 socantrcvmore(so); 829} 830 831static void 832unp_drop(unp, errno) 833 struct unpcb *unp; 834 int errno; 835{ 836 struct socket *so = unp->unp_socket; 837 838 so->so_error = errno; 839 unp_disconnect(unp); 840 if (so->so_head) { 841 LIST_REMOVE(unp, unp_link); 842 unp->unp_gencnt = ++unp_gencnt; 843 unp_count--; 844 so->so_pcb = (caddr_t) 0; 845 if (unp->unp_addr) 846 FREE(unp->unp_addr, M_SONAME); 847 zfree(unp_zone, unp); 848 sofree(so); 849 } 850} 851 852#ifdef notdef 853void 854unp_drain() 855{ 856 857} 858#endif 859 860int 861unp_externalize(rights) 862 struct mbuf *rights; 863{ 864 struct proc *p = curproc; /* XXX */ 865 register int i; 866 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 867 register struct file **rp = (struct file **)(cm + 1); 868 register struct file *fp; 869 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); 870 int f; 871 872 /* 873 * if the new FD's will not fit, then we free them all 874 */ 875 if (!fdavail(p, newfds)) { 876 for (i = 0; i < newfds; i++) { 877 fp = *rp; 878 unp_discard(fp); 879 *rp++ = 0; 880 } 881 return (EMSGSIZE); 882 } 883 /* 884 * now change each pointer to an fd in the global table to 885 * an integer that is the index to the local fd table entry 886 * that we set up to point to the global one we are transferring. 887 * XXX this assumes a pointer and int are the same size...! 888 */ 889 for (i = 0; i < newfds; i++) { 890 if (fdalloc(p, 0, &f)) 891 panic("unp_externalize"); 892 fp = *rp; 893 p->p_fd->fd_ofiles[f] = fp; 894 fp->f_msgcount--; 895 unp_rights--; 896 *(int *)rp++ = f; 897 } 898 return (0); 899} 900 901void 902unp_init(void) 903{ 904 unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); 905 if (unp_zone == 0) 906 panic("unp_init"); 907 LIST_INIT(&unp_dhead); 908 LIST_INIT(&unp_shead); 909} 910 911#ifndef MIN 912#define MIN(a,b) (((a)<(b))?(a):(b)) 913#endif 914 915static int 916unp_internalize(control, p) 917 struct mbuf *control; 918 struct proc *p; 919{ 920 struct filedesc *fdp = p->p_fd; 921 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 922 register struct file **rp; 923 register struct file *fp; 924 register int i, fd; 925 register struct cmsgcred *cmcred; 926 int oldfds; 927 928 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 929 cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) 930 return (EINVAL); 931 932 /* 933 * Fill in credential information. 934 */ 935 if (cm->cmsg_type == SCM_CREDS) { 936 cmcred = (struct cmsgcred *)(cm + 1); 937 cmcred->cmcred_pid = p->p_pid; 938 cmcred->cmcred_uid = p->p_cred->p_ruid; 939 cmcred->cmcred_gid = p->p_cred->p_rgid; 940 cmcred->cmcred_euid = p->p_ucred->cr_uid; 941 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 942 CMGROUP_MAX); 943 for (i = 0; i < cmcred->cmcred_ngroups; i++) 944 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 945 return(0); 946 } 947 948 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 949 /* 950 * check that all the FDs passed in refer to legal OPEN files 951 * If not, reject the entire operation. 952 */ 953 rp = (struct file **)(cm + 1); 954 for (i = 0; i < oldfds; i++) { 955 fd = *(int *)rp++; 956 if ((unsigned)fd >= fdp->fd_nfiles || 957 fdp->fd_ofiles[fd] == NULL) 958 return (EBADF); 959 } 960 /* 961 * Now replace the integer FDs with pointers to 962 * the associated global file table entry.. 963 * XXX this assumes a pointer and an int are the same size! 964 */ 965 rp = (struct file **)(cm + 1); 966 for (i = 0; i < oldfds; i++) { 967 fp = fdp->fd_ofiles[*(int *)rp]; 968 *rp++ = fp; 969 fp->f_count++; 970 fp->f_msgcount++; 971 unp_rights++; 972 } 973 return (0); 974} 975 976static int unp_defer, unp_gcing; 977 978static void 979unp_gc() 980{ 981 register struct file *fp, *nextfp; 982 register struct socket *so; 983 struct file **extra_ref, **fpp; 984 int nunref, i; 985 986 if (unp_gcing) 987 return; 988 unp_gcing = 1; 989 unp_defer = 0; 990 /* 991 * before going through all this, set all FDs to 992 * be NOT defered and NOT externally accessible 993 */ 994 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 995 fp->f_flag &= ~(FMARK|FDEFER); 996 do { 997 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 998 /* 999 * If the file is not open, skip it 1000 */ 1001 if (fp->f_count == 0) 1002 continue; 1003 /* 1004 * If we already marked it as 'defer' in a 1005 * previous pass, then try process it this time 1006 * and un-mark it 1007 */ 1008 if (fp->f_flag & FDEFER) { 1009 fp->f_flag &= ~FDEFER; 1010 unp_defer--; 1011 } else { 1012 /* 1013 * if it's not defered, then check if it's 1014 * already marked.. if so skip it 1015 */ 1016 if (fp->f_flag & FMARK) 1017 continue; 1018 /* 1019 * If all references are from messages 1020 * in transit, then skip it. it's not 1021 * externally accessible. 1022 */ 1023 if (fp->f_count == fp->f_msgcount) 1024 continue; 1025 /* 1026 * If it got this far then it must be 1027 * externally accessible. 1028 */ 1029 fp->f_flag |= FMARK; 1030 } 1031 /* 1032 * either it was defered, or it is externally 1033 * accessible and not already marked so. 1034 * Now check if it is possibly one of OUR sockets. 1035 */ 1036 if (fp->f_type != DTYPE_SOCKET || 1037 (so = (struct socket *)fp->f_data) == 0) 1038 continue; 1039 if (so->so_proto->pr_domain != &localdomain || 1040 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1041 continue; 1042#ifdef notdef 1043 if (so->so_rcv.sb_flags & SB_LOCK) { 1044 /* 1045 * This is problematical; it's not clear 1046 * we need to wait for the sockbuf to be 1047 * unlocked (on a uniprocessor, at least), 1048 * and it's also not clear what to do 1049 * if sbwait returns an error due to receipt 1050 * of a signal. If sbwait does return 1051 * an error, we'll go into an infinite 1052 * loop. Delete all of this for now. 1053 */ 1054 (void) sbwait(&so->so_rcv); 1055 goto restart; 1056 } 1057#endif 1058 /* 1059 * So, Ok, it's one of our sockets and it IS externally 1060 * accessible (or was defered). Now we look 1061 * to see if we hold any file descriptors in its 1062 * message buffers. Follow those links and mark them 1063 * as accessible too. 1064 */ 1065 unp_scan(so->so_rcv.sb_mb, unp_mark); 1066 } 1067 } while (unp_defer); 1068 /* 1069 * We grab an extra reference to each of the file table entries 1070 * that are not otherwise accessible and then free the rights 1071 * that are stored in messages on them. 1072 * 1073 * The bug in the orginal code is a little tricky, so I'll describe 1074 * what's wrong with it here. 1075 * 1076 * It is incorrect to simply unp_discard each entry for f_msgcount 1077 * times -- consider the case of sockets A and B that contain 1078 * references to each other. On a last close of some other socket, 1079 * we trigger a gc since the number of outstanding rights (unp_rights) 1080 * is non-zero. If during the sweep phase the gc code un_discards, 1081 * we end up doing a (full) closef on the descriptor. A closef on A 1082 * results in the following chain. Closef calls soo_close, which 1083 * calls soclose. Soclose calls first (through the switch 1084 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1085 * returns because the previous instance had set unp_gcing, and 1086 * we return all the way back to soclose, which marks the socket 1087 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1088 * to free up the rights that are queued in messages on the socket A, 1089 * i.e., the reference on B. The sorflush calls via the dom_dispose 1090 * switch unp_dispose, which unp_scans with unp_discard. This second 1091 * instance of unp_discard just calls closef on B. 1092 * 1093 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1094 * which results in another closef on A. Unfortunately, A is already 1095 * being closed, and the descriptor has already been marked with 1096 * SS_NOFDREF, and soclose panics at this point. 1097 * 1098 * Here, we first take an extra reference to each inaccessible 1099 * descriptor. Then, we call sorflush ourself, since we know 1100 * it is a Unix domain socket anyhow. After we destroy all the 1101 * rights carried in messages, we do a last closef to get rid 1102 * of our extra reference. This is the last close, and the 1103 * unp_detach etc will shut down the socket. 1104 * 1105 * 91/09/19, bsy@cs.cmu.edu 1106 */ 1107 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1108 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 1109 fp = nextfp) { 1110 nextfp = fp->f_list.le_next; 1111 /* 1112 * If it's not open, skip it 1113 */ 1114 if (fp->f_count == 0) 1115 continue; 1116 /* 1117 * If all refs are from msgs, and it's not marked accessible 1118 * then it must be referenced from some unreachable cycle 1119 * of (shut-down) FDs, so include it in our 1120 * list of FDs to remove 1121 */ 1122 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1123 *fpp++ = fp; 1124 nunref++; 1125 fp->f_count++; 1126 } 1127 } 1128 /* 1129 * for each FD on our hit list, do the following two things 1130 */ 1131 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1132 struct file *tfp = *fpp; 1133 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1134 sorflush((struct socket *)(tfp->f_data)); 1135 } 1136 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1137 closef(*fpp, (struct proc *) NULL); 1138 free((caddr_t)extra_ref, M_FILE); 1139 unp_gcing = 0; 1140} 1141 1142void 1143unp_dispose(m) 1144 struct mbuf *m; 1145{ 1146 1147 if (m) 1148 unp_scan(m, unp_discard); 1149} 1150 1151static void 1152unp_scan(m0, op) 1153 register struct mbuf *m0; 1154 void (*op) __P((struct file *)); 1155{ 1156 register struct mbuf *m; 1157 register struct file **rp; 1158 register struct cmsghdr *cm; 1159 register int i; 1160 int qfds; 1161 1162 while (m0) { 1163 for (m = m0; m; m = m->m_next) 1164 if (m->m_type == MT_CONTROL && 1165 m->m_len >= sizeof(*cm)) { 1166 cm = mtod(m, struct cmsghdr *); 1167 if (cm->cmsg_level != SOL_SOCKET || 1168 cm->cmsg_type != SCM_RIGHTS) 1169 continue; 1170 qfds = (cm->cmsg_len - sizeof *cm) 1171 / sizeof (struct file *); 1172 rp = (struct file **)(cm + 1); 1173 for (i = 0; i < qfds; i++) 1174 (*op)(*rp++); 1175 break; /* XXX, but saves time */ 1176 } 1177 m0 = m0->m_act; 1178 } 1179} 1180 1181static void 1182unp_mark(fp) 1183 struct file *fp; 1184{ 1185 1186 if (fp->f_flag & FMARK) 1187 return; 1188 unp_defer++; 1189 fp->f_flag |= (FMARK|FDEFER); 1190} 1191 1192static void 1193unp_discard(fp) 1194 struct file *fp; 1195{ 1196 1197 fp->f_msgcount--; 1198 unp_rights--; 1199 (void) closef(fp, (struct proc *)NULL); 1200} 1201