65static unp_gen_t unp_gencnt; 66static u_int unp_count; 67 68static struct unp_head unp_shead, unp_dhead; 69 70/* 71 * Unix communications domain. 72 * 73 * TODO: 74 * SEQPACKET, RDM 75 * rethink name space problems 76 * need a proper out-of-band 77 * lock pushdown 78 */ 79static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 80static ino_t unp_ino; /* prototype for fake inode numbers */ 81 82static int unp_attach __P((struct socket *)); 83static void unp_detach __P((struct unpcb *)); 84static int unp_bind __P((struct unpcb *,struct sockaddr *, struct thread *)); 85static int unp_connect __P((struct socket *,struct sockaddr *, 86 struct thread *)); 87static void unp_disconnect __P((struct unpcb *)); 88static void unp_shutdown __P((struct unpcb *)); 89static void unp_drop __P((struct unpcb *, int)); 90static void unp_gc __P((void)); 91static void unp_scan __P((struct mbuf *, void (*)(struct file *))); 92static void unp_mark __P((struct file *)); 93static void unp_discard __P((struct file *)); 94static void unp_freerights __P((struct file **, int)); 95static int unp_internalize __P((struct mbuf **, struct thread *)); 96static int unp_listen __P((struct unpcb *, struct thread *)); 97 98static int 99uipc_abort(struct socket *so) 100{ 101 struct unpcb *unp = sotounpcb(so); 102 103 if (unp == 0) 104 return EINVAL; 105 unp_drop(unp, ECONNABORTED); 106 unp_detach(unp); 107 sotryfree(so); 108 return 0; 109} 110 111static int 112uipc_accept(struct socket *so, struct sockaddr **nam) 113{ 114 struct unpcb *unp = sotounpcb(so); 115 116 if (unp == 0) 117 return EINVAL; 118 119 /* 120 * Pass back name of connected socket, 121 * if it was bound and we are still connected 122 * (our peer may have closed already!). 123 */ 124 if (unp->unp_conn && unp->unp_conn->unp_addr) { 125 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 126 1); 127 } else { 128 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 129 } 130 return 0; 131} 132 133static int 134uipc_attach(struct socket *so, int proto, struct thread *td) 135{ 136 struct unpcb *unp = sotounpcb(so); 137 138 if (unp != 0) 139 return EISCONN; 140 return unp_attach(so); 141} 142 143static int 144uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 145{ 146 struct unpcb *unp = sotounpcb(so); 147 148 if (unp == 0) 149 return EINVAL; 150 151 return unp_bind(unp, nam, td); 152} 153 154static int 155uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 156{ 157 struct unpcb *unp = sotounpcb(so); 158 159 if (unp == 0) 160 return EINVAL; 161 return unp_connect(so, nam, curthread); 162} 163 164static int 165uipc_connect2(struct socket *so1, struct socket *so2) 166{ 167 struct unpcb *unp = sotounpcb(so1); 168 169 if (unp == 0) 170 return EINVAL; 171 172 return unp_connect2(so1, so2); 173} 174 175/* control is EOPNOTSUPP */ 176 177static int 178uipc_detach(struct socket *so) 179{ 180 struct unpcb *unp = sotounpcb(so); 181 182 if (unp == 0) 183 return EINVAL; 184 185 unp_detach(unp); 186 return 0; 187} 188 189static int 190uipc_disconnect(struct socket *so) 191{ 192 struct unpcb *unp = sotounpcb(so); 193 194 if (unp == 0) 195 return EINVAL; 196 unp_disconnect(unp); 197 return 0; 198} 199 200static int 201uipc_listen(struct socket *so, struct thread *td) 202{ 203 struct unpcb *unp = sotounpcb(so); 204 205 if (unp == 0 || unp->unp_vnode == 0) 206 return EINVAL; 207 return unp_listen(unp, td); 208} 209 210static int 211uipc_peeraddr(struct socket *so, struct sockaddr **nam) 212{ 213 struct unpcb *unp = sotounpcb(so); 214 215 if (unp == 0) 216 return EINVAL; 217 if (unp->unp_conn && unp->unp_conn->unp_addr) 218 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 219 1); 220 return 0; 221} 222 223static int 224uipc_rcvd(struct socket *so, int flags) 225{ 226 struct unpcb *unp = sotounpcb(so); 227 struct socket *so2; 228 u_long newhiwat; 229 230 if (unp == 0) 231 return EINVAL; 232 switch (so->so_type) { 233 case SOCK_DGRAM: 234 panic("uipc_rcvd DGRAM?"); 235 /*NOTREACHED*/ 236 237 case SOCK_STREAM: 238 if (unp->unp_conn == 0) 239 break; 240 so2 = unp->unp_conn->unp_socket; 241 /* 242 * Adjust backpressure on sender 243 * and wakeup any waiting to write. 244 */ 245 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 246 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 247 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 248 so->so_rcv.sb_cc; 249 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 250 newhiwat, RLIM_INFINITY); 251 unp->unp_cc = so->so_rcv.sb_cc; 252 sowwakeup(so2); 253 break; 254 255 default: 256 panic("uipc_rcvd unknown socktype"); 257 } 258 return 0; 259} 260 261/* pru_rcvoob is EOPNOTSUPP */ 262 263static int 264uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 265 struct mbuf *control, struct thread *td) 266{ 267 int error = 0; 268 struct unpcb *unp = sotounpcb(so); 269 struct socket *so2; 270 u_long newhiwat; 271 272 if (unp == 0) { 273 error = EINVAL; 274 goto release; 275 } 276 if (flags & PRUS_OOB) { 277 error = EOPNOTSUPP; 278 goto release; 279 } 280 281 if (control && (error = unp_internalize(&control, td))) 282 goto release; 283 284 switch (so->so_type) { 285 case SOCK_DGRAM: 286 { 287 struct sockaddr *from; 288 289 if (nam) { 290 if (unp->unp_conn) { 291 error = EISCONN; 292 break; 293 } 294 error = unp_connect(so, nam, td); 295 if (error) 296 break; 297 } else { 298 if (unp->unp_conn == 0) { 299 error = ENOTCONN; 300 break; 301 } 302 } 303 so2 = unp->unp_conn->unp_socket; 304 if (unp->unp_addr) 305 from = (struct sockaddr *)unp->unp_addr; 306 else 307 from = &sun_noname; 308 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 309 sorwakeup(so2); 310 m = 0; 311 control = 0; 312 } else 313 error = ENOBUFS; 314 if (nam) 315 unp_disconnect(unp); 316 break; 317 } 318 319 case SOCK_STREAM: 320 /* Connect if not connected yet. */ 321 /* 322 * Note: A better implementation would complain 323 * if not equal to the peer's address. 324 */ 325 if ((so->so_state & SS_ISCONNECTED) == 0) { 326 if (nam) { 327 error = unp_connect(so, nam, td); 328 if (error) 329 break; /* XXX */ 330 } else { 331 error = ENOTCONN; 332 break; 333 } 334 } 335 336 if (so->so_state & SS_CANTSENDMORE) { 337 error = EPIPE; 338 break; 339 } 340 if (unp->unp_conn == 0) 341 panic("uipc_send connected but no connection?"); 342 so2 = unp->unp_conn->unp_socket; 343 /* 344 * Send to paired receive port, and then reduce 345 * send buffer hiwater marks to maintain backpressure. 346 * Wake up readers. 347 */ 348 if (control) { 349 if (sbappendcontrol(&so2->so_rcv, m, control)) 350 control = 0; 351 } else 352 sbappend(&so2->so_rcv, m); 353 so->so_snd.sb_mbmax -= 354 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 355 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 356 newhiwat = so->so_snd.sb_hiwat - 357 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 358 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 359 newhiwat, RLIM_INFINITY); 360 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 361 sorwakeup(so2); 362 m = 0; 363 break; 364 365 default: 366 panic("uipc_send unknown socktype"); 367 } 368 369 /* 370 * SEND_EOF is equivalent to a SEND followed by 371 * a SHUTDOWN. 372 */ 373 if (flags & PRUS_EOF) { 374 socantsendmore(so); 375 unp_shutdown(unp); 376 } 377 378 if (control && error != 0) 379 unp_dispose(control); 380 381release: 382 if (control) 383 m_freem(control); 384 if (m) 385 m_freem(m); 386 return error; 387} 388 389static int 390uipc_sense(struct socket *so, struct stat *sb) 391{ 392 struct unpcb *unp = sotounpcb(so); 393 struct socket *so2; 394 395 if (unp == 0) 396 return EINVAL; 397 sb->st_blksize = so->so_snd.sb_hiwat; 398 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 399 so2 = unp->unp_conn->unp_socket; 400 sb->st_blksize += so2->so_rcv.sb_cc; 401 } 402 sb->st_dev = NOUDEV; 403 if (unp->unp_ino == 0) 404 unp->unp_ino = unp_ino++; 405 sb->st_ino = unp->unp_ino; 406 return (0); 407} 408 409static int 410uipc_shutdown(struct socket *so) 411{ 412 struct unpcb *unp = sotounpcb(so); 413 414 if (unp == 0) 415 return EINVAL; 416 socantsendmore(so); 417 unp_shutdown(unp); 418 return 0; 419} 420 421static int 422uipc_sockaddr(struct socket *so, struct sockaddr **nam) 423{ 424 struct unpcb *unp = sotounpcb(so); 425 426 if (unp == 0) 427 return EINVAL; 428 if (unp->unp_addr) 429 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); 430 else 431 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 432 return 0; 433} 434 435struct pr_usrreqs uipc_usrreqs = { 436 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 437 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 438 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 439 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 440 sosend, soreceive, sopoll 441}; 442 443int 444uipc_ctloutput(so, sopt) 445 struct socket *so; 446 struct sockopt *sopt; 447{ 448 struct unpcb *unp = sotounpcb(so); 449 int error; 450 451 switch (sopt->sopt_dir) { 452 case SOPT_GET: 453 switch (sopt->sopt_name) { 454 case LOCAL_PEERCRED: 455 if (unp->unp_flags & UNP_HAVEPC) 456 error = sooptcopyout(sopt, &unp->unp_peercred, 457 sizeof(unp->unp_peercred)); 458 else { 459 if (so->so_type == SOCK_STREAM) 460 error = ENOTCONN; 461 else 462 error = EINVAL; 463 } 464 break; 465 default: 466 error = EOPNOTSUPP; 467 break; 468 } 469 break; 470 case SOPT_SET: 471 default: 472 error = EOPNOTSUPP; 473 break; 474 } 475 return (error); 476} 477 478/* 479 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 480 * for stream sockets, although the total for sender and receiver is 481 * actually only PIPSIZ. 482 * Datagram sockets really use the sendspace as the maximum datagram size, 483 * and don't really want to reserve the sendspace. Their recvspace should 484 * be large enough for at least one max-size datagram plus address. 485 */ 486#ifndef PIPSIZ 487#define PIPSIZ 8192 488#endif 489static u_long unpst_sendspace = PIPSIZ; 490static u_long unpst_recvspace = PIPSIZ; 491static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 492static u_long unpdg_recvspace = 4*1024; 493 494static int unp_rights; /* file descriptors in flight */ 495 496SYSCTL_DECL(_net_local_stream); 497SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 498 &unpst_sendspace, 0, ""); 499SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 500 &unpst_recvspace, 0, ""); 501SYSCTL_DECL(_net_local_dgram); 502SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 503 &unpdg_sendspace, 0, ""); 504SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 505 &unpdg_recvspace, 0, ""); 506SYSCTL_DECL(_net_local); 507SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 508 509static int 510unp_attach(so) 511 struct socket *so; 512{ 513 register struct unpcb *unp; 514 int error; 515 516 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 517 switch (so->so_type) { 518 519 case SOCK_STREAM: 520 error = soreserve(so, unpst_sendspace, unpst_recvspace); 521 break; 522 523 case SOCK_DGRAM: 524 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 525 break; 526 527 default: 528 panic("unp_attach"); 529 } 530 if (error) 531 return (error); 532 } 533 unp = zalloc(unp_zone); 534 if (unp == NULL) 535 return (ENOBUFS); 536 bzero(unp, sizeof *unp); 537 unp->unp_gencnt = ++unp_gencnt; 538 unp_count++; 539 LIST_INIT(&unp->unp_refs); 540 unp->unp_socket = so; 541 FILEDESC_LOCK(curproc->p_fd); 542 unp->unp_rvnode = curthread->td_proc->p_fd->fd_rdir; 543 FILEDESC_UNLOCK(curproc->p_fd); 544 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 545 : &unp_shead, unp, unp_link); 546 so->so_pcb = (caddr_t)unp; 547 return (0); 548} 549 550static void 551unp_detach(unp) 552 register struct unpcb *unp; 553{ 554 LIST_REMOVE(unp, unp_link); 555 unp->unp_gencnt = ++unp_gencnt; 556 --unp_count; 557 if (unp->unp_vnode) { 558 unp->unp_vnode->v_socket = 0; 559 vrele(unp->unp_vnode); 560 unp->unp_vnode = 0; 561 } 562 if (unp->unp_conn) 563 unp_disconnect(unp); 564 while (!LIST_EMPTY(&unp->unp_refs)) 565 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 566 soisdisconnected(unp->unp_socket); 567 unp->unp_socket->so_pcb = 0; 568 if (unp_rights) { 569 /* 570 * Normally the receive buffer is flushed later, 571 * in sofree, but if our receive buffer holds references 572 * to descriptors that are now garbage, we will dispose 573 * of those descriptor references after the garbage collector 574 * gets them (resulting in a "panic: closef: count < 0"). 575 */ 576 sorflush(unp->unp_socket); 577 unp_gc(); 578 } 579 if (unp->unp_addr) 580 FREE(unp->unp_addr, M_SONAME); 581 zfree(unp_zone, unp); 582} 583 584static int 585unp_bind(unp, nam, td) 586 struct unpcb *unp; 587 struct sockaddr *nam; 588 struct thread *td; 589{ 590 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 591 struct vnode *vp; 592 struct mount *mp; 593 struct vattr vattr; 594 int error, namelen; 595 struct nameidata nd; 596 char *buf; 597 598 if (unp->unp_vnode != NULL) 599 return (EINVAL); 600 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 601 if (namelen <= 0) 602 return EINVAL; 603 buf = malloc(SOCK_MAXADDRLEN, M_TEMP, M_WAITOK); 604 strncpy(buf, soun->sun_path, namelen); 605 buf[namelen] = 0; /* null-terminate the string */ 606restart: 607 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 608 buf, td); 609/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 610 error = namei(&nd); 611 if (error) { 612 free(buf, M_TEMP); 613 return (error); 614 } 615 vp = nd.ni_vp; 616 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 617 NDFREE(&nd, NDF_ONLY_PNBUF); 618 if (nd.ni_dvp == vp) 619 vrele(nd.ni_dvp); 620 else 621 vput(nd.ni_dvp); 622 if (vp != NULL) { 623 vrele(vp); 624 free(buf, M_TEMP); 625 return (EADDRINUSE); 626 } 627 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 628 if (error) { 629 free(buf, M_TEMP); 630 return (error); 631 } 632 goto restart; 633 } 634 VATTR_NULL(&vattr); 635 vattr.va_type = VSOCK; 636 FILEDESC_LOCK(td->td_proc->p_fd); 637 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 638 FILEDESC_UNLOCK(td->td_proc->p_fd); 639 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 640 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 641 NDFREE(&nd, NDF_ONLY_PNBUF); 642 vput(nd.ni_dvp); 643 if (error) { 644 free(buf, M_TEMP); 645 return (error); 646 } 647 vp = nd.ni_vp; 648 vp->v_socket = unp->unp_socket; 649 unp->unp_vnode = vp; 650 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); 651 VOP_UNLOCK(vp, 0, td); 652 vn_finished_write(mp); 653 free(buf, M_TEMP); 654 return (0); 655} 656 657static int 658unp_connect(so, nam, td) 659 struct socket *so; 660 struct sockaddr *nam; 661 struct thread *td; 662{ 663 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 664 register struct vnode *vp; 665 register struct socket *so2, *so3; 666 struct unpcb *unp, *unp2, *unp3; 667 int error, len; 668 struct nameidata nd; 669 char buf[SOCK_MAXADDRLEN]; 670 671 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 672 if (len <= 0) 673 return EINVAL; 674 strncpy(buf, soun->sun_path, len); 675 buf[len] = 0; 676 677 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 678 error = namei(&nd); 679 if (error) 680 return (error); 681 vp = nd.ni_vp; 682 NDFREE(&nd, NDF_ONLY_PNBUF); 683 if (vp->v_type != VSOCK) { 684 error = ENOTSOCK; 685 goto bad; 686 } 687 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 688 if (error) 689 goto bad; 690 so2 = vp->v_socket; 691 if (so2 == 0) { 692 error = ECONNREFUSED; 693 goto bad; 694 } 695 if (so->so_type != so2->so_type) { 696 error = EPROTOTYPE; 697 goto bad; 698 } 699 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 700 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 701 (so3 = sonewconn(so2, 0)) == 0) { 702 error = ECONNREFUSED; 703 goto bad; 704 } 705 unp = sotounpcb(so); 706 unp2 = sotounpcb(so2); 707 unp3 = sotounpcb(so3); 708 if (unp2->unp_addr) 709 unp3->unp_addr = (struct sockaddr_un *) 710 dup_sockaddr((struct sockaddr *) 711 unp2->unp_addr, 1); 712 713 /* 714 * unp_peercred management: 715 * 716 * The connecter's (client's) credentials are copied 717 * from its process structure at the time of connect() 718 * (which is now). 719 */ 720 cru2x(td->td_ucred, &unp3->unp_peercred); 721 unp3->unp_flags |= UNP_HAVEPC; 722 /* 723 * The receiver's (server's) credentials are copied 724 * from the unp_peercred member of socket on which the 725 * former called listen(); unp_listen() cached that 726 * process's credentials at that time so we can use 727 * them now. 728 */ 729 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 730 ("unp_connect: listener without cached peercred")); 731 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 732 sizeof(unp->unp_peercred)); 733 unp->unp_flags |= UNP_HAVEPC; 734 735 so2 = so3; 736 } 737 error = unp_connect2(so, so2); 738bad: 739 vput(vp); 740 return (error); 741} 742 743int 744unp_connect2(so, so2) 745 register struct socket *so; 746 register struct socket *so2; 747{ 748 register struct unpcb *unp = sotounpcb(so); 749 register struct unpcb *unp2; 750 751 if (so2->so_type != so->so_type) 752 return (EPROTOTYPE); 753 unp2 = sotounpcb(so2); 754 unp->unp_conn = unp2; 755 switch (so->so_type) { 756 757 case SOCK_DGRAM: 758 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 759 soisconnected(so); 760 break; 761 762 case SOCK_STREAM: 763 unp2->unp_conn = unp; 764 soisconnected(so); 765 soisconnected(so2); 766 break; 767 768 default: 769 panic("unp_connect2"); 770 } 771 return (0); 772} 773 774static void 775unp_disconnect(unp) 776 struct unpcb *unp; 777{ 778 register struct unpcb *unp2 = unp->unp_conn; 779 780 if (unp2 == 0) 781 return; 782 unp->unp_conn = 0; 783 switch (unp->unp_socket->so_type) { 784 785 case SOCK_DGRAM: 786 LIST_REMOVE(unp, unp_reflink); 787 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 788 break; 789 790 case SOCK_STREAM: 791 soisdisconnected(unp->unp_socket); 792 unp2->unp_conn = 0; 793 soisdisconnected(unp2->unp_socket); 794 break; 795 } 796} 797 798#ifdef notdef 799void 800unp_abort(unp) 801 struct unpcb *unp; 802{ 803 804 unp_detach(unp); 805} 806#endif 807 808static int 809unp_pcblist(SYSCTL_HANDLER_ARGS) 810{ 811 int error, i, n; 812 struct unpcb *unp, **unp_list; 813 unp_gen_t gencnt; 814 struct xunpgen *xug; 815 struct unp_head *head; 816 struct xunpcb *xu; 817 818 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 819 820 /* 821 * The process of preparing the PCB list is too time-consuming and 822 * resource-intensive to repeat twice on every request. 823 */ 824 if (req->oldptr == 0) { 825 n = unp_count; 826 req->oldidx = 2 * (sizeof *xug) 827 + (n + n/8) * sizeof(struct xunpcb); 828 return 0; 829 } 830 831 if (req->newptr != 0) 832 return EPERM; 833 834 /* 835 * OK, now we're committed to doing something. 836 */ 837 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 838 gencnt = unp_gencnt; 839 n = unp_count; 840 841 xug->xug_len = sizeof *xug; 842 xug->xug_count = n; 843 xug->xug_gen = gencnt; 844 xug->xug_sogen = so_gencnt; 845 error = SYSCTL_OUT(req, xug, sizeof *xug); 846 if (error) { 847 free(xug, M_TEMP); 848 return error; 849 } 850 851 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 852 853 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 854 unp = LIST_NEXT(unp, unp_link)) { 855 if (unp->unp_gencnt <= gencnt) { 856 if (cr_cansee(req->td->td_ucred, 857 unp->unp_socket->so_cred)) 858 continue; 859 unp_list[i++] = unp; 860 } 861 } 862 n = i; /* in case we lost some during malloc */ 863 864 error = 0; 865 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 866 for (i = 0; i < n; i++) { 867 unp = unp_list[i]; 868 if (unp->unp_gencnt <= gencnt) { 869 xu->xu_len = sizeof *xu; 870 xu->xu_unpp = unp; 871 /* 872 * XXX - need more locking here to protect against 873 * connect/disconnect races for SMP. 874 */ 875 if (unp->unp_addr) 876 bcopy(unp->unp_addr, &xu->xu_addr, 877 unp->unp_addr->sun_len); 878 if (unp->unp_conn && unp->unp_conn->unp_addr) 879 bcopy(unp->unp_conn->unp_addr, 880 &xu->xu_caddr, 881 unp->unp_conn->unp_addr->sun_len); 882 bcopy(unp, &xu->xu_unp, sizeof *unp); 883 sotoxsocket(unp->unp_socket, &xu->xu_socket); 884 error = SYSCTL_OUT(req, xu, sizeof *xu); 885 } 886 } 887 free(xu, M_TEMP); 888 if (!error) { 889 /* 890 * Give the user an updated idea of our state. 891 * If the generation differs from what we told 892 * her before, she knows that something happened 893 * while we were processing this request, and it 894 * might be necessary to retry. 895 */ 896 xug->xug_gen = unp_gencnt; 897 xug->xug_sogen = so_gencnt; 898 xug->xug_count = unp_count; 899 error = SYSCTL_OUT(req, xug, sizeof *xug); 900 } 901 free(unp_list, M_TEMP); 902 free(xug, M_TEMP); 903 return error; 904} 905 906SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 907 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 908 "List of active local datagram sockets"); 909SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 910 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 911 "List of active local stream sockets"); 912 913static void 914unp_shutdown(unp) 915 struct unpcb *unp; 916{ 917 struct socket *so; 918 919 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 920 (so = unp->unp_conn->unp_socket)) 921 socantrcvmore(so); 922} 923 924static void 925unp_drop(unp, errno) 926 struct unpcb *unp; 927 int errno; 928{ 929 struct socket *so = unp->unp_socket; 930 931 so->so_error = errno; 932 unp_disconnect(unp); 933} 934 935#ifdef notdef 936void 937unp_drain() 938{ 939 940} 941#endif 942 943static void 944unp_freerights(rp, fdcount) 945 struct file **rp; 946 int fdcount; 947{ 948 int i; 949 struct file *fp; 950 951 for (i = 0; i < fdcount; i++) { 952 fp = *rp; 953 /* 954 * zero the pointer before calling 955 * unp_discard since it may end up 956 * in unp_gc().. 957 */ 958 *rp++ = 0; 959 unp_discard(fp); 960 } 961} 962 963int 964unp_externalize(control, controlp) 965 struct mbuf *control, **controlp; 966{ 967 struct thread *td = curthread; /* XXX */ 968 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 969 int i; 970 int *fdp; 971 struct file **rp; 972 struct file *fp; 973 void *data; 974 socklen_t clen = control->m_len, datalen; 975 int error, newfds; 976 int f; 977 u_int newlen; 978 979 error = 0; 980 if (controlp != NULL) /* controlp == NULL => free control messages */ 981 *controlp = NULL; 982 983 while (cm != NULL) { 984 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 985 error = EINVAL; 986 break; 987 } 988 989 data = CMSG_DATA(cm); 990 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 991 992 if (cm->cmsg_level == SOL_SOCKET 993 && cm->cmsg_type == SCM_RIGHTS) { 994 newfds = datalen / sizeof(struct file *); 995 rp = data; 996 997 /* If we're not outputting the discriptors free them. */ 998 if (error || controlp == NULL) { 999 unp_freerights(rp, newfds); 1000 goto next; 1001 } 1002 FILEDESC_LOCK(td->td_proc->p_fd); 1003 /* if the new FD's will not fit free them. */ 1004 if (!fdavail(td, newfds)) { 1005 FILEDESC_UNLOCK(td->td_proc->p_fd); 1006 error = EMSGSIZE; 1007 unp_freerights(rp, newfds); 1008 goto next; 1009 } 1010 /* 1011 * now change each pointer to an fd in the global 1012 * table to an integer that is the index to the 1013 * local fd table entry that we set up to point 1014 * to the global one we are transferring. 1015 */ 1016 newlen = newfds * sizeof(int); 1017 *controlp = sbcreatecontrol(NULL, newlen, 1018 SCM_RIGHTS, SOL_SOCKET); 1019 if (*controlp == NULL) { 1020 FILEDESC_UNLOCK(td->td_proc->p_fd); 1021 error = E2BIG; 1022 unp_freerights(rp, newfds); 1023 goto next; 1024 } 1025 1026 fdp = (int *) 1027 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1028 for (i = 0; i < newfds; i++) { 1029 if (fdalloc(td, 0, &f)) 1030 panic("unp_externalize fdalloc failed"); 1031 fp = *rp++; 1032 td->td_proc->p_fd->fd_ofiles[f] = fp; 1033 FILE_LOCK(fp); 1034 fp->f_msgcount--; 1035 FILE_UNLOCK(fp); 1036 unp_rights--; 1037 *fdp++ = f; 1038 } 1039 FILEDESC_UNLOCK(td->td_proc->p_fd); 1040 } else { /* We can just copy anything else across */ 1041 if (error || controlp == NULL) 1042 goto next; 1043 *controlp = sbcreatecontrol(NULL, datalen, 1044 cm->cmsg_type, cm->cmsg_level); 1045 if (*controlp == NULL) { 1046 error = ENOBUFS; 1047 goto next; 1048 } 1049 bcopy(data, 1050 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1051 datalen); 1052 } 1053 1054 controlp = &(*controlp)->m_next; 1055 1056next: 1057 if (CMSG_SPACE(datalen) < clen) { 1058 clen -= CMSG_SPACE(datalen); 1059 cm = (struct cmsghdr *) 1060 ((caddr_t)cm + CMSG_SPACE(datalen)); 1061 } else { 1062 clen = 0; 1063 cm = NULL; 1064 } 1065 } 1066 1067 m_freem(control); 1068 1069 return (error); 1070} 1071 1072void 1073unp_init(void) 1074{ 1075 unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); 1076 if (unp_zone == 0) 1077 panic("unp_init"); 1078 LIST_INIT(&unp_dhead); 1079 LIST_INIT(&unp_shead); 1080} 1081 1082#ifndef MIN 1083#define MIN(a,b) (((a)<(b))?(a):(b)) 1084#endif 1085 1086static int 1087unp_internalize(controlp, td) 1088 struct mbuf **controlp; 1089 struct thread *td; 1090{ 1091 struct mbuf *control = *controlp; 1092 struct proc *p = td->td_proc; 1093 struct filedesc *fdescp = p->p_fd; 1094 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1095 struct cmsgcred *cmcred; 1096 struct file **rp; 1097 struct file *fp; 1098 struct timeval *tv; 1099 int i, fd, *fdp; 1100 void *data; 1101 socklen_t clen = control->m_len, datalen; 1102 int error, oldfds; 1103 u_int newlen; 1104 1105 error = 0; 1106 *controlp = NULL; 1107 1108 while (cm != NULL) { 1109 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1110 || cm->cmsg_len > clen) { 1111 error = EINVAL; 1112 goto out; 1113 } 1114 1115 data = CMSG_DATA(cm); 1116 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1117 1118 switch (cm->cmsg_type) { 1119 /* 1120 * Fill in credential information. 1121 */ 1122 case SCM_CREDS: 1123 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1124 SCM_CREDS, SOL_SOCKET); 1125 if (*controlp == NULL) { 1126 error = ENOBUFS; 1127 goto out; 1128 } 1129 1130 cmcred = (struct cmsgcred *) 1131 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1132 cmcred->cmcred_pid = p->p_pid; 1133 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1134 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1135 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1136 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1137 CMGROUP_MAX); 1138 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1139 cmcred->cmcred_groups[i] = 1140 td->td_ucred->cr_groups[i]; 1141 break; 1142 1143 case SCM_RIGHTS: 1144 oldfds = datalen / sizeof (int); 1145 /* 1146 * check that all the FDs passed in refer to legal files 1147 * If not, reject the entire operation. 1148 */ 1149 fdp = data; 1150 FILEDESC_LOCK(fdescp); 1151 for (i = 0; i < oldfds; i++) { 1152 fd = *fdp++; 1153 if ((unsigned)fd >= fdescp->fd_nfiles || 1154 fdescp->fd_ofiles[fd] == NULL) { 1155 FILEDESC_UNLOCK(fdescp); 1156 error = EBADF; 1157 goto out; 1158 } 1159 } 1160 /* 1161 * Now replace the integer FDs with pointers to 1162 * the associated global file table entry.. 1163 */ 1164 newlen = oldfds * sizeof(struct file *); 1165 *controlp = sbcreatecontrol(NULL, newlen, 1166 SCM_RIGHTS, SOL_SOCKET); 1167 if (*controlp == NULL) { 1168 FILEDESC_UNLOCK(fdescp); 1169 error = E2BIG; 1170 goto out; 1171 } 1172 1173 fdp = data; 1174 rp = (struct file **) 1175 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1176 for (i = 0; i < oldfds; i++) { 1177 fp = fdescp->fd_ofiles[*fdp++]; 1178 *rp++ = fp; 1179 FILE_LOCK(fp); 1180 fp->f_count++; 1181 fp->f_msgcount++; 1182 FILE_UNLOCK(fp); 1183 unp_rights++; 1184 } 1185 FILEDESC_UNLOCK(fdescp); 1186 break; 1187 1188 case SCM_TIMESTAMP: 1189 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1190 SCM_TIMESTAMP, SOL_SOCKET); 1191 if (*controlp == NULL) { 1192 error = ENOBUFS; 1193 goto out; 1194 } 1195 tv = (struct timeval *) 1196 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1197 microtime(tv); 1198 break; 1199 1200 default: 1201 error = EINVAL; 1202 goto out; 1203 } 1204 1205 controlp = &(*controlp)->m_next; 1206 1207 if (CMSG_SPACE(datalen) < clen) { 1208 clen -= CMSG_SPACE(datalen); 1209 cm = (struct cmsghdr *) 1210 ((caddr_t)cm + CMSG_SPACE(datalen)); 1211 } else { 1212 clen = 0; 1213 cm = NULL; 1214 } 1215 } 1216 1217out: 1218 m_freem(control); 1219 1220 return (error); 1221} 1222 1223static int unp_defer, unp_gcing; 1224 1225static void 1226unp_gc() 1227{ 1228 register struct file *fp, *nextfp; 1229 register struct socket *so; 1230 struct file **extra_ref, **fpp; 1231 int nunref, i; 1232 1233 if (unp_gcing) 1234 return; 1235 unp_gcing = 1; 1236 unp_defer = 0; 1237 /* 1238 * before going through all this, set all FDs to 1239 * be NOT defered and NOT externally accessible 1240 */ 1241 sx_slock(&filelist_lock); 1242 LIST_FOREACH(fp, &filehead, f_list) 1243 fp->f_gcflag &= ~(FMARK|FDEFER); 1244 do { 1245 LIST_FOREACH(fp, &filehead, f_list) { 1246 FILE_LOCK(fp); 1247 /* 1248 * If the file is not open, skip it 1249 */ 1250 if (fp->f_count == 0) { 1251 FILE_UNLOCK(fp); 1252 continue; 1253 } 1254 /* 1255 * If we already marked it as 'defer' in a 1256 * previous pass, then try process it this time 1257 * and un-mark it 1258 */ 1259 if (fp->f_gcflag & FDEFER) { 1260 fp->f_gcflag &= ~FDEFER; 1261 unp_defer--; 1262 } else { 1263 /* 1264 * if it's not defered, then check if it's 1265 * already marked.. if so skip it 1266 */ 1267 if (fp->f_gcflag & FMARK) { 1268 FILE_UNLOCK(fp); 1269 continue; 1270 } 1271 /* 1272 * If all references are from messages 1273 * in transit, then skip it. it's not 1274 * externally accessible. 1275 */ 1276 if (fp->f_count == fp->f_msgcount) { 1277 FILE_UNLOCK(fp); 1278 continue; 1279 } 1280 /* 1281 * If it got this far then it must be 1282 * externally accessible. 1283 */ 1284 fp->f_gcflag |= FMARK; 1285 } 1286 /* 1287 * either it was defered, or it is externally 1288 * accessible and not already marked so. 1289 * Now check if it is possibly one of OUR sockets. 1290 */ 1291 if (fp->f_type != DTYPE_SOCKET || 1292 (so = (struct socket *)fp->f_data) == 0) { 1293 FILE_UNLOCK(fp); 1294 continue; 1295 } 1296 FILE_UNLOCK(fp); 1297 if (so->so_proto->pr_domain != &localdomain || 1298 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1299 continue; 1300#ifdef notdef 1301 if (so->so_rcv.sb_flags & SB_LOCK) { 1302 /* 1303 * This is problematical; it's not clear 1304 * we need to wait for the sockbuf to be 1305 * unlocked (on a uniprocessor, at least), 1306 * and it's also not clear what to do 1307 * if sbwait returns an error due to receipt 1308 * of a signal. If sbwait does return 1309 * an error, we'll go into an infinite 1310 * loop. Delete all of this for now. 1311 */ 1312 (void) sbwait(&so->so_rcv); 1313 goto restart; 1314 } 1315#endif 1316 /* 1317 * So, Ok, it's one of our sockets and it IS externally 1318 * accessible (or was defered). Now we look 1319 * to see if we hold any file descriptors in its 1320 * message buffers. Follow those links and mark them 1321 * as accessible too. 1322 */ 1323 unp_scan(so->so_rcv.sb_mb, unp_mark); 1324 } 1325 } while (unp_defer); 1326 sx_sunlock(&filelist_lock); 1327 /* 1328 * We grab an extra reference to each of the file table entries 1329 * that are not otherwise accessible and then free the rights 1330 * that are stored in messages on them. 1331 * 1332 * The bug in the orginal code is a little tricky, so I'll describe 1333 * what's wrong with it here. 1334 * 1335 * It is incorrect to simply unp_discard each entry for f_msgcount 1336 * times -- consider the case of sockets A and B that contain 1337 * references to each other. On a last close of some other socket, 1338 * we trigger a gc since the number of outstanding rights (unp_rights) 1339 * is non-zero. If during the sweep phase the gc code un_discards, 1340 * we end up doing a (full) closef on the descriptor. A closef on A 1341 * results in the following chain. Closef calls soo_close, which 1342 * calls soclose. Soclose calls first (through the switch 1343 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1344 * returns because the previous instance had set unp_gcing, and 1345 * we return all the way back to soclose, which marks the socket 1346 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1347 * to free up the rights that are queued in messages on the socket A, 1348 * i.e., the reference on B. The sorflush calls via the dom_dispose 1349 * switch unp_dispose, which unp_scans with unp_discard. This second 1350 * instance of unp_discard just calls closef on B. 1351 * 1352 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1353 * which results in another closef on A. Unfortunately, A is already 1354 * being closed, and the descriptor has already been marked with 1355 * SS_NOFDREF, and soclose panics at this point. 1356 * 1357 * Here, we first take an extra reference to each inaccessible 1358 * descriptor. Then, we call sorflush ourself, since we know 1359 * it is a Unix domain socket anyhow. After we destroy all the 1360 * rights carried in messages, we do a last closef to get rid 1361 * of our extra reference. This is the last close, and the 1362 * unp_detach etc will shut down the socket. 1363 * 1364 * 91/09/19, bsy@cs.cmu.edu 1365 */
|