1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1989, 1991, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 61 */ 62/* 63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 64 * support for mandatory and extensible security protections. This notice 65 * is included in support of clause 2.2 (b) of the Apple Public License, 66 * Version 2.0. 67 */ 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/domain.h> 73#include <sys/fcntl.h> 74#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 75#include <sys/file_internal.h> 76#include <sys/filedesc.h> 77#include <sys/lock.h> 78#include <sys/mbuf.h> 79#include <sys/namei.h> 80#include <sys/proc_internal.h> 81#include <sys/kauth.h> 82#include <sys/protosw.h> 83#include <sys/socket.h> 84#include <sys/socketvar.h> 85#include <sys/stat.h> 86#include <sys/sysctl.h> 87#include <sys/un.h> 88#include <sys/unpcb.h> 89#include <sys/vnode_internal.h> 90#include <sys/kdebug.h> 91 92#include <kern/zalloc.h> 93#include <kern/locks.h> 94 95#if CONFIG_MACF 96#include <security/mac_framework.h> 97#endif /* CONFIG_MACF */ 98 99#include <mach/vm_param.h> 100 101#define f_msgcount f_fglob->fg_msgcount 102#define f_cred f_fglob->fg_cred 103#define f_ops f_fglob->fg_ops 104#define f_offset f_fglob->fg_offset 105#define f_data f_fglob->fg_data 106struct zone *unp_zone; 107static unp_gen_t unp_gencnt; 108static u_int unp_count; 109 110static lck_attr_t *unp_mtx_attr; 111static lck_grp_t *unp_mtx_grp; 112static lck_grp_attr_t *unp_mtx_grp_attr; 113static lck_rw_t *unp_list_mtx; 114 115static lck_mtx_t *unp_disconnect_lock; 116static lck_mtx_t *unp_connect_lock; 117static u_int disconnect_in_progress; 118 119extern lck_mtx_t *uipc_lock; 120static struct unp_head unp_shead, unp_dhead; 121 122/* 123 * mDNSResponder tracing. When enabled, endpoints connected to 124 * /var/run/mDNSResponder will be traced; during each send on 125 * the traced socket, we log the PID and process name of the 126 * sending process. We also print out a bit of info related 127 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h 128 * of mDNSResponder stays the same. 129 */ 130#define MDNSRESPONDER_PATH "/var/run/mDNSResponder" 131 132static int unpst_tracemdns; /* enable tracing */ 133 134#define MDNS_IPC_MSG_HDR_VERSION_1 1 135 136struct mdns_ipc_msg_hdr { 137 uint32_t version; 138 uint32_t datalen; 139 uint32_t ipc_flags; 140 uint32_t op; 141 union { 142 void *context; 143 uint32_t u32[2]; 144 } __attribute__((packed)); 145 uint32_t reg_index; 146} __attribute__((packed)); 147 148/* 149 * Unix communications domain. 150 * 151 * TODO: 152 * SEQPACKET, RDM 153 * rethink name space problems 154 * need a proper out-of-band 155 * lock pushdown 156 */ 157static struct sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } }; 158static ino_t unp_ino; /* prototype for fake inode numbers */ 159 160static int unp_attach(struct socket *); 161static void unp_detach(struct unpcb *); 162static int unp_bind(struct unpcb *, struct sockaddr *, proc_t); 163static int unp_connect(struct socket *, struct sockaddr *, proc_t); 164static void unp_disconnect(struct unpcb *); 165static void unp_shutdown(struct unpcb *); 166static void unp_drop(struct unpcb *, int); 167__private_extern__ void unp_gc(void); 168static void unp_scan(struct mbuf *, void (*)(struct fileglob *)); 169static void unp_mark(struct fileglob *); 170static void unp_discard(struct fileglob *); 171static void unp_discard_fdlocked(struct fileglob *, proc_t); 172static int unp_internalize(struct mbuf *, proc_t); 173static int unp_listen(struct unpcb *, proc_t); 174static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *); 175static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so); 176 177static void 178unp_get_locks_in_order(struct socket *so, struct socket *conn_so) 179{ 180 if (so < conn_so) { 181 socket_lock(conn_so, 1); 182 } else { 183 struct unpcb *unp = sotounpcb(so); 184 unp->unp_flags |= UNP_DONTDISCONNECT; 185 unp->rw_thrcount++; 186 socket_unlock(so, 0); 187 188 /* Get the locks in the correct order */ 189 socket_lock(conn_so, 1); 190 socket_lock(so, 0); 191 unp->rw_thrcount--; 192 if (unp->rw_thrcount == 0) { 193 unp->unp_flags &= ~UNP_DONTDISCONNECT; 194 wakeup(unp); 195 } 196 } 197} 198 199static int 200uipc_abort(struct socket *so) 201{ 202 struct unpcb *unp = sotounpcb(so); 203 204 if (unp == 0) 205 return (EINVAL); 206 unp_drop(unp, ECONNABORTED); 207 unp_detach(unp); 208 sofree(so); 209 return (0); 210} 211 212static int 213uipc_accept(struct socket *so, struct sockaddr **nam) 214{ 215 struct unpcb *unp = sotounpcb(so); 216 217 if (unp == 0) 218 return (EINVAL); 219 220 /* 221 * Pass back name of connected socket, 222 * if it was bound and we are still connected 223 * (our peer may have closed already!). 224 */ 225 if (unp->unp_conn && unp->unp_conn->unp_addr) { 226 *nam = dup_sockaddr((struct sockaddr *) 227 unp->unp_conn->unp_addr, 1); 228 } else { 229 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 230 } 231 return (0); 232} 233 234/* 235 * Returns: 0 Success 236 * EISCONN 237 * unp_attach: 238 */ 239static int 240uipc_attach(struct socket *so, __unused int proto, __unused proc_t p) 241{ 242 struct unpcb *unp = sotounpcb(so); 243 244 if (unp != 0) 245 return (EISCONN); 246 return (unp_attach(so)); 247} 248 249static int 250uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p) 251{ 252 struct unpcb *unp = sotounpcb(so); 253 254 if (unp == 0) 255 return (EINVAL); 256 257 return (unp_bind(unp, nam, p)); 258} 259 260/* 261 * Returns: 0 Success 262 * EINVAL 263 * unp_connect:??? [See elsewhere in this file] 264 */ 265static int 266uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p) 267{ 268 struct unpcb *unp = sotounpcb(so); 269 270 if (unp == 0) 271 return (EINVAL); 272 return (unp_connect(so, nam, p)); 273} 274 275/* 276 * Returns: 0 Success 277 * EINVAL 278 * unp_connect2:EPROTOTYPE Protocol wrong type for socket 279 * unp_connect2:EINVAL Invalid argument 280 */ 281static int 282uipc_connect2(struct socket *so1, struct socket *so2) 283{ 284 struct unpcb *unp = sotounpcb(so1); 285 286 if (unp == 0) 287 return (EINVAL); 288 289 return (unp_connect2(so1, so2)); 290} 291 292/* control is EOPNOTSUPP */ 293 294static int 295uipc_detach(struct socket *so) 296{ 297 struct unpcb *unp = sotounpcb(so); 298 299 if (unp == 0) 300 return (EINVAL); 301 302 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 303 unp_detach(unp); 304 return (0); 305} 306 307static int 308uipc_disconnect(struct socket *so) 309{ 310 struct unpcb *unp = sotounpcb(so); 311 312 if (unp == 0) 313 return (EINVAL); 314 unp_disconnect(unp); 315 return (0); 316} 317 318/* 319 * Returns: 0 Success 320 * EINVAL 321 */ 322static int 323uipc_listen(struct socket *so, __unused proc_t p) 324{ 325 struct unpcb *unp = sotounpcb(so); 326 327 if (unp == 0 || unp->unp_vnode == 0) 328 return (EINVAL); 329 return (unp_listen(unp, p)); 330} 331 332static int 333uipc_peeraddr(struct socket *so, struct sockaddr **nam) 334{ 335 struct unpcb *unp = sotounpcb(so); 336 337 if (unp == NULL) 338 return (EINVAL); 339 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) { 340 *nam = dup_sockaddr((struct sockaddr *) 341 unp->unp_conn->unp_addr, 1); 342 } else { 343 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 344 } 345 return (0); 346} 347 348static int 349uipc_rcvd(struct socket *so, __unused int flags) 350{ 351 struct unpcb *unp = sotounpcb(so); 352 struct socket *so2; 353 354 if (unp == 0) 355 return (EINVAL); 356 switch (so->so_type) { 357 case SOCK_DGRAM: 358 panic("uipc_rcvd DGRAM?"); 359 /*NOTREACHED*/ 360 361 case SOCK_STREAM: 362#define rcv (&so->so_rcv) 363#define snd (&so2->so_snd) 364 if (unp->unp_conn == 0) 365 break; 366 367 so2 = unp->unp_conn->unp_socket; 368 unp_get_locks_in_order(so, so2); 369 /* 370 * Adjust backpressure on sender 371 * and wakeup any waiting to write. 372 */ 373 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 374 unp->unp_mbcnt = rcv->sb_mbcnt; 375 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 376 unp->unp_cc = rcv->sb_cc; 377 sowwakeup(so2); 378 379 socket_unlock(so2, 1); 380 381#undef snd 382#undef rcv 383 break; 384 385 default: 386 panic("uipc_rcvd unknown socktype"); 387 } 388 return (0); 389} 390 391/* pru_rcvoob is EOPNOTSUPP */ 392 393/* 394 * Returns: 0 Success 395 * EINVAL 396 * EOPNOTSUPP 397 * EPIPE 398 * ENOTCONN 399 * EISCONN 400 * unp_internalize:EINVAL 401 * unp_internalize:EBADF 402 * unp_connect:EAFNOSUPPORT Address family not supported 403 * unp_connect:EINVAL Invalid argument 404 * unp_connect:ENOTSOCK Not a socket 405 * unp_connect:ECONNREFUSED Connection refused 406 * unp_connect:EISCONN Socket is connected 407 * unp_connect:EPROTOTYPE Protocol wrong type for socket 408 * unp_connect:??? 409 * sbappendaddr:ENOBUFS [5th argument, contents modified] 410 * sbappendaddr:??? [whatever a filter author chooses] 411 */ 412static int 413uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 414 struct mbuf *control, proc_t p) 415{ 416 int error = 0; 417 struct unpcb *unp = sotounpcb(so); 418 struct socket *so2; 419 420 if (unp == 0) { 421 error = EINVAL; 422 goto release; 423 } 424 if (flags & PRUS_OOB) { 425 error = EOPNOTSUPP; 426 goto release; 427 } 428 429 if (control) { 430 /* release lock to avoid deadlock (4436174) */ 431 socket_unlock(so, 0); 432 error = unp_internalize(control, p); 433 socket_lock(so, 0); 434 if (error) 435 goto release; 436 } 437 438 switch (so->so_type) { 439 case SOCK_DGRAM: 440 { 441 struct sockaddr *from; 442 443 if (nam) { 444 if (unp->unp_conn) { 445 error = EISCONN; 446 break; 447 } 448 error = unp_connect(so, nam, p); 449 if (error) 450 break; 451 } else { 452 if (unp->unp_conn == 0) { 453 error = ENOTCONN; 454 break; 455 } 456 } 457 458 so2 = unp->unp_conn->unp_socket; 459 if (so != so2) 460 unp_get_locks_in_order(so, so2); 461 462 if (unp->unp_addr) 463 from = (struct sockaddr *)unp->unp_addr; 464 else 465 from = &sun_noname; 466 /* 467 * sbappendaddr() will fail when the receiver runs out of 468 * space; in contrast to SOCK_STREAM, we will lose messages 469 * for the SOCK_DGRAM case when the receiver's queue overflows. 470 * SB_UNIX on the socket buffer implies that the callee will 471 * not free the control message, if any, because we would need 472 * to call unp_dispose() on it. 473 */ 474 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) { 475 control = NULL; 476 sorwakeup(so2); 477 } else if (control != NULL && error == 0) { 478 /* A socket filter took control; don't touch it */ 479 control = NULL; 480 } 481 482 if (so != so2) 483 socket_unlock(so2, 1); 484 485 m = NULL; 486 if (nam) 487 unp_disconnect(unp); 488 break; 489 } 490 491 case SOCK_STREAM: { 492 int didreceive = 0; 493#define rcv (&so2->so_rcv) 494#define snd (&so->so_snd) 495 /* Connect if not connected yet. */ 496 /* 497 * Note: A better implementation would complain 498 * if not equal to the peer's address. 499 */ 500 if ((so->so_state & SS_ISCONNECTED) == 0) { 501 if (nam) { 502 error = unp_connect(so, nam, p); 503 if (error) 504 break; /* XXX */ 505 } else { 506 error = ENOTCONN; 507 break; 508 } 509 } 510 511 if (so->so_state & SS_CANTSENDMORE) { 512 error = EPIPE; 513 break; 514 } 515 if (unp->unp_conn == 0) 516 panic("uipc_send connected but no connection?"); 517 518 so2 = unp->unp_conn->unp_socket; 519 unp_get_locks_in_order(so, so2); 520 521 /* Check socket state again as we might have unlocked the socket 522 * while trying to get the locks in order 523 */ 524 525 if ((so->so_state & SS_CANTSENDMORE)) { 526 error = EPIPE; 527 socket_unlock(so2, 1); 528 break; 529 } 530 531 if (unp->unp_flags & UNP_TRACE_MDNS) { 532 struct mdns_ipc_msg_hdr hdr; 533 534 if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 && 535 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) { 536 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n", 537 __func__, p->p_pid, p->p_comm, ntohl(hdr.op)); 538 } 539 } 540 541 /* 542 * Send to paired receive port, and then reduce send buffer 543 * hiwater marks to maintain backpressure. Wake up readers. 544 * SB_UNIX flag will allow new record to be appended to the 545 * receiver's queue even when it is already full. It is 546 * possible, however, that append might fail. In that case, 547 * we will need to call unp_dispose() on the control message; 548 * the callee will not free it since SB_UNIX is set. 549 */ 550 didreceive = control ? 551 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m); 552 553 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 554 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 555 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 556 unp->unp_conn->unp_cc = rcv->sb_cc; 557 if (didreceive) { 558 control = NULL; 559 sorwakeup(so2); 560 } else if (control != NULL && error == 0) { 561 /* A socket filter took control; don't touch it */ 562 control = NULL; 563 } 564 565 socket_unlock(so2, 1); 566 m = NULL; 567#undef snd 568#undef rcv 569 } 570 break; 571 572 default: 573 panic("uipc_send unknown socktype"); 574 } 575 576 /* 577 * SEND_EOF is equivalent to a SEND followed by 578 * a SHUTDOWN. 579 */ 580 if (flags & PRUS_EOF) { 581 socantsendmore(so); 582 unp_shutdown(unp); 583 } 584 585 if (control && error != 0) { 586 socket_unlock(so, 0); 587 unp_dispose(control); 588 socket_lock(so, 0); 589 } 590 591release: 592 if (control) 593 m_freem(control); 594 if (m) 595 m_freem(m); 596 return (error); 597} 598 599static int 600uipc_sense(struct socket *so, void *ub, int isstat64) 601{ 602 struct unpcb *unp = sotounpcb(so); 603 struct socket *so2; 604 blksize_t blksize; 605 606 if (unp == 0) 607 return (EINVAL); 608 609 blksize = so->so_snd.sb_hiwat; 610 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 611 so2 = unp->unp_conn->unp_socket; 612 blksize += so2->so_rcv.sb_cc; 613 } 614 if (unp->unp_ino == 0) 615 unp->unp_ino = unp_ino++; 616 617 if (isstat64 != 0) { 618 struct stat64 *sb64; 619 620 sb64 = (struct stat64 *)ub; 621 sb64->st_blksize = blksize; 622 sb64->st_dev = NODEV; 623 sb64->st_ino = (ino64_t)unp->unp_ino; 624 } else { 625 struct stat *sb; 626 627 sb = (struct stat *)ub; 628 sb->st_blksize = blksize; 629 sb->st_dev = NODEV; 630 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino; 631 } 632 633 return (0); 634} 635 636/* 637 * Returns: 0 Success 638 * EINVAL 639 * 640 * Notes: This is not strictly correct, as unp_shutdown() also calls 641 * socantrcvmore(). These should maybe both be conditionalized 642 * on the 'how' argument in soshutdown() as called from the 643 * shutdown() system call. 644 */ 645static int 646uipc_shutdown(struct socket *so) 647{ 648 struct unpcb *unp = sotounpcb(so); 649 650 if (unp == 0) 651 return (EINVAL); 652 socantsendmore(so); 653 unp_shutdown(unp); 654 return (0); 655} 656 657/* 658 * Returns: 0 Success 659 * EINVAL Invalid argument 660 */ 661static int 662uipc_sockaddr(struct socket *so, struct sockaddr **nam) 663{ 664 struct unpcb *unp = sotounpcb(so); 665 666 if (unp == NULL) 667 return (EINVAL); 668 if (unp->unp_addr != NULL) { 669 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); 670 } else { 671 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 672 } 673 return (0); 674} 675 676struct pr_usrreqs uipc_usrreqs = { 677 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 678 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 679 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 680 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 681 sosend, soreceive, pru_sopoll_notsupp 682}; 683 684int 685uipc_ctloutput(struct socket *so, struct sockopt *sopt) 686{ 687 struct unpcb *unp = sotounpcb(so); 688 int error; 689 690 switch (sopt->sopt_dir) { 691 case SOPT_GET: 692 switch (sopt->sopt_name) { 693 case LOCAL_PEERCRED: 694 if (unp->unp_flags & UNP_HAVEPC) { 695 error = sooptcopyout(sopt, &unp->unp_peercred, 696 sizeof (unp->unp_peercred)); 697 } else { 698 if (so->so_type == SOCK_STREAM) 699 error = ENOTCONN; 700 else 701 error = EINVAL; 702 } 703 break; 704 case LOCAL_PEERPID: 705 if (unp->unp_conn != NULL) { 706 if (unp->unp_conn->unp_socket != NULL) { 707 pid_t peerpid = unp->unp_conn->unp_socket->last_pid; 708 error = sooptcopyout(sopt, &peerpid, sizeof (peerpid)); 709 } else { 710 panic("peer is connected but has no socket?"); 711 } 712 } else { 713 error = ENOTCONN; 714 } 715 break; 716 default: 717 error = EOPNOTSUPP; 718 break; 719 } 720 break; 721 case SOPT_SET: 722 default: 723 error = EOPNOTSUPP; 724 break; 725 } 726 return (error); 727} 728 729/* 730 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 731 * for stream sockets, although the total for sender and receiver is 732 * actually only PIPSIZ. 733 * Datagram sockets really use the sendspace as the maximum datagram size, 734 * and don't really want to reserve the sendspace. Their recvspace should 735 * be large enough for at least one max-size datagram plus address. 736 */ 737#ifndef PIPSIZ 738#define PIPSIZ 8192 739#endif 740static u_int32_t unpst_sendspace = PIPSIZ; 741static u_int32_t unpst_recvspace = PIPSIZ; 742static u_int32_t unpdg_sendspace = 2*1024; /* really max datagram size */ 743static u_int32_t unpdg_recvspace = 4*1024; 744 745static int unp_rights; /* file descriptors in flight */ 746static int unp_disposed; /* discarded file descriptors */ 747 748SYSCTL_DECL(_net_local_stream); 749SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED, 750 &unpst_sendspace, 0, ""); 751SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, 752 &unpst_recvspace, 0, ""); 753SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED, 754 &unpst_tracemdns, 0, ""); 755SYSCTL_DECL(_net_local_dgram); 756SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED, 757 &unpdg_sendspace, 0, ""); 758SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, 759 &unpdg_recvspace, 0, ""); 760SYSCTL_DECL(_net_local); 761SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, ""); 762 763/* 764 * Returns: 0 Success 765 * ENOBUFS 766 * soreserve:ENOBUFS 767 */ 768static int 769unp_attach(struct socket *so) 770{ 771 struct unpcb *unp; 772 int error = 0; 773 774 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 775 switch (so->so_type) { 776 777 case SOCK_STREAM: 778 error = soreserve(so, unpst_sendspace, unpst_recvspace); 779 break; 780 781 case SOCK_DGRAM: 782 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 783 break; 784 785 default: 786 panic("unp_attach"); 787 } 788 if (error) 789 return (error); 790 } 791 unp = (struct unpcb *)zalloc(unp_zone); 792 if (unp == NULL) 793 return (ENOBUFS); 794 bzero(unp, sizeof (*unp)); 795 796 lck_mtx_init(&unp->unp_mtx, 797 unp_mtx_grp, unp_mtx_attr); 798 799 lck_rw_lock_exclusive(unp_list_mtx); 800 LIST_INIT(&unp->unp_refs); 801 unp->unp_socket = so; 802 unp->unp_gencnt = ++unp_gencnt; 803 unp_count++; 804 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? 805 &unp_dhead : &unp_shead, unp, unp_link); 806 lck_rw_done(unp_list_mtx); 807 so->so_pcb = (caddr_t)unp; 808 /* 809 * Mark AF_UNIX socket buffers accordingly so that: 810 * 811 * a. In the SOCK_STREAM case, socket buffer append won't fail due to 812 * the lack of space; this essentially loosens the sbspace() check, 813 * since there is disconnect between sosend() and uipc_send() with 814 * respect to flow control that might result in our dropping the 815 * data in uipc_send(). By setting this, we allow for slightly 816 * more records to be appended to the receiving socket to avoid 817 * losing data (which we can't afford in the SOCK_STREAM case). 818 * Flow control still takes place since we adjust the sender's 819 * hiwat during each send. This doesn't affect the SOCK_DGRAM 820 * case and append would still fail when the queue overflows. 821 * 822 * b. In the presence of control messages containing internalized 823 * file descriptors, the append routines will not free them since 824 * we'd need to undo the work first via unp_dispose(). 825 */ 826 so->so_rcv.sb_flags |= SB_UNIX; 827 so->so_snd.sb_flags |= SB_UNIX; 828 return (0); 829} 830 831static void 832unp_detach(struct unpcb *unp) 833{ 834 int so_locked = 1; 835 836 lck_rw_lock_exclusive(unp_list_mtx); 837 LIST_REMOVE(unp, unp_link); 838 --unp_count; 839 ++unp_gencnt; 840 lck_rw_done(unp_list_mtx); 841 if (unp->unp_vnode) { 842 struct vnode *tvp = NULL; 843 socket_unlock(unp->unp_socket, 0); 844 845 /* Holding unp_connect_lock will avoid a race between 846 * a thread closing the listening socket and a thread 847 * connecting to it. 848 */ 849 lck_mtx_lock(unp_connect_lock); 850 socket_lock(unp->unp_socket, 0); 851 if (unp->unp_vnode) { 852 tvp = unp->unp_vnode; 853 unp->unp_vnode->v_socket = NULL; 854 unp->unp_vnode = NULL; 855 } 856 lck_mtx_unlock(unp_connect_lock); 857 if (tvp != NULL) 858 vnode_rele(tvp); /* drop the usecount */ 859 } 860 if (unp->unp_conn) 861 unp_disconnect(unp); 862 while (unp->unp_refs.lh_first) { 863 struct unpcb *unp2 = NULL; 864 865 /* This datagram socket is connected to one or more 866 * sockets. In order to avoid a race condition between removing 867 * this reference and closing the connected socket, we need 868 * to check disconnect_in_progress 869 */ 870 if (so_locked == 1) { 871 socket_unlock(unp->unp_socket, 0); 872 so_locked = 0; 873 } 874 lck_mtx_lock(unp_disconnect_lock); 875 while (disconnect_in_progress != 0) { 876 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock, 877 PSOCK, "disconnect", NULL); 878 } 879 disconnect_in_progress = 1; 880 lck_mtx_unlock(unp_disconnect_lock); 881 882 /* Now we are sure that any unpcb socket disconnect is not happening */ 883 if (unp->unp_refs.lh_first != NULL) { 884 unp2 = unp->unp_refs.lh_first; 885 socket_lock(unp2->unp_socket, 1); 886 } 887 888 lck_mtx_lock(unp_disconnect_lock); 889 disconnect_in_progress = 0; 890 wakeup(&disconnect_in_progress); 891 lck_mtx_unlock(unp_disconnect_lock); 892 893 if (unp2 != NULL) { 894 /* We already locked this socket and have a reference on it */ 895 unp_drop(unp2, ECONNRESET); 896 socket_unlock(unp2->unp_socket, 1); 897 } 898 } 899 900 if (so_locked == 0) { 901 socket_lock(unp->unp_socket, 0); 902 so_locked = 1; 903 } 904 soisdisconnected(unp->unp_socket); 905 /* makes sure we're getting dealloced */ 906 unp->unp_socket->so_flags |= SOF_PCBCLEARING; 907} 908 909/* 910 * Returns: 0 Success 911 * EAFNOSUPPORT 912 * EINVAL 913 * EADDRINUSE 914 * namei:??? [anything namei can return] 915 * vnode_authorize:??? [anything vnode_authorize can return] 916 * 917 * Notes: p at this point is the current process, as this function is 918 * only called by sobind(). 919 */ 920static int 921unp_bind( 922 struct unpcb *unp, 923 struct sockaddr *nam, 924 proc_t p) 925{ 926 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 927 struct vnode *vp, *dvp; 928 struct vnode_attr va; 929 vfs_context_t ctx = vfs_context_current(); 930 int error, namelen; 931 struct nameidata nd; 932 struct socket *so = unp->unp_socket; 933 char buf[SOCK_MAXADDRLEN]; 934 935 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) { 936 return (EAFNOSUPPORT); 937 } 938 939 if (unp->unp_vnode != NULL) 940 return (EINVAL); 941 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 942 if (namelen <= 0) 943 return (EINVAL); 944 945 socket_unlock(so, 0); 946 947 strlcpy(buf, soun->sun_path, namelen+1); 948 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 949 CAST_USER_ADDR_T(buf), ctx); 950 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 951 error = namei(&nd); 952 if (error) { 953 socket_lock(so, 0); 954 return (error); 955 } 956 dvp = nd.ni_dvp; 957 vp = nd.ni_vp; 958 959 if (vp != NULL) { 960 /* 961 * need to do this before the vnode_put of dvp 962 * since we may have to release an fs_nodelock 963 */ 964 nameidone(&nd); 965 966 vnode_put(dvp); 967 vnode_put(vp); 968 969 socket_lock(so, 0); 970 return (EADDRINUSE); 971 } 972 973 VATTR_INIT(&va); 974 VATTR_SET(&va, va_type, VSOCK); 975 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask)); 976 977#if CONFIG_MACF 978 error = mac_vnode_check_create(ctx, 979 nd.ni_dvp, &nd.ni_cnd, &va); 980 981 if (error == 0) 982#endif /* CONFIG_MACF */ 983#if CONFIG_MACF_SOCKET_SUBSET 984 error = mac_vnode_check_uipc_bind(ctx, 985 nd.ni_dvp, &nd.ni_cnd, &va); 986 987 if (error == 0) 988#endif /* MAC_SOCKET_SUBSET */ 989 /* authorize before creating */ 990 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx); 991 992 if (!error) { 993 /* create the socket */ 994 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx); 995 } 996 997 nameidone(&nd); 998 vnode_put(dvp); 999 1000 if (error) { 1001 socket_lock(so, 0); 1002 return (error); 1003 } 1004 vnode_ref(vp); /* gain a longterm reference */ 1005 socket_lock(so, 0); 1006 vp->v_socket = unp->unp_socket; 1007 unp->unp_vnode = vp; 1008 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); 1009 vnode_put(vp); /* drop the iocount */ 1010 1011 return (0); 1012} 1013 1014 1015/* 1016 * Returns: 0 Success 1017 * EAFNOSUPPORT Address family not supported 1018 * EINVAL Invalid argument 1019 * ENOTSOCK Not a socket 1020 * ECONNREFUSED Connection refused 1021 * EPROTOTYPE Protocol wrong type for socket 1022 * EISCONN Socket is connected 1023 * unp_connect2:EPROTOTYPE Protocol wrong type for socket 1024 * unp_connect2:EINVAL Invalid argument 1025 * namei:??? [anything namei can return] 1026 * vnode_authorize:???? [anything vnode_authorize can return] 1027 * 1028 * Notes: p at this point is the current process, as this function is 1029 * only called by sosend(), sendfile(), and soconnectlock(). 1030 */ 1031static int 1032unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) 1033{ 1034 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1035 struct vnode *vp; 1036 struct socket *so2, *so3, *list_so=NULL; 1037 struct unpcb *unp, *unp2, *unp3; 1038 vfs_context_t ctx = vfs_context_current(); 1039 int error, len; 1040 struct nameidata nd; 1041 char buf[SOCK_MAXADDRLEN]; 1042 1043 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) { 1044 return (EAFNOSUPPORT); 1045 } 1046 1047 unp = sotounpcb(so); 1048 so2 = so3 = NULL; 1049 1050 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 1051 if (len <= 0) 1052 return (EINVAL); 1053 1054 strlcpy(buf, soun->sun_path, len+1); 1055 socket_unlock(so, 0); 1056 1057 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, 1058 CAST_USER_ADDR_T(buf), ctx); 1059 error = namei(&nd); 1060 if (error) { 1061 socket_lock(so, 0); 1062 return (error); 1063 } 1064 nameidone(&nd); 1065 vp = nd.ni_vp; 1066 if (vp->v_type != VSOCK) { 1067 error = ENOTSOCK; 1068 socket_lock(so, 0); 1069 goto out; 1070 } 1071 1072#if CONFIG_MACF_SOCKET_SUBSET 1073 error = mac_vnode_check_uipc_connect(ctx, vp); 1074 if (error) { 1075 socket_lock(so, 0); 1076 goto out; 1077 } 1078#endif /* MAC_SOCKET_SUBSET */ 1079 1080 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx); 1081 if (error) { 1082 socket_lock(so, 0); 1083 goto out; 1084 } 1085 1086 lck_mtx_lock(unp_connect_lock); 1087 1088 if (vp->v_socket == 0) { 1089 lck_mtx_unlock(unp_connect_lock); 1090 error = ECONNREFUSED; 1091 socket_lock(so, 0); 1092 goto out; 1093 } 1094 1095 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */ 1096 so2 = vp->v_socket; 1097 lck_mtx_unlock(unp_connect_lock); 1098 1099 1100 if (so2->so_pcb == NULL) { 1101 error = ECONNREFUSED; 1102 if (so != so2) { 1103 socket_unlock(so2, 1); 1104 socket_lock(so, 0); 1105 } else { 1106 /* Release the reference held for the listen socket */ 1107 so2->so_usecount--; 1108 } 1109 goto out; 1110 } 1111 1112 if (so < so2) { 1113 socket_unlock(so2, 0); 1114 socket_lock(so, 0); 1115 socket_lock(so2, 0); 1116 } else if (so > so2) { 1117 socket_lock(so, 0); 1118 } 1119 /* 1120 * Check if socket was connected while we were trying to 1121 * get the socket locks in order. 1122 * XXX - probably shouldn't return an error for SOCK_DGRAM 1123 */ 1124 if ((so->so_state & SS_ISCONNECTED) != 0) { 1125 error = EISCONN; 1126 goto decref_out; 1127 } 1128 1129 if (so->so_type != so2->so_type) { 1130 error = EPROTOTYPE; 1131 goto decref_out; 1132 } 1133 1134 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1135 /* Release the incoming socket but keep a reference */ 1136 socket_unlock(so, 0); 1137 1138 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 1139 (so3 = sonewconn(so2, 0, nam)) == 0) { 1140 error = ECONNREFUSED; 1141 if (so != so2) { 1142 socket_unlock(so2, 1); 1143 socket_lock(so, 0); 1144 } else { 1145 socket_lock(so, 0); 1146 /* Release the reference held for 1147 * listen socket. 1148 */ 1149 so2->so_usecount--; 1150 } 1151 goto out; 1152 } 1153 unp2 = sotounpcb(so2); 1154 unp3 = sotounpcb(so3); 1155 if (unp2->unp_addr) 1156 unp3->unp_addr = (struct sockaddr_un *) 1157 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1); 1158 1159 /* 1160 * unp_peercred management: 1161 * 1162 * The connecter's (client's) credentials are copied 1163 * from its process structure at the time of connect() 1164 * (which is now). 1165 */ 1166 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred); 1167 unp3->unp_flags |= UNP_HAVEPC; 1168 /* 1169 * The receiver's (server's) credentials are copied 1170 * from the unp_peercred member of socket on which the 1171 * former called listen(); unp_listen() cached that 1172 * process's credentials at that time so we can use 1173 * them now. 1174 */ 1175 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1176 ("unp_connect: listener without cached peercred")); 1177 1178 /* Here we need to have both so and so2 locks and so2 1179 * is already locked. Lock ordering is required. 1180 */ 1181 if (so < so2) { 1182 socket_unlock(so2, 0); 1183 socket_lock(so, 0); 1184 socket_lock(so2, 0); 1185 } else { 1186 socket_lock(so, 0); 1187 } 1188 1189 /* Check again if the socket state changed when its lock was released */ 1190 if ((so->so_state & SS_ISCONNECTED) != 0) { 1191 error = EISCONN; 1192 socket_unlock(so2, 1); 1193 socket_lock(so3, 0); 1194 sofreelastref(so3, 1); 1195 goto out; 1196 } 1197 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1198 sizeof (unp->unp_peercred)); 1199 unp->unp_flags |= UNP_HAVEPC; 1200 1201#if CONFIG_MACF_SOCKET 1202 /* XXXMAC: recursive lock: SOCK_LOCK(so); */ 1203 mac_socketpeer_label_associate_socket(so, so3); 1204 mac_socketpeer_label_associate_socket(so3, so); 1205 /* XXXMAC: SOCK_UNLOCK(so); */ 1206#endif /* MAC_SOCKET */ 1207 1208 /* Hold the reference on listening socket until the end */ 1209 socket_unlock(so2, 0); 1210 list_so = so2; 1211 1212 /* Lock ordering doesn't matter because so3 was just created */ 1213 socket_lock(so3, 1); 1214 so2 = so3; 1215 1216 /* 1217 * Enable tracing for mDNSResponder endpoints. (The use 1218 * of sizeof instead of strlen below takes the null 1219 * terminating character into account.) 1220 */ 1221 if (unpst_tracemdns && 1222 !strncmp(soun->sun_path, MDNSRESPONDER_PATH, 1223 sizeof (MDNSRESPONDER_PATH))) { 1224 unp->unp_flags |= UNP_TRACE_MDNS; 1225 unp2->unp_flags |= UNP_TRACE_MDNS; 1226 } 1227 } 1228 1229 error = unp_connect2(so, so2); 1230 1231decref_out: 1232 if (so2 != NULL) { 1233 if (so != so2) { 1234 socket_unlock(so2, 1); 1235 } else { 1236 /* Release the extra reference held for the listen socket. 1237 * This is possible only for SOCK_DGRAM sockets. We refuse 1238 * connecting to the same socket for SOCK_STREAM sockets. 1239 */ 1240 so2->so_usecount--; 1241 } 1242 } 1243 1244 if (list_so != NULL) { 1245 socket_lock(list_so, 0); 1246 socket_unlock(list_so, 1); 1247 } 1248 1249out: 1250 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 1251 vnode_put(vp); 1252 return (error); 1253} 1254 1255/* 1256 * Returns: 0 Success 1257 * EPROTOTYPE Protocol wrong type for socket 1258 * EINVAL Invalid argument 1259 */ 1260int 1261unp_connect2(struct socket *so, struct socket *so2) 1262{ 1263 struct unpcb *unp = sotounpcb(so); 1264 struct unpcb *unp2; 1265 1266 if (so2->so_type != so->so_type) 1267 return (EPROTOTYPE); 1268 1269 unp2 = sotounpcb(so2); 1270 1271 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 1272 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); 1273 1274 /* Verify both sockets are still opened */ 1275 if (unp == 0 || unp2 == 0) 1276 return (EINVAL); 1277 1278 unp->unp_conn = unp2; 1279 so2->so_usecount++; 1280 1281 switch (so->so_type) { 1282 1283 case SOCK_DGRAM: 1284 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1285 1286 if (so != so2) { 1287 /* Avoid lock order reversals due to drop/acquire in soisconnected. */ 1288 /* Keep an extra reference on so2 that will be dropped 1289 * soon after getting the locks in order 1290 */ 1291 socket_unlock(so2, 0); 1292 soisconnected(so); 1293 unp_get_locks_in_order(so, so2); 1294 so2->so_usecount--; 1295 } else { 1296 soisconnected(so); 1297 } 1298 1299 break; 1300 1301 case SOCK_STREAM: 1302 /* This takes care of socketpair */ 1303 if (!(unp->unp_flags & UNP_HAVEPC) && 1304 !(unp2->unp_flags & UNP_HAVEPC)) { 1305 cru2x(kauth_cred_get(), &unp->unp_peercred); 1306 unp->unp_flags |= UNP_HAVEPC; 1307 1308 cru2x(kauth_cred_get(), &unp2->unp_peercred); 1309 unp2->unp_flags |= UNP_HAVEPC; 1310 } 1311 unp2->unp_conn = unp; 1312 so->so_usecount++; 1313 1314 /* Avoid lock order reversals due to drop/acquire in soisconnected. */ 1315 socket_unlock(so, 0); 1316 soisconnected(so2); 1317 1318 /* Keep an extra reference on so2, that will be dropped soon after 1319 * getting the locks in order again. 1320 */ 1321 socket_unlock(so2, 0); 1322 1323 socket_lock(so, 0); 1324 soisconnected(so); 1325 1326 unp_get_locks_in_order(so, so2); 1327 /* Decrement the extra reference left before */ 1328 so2->so_usecount--; 1329 break; 1330 1331 default: 1332 panic("unknown socket type %d in unp_connect2", so->so_type); 1333 } 1334 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 1335 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); 1336 return (0); 1337} 1338 1339static void 1340unp_disconnect(struct unpcb *unp) 1341{ 1342 struct unpcb *unp2 = NULL; 1343 struct socket *so2 = NULL, *so; 1344 struct socket *waitso; 1345 int so_locked = 1, strdisconn = 0; 1346 1347 so = unp->unp_socket; 1348 if (unp->unp_conn == NULL) { 1349 return; 1350 } 1351 lck_mtx_lock(unp_disconnect_lock); 1352 while (disconnect_in_progress != 0) { 1353 if (so_locked == 1) { 1354 socket_unlock(so, 0); 1355 so_locked = 0; 1356 } 1357 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock, 1358 PSOCK, "disconnect", NULL); 1359 } 1360 disconnect_in_progress = 1; 1361 lck_mtx_unlock(unp_disconnect_lock); 1362 1363 if (so_locked == 0) { 1364 socket_lock(so, 0); 1365 so_locked = 1; 1366 } 1367 1368 unp2 = unp->unp_conn; 1369 1370 if (unp2 == 0 || unp2->unp_socket == NULL) { 1371 goto out; 1372 } 1373 so2 = unp2->unp_socket; 1374 1375try_again: 1376 if (so == so2) { 1377 if (so_locked == 0) { 1378 socket_lock(so, 0); 1379 } 1380 waitso = so; 1381 } else if (so < so2) { 1382 if (so_locked == 0) { 1383 socket_lock(so, 0); 1384 } 1385 socket_lock(so2, 1); 1386 waitso = so2; 1387 } else { 1388 if (so_locked == 1) { 1389 socket_unlock(so, 0); 1390 } 1391 socket_lock(so2, 1); 1392 socket_lock(so, 0); 1393 waitso = so; 1394 } 1395 so_locked = 1; 1396 1397 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 1398 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); 1399 1400 /* Check for the UNP_DONTDISCONNECT flag, if it 1401 * is set, release both sockets and go to sleep 1402 */ 1403 1404 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) { 1405 if (so != so2) { 1406 socket_unlock(so2, 1); 1407 } 1408 so_locked = 0; 1409 1410 (void)msleep(waitso->so_pcb, &unp->unp_mtx, 1411 PSOCK | PDROP, "unpdisconnect", NULL); 1412 goto try_again; 1413 } 1414 1415 if (unp->unp_conn == NULL) { 1416 panic("unp_conn became NULL after sleep"); 1417 } 1418 1419 unp->unp_conn = NULL; 1420 so2->so_usecount--; 1421 1422 if (unp->unp_flags & UNP_TRACE_MDNS) 1423 unp->unp_flags &= ~UNP_TRACE_MDNS; 1424 1425 switch (unp->unp_socket->so_type) { 1426 1427 case SOCK_DGRAM: 1428 LIST_REMOVE(unp, unp_reflink); 1429 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 1430 if (so != so2) 1431 socket_unlock(so2, 1); 1432 break; 1433 1434 case SOCK_STREAM: 1435 unp2->unp_conn = NULL; 1436 so->so_usecount--; 1437 1438 /* Set the socket state correctly but do a wakeup later when 1439 * we release all locks except the socket lock, this will avoid 1440 * a deadlock. 1441 */ 1442 unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 1443 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 1444 1445 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 1446 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 1447 1448 if (unp2->unp_flags & UNP_TRACE_MDNS) 1449 unp2->unp_flags &= ~UNP_TRACE_MDNS; 1450 1451 strdisconn = 1; 1452 break; 1453 default: 1454 panic("unknown socket type %d", so->so_type); 1455 } 1456out: 1457 lck_mtx_lock(unp_disconnect_lock); 1458 disconnect_in_progress = 0; 1459 wakeup(&disconnect_in_progress); 1460 lck_mtx_unlock(unp_disconnect_lock); 1461 1462 if (strdisconn) { 1463 socket_unlock(so, 0); 1464 soisdisconnected(so2); 1465 socket_unlock(so2, 1); 1466 1467 socket_lock(so,0); 1468 soisdisconnected(so); 1469 } 1470 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED); 1471 return; 1472} 1473 1474/* 1475 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format. 1476 * The unpcb_compat data structure is passed to user space and must not change. 1477 */ 1478static void 1479unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp) 1480{ 1481#if defined(__LP64__) 1482 cp->unp_link.le_next = (u_int32_t) 1483 VM_KERNEL_ADDRPERM(up->unp_link.le_next); 1484 cp->unp_link.le_prev = (u_int32_t) 1485 VM_KERNEL_ADDRPERM(up->unp_link.le_prev); 1486#else 1487 cp->unp_link.le_next = (struct unpcb_compat *) 1488 VM_KERNEL_ADDRPERM(up->unp_link.le_next); 1489 cp->unp_link.le_prev = (struct unpcb_compat **) 1490 VM_KERNEL_ADDRPERM(up->unp_link.le_prev); 1491#endif 1492 cp->unp_socket = (_UNPCB_PTR(struct socket *)) 1493 VM_KERNEL_ADDRPERM(up->unp_socket); 1494 cp->unp_vnode = (_UNPCB_PTR(struct vnode *)) 1495 VM_KERNEL_ADDRPERM(up->unp_vnode); 1496 cp->unp_ino = up->unp_ino; 1497 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *)) 1498 VM_KERNEL_ADDRPERM(up->unp_conn); 1499 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first); 1500#if defined(__LP64__) 1501 cp->unp_reflink.le_next = 1502 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next); 1503 cp->unp_reflink.le_prev = 1504 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev); 1505#else 1506 cp->unp_reflink.le_next = 1507 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next); 1508 cp->unp_reflink.le_prev = 1509 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev); 1510#endif 1511 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *)) 1512 VM_KERNEL_ADDRPERM(up->unp_addr); 1513 cp->unp_cc = up->unp_cc; 1514 cp->unp_mbcnt = up->unp_mbcnt; 1515 cp->unp_gencnt = up->unp_gencnt; 1516} 1517 1518static int 1519unp_pcblist SYSCTL_HANDLER_ARGS 1520{ 1521#pragma unused(oidp,arg2) 1522 int error, i, n; 1523 struct unpcb *unp, **unp_list; 1524 unp_gen_t gencnt; 1525 struct xunpgen xug; 1526 struct unp_head *head; 1527 1528 lck_rw_lock_shared(unp_list_mtx); 1529 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1530 1531 /* 1532 * The process of preparing the PCB list is too time-consuming and 1533 * resource-intensive to repeat twice on every request. 1534 */ 1535 if (req->oldptr == USER_ADDR_NULL) { 1536 n = unp_count; 1537 req->oldidx = 2 * sizeof (xug) + (n + n / 8) * 1538 sizeof (struct xunpcb); 1539 lck_rw_done(unp_list_mtx); 1540 return (0); 1541 } 1542 1543 if (req->newptr != USER_ADDR_NULL) { 1544 lck_rw_done(unp_list_mtx); 1545 return (EPERM); 1546 } 1547 1548 /* 1549 * OK, now we're committed to doing something. 1550 */ 1551 gencnt = unp_gencnt; 1552 n = unp_count; 1553 1554 bzero(&xug, sizeof (xug)); 1555 xug.xug_len = sizeof (xug); 1556 xug.xug_count = n; 1557 xug.xug_gen = gencnt; 1558 xug.xug_sogen = so_gencnt; 1559 error = SYSCTL_OUT(req, &xug, sizeof (xug)); 1560 if (error) { 1561 lck_rw_done(unp_list_mtx); 1562 return (error); 1563 } 1564 1565 /* 1566 * We are done if there is no pcb 1567 */ 1568 if (n == 0) { 1569 lck_rw_done(unp_list_mtx); 1570 return (0); 1571 } 1572 1573 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list), 1574 M_TEMP, M_WAITOK); 1575 if (unp_list == 0) { 1576 lck_rw_done(unp_list_mtx); 1577 return (ENOMEM); 1578 } 1579 1580 for (unp = head->lh_first, i = 0; unp && i < n; 1581 unp = unp->unp_link.le_next) { 1582 if (unp->unp_gencnt <= gencnt) 1583 unp_list[i++] = unp; 1584 } 1585 n = i; /* in case we lost some during malloc */ 1586 1587 error = 0; 1588 for (i = 0; i < n; i++) { 1589 unp = unp_list[i]; 1590 if (unp->unp_gencnt <= gencnt) { 1591 struct xunpcb xu; 1592 1593 bzero(&xu, sizeof (xu)); 1594 xu.xu_len = sizeof (xu); 1595 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *)) 1596 VM_KERNEL_ADDRPERM(unp); 1597 /* 1598 * XXX - need more locking here to protect against 1599 * connect/disconnect races for SMP. 1600 */ 1601 if (unp->unp_addr) 1602 bcopy(unp->unp_addr, &xu.xu_addr, 1603 unp->unp_addr->sun_len); 1604 if (unp->unp_conn && unp->unp_conn->unp_addr) 1605 bcopy(unp->unp_conn->unp_addr, 1606 &xu.xu_caddr, 1607 unp->unp_conn->unp_addr->sun_len); 1608 unpcb_to_compat(unp, &xu.xu_unp); 1609 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1610 error = SYSCTL_OUT(req, &xu, sizeof (xu)); 1611 } 1612 } 1613 if (!error) { 1614 /* 1615 * Give the user an updated idea of our state. 1616 * If the generation differs from what we told 1617 * them before, they know that something happened 1618 * while we were processing this request, and it 1619 * might be necessary to retry. 1620 */ 1621 bzero(&xug, sizeof (xug)); 1622 xug.xug_len = sizeof (xug); 1623 xug.xug_gen = unp_gencnt; 1624 xug.xug_sogen = so_gencnt; 1625 xug.xug_count = unp_count; 1626 error = SYSCTL_OUT(req, &xug, sizeof (xug)); 1627 } 1628 FREE(unp_list, M_TEMP); 1629 lck_rw_done(unp_list_mtx); 1630 return (error); 1631} 1632 1633SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 1634 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1635 "List of active local datagram sockets"); 1636SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 1637 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1638 "List of active local stream sockets"); 1639 1640#if !CONFIG_EMBEDDED 1641 1642static int 1643unp_pcblist64 SYSCTL_HANDLER_ARGS 1644{ 1645#pragma unused(oidp,arg2) 1646 int error, i, n; 1647 struct unpcb *unp, **unp_list; 1648 unp_gen_t gencnt; 1649 struct xunpgen xug; 1650 struct unp_head *head; 1651 1652 lck_rw_lock_shared(unp_list_mtx); 1653 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1654 1655 /* 1656 * The process of preparing the PCB list is too time-consuming and 1657 * resource-intensive to repeat twice on every request. 1658 */ 1659 if (req->oldptr == USER_ADDR_NULL) { 1660 n = unp_count; 1661 req->oldidx = 2 * sizeof (xug) + (n + n / 8) * 1662 (sizeof (struct xunpcb64)); 1663 lck_rw_done(unp_list_mtx); 1664 return (0); 1665 } 1666 1667 if (req->newptr != USER_ADDR_NULL) { 1668 lck_rw_done(unp_list_mtx); 1669 return (EPERM); 1670 } 1671 1672 /* 1673 * OK, now we're committed to doing something. 1674 */ 1675 gencnt = unp_gencnt; 1676 n = unp_count; 1677 1678 bzero(&xug, sizeof (xug)); 1679 xug.xug_len = sizeof (xug); 1680 xug.xug_count = n; 1681 xug.xug_gen = gencnt; 1682 xug.xug_sogen = so_gencnt; 1683 error = SYSCTL_OUT(req, &xug, sizeof (xug)); 1684 if (error) { 1685 lck_rw_done(unp_list_mtx); 1686 return (error); 1687 } 1688 1689 /* 1690 * We are done if there is no pcb 1691 */ 1692 if (n == 0) { 1693 lck_rw_done(unp_list_mtx); 1694 return (0); 1695 } 1696 1697 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list), 1698 M_TEMP, M_WAITOK); 1699 if (unp_list == 0) { 1700 lck_rw_done(unp_list_mtx); 1701 return (ENOMEM); 1702 } 1703 1704 for (unp = head->lh_first, i = 0; unp && i < n; 1705 unp = unp->unp_link.le_next) { 1706 if (unp->unp_gencnt <= gencnt) 1707 unp_list[i++] = unp; 1708 } 1709 n = i; /* in case we lost some during malloc */ 1710 1711 error = 0; 1712 for (i = 0; i < n; i++) { 1713 unp = unp_list[i]; 1714 if (unp->unp_gencnt <= gencnt) { 1715 struct xunpcb64 xu; 1716 size_t xu_len = sizeof(struct xunpcb64); 1717 1718 bzero(&xu, xu_len); 1719 xu.xu_len = xu_len; 1720 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp); 1721 xu.xunp_link.le_next = (u_int64_t) 1722 VM_KERNEL_ADDRPERM(unp->unp_link.le_next); 1723 xu.xunp_link.le_prev = (u_int64_t) 1724 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev); 1725 xu.xunp_socket = (u_int64_t) 1726 VM_KERNEL_ADDRPERM(unp->unp_socket); 1727 xu.xunp_vnode = (u_int64_t) 1728 VM_KERNEL_ADDRPERM(unp->unp_vnode); 1729 xu.xunp_ino = unp->unp_ino; 1730 xu.xunp_conn = (u_int64_t) 1731 VM_KERNEL_ADDRPERM(unp->unp_conn); 1732 xu.xunp_refs = (u_int64_t) 1733 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first); 1734 xu.xunp_reflink.le_next = (u_int64_t) 1735 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next); 1736 xu.xunp_reflink.le_prev = (u_int64_t) 1737 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev); 1738 xu.xunp_cc = unp->unp_cc; 1739 xu.xunp_mbcnt = unp->unp_mbcnt; 1740 xu.xunp_gencnt = unp->unp_gencnt; 1741 1742 if (unp->unp_socket) 1743 sotoxsocket64(unp->unp_socket, &xu.xu_socket); 1744 1745 /* 1746 * XXX - need more locking here to protect against 1747 * connect/disconnect races for SMP. 1748 */ 1749 if (unp->unp_addr) 1750 bcopy(unp->unp_addr, &xu.xunp_addr, 1751 unp->unp_addr->sun_len); 1752 if (unp->unp_conn && unp->unp_conn->unp_addr) 1753 bcopy(unp->unp_conn->unp_addr, 1754 &xu.xunp_caddr, 1755 unp->unp_conn->unp_addr->sun_len); 1756 1757 error = SYSCTL_OUT(req, &xu, xu_len); 1758 } 1759 } 1760 if (!error) { 1761 /* 1762 * Give the user an updated idea of our state. 1763 * If the generation differs from what we told 1764 * her before, she knows that something happened 1765 * while we were processing this request, and it 1766 * might be necessary to retry. 1767 */ 1768 bzero(&xug, sizeof (xug)); 1769 xug.xug_len = sizeof (xug); 1770 xug.xug_gen = unp_gencnt; 1771 xug.xug_sogen = so_gencnt; 1772 xug.xug_count = unp_count; 1773 error = SYSCTL_OUT(req, &xug, sizeof (xug)); 1774 } 1775 FREE(unp_list, M_TEMP); 1776 lck_rw_done(unp_list_mtx); 1777 return (error); 1778} 1779 1780SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 1781 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64", 1782 "List of active local datagram sockets 64 bit"); 1783SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 1784 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64", 1785 "List of active local stream sockets 64 bit"); 1786 1787#endif /* !CONFIG_EMBEDDED */ 1788 1789static void 1790unp_shutdown(struct unpcb *unp) 1791{ 1792 struct socket *so = unp->unp_socket; 1793 struct socket *so2; 1794 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) { 1795 so2 = unp->unp_conn->unp_socket; 1796 unp_get_locks_in_order(so, so2); 1797 socantrcvmore(so2); 1798 socket_unlock(so2, 1); 1799 } 1800} 1801 1802static void 1803unp_drop(struct unpcb *unp, int errno) 1804{ 1805 struct socket *so = unp->unp_socket; 1806 1807 so->so_error = errno; 1808 unp_disconnect(unp); 1809} 1810 1811/* 1812 * Returns: 0 Success 1813 * EMSGSIZE The new fd's will not fit 1814 * ENOBUFS Cannot alloc struct fileproc 1815 */ 1816int 1817unp_externalize(struct mbuf *rights) 1818{ 1819 proc_t p = current_proc(); /* XXX */ 1820 int i; 1821 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1822 struct fileglob **rp = (struct fileglob **)(cm + 1); 1823 int *fds = (int *)(cm + 1); 1824 struct fileproc *fp; 1825 struct fileglob *fg; 1826 int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 1827 int f; 1828 1829 proc_fdlock(p); 1830 1831 /* 1832 * if the new FD's will not fit, then we free them all 1833 */ 1834 if (!fdavail(p, newfds)) { 1835 for (i = 0; i < newfds; i++) { 1836 fg = *rp; 1837 unp_discard_fdlocked(fg, p); 1838 *rp++ = NULL; 1839 } 1840 proc_fdunlock(p); 1841 1842 return (EMSGSIZE); 1843 } 1844 /* 1845 * now change each pointer to an fd in the global table to 1846 * an integer that is the index to the local fd table entry 1847 * that we set up to point to the global one we are transferring. 1848 * XXX (1) this assumes a pointer and int are the same size, 1849 * XXX or the mbuf can hold the expansion 1850 * XXX (2) allocation failures should be non-fatal 1851 */ 1852 for (i = 0; i < newfds; i++) { 1853#if CONFIG_MACF_SOCKET 1854 /* 1855 * If receive access is denied, don't pass along 1856 * and error message, just discard the descriptor. 1857 */ 1858 if (mac_file_check_receive(kauth_cred_get(), *rp)) { 1859 fg = *rp; 1860 *rp++ = 0; 1861 unp_discard_fdlocked(fg, p); 1862 continue; 1863 } 1864#endif 1865 if (fdalloc(p, 0, &f)) 1866 panic("unp_externalize:fdalloc"); 1867 fg = rp[i]; 1868 MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc), 1869 M_FILEPROC, M_WAITOK); 1870 if (fp == NULL) 1871 panic("unp_externalize: MALLOC_ZONE"); 1872 bzero(fp, sizeof (struct fileproc)); 1873 fp->f_iocount = 0; 1874 fp->f_fglob = fg; 1875 fg_removeuipc(fg); 1876 procfdtbl_releasefd(p, f, fp); 1877 (void) OSAddAtomic(-1, &unp_rights); 1878 fds[i] = f; 1879 } 1880 proc_fdunlock(p); 1881 1882 return (0); 1883} 1884 1885void 1886unp_init(void) 1887{ 1888 unp_zone = zinit(sizeof (struct unpcb), 1889 (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone"); 1890 1891 if (unp_zone == 0) 1892 panic("unp_init"); 1893 LIST_INIT(&unp_dhead); 1894 LIST_INIT(&unp_shead); 1895 1896 /* 1897 * allocate lock group attribute and group for udp pcb mutexes 1898 */ 1899 unp_mtx_grp_attr = lck_grp_attr_alloc_init(); 1900 1901 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr); 1902 1903 unp_mtx_attr = lck_attr_alloc_init(); 1904 1905 if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp, 1906 unp_mtx_attr)) == NULL) 1907 return; /* pretty much dead if this fails... */ 1908 1909 if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp, 1910 unp_mtx_attr)) == NULL) 1911 return; 1912 1913 if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp, 1914 unp_mtx_attr)) == NULL) 1915 return; 1916} 1917 1918#ifndef MIN 1919#define MIN(a, b) (((a) < (b)) ? (a) : (b)) 1920#endif 1921 1922/* 1923 * Returns: 0 Success 1924 * EINVAL 1925 * fdgetf_noref:EBADF 1926 */ 1927static int 1928unp_internalize(struct mbuf *control, proc_t p) 1929{ 1930 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1931 int *fds; 1932 struct fileglob **rp; 1933 struct fileproc *fp; 1934 int i, error; 1935 int oldfds; 1936 1937 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */ 1938 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 1939 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) { 1940 return (EINVAL); 1941 } 1942 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 1943 1944 proc_fdlock(p); 1945 fds = (int *)(cm + 1); 1946 1947 for (i = 0; i < oldfds; i++) { 1948 struct fileproc *tmpfp; 1949 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) { 1950 proc_fdunlock(p); 1951 return (error); 1952 } else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) { 1953 proc_fdunlock(p); 1954 return (EINVAL); 1955 } 1956 } 1957 rp = (struct fileglob **)(cm + 1); 1958 1959 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd 1960 * and doing them in-order would result in stomping over unprocessed fd's 1961 */ 1962 for (i = (oldfds - 1); i >= 0; i--) { 1963 (void) fdgetf_noref(p, fds[i], &fp); 1964 fg_insertuipc(fp->f_fglob); 1965 rp[i] = fp->f_fglob; 1966 (void) OSAddAtomic(1, &unp_rights); 1967 } 1968 proc_fdunlock(p); 1969 1970 return (0); 1971} 1972 1973static int unp_defer, unp_gcing, unp_gcwait; 1974static thread_t unp_gcthread = NULL; 1975 1976/* always called under uipc_lock */ 1977void 1978unp_gc_wait(void) 1979{ 1980 if (unp_gcthread == current_thread()) 1981 return; 1982 1983 while (unp_gcing != 0) { 1984 unp_gcwait = 1; 1985 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL); 1986 } 1987} 1988 1989 1990__private_extern__ void 1991unp_gc(void) 1992{ 1993 struct fileglob *fg, *nextfg; 1994 struct socket *so; 1995 static struct fileglob **extra_ref; 1996 struct fileglob **fpp; 1997 int nunref, i; 1998 int need_gcwakeup = 0; 1999 2000 lck_mtx_lock(uipc_lock); 2001 if (unp_gcing) { 2002 lck_mtx_unlock(uipc_lock); 2003 return; 2004 } 2005 unp_gcing = 1; 2006 unp_defer = 0; 2007 unp_gcthread = current_thread(); 2008 lck_mtx_unlock(uipc_lock); 2009 /* 2010 * before going through all this, set all FDs to 2011 * be NOT defered and NOT externally accessible 2012 */ 2013 for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) { 2014 lck_mtx_lock(&fg->fg_lock); 2015 fg->fg_flag &= ~(FMARK|FDEFER); 2016 lck_mtx_unlock(&fg->fg_lock); 2017 } 2018 do { 2019 for (fg = fmsghead.lh_first; fg != 0; 2020 fg = fg->f_msglist.le_next) { 2021 lck_mtx_lock(&fg->fg_lock); 2022 /* 2023 * If the file is not open, skip it 2024 */ 2025 if (fg->fg_count == 0) { 2026 lck_mtx_unlock(&fg->fg_lock); 2027 continue; 2028 } 2029 /* 2030 * If we already marked it as 'defer' in a 2031 * previous pass, then try process it this time 2032 * and un-mark it 2033 */ 2034 if (fg->fg_flag & FDEFER) { 2035 fg->fg_flag &= ~FDEFER; 2036 unp_defer--; 2037 } else { 2038 /* 2039 * if it's not defered, then check if it's 2040 * already marked.. if so skip it 2041 */ 2042 if (fg->fg_flag & FMARK) { 2043 lck_mtx_unlock(&fg->fg_lock); 2044 continue; 2045 } 2046 /* 2047 * If all references are from messages 2048 * in transit, then skip it. it's not 2049 * externally accessible. 2050 */ 2051 if (fg->fg_count == fg->fg_msgcount) { 2052 lck_mtx_unlock(&fg->fg_lock); 2053 continue; 2054 } 2055 /* 2056 * If it got this far then it must be 2057 * externally accessible. 2058 */ 2059 fg->fg_flag |= FMARK; 2060 } 2061 /* 2062 * either it was defered, or it is externally 2063 * accessible and not already marked so. 2064 * Now check if it is possibly one of OUR sockets. 2065 */ 2066 if (fg->fg_type != DTYPE_SOCKET || 2067 (so = (struct socket *)fg->fg_data) == 0) { 2068 lck_mtx_unlock(&fg->fg_lock); 2069 continue; 2070 } 2071 if (so->so_proto->pr_domain != &localdomain || 2072 (so->so_proto->pr_flags&PR_RIGHTS) == 0) { 2073 lck_mtx_unlock(&fg->fg_lock); 2074 continue; 2075 } 2076#ifdef notdef 2077 /* 2078 * if this code is enabled need to run 2079 * under network funnel 2080 */ 2081 if (so->so_rcv.sb_flags & SB_LOCK) { 2082 /* 2083 * This is problematical; it's not clear 2084 * we need to wait for the sockbuf to be 2085 * unlocked (on a uniprocessor, at least), 2086 * and it's also not clear what to do 2087 * if sbwait returns an error due to receipt 2088 * of a signal. If sbwait does return 2089 * an error, we'll go into an infinite 2090 * loop. Delete all of this for now. 2091 */ 2092 (void) sbwait(&so->so_rcv); 2093 goto restart; 2094 } 2095#endif 2096 /* 2097 * So, Ok, it's one of our sockets and it IS externally 2098 * accessible (or was defered). Now we look 2099 * to see if we hold any file descriptors in its 2100 * message buffers. Follow those links and mark them 2101 * as accessible too. 2102 * 2103 * In case a file is passed onto itself we need to 2104 * release the file lock. 2105 */ 2106 lck_mtx_unlock(&fg->fg_lock); 2107 2108 unp_scan(so->so_rcv.sb_mb, unp_mark); 2109 } 2110 } while (unp_defer); 2111 /* 2112 * We grab an extra reference to each of the file table entries 2113 * that are not otherwise accessible and then free the rights 2114 * that are stored in messages on them. 2115 * 2116 * The bug in the orginal code is a little tricky, so I'll describe 2117 * what's wrong with it here. 2118 * 2119 * It is incorrect to simply unp_discard each entry for f_msgcount 2120 * times -- consider the case of sockets A and B that contain 2121 * references to each other. On a last close of some other socket, 2122 * we trigger a gc since the number of outstanding rights (unp_rights) 2123 * is non-zero. If during the sweep phase the gc code un_discards, 2124 * we end up doing a (full) closef on the descriptor. A closef on A 2125 * results in the following chain. Closef calls soo_close, which 2126 * calls soclose. Soclose calls first (through the switch 2127 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 2128 * returns because the previous instance had set unp_gcing, and 2129 * we return all the way back to soclose, which marks the socket 2130 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 2131 * to free up the rights that are queued in messages on the socket A, 2132 * i.e., the reference on B. The sorflush calls via the dom_dispose 2133 * switch unp_dispose, which unp_scans with unp_discard. This second 2134 * instance of unp_discard just calls closef on B. 2135 * 2136 * Well, a similar chain occurs on B, resulting in a sorflush on B, 2137 * which results in another closef on A. Unfortunately, A is already 2138 * being closed, and the descriptor has already been marked with 2139 * SS_NOFDREF, and soclose panics at this point. 2140 * 2141 * Here, we first take an extra reference to each inaccessible 2142 * descriptor. Then, we call sorflush ourself, since we know 2143 * it is a Unix domain socket anyhow. After we destroy all the 2144 * rights carried in messages, we do a last closef to get rid 2145 * of our extra reference. This is the last close, and the 2146 * unp_detach etc will shut down the socket. 2147 * 2148 * 91/09/19, bsy@cs.cmu.edu 2149 */ 2150 extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *), 2151 M_FILEGLOB, M_WAITOK); 2152 if (extra_ref == NULL) 2153 goto bail; 2154 for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0; 2155 fg = nextfg) { 2156 lck_mtx_lock(&fg->fg_lock); 2157 2158 nextfg = fg->f_msglist.le_next; 2159 /* 2160 * If it's not open, skip it 2161 */ 2162 if (fg->fg_count == 0) { 2163 lck_mtx_unlock(&fg->fg_lock); 2164 continue; 2165 } 2166 /* 2167 * If all refs are from msgs, and it's not marked accessible 2168 * then it must be referenced from some unreachable cycle 2169 * of (shut-down) FDs, so include it in our 2170 * list of FDs to remove 2171 */ 2172 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) { 2173 fg->fg_count++; 2174 *fpp++ = fg; 2175 nunref++; 2176 } 2177 lck_mtx_unlock(&fg->fg_lock); 2178 } 2179 /* 2180 * for each FD on our hit list, do the following two things 2181 */ 2182 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 2183 struct fileglob *tfg; 2184 2185 tfg = *fpp; 2186 2187 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) { 2188 so = (struct socket *)(tfg->fg_data); 2189 2190 socket_lock(so, 0); 2191 2192 sorflush(so); 2193 2194 socket_unlock(so, 0); 2195 } 2196 } 2197 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 2198 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL); 2199 2200 FREE((caddr_t)extra_ref, M_FILEGLOB); 2201bail: 2202 lck_mtx_lock(uipc_lock); 2203 unp_gcing = 0; 2204 unp_gcthread = NULL; 2205 2206 if (unp_gcwait != 0) { 2207 unp_gcwait = 0; 2208 need_gcwakeup = 1; 2209 } 2210 lck_mtx_unlock(uipc_lock); 2211 2212 if (need_gcwakeup != 0) 2213 wakeup(&unp_gcing); 2214} 2215 2216void 2217unp_dispose(struct mbuf *m) 2218{ 2219 if (m) { 2220 unp_scan(m, unp_discard); 2221 } 2222} 2223 2224/* 2225 * Returns: 0 Success 2226 */ 2227static int 2228unp_listen(struct unpcb *unp, proc_t p) 2229{ 2230 kauth_cred_t safecred = kauth_cred_proc_ref(p); 2231 cru2x(safecred, &unp->unp_peercred); 2232 kauth_cred_unref(&safecred); 2233 unp->unp_flags |= UNP_HAVEPCCACHED; 2234 return (0); 2235} 2236 2237/* should run under kernel funnel */ 2238static void 2239unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) 2240{ 2241 struct mbuf *m; 2242 struct fileglob **rp; 2243 struct cmsghdr *cm; 2244 int i; 2245 int qfds; 2246 2247 while (m0) { 2248 for (m = m0; m; m = m->m_next) 2249 if (m->m_type == MT_CONTROL && 2250 (size_t)m->m_len >= sizeof (*cm)) { 2251 cm = mtod(m, struct cmsghdr *); 2252 if (cm->cmsg_level != SOL_SOCKET || 2253 cm->cmsg_type != SCM_RIGHTS) 2254 continue; 2255 qfds = (cm->cmsg_len - sizeof (*cm)) / 2256 sizeof (int); 2257 rp = (struct fileglob **)(cm + 1); 2258 for (i = 0; i < qfds; i++) 2259 (*op)(*rp++); 2260 break; /* XXX, but saves time */ 2261 } 2262 m0 = m0->m_act; 2263 } 2264} 2265 2266/* should run under kernel funnel */ 2267static void 2268unp_mark(struct fileglob *fg) 2269{ 2270 lck_mtx_lock(&fg->fg_lock); 2271 2272 if (fg->fg_flag & FMARK) { 2273 lck_mtx_unlock(&fg->fg_lock); 2274 return; 2275 } 2276 fg->fg_flag |= (FMARK|FDEFER); 2277 2278 lck_mtx_unlock(&fg->fg_lock); 2279 2280 unp_defer++; 2281} 2282 2283/* should run under kernel funnel */ 2284static void 2285unp_discard(struct fileglob *fg) 2286{ 2287 proc_t p = current_proc(); /* XXX */ 2288 2289 (void) OSAddAtomic(1, &unp_disposed); 2290 2291 proc_fdlock(p); 2292 unp_discard_fdlocked(fg, p); 2293 proc_fdunlock(p); 2294} 2295static void 2296unp_discard_fdlocked(struct fileglob *fg, proc_t p) 2297{ 2298 fg_removeuipc(fg); 2299 2300 (void) OSAddAtomic(-1, &unp_rights); 2301 (void) closef_locked((struct fileproc *)0, fg, p); 2302} 2303 2304int 2305unp_lock(struct socket *so, int refcount, void * lr) 2306 { 2307 void * lr_saved; 2308 if (lr == 0) 2309 lr_saved = (void *) __builtin_return_address(0); 2310 else lr_saved = lr; 2311 2312 if (so->so_pcb) { 2313 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx); 2314 } else { 2315 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n", 2316 so, lr_saved, so->so_usecount); 2317 } 2318 2319 if (so->so_usecount < 0) 2320 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n", 2321 so, so->so_pcb, lr_saved, so->so_usecount); 2322 2323 if (refcount) 2324 so->so_usecount++; 2325 2326 so->lock_lr[so->next_lock_lr] = lr_saved; 2327 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; 2328 return (0); 2329} 2330 2331int 2332unp_unlock(struct socket *so, int refcount, void * lr) 2333{ 2334 void * lr_saved; 2335 lck_mtx_t * mutex_held = NULL; 2336 struct unpcb *unp = sotounpcb(so); 2337 2338 if (lr == 0) 2339 lr_saved = (void *) __builtin_return_address(0); 2340 else lr_saved = lr; 2341 2342 if (refcount) 2343 so->so_usecount--; 2344 2345 if (so->so_usecount < 0) 2346 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount); 2347 if (so->so_pcb == NULL) { 2348 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount); 2349 } else { 2350 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx; 2351 } 2352 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 2353 so->unlock_lr[so->next_unlock_lr] = lr_saved; 2354 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; 2355 2356 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) { 2357 sofreelastref(so, 1); 2358 2359 if (unp->unp_addr) 2360 FREE(unp->unp_addr, M_SONAME); 2361 2362 lck_mtx_unlock(mutex_held); 2363 2364 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp); 2365 zfree(unp_zone, unp); 2366 2367 unp_gc(); 2368 } else { 2369 lck_mtx_unlock(mutex_held); 2370 } 2371 2372 return (0); 2373} 2374 2375lck_mtx_t * 2376unp_getlock(struct socket *so, __unused int locktype) 2377{ 2378 struct unpcb *unp = (struct unpcb *)so->so_pcb; 2379 2380 2381 if (so->so_pcb) { 2382 if (so->so_usecount < 0) 2383 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount); 2384 return(&unp->unp_mtx); 2385 } else { 2386 panic("unp_getlock: so=%p NULL so_pcb\n", so); 2387 return (so->so_proto->pr_domain->dom_mtx); 2388 } 2389} 2390 2391