1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.98 2004/08/17 22:05:54 andre Exp $ 61 */ 62 63#if !INET 64#error "IPDIVERT requires INET." 65#endif 66 67#include <sys/param.h> 68#include <sys/kernel.h> 69#include <sys/malloc.h> 70#include <sys/mbuf.h> 71#include <sys/socket.h> 72#include <sys/domain.h> 73#include <sys/protosw.h> 74#include <sys/socketvar.h> 75#include <sys/sysctl.h> 76#include <sys/systm.h> 77#include <sys/proc.h> 78 79#include <machine/endian.h> 80 81#include <net/if.h> 82#include <net/route.h> 83#include <net/kpi_protocol.h> 84 85#include <netinet/in.h> 86#include <netinet/in_systm.h> 87#include <netinet/ip.h> 88#include <netinet/in_pcb.h> 89#include <netinet/in_var.h> 90#include <netinet/ip_var.h> 91#include <netinet/ip_fw.h> 92#include <netinet/ip_divert.h> 93 94#include <kern/zalloc.h> 95#include <libkern/OSAtomic.h> 96 97/* 98 * Divert sockets 99 */ 100 101/* 102 * Allocate enough space to hold a full IP packet 103 */ 104#define DIVSNDQ (65536 + 100) 105#define DIVRCVQ (65536 + 100) 106 107/* 108 * Divert sockets work in conjunction with ipfw, see the divert(4) 109 * manpage for features. 110 * Internally, packets selected by ipfw in ip_input() or ip_output(), 111 * and never diverted before, are passed to the input queue of the 112 * divert socket with a given 'divert_port' number (as specified in 113 * the matching ipfw rule), and they are tagged with a 16 bit cookie 114 * (representing the rule number of the matching ipfw rule), which 115 * is passed to process reading from the socket. 116 * 117 * Packets written to the divert socket are again tagged with a cookie 118 * (usually the same as above) and a destination address. 119 * If the destination address is INADDR_ANY then the packet is 120 * treated as outgoing and sent to ip_output(), otherwise it is 121 * treated as incoming and sent to ip_input(). 122 * In both cases, the packet is tagged with the cookie. 123 * 124 * On reinjection, processing in ip_input() and ip_output() 125 * will be exactly the same as for the original packet, except that 126 * ipfw processing will start at the rule number after the one 127 * written in the cookie (so, tagging a packet with a cookie of 0 128 * will cause it to be effectively considered as a standard packet). 129 */ 130 131/* Internal variables */ 132static struct inpcbhead divcb; 133static struct inpcbinfo divcbinfo; 134 135static u_int32_t div_sendspace = DIVSNDQ; /* XXX sysctl ? */ 136static u_int32_t div_recvspace = DIVRCVQ; /* XXX sysctl ? */ 137 138/* Optimization: have this preinitialized */ 139static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, 0, { 0 }, { 0,0,0,0,0,0,0,0 } }; 140 141/* Internal functions */ 142static int div_output(struct socket *so, 143 struct mbuf *m, struct sockaddr_in *addr, struct mbuf *control); 144 145extern int load_ipfw(void); 146/* 147 * Initialize divert connection block queue. 148 */ 149void 150div_init(struct protosw *pp, struct domain *dp) 151{ 152#pragma unused(dp) 153 static int div_initialized = 0; 154 struct inpcbinfo *pcbinfo; 155 156 VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); 157 158 if (div_initialized) 159 return; 160 div_initialized = 1; 161 162 LIST_INIT(&divcb); 163 divcbinfo.ipi_listhead = &divcb; 164 /* 165 * XXX We don't use the hash list for divert IP, but it's easier 166 * to allocate a one entry hash list than it is to check all 167 * over the place for ipi_hashbase == NULL. 168 */ 169 divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask); 170 divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &divcbinfo.ipi_porthashmask); 171 divcbinfo.ipi_zone = zinit(sizeof(struct inpcb),(512 * sizeof(struct inpcb)), 172 4096, "divzone"); 173 pcbinfo = &divcbinfo; 174 /* 175 * allocate lock group attribute and group for udp pcb mutexes 176 */ 177 pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init(); 178 179 pcbinfo->ipi_lock_grp = lck_grp_alloc_init("divcb", pcbinfo->ipi_lock_grp_attr); 180 181 /* 182 * allocate the lock attribute for divert pcb mutexes 183 */ 184 pcbinfo->ipi_lock_attr = lck_attr_alloc_init(); 185 186 if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp, 187 pcbinfo->ipi_lock_attr)) == NULL) { 188 panic("%s: unable to allocate PCB lock\n", __func__); 189 /* NOTREACHED */ 190 } 191 192 in_pcbinfo_attach(&divcbinfo); 193 194#if IPFIREWALL 195 if (!IPFW_LOADED) { 196 load_ipfw(); 197 } 198#endif 199} 200 201/* 202 * IPPROTO_DIVERT is not a real IP protocol; don't allow any packets 203 * with that protocol number to enter the system from the outside. 204 */ 205void 206div_input(struct mbuf *m, __unused int off) 207{ 208 OSAddAtomic(1, &ipstat.ips_noproto); 209 m_freem(m); 210} 211 212/* 213 * Divert a packet by passing it up to the divert socket at port 'port'. 214 * 215 * Setup generic address and protocol structures for div_input routine, 216 * then pass them along with mbuf chain. 217 */ 218void 219divert_packet(struct mbuf *m, int incoming, int port, int rule) 220{ 221 struct ip *ip; 222 struct inpcb *inp; 223 struct socket *sa; 224 u_int16_t nport; 225 226 /* Sanity check */ 227 KASSERT(port != 0, ("%s: port=0", __FUNCTION__)); 228 229 divsrc.sin_port = rule; /* record matching rule */ 230 231 /* Assure header */ 232 if (m->m_len < sizeof(struct ip) && 233 (m = m_pullup(m, sizeof(struct ip))) == 0) { 234 return; 235 } 236 ip = mtod(m, struct ip *); 237 238 /* 239 * Record receive interface address, if any. 240 * But only for incoming packets. 241 */ 242 divsrc.sin_addr.s_addr = 0; 243 if (incoming) { 244 struct ifaddr *ifa; 245 246 /* Sanity check */ 247 KASSERT((m->m_flags & M_PKTHDR), ("%s: !PKTHDR", __FUNCTION__)); 248 249 /* Find IP address for receive interface */ 250 ifnet_lock_shared(m->m_pkthdr.rcvif); 251 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 252 IFA_LOCK(ifa); 253 if (ifa->ifa_addr->sa_family != AF_INET) { 254 IFA_UNLOCK(ifa); 255 continue; 256 } 257 divsrc.sin_addr = 258 ((struct sockaddr_in *)(void *) ifa->ifa_addr)->sin_addr; 259 IFA_UNLOCK(ifa); 260 break; 261 } 262 ifnet_lock_done(m->m_pkthdr.rcvif); 263 } 264 /* 265 * Record the incoming interface name whenever we have one. 266 */ 267 bzero(&divsrc.sin_zero, sizeof(divsrc.sin_zero)); 268 if (m->m_pkthdr.rcvif) { 269 /* 270 * Hide the actual interface name in there in the 271 * sin_zero array. XXX This needs to be moved to a 272 * different sockaddr type for divert, e.g. 273 * sockaddr_div with multiple fields like 274 * sockaddr_dl. Presently we have only 7 bytes 275 * but that will do for now as most interfaces 276 * are 4 or less + 2 or less bytes for unit. 277 * There is probably a faster way of doing this, 278 * possibly taking it from the sockaddr_dl on the iface. 279 * This solves the problem of a P2P link and a LAN interface 280 * having the same address, which can result in the wrong 281 * interface being assigned to the packet when fed back 282 * into the divert socket. Theoretically if the daemon saves 283 * and re-uses the sockaddr_in as suggested in the man pages, 284 * this iface name will come along for the ride. 285 * (see div_output for the other half of this.) 286 */ 287 snprintf(divsrc.sin_zero, sizeof(divsrc.sin_zero), 288 "%s", if_name(m->m_pkthdr.rcvif)); 289 } 290 291 /* Put packet on socket queue, if any */ 292 sa = NULL; 293 nport = htons((u_int16_t)port); 294 lck_rw_lock_shared(divcbinfo.ipi_lock); 295 LIST_FOREACH(inp, &divcb, inp_list) { 296 if (inp->inp_lport == nport) 297 sa = inp->inp_socket; 298 } 299 if (sa) { 300 int error = 0; 301 302 socket_lock(sa, 1); 303 if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, 304 m, (struct mbuf *)0, &error) != 0) 305 sorwakeup(sa); 306 socket_unlock(sa, 1); 307 } else { 308 m_freem(m); 309 OSAddAtomic(1, &ipstat.ips_noproto); 310 OSAddAtomic(-1, &ipstat.ips_delivered); 311 } 312 lck_rw_done(divcbinfo.ipi_lock); 313} 314 315/* 316 * Deliver packet back into the IP processing machinery. 317 * 318 * If no address specified, or address is 0.0.0.0, send to ip_output(); 319 * otherwise, send to ip_input() and mark as having been received on 320 * the interface with that address. 321 * ###LOCK called in inet_proto mutex when from div_send. 322 */ 323static int 324div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, 325 struct mbuf *control) 326{ 327 struct inpcb *const inp = sotoinpcb(so); 328 struct ip *const ip = mtod(m, struct ip *); 329 int error = 0; 330 mbuf_svc_class_t msc = MBUF_SC_UNSPEC; 331 332 if (control != NULL) { 333 msc = mbuf_service_class_from_control(control); 334 335 m_freem(control); /* XXX */ 336 control = NULL; 337 } 338 /* Loopback avoidance and state recovery */ 339 if (sin) { 340 struct m_tag *mtag; 341 struct divert_tag *dt; 342 int len = 0; 343 char *c = sin->sin_zero; 344 345 mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, 346 sizeof(struct divert_tag), M_NOWAIT, m); 347 if (mtag == NULL) { 348 error = ENOBUFS; 349 goto cantsend; 350 } 351 dt = (struct divert_tag *)(mtag+1); 352 dt->info = 0; 353 dt->cookie = sin->sin_port; 354 m_tag_prepend(m, mtag); 355 356 /* 357 * Find receive interface with the given name or IP address. 358 * The name is user supplied data so don't trust it's size or 359 * that it is zero terminated. The name has priority. 360 * We are presently assuming that the sockaddr_in 361 * has not been replaced by a sockaddr_div, so we limit it 362 * to 16 bytes in total. the name is stuffed (if it exists) 363 * in the sin_zero[] field. 364 */ 365 while (*c++ && (len++ < sizeof(sin->sin_zero))); 366 if ((len > 0) && (len < sizeof(sin->sin_zero))) 367 m->m_pkthdr.rcvif = ifunit(sin->sin_zero); 368 } 369 370 /* Reinject packet into the system as incoming or outgoing */ 371 if (!sin || sin->sin_addr.s_addr == 0) { 372 struct ip_out_args ipoa = 373 { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; 374 struct route ro; 375 struct ip_moptions *imo; 376 377 /* 378 * Don't allow both user specified and setsockopt options, 379 * and don't allow packet length sizes that will crash 380 */ 381 if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || 382 ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) { 383 error = EINVAL; 384 goto cantsend; 385 } 386 387 /* Convert fields to host order for ip_output() */ 388#if BYTE_ORDER != BIG_ENDIAN 389 NTOHS(ip->ip_len); 390 NTOHS(ip->ip_off); 391#endif 392 393 OSAddAtomic(1, &ipstat.ips_rawout); 394 /* Copy the cached route and take an extra reference */ 395 inp_route_copyout(inp, &ro); 396 397 set_packet_service_class(m, so, msc, 0); 398 399 imo = inp->inp_moptions; 400 if (imo != NULL) 401 IMO_ADDREF(imo); 402 socket_unlock(so, 0); 403#if CONFIG_MACF_NET 404 mac_mbuf_label_associate_inpcb(inp, m); 405#endif 406 /* Send packet to output processing */ 407 error = ip_output(m, inp->inp_options, &ro, 408 (so->so_options & SO_DONTROUTE) | 409 IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS, 410 imo, &ipoa); 411 412 socket_lock(so, 0); 413 if (imo != NULL) 414 IMO_REMREF(imo); 415 /* Synchronize cached PCB route */ 416 inp_route_copyin(inp, &ro); 417 } else { 418 struct ifaddr *ifa; 419 420 /* If no luck with the name above. check by IP address. */ 421 if (m->m_pkthdr.rcvif == NULL) { 422 struct sockaddr_in _sin; 423 /* 424 * Make sure there are no distractions for 425 * ifa_ifwithaddr; use sanitized version. 426 */ 427 bzero(&_sin, sizeof (_sin)); 428 _sin.sin_family = AF_INET; 429 _sin.sin_len = sizeof (struct sockaddr_in); 430 _sin.sin_addr.s_addr = sin->sin_addr.s_addr; 431 if (!(ifa = ifa_ifwithaddr(SA(&_sin)))) { 432 error = EADDRNOTAVAIL; 433 goto cantsend; 434 } 435 m->m_pkthdr.rcvif = ifa->ifa_ifp; 436 IFA_REMREF(ifa); 437 } 438#if CONFIG_MACF_NET 439 mac_mbuf_label_associate_socket(so, m); 440#endif 441 /* Send packet to input processing */ 442 proto_inject(PF_INET, m); 443 } 444 445 return error; 446 447cantsend: 448 m_freem(m); 449 return error; 450} 451 452static int 453div_attach(struct socket *so, int proto, struct proc *p) 454{ 455 struct inpcb *inp; 456 int error; 457 458 459 inp = sotoinpcb(so); 460 if (inp) 461 panic("div_attach"); 462 if ((error = proc_suser(p)) != 0) 463 return error; 464 465 error = soreserve(so, div_sendspace, div_recvspace); 466 if (error) 467 return error; 468 error = in_pcballoc(so, &divcbinfo, p); 469 if (error) 470 return error; 471 inp = (struct inpcb *)so->so_pcb; 472 inp->inp_ip_p = proto; 473 inp->inp_vflag |= INP_IPV4; 474 inp->inp_flags |= INP_HDRINCL; 475 /* The socket is always "connected" because 476 we always know "where" to send the packet */ 477 so->so_state |= SS_ISCONNECTED; 478 479#ifdef MORE_DICVLOCK_DEBUG 480 printf("div_attach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n", 481 (uint64_t)VM_KERNEL_ADDRPERM(so), 482 (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), 483 (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)), 484 so->so_usecount); 485#endif 486 return 0; 487} 488 489static int 490div_detach(struct socket *so) 491{ 492 struct inpcb *inp; 493 494#ifdef MORE_DICVLOCK_DEBUG 495 printf("div_detach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n", 496 (uint64_t)VM_KERNEL_ADDRPERM(so), 497 (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), 498 (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)), 499 so->so_usecount); 500#endif 501 inp = sotoinpcb(so); 502 if (inp == 0) 503 panic("div_detach: so=%p null inp\n", so); 504 in_pcbdetach(inp); 505 inp->inp_state = INPCB_STATE_DEAD; 506 return 0; 507} 508 509static int 510div_abort(struct socket *so) 511{ 512 soisdisconnected(so); 513 return div_detach(so); 514} 515 516static int 517div_disconnect(struct socket *so) 518{ 519 if ((so->so_state & SS_ISCONNECTED) == 0) 520 return ENOTCONN; 521 return div_abort(so); 522} 523 524static int 525div_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 526{ 527 struct inpcb *inp; 528 int error; 529 530 inp = sotoinpcb(so); 531 /* in_pcbbind assumes that the socket is a sockaddr_in 532 * and in_pcbbind requires a valid address. Since divert 533 * sockets don't we need to make sure the address is 534 * filled in properly. 535 * XXX -- divert should not be abusing in_pcbind 536 * and should probably have its own family. 537 */ 538 if (nam->sa_family != AF_INET) { 539 error = EAFNOSUPPORT; 540 } else { 541 ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr = INADDR_ANY; 542 error = in_pcbbind(inp, nam, p); 543 } 544 return error; 545} 546 547static int 548div_shutdown(struct socket *so) 549{ 550 socantsendmore(so); 551 return 0; 552} 553 554static int 555div_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam, 556 struct mbuf *control, __unused struct proc *p) 557{ 558 /* Packet must have a header (but that's about it) */ 559 if (m->m_len < sizeof (struct ip) && 560 (m = m_pullup(m, sizeof (struct ip))) == 0) { 561 OSAddAtomic(1, &ipstat.ips_toosmall); 562 m_freem(m); 563 return EINVAL; 564 } 565 566 /* Send packet */ 567 return div_output(so, m, SIN(nam), control); 568} 569 570#if 0 571static int 572div_pcblist SYSCTL_HANDLER_ARGS 573{ 574#pragma unused(oidp, arg1, arg2) 575 int error, i, n; 576 struct inpcb *inp, **inp_list; 577 inp_gen_t gencnt; 578 struct xinpgen xig; 579 580 /* 581 * The process of preparing the TCB list is too time-consuming and 582 * resource-intensive to repeat twice on every request. 583 */ 584 lck_rw_lock_exclusive(divcbinfo.ipi_lock); 585 if (req->oldptr == USER_ADDR_NULL) { 586 n = divcbinfo.ipi_count; 587 req->oldidx = 2 * (sizeof xig) 588 + (n + n/8) * sizeof(struct xinpcb); 589 lck_rw_done(divcbinfo.ipi_lock); 590 return 0; 591 } 592 593 if (req->newptr != USER_ADDR_NULL) { 594 lck_rw_done(divcbinfo.ipi_lock); 595 return EPERM; 596 } 597 598 /* 599 * OK, now we're committed to doing something. 600 */ 601 gencnt = divcbinfo.ipi_gencnt; 602 n = divcbinfo.ipi_count; 603 604 bzero(&xig, sizeof(xig)); 605 xig.xig_len = sizeof xig; 606 xig.xig_count = n; 607 xig.xig_gen = gencnt; 608 xig.xig_sogen = so_gencnt; 609 error = SYSCTL_OUT(req, &xig, sizeof xig); 610 if (error) { 611 lck_rw_done(divcbinfo.ipi_lock); 612 return error; 613 } 614 615 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); 616 if (inp_list == 0) { 617 lck_rw_done(divcbinfo.ipi_lock); 618 return ENOMEM; 619 } 620 621 for (inp = LIST_FIRST(divcbinfo.ipi_listhead), i = 0; inp && i < n; 622 inp = LIST_NEXT(inp, inp_list)) { 623#ifdef __APPLE__ 624 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) 625#else 626 if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) 627#endif 628 inp_list[i++] = inp; 629 } 630 n = i; 631 632 error = 0; 633 for (i = 0; i < n; i++) { 634 inp = inp_list[i]; 635 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { 636 struct xinpcb xi; 637 638 bzero(&xi, sizeof(xi)); 639 xi.xi_len = sizeof xi; 640 /* XXX should avoid extra copy */ 641 inpcb_to_compat(inp, &xi.xi_inp); 642 if (inp->inp_socket) 643 sotoxsocket(inp->inp_socket, &xi.xi_socket); 644 error = SYSCTL_OUT(req, &xi, sizeof xi); 645 } 646 } 647 if (!error) { 648 /* 649 * Give the user an updated idea of our state. 650 * If the generation differs from what we told 651 * her before, she knows that something happened 652 * while we were processing this request, and it 653 * might be necessary to retry. 654 */ 655 bzero(&xig, sizeof(xig)); 656 xig.xig_len = sizeof xig; 657 xig.xig_gen = divcbinfo.ipi_gencnt; 658 xig.xig_sogen = so_gencnt; 659 xig.xig_count = divcbinfo.ipi_count; 660 error = SYSCTL_OUT(req, &xig, sizeof xig); 661 } 662 FREE(inp_list, M_TEMP); 663 lck_rw_done(divcbinfo.ipi_lock); 664 return error; 665} 666#endif 667 668__private_extern__ int 669div_lock(struct socket *so, int refcount, void *lr) 670{ 671 void *lr_saved; 672 673 if (lr == NULL) 674 lr_saved = __builtin_return_address(0); 675 else 676 lr_saved = lr; 677 678#ifdef MORE_DICVLOCK_DEBUG 679 printf("div_lock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x " 680 "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), 681 (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ? 682 (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL, 683 so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved)); 684#endif 685 if (so->so_pcb) { 686 lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx); 687 } else { 688 panic("div_lock: so=%p NO PCB! lr=%p lrh= lrh= %s\n", 689 so, lr_saved, solockhistory_nr(so)); 690 /* NOTREACHED */ 691 } 692 693 if (so->so_usecount < 0) { 694 panic("div_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", 695 so, so->so_pcb, lr_saved, so->so_usecount, 696 solockhistory_nr(so)); 697 /* NOTREACHED */ 698 } 699 700 if (refcount) 701 so->so_usecount++; 702 so->lock_lr[so->next_lock_lr] = lr_saved; 703 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; 704 705 return (0); 706} 707 708__private_extern__ int 709div_unlock(struct socket *so, int refcount, void *lr) 710{ 711 void *lr_saved; 712 lck_mtx_t * mutex_held; 713 struct inpcb *inp = sotoinpcb(so); 714 715 if (lr == NULL) 716 lr_saved = __builtin_return_address(0); 717 else 718 lr_saved = lr; 719 720#ifdef MORE_DICVLOCK_DEBUG 721 printf("div_unlock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x " 722 "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), 723 (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ? 724 (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL, 725 so->so_usecount, lr_saved); 726#endif 727 if (refcount) 728 so->so_usecount--; 729 730 if (so->so_usecount < 0) { 731 panic("div_unlock: so=%p usecount=%x lrh= %s\n", 732 so, so->so_usecount, solockhistory_nr(so)); 733 /* NOTREACHED */ 734 } 735 if (so->so_pcb == NULL) { 736 panic("div_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", 737 so, so->so_usecount, lr_saved, solockhistory_nr(so)); 738 /* NOTREACHED */ 739 } 740 mutex_held = &((struct inpcb *)so->so_pcb)->inpcb_mtx; 741 742 if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { 743 lck_rw_lock_exclusive(divcbinfo.ipi_lock); 744 if (inp->inp_state != INPCB_STATE_DEAD) 745 in_pcbdetach(inp); 746 in_pcbdispose(inp); 747 lck_rw_done(divcbinfo.ipi_lock); 748 return (0); 749 } 750 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 751 so->unlock_lr[so->next_unlock_lr] = lr_saved; 752 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; 753 lck_mtx_unlock(mutex_held); 754 return (0); 755} 756 757__private_extern__ lck_mtx_t * 758div_getlock(struct socket *so, __unused int locktype) 759{ 760 struct inpcb *inpcb = (struct inpcb *)so->so_pcb; 761 762 if (so->so_pcb) { 763 if (so->so_usecount < 0) 764 panic("div_getlock: so=%p usecount=%x lrh= %s\n", 765 so, so->so_usecount, solockhistory_nr(so)); 766 return(&inpcb->inpcb_mtx); 767 } else { 768 panic("div_getlock: so=%p NULL NO PCB lrh= %s\n", 769 so, solockhistory_nr(so)); 770 return (so->so_proto->pr_domain->dom_mtx); 771 } 772} 773 774struct pr_usrreqs div_usrreqs = { 775 .pru_abort = div_abort, 776 .pru_attach = div_attach, 777 .pru_bind = div_bind, 778 .pru_control = in_control, 779 .pru_detach = div_detach, 780 .pru_disconnect = div_disconnect, 781 .pru_peeraddr = in_getpeeraddr, 782 .pru_send = div_send, 783 .pru_shutdown = div_shutdown, 784 .pru_sockaddr = in_getsockaddr, 785 .pru_sosend = sosend, 786 .pru_soreceive = soreceive, 787}; 788 789