1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1991, 1993, 1995 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ 62 */ 63 64#include <sys/param.h> 65#include <sys/systm.h> 66#include <sys/malloc.h> 67#include <sys/mbuf.h> 68#include <sys/domain.h> 69#include <sys/protosw.h> 70#include <sys/socket.h> 71#include <sys/socketvar.h> 72#include <sys/proc.h> 73#ifndef __APPLE__ 74#include <sys/jail.h> 75#endif 76#include <sys/kernel.h> 77#include <sys/sysctl.h> 78#include <sys/mcache.h> 79#include <sys/kauth.h> 80#include <sys/priv.h> 81#include <libkern/OSAtomic.h> 82#include <kern/locks.h> 83 84#include <machine/limits.h> 85 86#ifdef __APPLE__ 87#include <kern/zalloc.h> 88#endif 89 90#include <net/if.h> 91#include <net/if_types.h> 92#include <net/route.h> 93#include <net/flowhash.h> 94#include <net/flowadv.h> 95 96#include <netinet/in.h> 97#include <netinet/in_pcb.h> 98#include <netinet/in_var.h> 99#include <netinet/ip_var.h> 100#if INET6 101#include <netinet/ip6.h> 102#include <netinet6/ip6_var.h> 103#endif /* INET6 */ 104 105#if IPSEC 106#include <netinet6/ipsec.h> 107#include <netkey/key.h> 108#endif /* IPSEC */ 109 110#include <sys/kdebug.h> 111#include <sys/random.h> 112#include <dev/random/randomdev.h> 113 114#if IPSEC 115extern int ipsec_bypass; 116#endif 117 118#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) 119#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) 120 121struct in_addr zeroin_addr; 122 123/* 124 * These configure the range of local port addresses assigned to 125 * "unspecified" outgoing connections/packets/whatever. 126 */ 127int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 128int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 129#ifndef __APPLE__ 130int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 131int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 132#else 133int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 134int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 135#endif 136int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 137int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 138 139#define RANGECHK(var, min, max) \ 140 if ((var) < (min)) { (var) = (min); } \ 141 else if ((var) > (max)) { (var) = (max); } 142 143static int 144sysctl_net_ipport_check SYSCTL_HANDLER_ARGS 145{ 146#pragma unused(arg1, arg2) 147 int error = sysctl_handle_int(oidp, 148 oidp->oid_arg1, oidp->oid_arg2, req); 149 if (!error) { 150 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 151 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 152 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 153 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 154 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 155 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 156 } 157 return error; 158} 159 160#undef RANGECHK 161 162SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); 163 164SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 165 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 166SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 167 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 168SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 169 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 170SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 171 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 172SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 173 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 174SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 175 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 176 177extern int udp_use_randomport; 178extern int tcp_use_randomport; 179 180/* Structs used for flowhash computation */ 181struct inp_flowhash_key_addr { 182 union { 183 struct in_addr v4; 184 struct in6_addr v6; 185 u_int8_t addr8[16]; 186 u_int16_t addr16[8]; 187 u_int32_t addr32[4]; 188 } infha; 189}; 190 191struct inp_flowhash_key { 192 struct inp_flowhash_key_addr infh_laddr; 193 struct inp_flowhash_key_addr infh_faddr; 194 u_int32_t infh_lport; 195 u_int32_t infh_fport; 196 u_int32_t infh_af; 197 u_int32_t infh_proto; 198 u_int32_t infh_rand1; 199 u_int32_t infh_rand2; 200}; 201 202u_int32_t inp_hash_seed = 0; 203 204static __inline int infc_cmp(const struct inp_fc_entry *, 205 const struct inp_fc_entry *); 206lck_grp_t *inp_lck_grp; 207lck_grp_attr_t *inp_lck_grp_attr; 208lck_attr_t *inp_lck_attr; 209decl_lck_mtx_data(, inp_fc_lck); 210 211RB_HEAD(inp_fc_tree, inp_fc_entry) inp_fc_tree; 212RB_PROTOTYPE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp); 213 214RB_GENERATE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp); 215 216static unsigned int inp_fcezone_size; 217static struct zone *inp_fcezone; 218#define INP_FCEZONE_NAME "inp_fcezone" 219#define INP_FCEZONE_MAX 32 220 221/* 222 * in_pcb.c: manage the Protocol Control Blocks. 223 */ 224 225/* 226 * Initialize data structures required to deliver 227 * flow advisories. 228 */ 229void 230socket_flowadv_init(void) 231{ 232 inp_lck_grp_attr = lck_grp_attr_alloc_init(); 233 inp_lck_grp = lck_grp_alloc_init("inp_lck_grp", inp_lck_grp_attr); 234 235 inp_lck_attr = lck_attr_alloc_init(); 236 lck_mtx_init(&inp_fc_lck, inp_lck_grp, inp_lck_attr); 237 238 RB_INIT(&inp_fc_tree); 239 240 inp_fcezone_size = P2ROUNDUP(sizeof (struct inp_fc_entry), 241 sizeof (u_int64_t)); 242 inp_fcezone = zinit(inp_fcezone_size, 243 INP_FCEZONE_MAX * inp_fcezone_size, 0, INP_FCEZONE_NAME); 244 if (inp_fcezone == NULL) { 245 panic("%s: failed allocating %s", __func__, 246 INP_FCEZONE_NAME); 247 /* NOTREACHED */ 248 } 249 zone_change(inp_fcezone, Z_EXPAND, TRUE); 250 zone_change(inp_fcezone, Z_CALLERACCT, FALSE); 251} 252 253/* 254 * Allocate a PCB and associate it with the socket. 255 * 256 * Returns: 0 Success 257 * ENOBUFS 258 * ENOMEM 259 * ipsec_init_policy:??? [IPSEC] 260 */ 261int 262in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p) 263{ 264 struct inpcb *inp; 265 caddr_t temp; 266#if IPSEC 267#ifndef __APPLE__ 268 int error; 269#endif 270#endif 271#if CONFIG_MACF_NET 272 int mac_error; 273#endif 274 275 if (so->cached_in_sock_layer == 0) { 276#if TEMPDEBUG 277 printf("PCBALLOC calling zalloc for socket %x\n", so); 278#endif 279 inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone); 280 if (inp == NULL) 281 return (ENOBUFS); 282 bzero((caddr_t)inp, sizeof(*inp)); 283 } 284 else { 285#if TEMPDEBUG 286 printf("PCBALLOC reusing PCB for socket %x\n", so); 287#endif 288 inp = (struct inpcb *)(void *)so->so_saved_pcb; 289 temp = inp->inp_saved_ppcb; 290 bzero((caddr_t) inp, sizeof(*inp)); 291 inp->inp_saved_ppcb = temp; 292 } 293 294 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 295 inp->inp_pcbinfo = pcbinfo; 296 inp->inp_socket = so; 297#if CONFIG_MACF_NET 298 mac_error = mac_inpcb_label_init(inp, M_WAITOK); 299 if (mac_error != 0) { 300 if (so->cached_in_sock_layer == 0) 301 zfree(pcbinfo->ipi_zone, inp); 302 return (mac_error); 303 } 304 mac_inpcb_label_associate(so, inp); 305#endif 306 // make sure inp_stat is always 64bit aligned 307 inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t)); 308 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) 309 + sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) { 310 panic("insufficient space to align inp_stat"); 311 } 312 313 so->so_pcb = (caddr_t)inp; 314 315 if (so->so_proto->pr_flags & PR_PCBLOCK) { 316 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr); 317 } 318 319#if IPSEC 320#ifndef __APPLE__ 321 if (ipsec_bypass == 0) { 322 error = ipsec_init_policy(so, &inp->inp_sp); 323 if (error != 0) { 324 zfree(pcbinfo->ipi_zone, inp); 325 return error; 326 } 327 } 328#endif 329#endif /*IPSEC*/ 330#if INET6 331 if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on) 332 inp->inp_flags |= IN6P_IPV6_V6ONLY; 333#endif 334 335#if INET6 336 if (ip6_auto_flowlabel) 337 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 338#endif 339 lck_rw_lock_exclusive(pcbinfo->mtx); 340 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 341 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 342 pcbinfo->ipi_count++; 343 lck_rw_done(pcbinfo->mtx); 344 return (0); 345} 346 347 348/* 349 in_pcblookup_local_and_cleanup does everything 350 in_pcblookup_local does but it checks for a socket 351 that's going away. Since we know that the lock is 352 held read+write when this funciton is called, we 353 can safely dispose of this socket like the slow 354 timer would usually do and return NULL. This is 355 great for bind. 356*/ 357struct inpcb* 358in_pcblookup_local_and_cleanup( 359 struct inpcbinfo *pcbinfo, 360 struct in_addr laddr, 361 u_int lport_arg, 362 int wild_okay) 363{ 364 struct inpcb *inp; 365 366 /* Perform normal lookup */ 367 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); 368 369 /* Check if we found a match but it's waiting to be disposed */ 370 if (inp && inp->inp_wantcnt == WNT_STOPUSING) { 371 struct socket *so = inp->inp_socket; 372 373 lck_mtx_lock(&inp->inpcb_mtx); 374 375 if (so->so_usecount == 0) { 376 if (inp->inp_state != INPCB_STATE_DEAD) 377 in_pcbdetach(inp); 378 in_pcbdispose(inp); 379 inp = NULL; 380 } 381 else { 382 lck_mtx_unlock(&inp->inpcb_mtx); 383 } 384 } 385 386 return inp; 387} 388 389#ifdef __APPLE_API_PRIVATE 390static void 391in_pcb_conflict_post_msg(u_int16_t port) 392{ 393 /* 394 * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind 395 * the port a socket who has set SOF_NOTIFYCONFLICT owns. 396 */ 397 struct kev_msg ev_msg; 398 struct kev_in_portinuse in_portinuse; 399 400 bzero(&in_portinuse, sizeof(struct kev_in_portinuse)); 401 bzero(&ev_msg, sizeof(struct kev_msg)); 402 in_portinuse.port = ntohs(port); /* port in host order */ 403 in_portinuse.req_pid = proc_selfpid(); 404 ev_msg.vendor_code = KEV_VENDOR_APPLE; 405 ev_msg.kev_class = KEV_NETWORK_CLASS; 406 ev_msg.kev_subclass = KEV_INET_SUBCLASS; 407 ev_msg.event_code = KEV_INET_PORTINUSE; 408 ev_msg.dv[0].data_ptr = &in_portinuse; 409 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse); 410 ev_msg.dv[1].data_length = 0; 411 kev_post_msg(&ev_msg); 412} 413#endif 414/* 415 * Returns: 0 Success 416 * EADDRNOTAVAIL Address not available. 417 * EINVAL Invalid argument 418 * EAFNOSUPPORT Address family not supported [notdef] 419 * EACCES Permission denied 420 * EADDRINUSE Address in use 421 * EAGAIN Resource unavailable, try again 422 * priv_check_cred:EPERM Operation not permitted 423 */ 424int 425in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) 426{ 427 struct socket *so = inp->inp_socket; 428 unsigned short *lastport; 429 struct sockaddr_in *sin; 430 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 431 u_short lport = 0, rand_port = 0; 432 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 433 int error, randomport, conflict = 0; 434 kauth_cred_t cred; 435 436 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 437 return (EADDRNOTAVAIL); 438 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 439 return (EINVAL); 440 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 441 wild = 1; 442 socket_unlock(so, 0); /* keep reference on socket */ 443 lck_rw_lock_exclusive(pcbinfo->mtx); 444 if (nam) { 445 struct ifnet *outif = NULL; 446 447 sin = (struct sockaddr_in *)(void *)nam; 448 if (nam->sa_len != sizeof (*sin)) { 449 lck_rw_done(pcbinfo->mtx); 450 socket_lock(so, 0); 451 return (EINVAL); 452 } 453#ifdef notdef 454 /* 455 * We should check the family, but old programs 456 * incorrectly fail to initialize it. 457 */ 458 if (sin->sin_family != AF_INET) { 459 lck_rw_done(pcbinfo->mtx); 460 socket_lock(so, 0); 461 return (EAFNOSUPPORT); 462 } 463#endif 464 lport = sin->sin_port; 465 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 466 /* 467 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 468 * allow complete duplication of binding if 469 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 470 * and a multicast address is bound on both 471 * new and duplicated sockets. 472 */ 473 if (so->so_options & SO_REUSEADDR) 474 reuseport = SO_REUSEADDR|SO_REUSEPORT; 475 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 476 struct ifaddr *ifa; 477 sin->sin_port = 0; /* yech... */ 478 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) { 479 lck_rw_done(pcbinfo->mtx); 480 socket_lock(so, 0); 481 return (EADDRNOTAVAIL); 482 } 483 else { 484 IFA_LOCK(ifa); 485 outif = ifa->ifa_ifp; 486 IFA_UNLOCK(ifa); 487 IFA_REMREF(ifa); 488 } 489 } 490 if (lport) { 491 struct inpcb *t; 492 493 /* GROSS */ 494#if !CONFIG_EMBEDDED 495 if (ntohs(lport) < IPPORT_RESERVED) { 496 cred = kauth_cred_proc_ref(p); 497 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 498 kauth_cred_unref(&cred); 499 if (error != 0) { 500 lck_rw_done(pcbinfo->mtx); 501 socket_lock(so, 0); 502 return (EACCES); 503 } 504 } 505#endif 506 if (kauth_cred_getuid(so->so_cred) && 507 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 508 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo, 509 sin->sin_addr, lport, INPLOOKUP_WILDCARD); 510 if (t && 511 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 512 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 513 (t->inp_socket->so_options & 514 SO_REUSEPORT) == 0) && 515 (kauth_cred_getuid(so->so_cred) != 516 kauth_cred_getuid(t->inp_socket->so_cred)) && 517 ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0) && 518 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 519 ntohl(t->inp_laddr.s_addr) != INADDR_ANY)) 520 { 521#ifdef __APPLE_API_PRIVATE 522 523 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) 524 conflict = 1; 525 526 lck_rw_done(pcbinfo->mtx); 527 528 if (conflict) 529 in_pcb_conflict_post_msg(lport); 530#else 531 lck_rw_done(pcbinfo->mtx); 532#endif /* __APPLE_API_PRIVATE */ 533 534 socket_lock(so, 0); 535 return (EADDRINUSE); 536 } 537 } 538 t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr, 539 lport, wild); 540 if (t && 541 (reuseport & t->inp_socket->so_options) == 0) { 542#if INET6 543 if (ntohl(sin->sin_addr.s_addr) != 544 INADDR_ANY || 545 ntohl(t->inp_laddr.s_addr) != 546 INADDR_ANY || 547 INP_SOCKAF(so) != AF_INET6 || 548 INP_SOCKAF(t->inp_socket) != AF_INET6) 549#endif /* INET6 */ 550 { 551#ifdef __APPLE_API_PRIVATE 552 553 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) 554 conflict = 1; 555 556 lck_rw_done(pcbinfo->mtx); 557 558 if (conflict) 559 in_pcb_conflict_post_msg(lport); 560#else 561 lck_rw_done(pcbinfo->mtx); 562#endif /* __APPLE_API_PRIVATE */ 563 socket_lock(so, 0); 564 return (EADDRINUSE); 565 } 566 } 567 } 568 inp->inp_laddr = sin->sin_addr; 569 inp->inp_last_outifp = outif; 570 } 571 if (lport == 0) { 572 u_short first, last; 573 int count; 574 575 randomport = (so->so_flags & SOF_BINDRANDOMPORT) || 576 (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport); 577 578 inp->inp_flags |= INP_ANONPORT; 579 580 if (inp->inp_flags & INP_HIGHPORT) { 581 first = ipport_hifirstauto; /* sysctl */ 582 last = ipport_hilastauto; 583 lastport = &pcbinfo->lasthi; 584 } else if (inp->inp_flags & INP_LOWPORT) { 585 cred = kauth_cred_proc_ref(p); 586 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 587 kauth_cred_unref(&cred); 588 if (error != 0) { 589 lck_rw_done(pcbinfo->mtx); 590 socket_lock(so, 0); 591 return error; 592 } 593 first = ipport_lowfirstauto; /* 1023 */ 594 last = ipport_lowlastauto; /* 600 */ 595 lastport = &pcbinfo->lastlow; 596 } else { 597 first = ipport_firstauto; /* sysctl */ 598 last = ipport_lastauto; 599 lastport = &pcbinfo->lastport; 600 } 601 /* No point in randomizing if only one port is available */ 602 603 if (first == last) 604 randomport = 0; 605 /* 606 * Simple check to ensure all ports are not used up causing 607 * a deadlock here. 608 * 609 * We split the two cases (up and down) so that the direction 610 * is not being tested on each round of the loop. 611 */ 612 if (first > last) { 613 /* 614 * counting down 615 */ 616 if (randomport) { 617 read_random(&rand_port, sizeof(rand_port)); 618 *lastport = first - (rand_port % (first - last)); 619 } 620 count = first - last; 621 622 do { 623 if (count-- < 0) { /* completely used? */ 624 lck_rw_done(pcbinfo->mtx); 625 socket_lock(so, 0); 626 inp->inp_laddr.s_addr = INADDR_ANY; 627 inp->inp_last_outifp = NULL; 628 return (EADDRNOTAVAIL); 629 } 630 --*lastport; 631 if (*lastport > first || *lastport < last) 632 *lastport = first; 633 lport = htons(*lastport); 634 } while (in_pcblookup_local_and_cleanup(pcbinfo, 635 inp->inp_laddr, lport, wild)); 636 } else { 637 /* 638 * counting up 639 */ 640 if (randomport) { 641 read_random(&rand_port, sizeof(rand_port)); 642 *lastport = first + (rand_port % (first - last)); 643 } 644 count = last - first; 645 646 do { 647 if (count-- < 0) { /* completely used? */ 648 lck_rw_done(pcbinfo->mtx); 649 socket_lock(so, 0); 650 inp->inp_laddr.s_addr = INADDR_ANY; 651 inp->inp_last_outifp = NULL; 652 return (EADDRNOTAVAIL); 653 } 654 ++*lastport; 655 if (*lastport < first || *lastport > last) 656 *lastport = first; 657 lport = htons(*lastport); 658 } while (in_pcblookup_local_and_cleanup(pcbinfo, 659 inp->inp_laddr, lport, wild)); 660 } 661 } 662 socket_lock(so, 0); 663 inp->inp_lport = lport; 664 if (in_pcbinshash(inp, 1) != 0) { 665 inp->inp_laddr.s_addr = INADDR_ANY; 666 inp->inp_lport = 0; 667 inp->inp_last_outifp = NULL; 668 lck_rw_done(pcbinfo->mtx); 669 return (EAGAIN); 670 } 671 lck_rw_done(pcbinfo->mtx); 672 sflt_notify(so, sock_evt_bound, NULL); 673 return (0); 674} 675 676/* 677 * Transform old in_pcbconnect() into an inner subroutine for new 678 * in_pcbconnect(): Do some validity-checking on the remote 679 * address (in mbuf 'nam') and then determine local host address 680 * (i.e., which interface) to use to access that remote host. 681 * 682 * This preserves definition of in_pcbconnect(), while supporting a 683 * slightly different version for T/TCP. (This is more than 684 * a bit of a kludge, but cleaning up the internal interfaces would 685 * have forced minor changes in every protocol). 686 * 687 * Returns: 0 Success 688 * EINVAL Invalid argument 689 * EAFNOSUPPORT Address family not supported 690 * EADDRNOTAVAIL Address not available 691 */ 692int 693in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 694 struct sockaddr_in *plocal_sin, struct ifnet **outif) 695{ 696 struct in_ifaddr *ia; 697 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 698 699 if (nam->sa_len != sizeof (*sin)) 700 return (EINVAL); 701 if (sin->sin_family != AF_INET) 702 return (EAFNOSUPPORT); 703 if (sin->sin_port == 0) 704 return (EADDRNOTAVAIL); 705 706 lck_rw_lock_shared(in_ifaddr_rwlock); 707 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 708 ia = TAILQ_FIRST(&in_ifaddrhead); 709 /* 710 * If the destination address is INADDR_ANY, 711 * use the primary local address. 712 * If the supplied address is INADDR_BROADCAST, 713 * and the primary interface supports broadcast, 714 * choose the broadcast address for that interface. 715 */ 716 IFA_LOCK_SPIN(&ia->ia_ifa); 717 if (sin->sin_addr.s_addr == INADDR_ANY) 718 sin->sin_addr = IA_SIN(ia)->sin_addr; 719 else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST && 720 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 721 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 722 IFA_UNLOCK(&ia->ia_ifa); 723 ia = NULL; 724 } 725 lck_rw_done(in_ifaddr_rwlock); 726 727 if (inp->inp_laddr.s_addr == INADDR_ANY) { 728 struct route *ro; 729 unsigned int ifscope = IFSCOPE_NONE; 730 unsigned int nocell; 731 /* 732 * If the socket is bound to a specifc interface, the 733 * optional scoped takes precedence over that if it 734 * is set by the caller. 735 */ 736 ia = (struct in_ifaddr *)0; 737 738 if (outif != NULL && *outif != NULL) 739 ifscope = (*outif)->if_index; 740 else if (inp->inp_flags & INP_BOUND_IF) 741 ifscope = inp->inp_boundifp->if_index; 742 743 nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0; 744 /* 745 * If route is known or can be allocated now, 746 * our src addr is taken from the i/f, else punt. 747 * Note that we should check the address family of the cached 748 * destination, in case of sharing the cache with IPv6. 749 */ 750 ro = &inp->inp_route; 751 if (ro->ro_rt != NULL) 752 RT_LOCK_SPIN(ro->ro_rt); 753 if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET || 754 satosin(&ro->ro_dst)->sin_addr.s_addr != 755 sin->sin_addr.s_addr || 756 inp->inp_socket->so_options & SO_DONTROUTE || 757 ro->ro_rt->generation_id != route_generation)) { 758 RT_UNLOCK(ro->ro_rt); 759 rtfree(ro->ro_rt); 760 ro->ro_rt = NULL; 761 } 762 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 763 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { 764 if (ro->ro_rt != NULL) 765 RT_UNLOCK(ro->ro_rt); 766 /* No route yet, so try to acquire one */ 767 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 768 ro->ro_dst.sa_family = AF_INET; 769 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 770 ((struct sockaddr_in *)(void *)&ro->ro_dst)->sin_addr = 771 sin->sin_addr; 772 rtalloc_scoped(ro, ifscope); 773 if (ro->ro_rt != NULL) 774 RT_LOCK_SPIN(ro->ro_rt); 775 } 776 /* 777 * If the route points to a cellular interface and the 778 * caller forbids our using interfaces of such type, 779 * pretend that there is no route. 780 */ 781 if (nocell && ro->ro_rt != NULL) { 782 RT_LOCK_ASSERT_HELD(ro->ro_rt); 783 if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) { 784 RT_UNLOCK(ro->ro_rt); 785 rtfree(ro->ro_rt); 786 ro->ro_rt = NULL; 787 soevent(inp->inp_socket, 788 (SO_FILT_HINT_LOCKED | 789 SO_FILT_HINT_IFDENIED)); 790 } 791 } 792 /* 793 * If we found a route, use the address 794 * corresponding to the outgoing interface 795 * unless it is the loopback (in case a route 796 * to our address on another net goes to loopback). 797 */ 798 if (ro->ro_rt != NULL) { 799 /* Become a regular mutex */ 800 RT_CONVERT_LOCK(ro->ro_rt); 801 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 802 ia = ifatoia(ro->ro_rt->rt_ifa); 803 if (ia) { 804 IFA_ADDREF(&ia->ia_ifa); 805 } 806 } 807 RT_UNLOCK(ro->ro_rt); 808 } 809 if (ia == 0) { 810 u_short fport = sin->sin_port; 811 812 sin->sin_port = 0; 813 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 814 if (ia == 0) { 815 ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin), 816 ifscope)); 817 } 818 sin->sin_port = fport; 819 if (ia == 0) { 820 lck_rw_lock_shared(in_ifaddr_rwlock); 821 ia = TAILQ_FIRST(&in_ifaddrhead); 822 if (ia) 823 IFA_ADDREF(&ia->ia_ifa); 824 lck_rw_done(in_ifaddr_rwlock); 825 } 826 /* 827 * If the source address belongs to a cellular interface 828 * and the socket forbids our using interfaces of such 829 * type, pretend that there is no source address. 830 */ 831 if (nocell && ia != NULL && 832 ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) { 833 IFA_REMREF(&ia->ia_ifa); 834 ia = NULL; 835 soevent(inp->inp_socket, 836 (SO_FILT_HINT_LOCKED | 837 SO_FILT_HINT_IFDENIED)); 838 } 839 if (ia == 0) 840 return (EADDRNOTAVAIL); 841 } 842 /* 843 * If the destination address is multicast and an outgoing 844 * interface has been set as a multicast option, use the 845 * address of that interface as our source address. 846 */ 847 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 848 inp->inp_moptions != NULL) { 849 struct ip_moptions *imo; 850 struct ifnet *ifp; 851 852 imo = inp->inp_moptions; 853 IMO_LOCK(imo); 854 if (imo->imo_multicast_ifp != NULL && (ia == NULL || 855 ia->ia_ifp != imo->imo_multicast_ifp)) { 856 ifp = imo->imo_multicast_ifp; 857 if (ia) 858 IFA_REMREF(&ia->ia_ifa); 859 lck_rw_lock_shared(in_ifaddr_rwlock); 860 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 861 if (ia->ia_ifp == ifp) 862 break; 863 } 864 if (ia) 865 IFA_ADDREF(&ia->ia_ifa); 866 lck_rw_done(in_ifaddr_rwlock); 867 if (ia == 0) { 868 IMO_UNLOCK(imo); 869 return (EADDRNOTAVAIL); 870 } 871 } 872 IMO_UNLOCK(imo); 873 } 874 /* 875 * Don't do pcblookup call here; return interface in plocal_sin 876 * and exit to caller, that will do the lookup. 877 */ 878 IFA_LOCK_SPIN(&ia->ia_ifa); 879 *plocal_sin = ia->ia_addr; 880 if (outif != NULL) 881 *outif = ia->ia_ifp; 882 IFA_UNLOCK(&ia->ia_ifa); 883 IFA_REMREF(&ia->ia_ifa); 884 } 885 return(0); 886} 887 888/* 889 * Outer subroutine: 890 * Connect from a socket to a specified address. 891 * Both address and port must be specified in argument sin. 892 * If don't have a local address for this socket yet, 893 * then pick one. 894 */ 895int 896in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, 897 struct ifnet **outif) 898{ 899 struct sockaddr_in ifaddr; 900 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 901 struct inpcb *pcb; 902 int error; 903 904 /* 905 * Call inner routine, to assign local interface address. 906 */ 907 if ((error = in_pcbladdr(inp, nam, &ifaddr, outif)) != 0) 908 return(error); 909 910 socket_unlock(inp->inp_socket, 0); 911 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, 912 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr, 913 inp->inp_lport, 0, NULL); 914 socket_lock(inp->inp_socket, 0); 915 916 /* Check if the socket is still in a valid state. When we unlock this 917 * embryonic socket, it can get aborted if another thread is closing 918 * the listener (radar 7947600). 919 */ 920 if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) { 921 return ECONNREFUSED; 922 } 923 924 if (pcb != NULL) { 925 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); 926 return (EADDRINUSE); 927 } 928 if (inp->inp_laddr.s_addr == INADDR_ANY) { 929 if (inp->inp_lport == 0) { 930 error = in_pcbbind(inp, (struct sockaddr *)0, p); 931 if (error) 932 return (error); 933 } 934 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { 935 /*lock inversion issue, mostly with udp multicast packets */ 936 socket_unlock(inp->inp_socket, 0); 937 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); 938 socket_lock(inp->inp_socket, 0); 939 } 940 inp->inp_laddr = ifaddr.sin_addr; 941 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; 942 inp->inp_flags |= INP_INADDR_ANY; 943 } 944 else { 945 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { 946 /*lock inversion issue, mostly with udp multicast packets */ 947 socket_unlock(inp->inp_socket, 0); 948 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); 949 socket_lock(inp->inp_socket, 0); 950 } 951 } 952 inp->inp_faddr = sin->sin_addr; 953 inp->inp_fport = sin->sin_port; 954 in_pcbrehash(inp); 955 lck_rw_done(inp->inp_pcbinfo->mtx); 956 return (0); 957} 958 959void 960in_pcbdisconnect(struct inpcb *inp) 961{ 962 963 inp->inp_faddr.s_addr = INADDR_ANY; 964 inp->inp_fport = 0; 965 966 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { 967 /*lock inversion issue, mostly with udp multicast packets */ 968 socket_unlock(inp->inp_socket, 0); 969 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); 970 socket_lock(inp->inp_socket, 0); 971 } 972 973 in_pcbrehash(inp); 974 lck_rw_done(inp->inp_pcbinfo->mtx); 975 976 if (inp->inp_socket->so_state & SS_NOFDREF) 977 in_pcbdetach(inp); 978} 979 980void 981in_pcbdetach(struct inpcb *inp) 982{ 983 struct socket *so = inp->inp_socket; 984 985 if (so->so_pcb == 0) { /* we've been called twice */ 986 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n", 987 inp, so, so->so_proto->pr_protocol); 988 } 989 990#if IPSEC 991 if (ipsec_bypass == 0) { 992 ipsec4_delete_pcbpolicy(inp); 993 } 994#endif /*IPSEC*/ 995 996 /* mark socket state as dead */ 997 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) 998 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol); 999 1000#if TEMPDEBUG 1001 if (so->cached_in_sock_layer) 1002 printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags); 1003 else 1004 printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags); 1005#endif 1006 if ((so->so_flags & SOF_PCBCLEARING) == 0) { 1007 struct rtentry *rt; 1008 struct ip_moptions *imo; 1009 1010 inp->inp_vflag = 0; 1011 if (inp->inp_options) 1012 (void)m_free(inp->inp_options); 1013 if ((rt = inp->inp_route.ro_rt) != NULL) { 1014 inp->inp_route.ro_rt = NULL; 1015 rtfree(rt); 1016 } 1017 imo = inp->inp_moptions; 1018 inp->inp_moptions = NULL; 1019 if (imo != NULL) 1020 IMO_REMREF(imo); 1021 sofreelastref(so, 0); 1022 inp->inp_state = INPCB_STATE_DEAD; 1023 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ 1024 } 1025} 1026 1027 1028void 1029in_pcbdispose(struct inpcb *inp) 1030{ 1031 struct socket *so = inp->inp_socket; 1032 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1033 1034#if TEMPDEBUG 1035 if (inp->inp_state != INPCB_STATE_DEAD) { 1036 printf("in_pcbdispose: not dead yet? so=%p\n", so); 1037 } 1038#endif 1039 if (so && so->so_usecount != 0) 1040 panic("%s: so %p so_usecount %d so_lockhistory %s\n", 1041 __func__, so, so->so_usecount, 1042 (so != NULL) ? solockhistory_nr(so) : "--"); 1043 1044 lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE); 1045 1046 inp->inp_gencnt = ++ipi->ipi_gencnt; 1047 /* access ipi in in_pcbremlists */ 1048 in_pcbremlists(inp); 1049 1050 if (so) { 1051 if (so->so_proto->pr_flags & PR_PCBLOCK) { 1052 sofreelastref(so, 0); 1053 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) { 1054#if TEMPDEBUG 1055 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n", 1056 so, so->so_rcv.sb_cc, so->so_snd.sb_cc); 1057#endif 1058 sbrelease(&so->so_rcv); 1059 sbrelease(&so->so_snd); 1060 } 1061 if (so->so_head != NULL) 1062 panic("in_pcbdispose, so=%p head still exist\n", so); 1063 lck_mtx_unlock(&inp->inpcb_mtx); 1064 lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp); 1065 } 1066 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ 1067 so->so_saved_pcb = (caddr_t) inp; 1068 so->so_pcb = 0; 1069 inp->inp_socket = 0; 1070#if CONFIG_MACF_NET 1071 mac_inpcb_label_destroy(inp); 1072#endif 1073 /* 1074 * In case there a route cached after a detach (possible 1075 * in the tcp case), make sure that it is freed before 1076 * we deallocate the structure. 1077 */ 1078 if (inp->inp_route.ro_rt != NULL) { 1079 rtfree(inp->inp_route.ro_rt); 1080 inp->inp_route.ro_rt = NULL; 1081 } 1082 if (so->cached_in_sock_layer == 0) { 1083 zfree(ipi->ipi_zone, inp); 1084 } 1085 sodealloc(so); 1086 } 1087#if TEMPDEBUG 1088 else 1089 printf("in_pcbdispose: no socket for inp=%p\n", inp); 1090#endif 1091} 1092 1093/* 1094 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 1095 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 1096 * in struct pr_usrreqs, so that protocols can just reference then directly 1097 * without the need for a wrapper function. The socket must have a valid 1098 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 1099 * except through a kernel programming error, so it is acceptable to panic 1100 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 1101 * because there actually /is/ a programming error somewhere... XXX) 1102 * 1103 * Returns: 0 Success 1104 * ENOBUFS No buffer space available 1105 * ECONNRESET Connection reset 1106 */ 1107int 1108in_setsockaddr(struct socket *so, struct sockaddr **nam) 1109{ 1110 struct inpcb *inp; 1111 struct sockaddr_in *sin; 1112 1113 /* 1114 * Do the malloc first in case it blocks. 1115 */ 1116 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); 1117 if (sin == NULL) 1118 return ENOBUFS; 1119 bzero(sin, sizeof *sin); 1120 sin->sin_family = AF_INET; 1121 sin->sin_len = sizeof(*sin); 1122 1123 inp = sotoinpcb(so); 1124 if (!inp) { 1125 FREE(sin, M_SONAME); 1126 return ECONNRESET; 1127 } 1128 sin->sin_port = inp->inp_lport; 1129 sin->sin_addr = inp->inp_laddr; 1130 1131 *nam = (struct sockaddr *)sin; 1132 return 0; 1133} 1134 1135int 1136in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1137{ 1138 struct inpcb *inp; 1139 struct sockaddr_in *sin; 1140 1141 /* 1142 * Do the malloc first in case it blocks. 1143 */ 1144 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); 1145 if (sin == NULL) 1146 return ENOBUFS; 1147 bzero((caddr_t)sin, sizeof (*sin)); 1148 sin->sin_family = AF_INET; 1149 sin->sin_len = sizeof(*sin); 1150 1151 inp = sotoinpcb(so); 1152 if (!inp) { 1153 FREE(sin, M_SONAME); 1154 return ECONNRESET; 1155 } 1156 sin->sin_port = inp->inp_fport; 1157 sin->sin_addr = inp->inp_faddr; 1158 1159 *nam = (struct sockaddr *)sin; 1160 return 0; 1161} 1162 1163void 1164in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1165 int errno, void (*notify)(struct inpcb *, int)) 1166{ 1167 struct inpcb *inp; 1168 1169 lck_rw_lock_shared(pcbinfo->mtx); 1170 1171 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 1172#if INET6 1173 if ((inp->inp_vflag & INP_IPV4) == 0) 1174 continue; 1175#endif 1176 if (inp->inp_faddr.s_addr != faddr.s_addr || 1177 inp->inp_socket == NULL) 1178 continue; 1179 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 1180 continue; 1181 socket_lock(inp->inp_socket, 1); 1182 (*notify)(inp, errno); 1183 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); 1184 socket_unlock(inp->inp_socket, 1); 1185 } 1186 lck_rw_done(pcbinfo->mtx); 1187} 1188 1189/* 1190 * Check for alternatives when higher level complains 1191 * about service problems. For now, invalidate cached 1192 * routing information. If the route was created dynamically 1193 * (by a redirect), time to try a default gateway again. 1194 */ 1195void 1196in_losing(struct inpcb *inp) 1197{ 1198 struct rtentry *rt; 1199 struct rt_addrinfo info; 1200 1201 if ((rt = inp->inp_route.ro_rt) != NULL) { 1202 struct in_ifaddr *ia; 1203 1204 bzero((caddr_t)&info, sizeof(info)); 1205 RT_LOCK(rt); 1206 info.rti_info[RTAX_DST] = 1207 (struct sockaddr *)&inp->inp_route.ro_dst; 1208 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1209 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1210 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 1211 if (rt->rt_flags & RTF_DYNAMIC) { 1212 /* 1213 * Prevent another thread from modifying rt_key, 1214 * rt_gateway via rt_setgate() after rt_lock is 1215 * dropped by marking the route as defunct. 1216 */ 1217 rt->rt_flags |= RTF_CONDEMNED; 1218 RT_UNLOCK(rt); 1219 (void) rtrequest(RTM_DELETE, rt_key(rt), 1220 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 1221 (struct rtentry **)0); 1222 } else { 1223 RT_UNLOCK(rt); 1224 } 1225 /* if the address is gone keep the old route in the pcb */ 1226 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { 1227 inp->inp_route.ro_rt = NULL; 1228 rtfree(rt); 1229 IFA_REMREF(&ia->ia_ifa); 1230 } 1231 /* 1232 * A new route can be allocated 1233 * the next time output is attempted. 1234 */ 1235 } 1236} 1237 1238/* 1239 * After a routing change, flush old routing 1240 * and allocate a (hopefully) better one. 1241 */ 1242void 1243in_rtchange(struct inpcb *inp, __unused int errno) 1244{ 1245 struct rtentry *rt; 1246 1247 if ((rt = inp->inp_route.ro_rt) != NULL) { 1248 struct in_ifaddr *ia; 1249 1250 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { 1251 return; /* we can't remove the route now. not sure if still ok to use src */ 1252 } 1253 IFA_REMREF(&ia->ia_ifa); 1254 rtfree(rt); 1255 inp->inp_route.ro_rt = NULL; 1256 /* 1257 * A new route can be allocated the next time 1258 * output is attempted. 1259 */ 1260 } 1261} 1262 1263/* 1264 * Lookup a PCB based on the local address and port. 1265 */ 1266struct inpcb * 1267in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1268 unsigned int lport_arg, int wild_okay) 1269{ 1270 struct inpcb *inp; 1271 int matchwild = 3, wildcard; 1272 u_short lport = lport_arg; 1273 1274 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0); 1275 1276 if (!wild_okay) { 1277 struct inpcbhead *head; 1278 /* 1279 * Look for an unconnected (wildcard foreign addr) PCB that 1280 * matches the local address and port we're looking for. 1281 */ 1282 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1283 LIST_FOREACH(inp, head, inp_hash) { 1284#if INET6 1285 if ((inp->inp_vflag & INP_IPV4) == 0) 1286 continue; 1287#endif 1288 if (inp->inp_faddr.s_addr == INADDR_ANY && 1289 inp->inp_laddr.s_addr == laddr.s_addr && 1290 inp->inp_lport == lport) { 1291 /* 1292 * Found. 1293 */ 1294 return (inp); 1295 } 1296 } 1297 /* 1298 * Not found. 1299 */ 1300 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0); 1301 return (NULL); 1302 } else { 1303 struct inpcbporthead *porthash; 1304 struct inpcbport *phd; 1305 struct inpcb *match = NULL; 1306 /* 1307 * Best fit PCB lookup. 1308 * 1309 * First see if this local port is in use by looking on the 1310 * port hash list. 1311 */ 1312 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 1313 pcbinfo->porthashmask)]; 1314 LIST_FOREACH(phd, porthash, phd_hash) { 1315 if (phd->phd_port == lport) 1316 break; 1317 } 1318 if (phd != NULL) { 1319 /* 1320 * Port is in use by one or more PCBs. Look for best 1321 * fit. 1322 */ 1323 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1324 wildcard = 0; 1325#if INET6 1326 if ((inp->inp_vflag & INP_IPV4) == 0) 1327 continue; 1328#endif 1329 if (inp->inp_faddr.s_addr != INADDR_ANY) 1330 wildcard++; 1331 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1332 if (laddr.s_addr == INADDR_ANY) 1333 wildcard++; 1334 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1335 continue; 1336 } else { 1337 if (laddr.s_addr != INADDR_ANY) 1338 wildcard++; 1339 } 1340 if (wildcard < matchwild) { 1341 match = inp; 1342 matchwild = wildcard; 1343 if (matchwild == 0) { 1344 break; 1345 } 1346 } 1347 } 1348 } 1349 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0); 1350 return (match); 1351 } 1352} 1353 1354/* 1355 * Check if PCB exists in hash list. 1356 */ 1357int 1358in_pcblookup_hash_exists( 1359 struct inpcbinfo *pcbinfo, 1360 struct in_addr faddr, 1361 u_int fport_arg, 1362 struct in_addr laddr, 1363 u_int lport_arg, 1364 int wildcard, 1365 uid_t *uid, 1366 gid_t *gid, 1367 struct ifnet *ifp) 1368{ 1369 struct inpcbhead *head; 1370 struct inpcb *inp; 1371 u_short fport = fport_arg, lport = lport_arg; 1372 int found; 1373 1374 *uid = UID_MAX; 1375 *gid = GID_MAX; 1376 1377 /* 1378 * We may have found the pcb in the last lookup - check this first. 1379 */ 1380 1381 lck_rw_lock_shared(pcbinfo->mtx); 1382 1383 /* 1384 * First look for an exact match. 1385 */ 1386 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1387 pcbinfo->hashmask)]; 1388 LIST_FOREACH(inp, head, inp_hash) { 1389#if INET6 1390 if ((inp->inp_vflag & INP_IPV4) == 0) 1391 continue; 1392#endif 1393 if (ip_restrictrecvif && ifp != NULL && 1394 (ifp->if_eflags & IFEF_RESTRICTED_RECV) && 1395 !(inp->inp_flags & INP_RECV_ANYIF)) 1396 continue; 1397 1398 if (inp->inp_faddr.s_addr == faddr.s_addr && 1399 inp->inp_laddr.s_addr == laddr.s_addr && 1400 inp->inp_fport == fport && 1401 inp->inp_lport == lport) { 1402 if ((found = (inp->inp_socket != NULL))) { 1403 /* 1404 * Found. 1405 */ 1406 *uid = kauth_cred_getuid( 1407 inp->inp_socket->so_cred); 1408 *gid = kauth_cred_getgid( 1409 inp->inp_socket->so_cred); 1410 } 1411 lck_rw_done(pcbinfo->mtx); 1412 return (found); 1413 } 1414 } 1415 if (wildcard) { 1416 struct inpcb *local_wild = NULL; 1417#if INET6 1418 struct inpcb *local_wild_mapped = NULL; 1419#endif 1420 1421 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1422 pcbinfo->hashmask)]; 1423 LIST_FOREACH(inp, head, inp_hash) { 1424#if INET6 1425 if ((inp->inp_vflag & INP_IPV4) == 0) 1426 continue; 1427#endif 1428 if (ip_restrictrecvif && ifp != NULL && 1429 (ifp->if_eflags & IFEF_RESTRICTED_RECV) && 1430 !(inp->inp_flags & INP_RECV_ANYIF)) 1431 continue; 1432 1433 if (inp->inp_faddr.s_addr == INADDR_ANY && 1434 inp->inp_lport == lport) { 1435 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1436 if ((found = (inp->inp_socket != NULL))) { 1437 *uid = kauth_cred_getuid( 1438 inp->inp_socket->so_cred); 1439 *gid = kauth_cred_getgid( 1440 inp->inp_socket->so_cred); 1441 } 1442 lck_rw_done(pcbinfo->mtx); 1443 return (found); 1444 } 1445 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1446#if INET6 1447 if (inp->inp_socket && 1448 INP_CHECK_SOCKAF(inp->inp_socket, 1449 AF_INET6)) 1450 local_wild_mapped = inp; 1451 else 1452#endif /* INET6 */ 1453 local_wild = inp; 1454 } 1455 } 1456 } 1457 if (local_wild == NULL) { 1458#if INET6 1459 if (local_wild_mapped != NULL) { 1460 if ((found = (local_wild_mapped->inp_socket != NULL))) { 1461 *uid = kauth_cred_getuid( 1462 local_wild_mapped->inp_socket->so_cred); 1463 *gid = kauth_cred_getgid( 1464 local_wild_mapped->inp_socket->so_cred); 1465 } 1466 lck_rw_done(pcbinfo->mtx); 1467 return (found); 1468 } 1469#endif /* INET6 */ 1470 lck_rw_done(pcbinfo->mtx); 1471 return (0); 1472 } 1473 if (local_wild != NULL) { 1474 if ((found = (local_wild->inp_socket != NULL))) { 1475 *uid = kauth_cred_getuid( 1476 local_wild->inp_socket->so_cred); 1477 *gid = kauth_cred_getgid( 1478 local_wild->inp_socket->so_cred); 1479 } 1480 lck_rw_done(pcbinfo->mtx); 1481 return (found); 1482 } 1483 } 1484 1485 /* 1486 * Not found. 1487 */ 1488 lck_rw_done(pcbinfo->mtx); 1489 return (0); 1490} 1491 1492/* 1493 * Lookup PCB in hash list. 1494 */ 1495struct inpcb * 1496in_pcblookup_hash( 1497 struct inpcbinfo *pcbinfo, 1498 struct in_addr faddr, 1499 u_int fport_arg, 1500 struct in_addr laddr, 1501 u_int lport_arg, 1502 int wildcard, 1503 struct ifnet *ifp) 1504{ 1505 struct inpcbhead *head; 1506 struct inpcb *inp; 1507 u_short fport = fport_arg, lport = lport_arg; 1508 1509 /* 1510 * We may have found the pcb in the last lookup - check this first. 1511 */ 1512 1513 lck_rw_lock_shared(pcbinfo->mtx); 1514 1515 /* 1516 * First look for an exact match. 1517 */ 1518 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 1519 LIST_FOREACH(inp, head, inp_hash) { 1520#if INET6 1521 if ((inp->inp_vflag & INP_IPV4) == 0) 1522 continue; 1523#endif 1524 if (ip_restrictrecvif && ifp != NULL && 1525 (ifp->if_eflags & IFEF_RESTRICTED_RECV) && 1526 !(inp->inp_flags & INP_RECV_ANYIF)) 1527 continue; 1528 1529 if (inp->inp_faddr.s_addr == faddr.s_addr && 1530 inp->inp_laddr.s_addr == laddr.s_addr && 1531 inp->inp_fport == fport && 1532 inp->inp_lport == lport) { 1533 /* 1534 * Found. 1535 */ 1536 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1537 lck_rw_done(pcbinfo->mtx); 1538 return (inp); 1539 } 1540 else { /* it's there but dead, say it isn't found */ 1541 lck_rw_done(pcbinfo->mtx); 1542 return (NULL); 1543 } 1544 } 1545 } 1546 if (wildcard) { 1547 struct inpcb *local_wild = NULL; 1548#if INET6 1549 struct inpcb *local_wild_mapped = NULL; 1550#endif 1551 1552 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1553 LIST_FOREACH(inp, head, inp_hash) { 1554#if INET6 1555 if ((inp->inp_vflag & INP_IPV4) == 0) 1556 continue; 1557#endif 1558 if (ip_restrictrecvif && ifp != NULL && 1559 (ifp->if_eflags & IFEF_RESTRICTED_RECV) && 1560 !(inp->inp_flags & INP_RECV_ANYIF)) 1561 continue; 1562 1563 if (inp->inp_faddr.s_addr == INADDR_ANY && 1564 inp->inp_lport == lport) { 1565 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1566 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1567 lck_rw_done(pcbinfo->mtx); 1568 return (inp); 1569 } 1570 else { /* it's there but dead, say it isn't found */ 1571 lck_rw_done(pcbinfo->mtx); 1572 return (NULL); 1573 } 1574 } 1575 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1576#if INET6 1577 if (INP_CHECK_SOCKAF(inp->inp_socket, 1578 AF_INET6)) 1579 local_wild_mapped = inp; 1580 else 1581#endif /* INET6 */ 1582 local_wild = inp; 1583 } 1584 } 1585 } 1586 if (local_wild == NULL) { 1587#if INET6 1588 if (local_wild_mapped != NULL) { 1589 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1590 lck_rw_done(pcbinfo->mtx); 1591 return (local_wild_mapped); 1592 } 1593 else { /* it's there but dead, say it isn't found */ 1594 lck_rw_done(pcbinfo->mtx); 1595 return (NULL); 1596 } 1597 } 1598#endif /* INET6 */ 1599 lck_rw_done(pcbinfo->mtx); 1600 return (NULL); 1601 } 1602 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1603 lck_rw_done(pcbinfo->mtx); 1604 return (local_wild); 1605 } 1606 else { /* it's there but dead, say it isn't found */ 1607 lck_rw_done(pcbinfo->mtx); 1608 return (NULL); 1609 } 1610 } 1611 1612 /* 1613 * Not found. 1614 */ 1615 lck_rw_done(pcbinfo->mtx); 1616 return (NULL); 1617} 1618 1619/* 1620 * Insert PCB onto various hash lists. 1621 */ 1622int 1623in_pcbinshash(struct inpcb *inp, int locked) 1624{ 1625 struct inpcbhead *pcbhash; 1626 struct inpcbporthead *pcbporthash; 1627 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1628 struct inpcbport *phd; 1629 u_int32_t hashkey_faddr; 1630 1631 if (!locked) { 1632 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { 1633 /*lock inversion issue, mostly with udp multicast packets */ 1634 socket_unlock(inp->inp_socket, 0); 1635 lck_rw_lock_exclusive(pcbinfo->mtx); 1636 socket_lock(inp->inp_socket, 0); 1637 if (inp->inp_state == INPCB_STATE_DEAD) { 1638 /* The socket got dropped when it was unlocked */ 1639 lck_rw_done(pcbinfo->mtx); 1640 return(ECONNABORTED); 1641 } 1642 } 1643 } 1644 1645#if INET6 1646 if (inp->inp_vflag & INP_IPV6) 1647 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1648 else 1649#endif /* INET6 */ 1650 hashkey_faddr = inp->inp_faddr.s_addr; 1651 1652 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask); 1653 1654 pcbhash = &pcbinfo->hashbase[inp->hash_element]; 1655 1656 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1657 pcbinfo->porthashmask)]; 1658 1659 /* 1660 * Go through port list and look for a head for this lport. 1661 */ 1662 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1663 if (phd->phd_port == inp->inp_lport) 1664 break; 1665 } 1666 1667 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 1668 1669 /* 1670 * If none exists, malloc one and tack it on. 1671 */ 1672 if (phd == NULL) { 1673 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK); 1674 if (phd == NULL) { 1675 if (!locked) 1676 lck_rw_done(pcbinfo->mtx); 1677 return (ENOBUFS); /* XXX */ 1678 } 1679 phd->phd_port = inp->inp_lport; 1680 LIST_INIT(&phd->phd_pcblist); 1681 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1682 } 1683 inp->inp_phd = phd; 1684 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1685 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1686 if (!locked) 1687 lck_rw_done(pcbinfo->mtx); 1688 return (0); 1689} 1690 1691/* 1692 * Move PCB to the proper hash bucket when { faddr, fport } have been 1693 * changed. NOTE: This does not handle the case of the lport changing (the 1694 * hashed port list would have to be updated as well), so the lport must 1695 * not change after in_pcbinshash() has been called. 1696 */ 1697void 1698in_pcbrehash(struct inpcb *inp) 1699{ 1700 struct inpcbhead *head; 1701 u_int32_t hashkey_faddr; 1702 1703#if INET6 1704 if (inp->inp_vflag & INP_IPV6) 1705 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1706 else 1707#endif /* INET6 */ 1708 hashkey_faddr = inp->inp_faddr.s_addr; 1709 inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 1710 inp->inp_fport, inp->inp_pcbinfo->hashmask); 1711 head = &inp->inp_pcbinfo->hashbase[inp->hash_element]; 1712 1713 LIST_REMOVE(inp, inp_hash); 1714 LIST_INSERT_HEAD(head, inp, inp_hash); 1715} 1716 1717/* 1718 * Remove PCB from various lists. 1719 * Must be called pcbinfo lock is held in exclusive mode. 1720 */ 1721void 1722in_pcbremlists(struct inpcb *inp) 1723{ 1724 struct inp_fc_entry *infce; 1725 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 1726 1727 if (inp->inp_lport) { 1728 struct inpcbport *phd = inp->inp_phd; 1729 1730 LIST_REMOVE(inp, inp_hash); 1731 LIST_REMOVE(inp, inp_portlist); 1732 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) { 1733 LIST_REMOVE(phd, phd_hash); 1734 FREE(phd, M_PCB); 1735 } 1736 } 1737 LIST_REMOVE(inp, inp_list); 1738 1739 infce = inp_fc_getinp(inp->inp_flowhash); 1740 if (infce != NULL) 1741 inp_fc_entry_free(infce); 1742 1743 inp->inp_pcbinfo->ipi_count--; 1744} 1745 1746/* Mechanism used to defer the memory release of PCBs 1747 * The pcb list will contain the pcb until the ripper can clean it up if 1748 * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING 1749 * 3) usecount is null 1750 * This function will be called to either mark the pcb as 1751*/ 1752int 1753in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) 1754{ 1755 1756 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; 1757 UInt32 origwant; 1758 UInt32 newwant; 1759 1760 switch (mode) { 1761 1762 case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */ 1763 1764 /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */ 1765 1766 if (locked == 0) 1767 socket_lock(pcb->inp_socket, 1); 1768 pcb->inp_state = INPCB_STATE_DEAD; 1769 1770stopusing: 1771 if (pcb->inp_socket->so_usecount < 0) 1772 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket); 1773 if (locked == 0) 1774 socket_unlock(pcb->inp_socket, 1); 1775 1776 origwant = *wantcnt; 1777 if ((UInt16) origwant == 0xffff ) /* should stop using */ 1778 return (WNT_STOPUSING); 1779 newwant = 0xffff; 1780 if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */ 1781 OSCompareAndSwap(origwant, newwant, wantcnt) ; 1782 } 1783 return (WNT_STOPUSING); 1784 break; 1785 1786 case WNT_ACQUIRE: /* try to increase reference to pcb */ 1787 /* if WNT_STOPUSING should bail out */ 1788 /* 1789 * if socket state DEAD, try to set count to STOPUSING, return failed 1790 * otherwise increase cnt 1791 */ 1792 do { 1793 origwant = *wantcnt; 1794 if ((UInt16) origwant == 0xffff ) {/* should stop using */ 1795// printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb); 1796 return (WNT_STOPUSING); 1797 } 1798 newwant = origwant + 1; 1799 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 1800 return (WNT_ACQUIRE); 1801 break; 1802 1803 case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD, 1804 set wanted bit to STOPUSING 1805 */ 1806 1807 if (locked == 0) 1808 socket_lock(pcb->inp_socket, 1); 1809 1810 do { 1811 origwant = *wantcnt; 1812 if ((UInt16) origwant == 0x0 ) 1813 panic("in_pcb_checkstate pcb=%p release with zero count", pcb); 1814 if ((UInt16) origwant == 0xffff ) {/* should stop using */ 1815#if TEMPDEBUG 1816 printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb); 1817#endif 1818 if (locked == 0) 1819 socket_unlock(pcb->inp_socket, 1); 1820 return (WNT_STOPUSING); 1821 } 1822 newwant = origwant - 1; 1823 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 1824 1825 if (pcb->inp_state == INPCB_STATE_DEAD) 1826 goto stopusing; 1827 if (pcb->inp_socket->so_usecount < 0) 1828 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket); 1829 1830 if (locked == 0) 1831 socket_unlock(pcb->inp_socket, 1); 1832 return (WNT_RELEASE); 1833 break; 1834 1835 default: 1836 1837 panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode); 1838 } 1839 1840 /* NOTREACHED */ 1841 return (mode); 1842} 1843 1844/* 1845 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. 1846 * The inpcb_compat data structure is passed to user space and must 1847 * not change. We intentionally avoid copying pointers. 1848 */ 1849void 1850inpcb_to_compat( 1851 struct inpcb *inp, 1852 struct inpcb_compat *inp_compat) 1853{ 1854 bzero(inp_compat, sizeof(*inp_compat)); 1855 inp_compat->inp_fport = inp->inp_fport; 1856 inp_compat->inp_lport = inp->inp_lport; 1857 inp_compat->nat_owner = 0; 1858 inp_compat->nat_cookie = inp->nat_cookie; 1859 inp_compat->inp_gencnt = inp->inp_gencnt; 1860 inp_compat->inp_flags = inp->inp_flags; 1861 inp_compat->inp_flow = inp->inp_flow; 1862 inp_compat->inp_vflag = inp->inp_vflag; 1863 inp_compat->inp_ip_ttl = inp->inp_ip_ttl; 1864 inp_compat->inp_ip_p = inp->inp_ip_p; 1865 inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; 1866 inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; 1867 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 1868 inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; 1869 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 1870 inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; 1871 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 1872} 1873 1874#if !CONFIG_EMBEDDED 1875 1876void 1877inpcb_to_xinpcb64( 1878 struct inpcb *inp, 1879 struct xinpcb64 *xinp) 1880{ 1881 xinp->inp_fport = inp->inp_fport; 1882 xinp->inp_lport = inp->inp_lport; 1883 xinp->inp_gencnt = inp->inp_gencnt; 1884 xinp->inp_flags = inp->inp_flags; 1885 xinp->inp_flow = inp->inp_flow; 1886 xinp->inp_vflag = inp->inp_vflag; 1887 xinp->inp_ip_ttl = inp->inp_ip_ttl; 1888 xinp->inp_ip_p = inp->inp_ip_p; 1889 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; 1890 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; 1891 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 1892 xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; 1893 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 1894 xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; 1895 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 1896} 1897 1898#endif /* !CONFIG_EMBEDDED */ 1899 1900 1901/* 1902 * The following routines implement this scheme: 1903 * 1904 * Callers of ip_output() that intend to cache the route in the inpcb pass 1905 * a local copy of the struct route to ip_output(). Using a local copy of 1906 * the cached route significantly simplifies things as IP no longer has to 1907 * worry about having exclusive access to the passed in struct route, since 1908 * it's defined in the caller's stack; in essence, this allows for a lock- 1909 * less operation when updating the struct route at the IP level and below, 1910 * whenever necessary. The scheme works as follows: 1911 * 1912 * Prior to dropping the socket's lock and calling ip_output(), the caller 1913 * copies the struct route from the inpcb into its stack, and adds a reference 1914 * to the cached route entry, if there was any. The socket's lock is then 1915 * dropped and ip_output() is called with a pointer to the copy of struct 1916 * route defined on the stack (not to the one in the inpcb.) 1917 * 1918 * Upon returning from ip_output(), the caller then acquires the socket's 1919 * lock and synchronizes the cache; if there is no route cached in the inpcb, 1920 * it copies the local copy of struct route (which may or may not contain any 1921 * route) back into the cache; otherwise, if the inpcb has a route cached in 1922 * it, the one in the local copy will be freed, if there's any. Trashing the 1923 * cached route in the inpcb can be avoided because ip_output() is single- 1924 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized 1925 * by the socket/transport layer.) 1926 */ 1927void 1928inp_route_copyout(struct inpcb *inp, struct route *dst) 1929{ 1930 struct route *src = &inp->inp_route; 1931 1932 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 1933 1934 /* 1935 * If the route in the PCB is not for IPv4, blow it away; 1936 * this is possible in the case of IPv4-mapped address case. 1937 */ 1938 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) { 1939 rtfree(src->ro_rt); 1940 src->ro_rt = NULL; 1941 } 1942 1943 route_copyout(dst, src, sizeof(*dst)); 1944} 1945 1946void 1947inp_route_copyin(struct inpcb *inp, struct route *src) 1948{ 1949 struct route *dst = &inp->inp_route; 1950 1951 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 1952 1953 /* Minor sanity check */ 1954 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) 1955 panic("%s: wrong or corrupted route: %p", __func__, src); 1956 1957 route_copyin(src, dst, sizeof(*src)); 1958} 1959 1960/* 1961 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. 1962 */ 1963int 1964inp_bindif(struct inpcb *inp, unsigned int ifscope) 1965{ 1966 struct ifnet *ifp = NULL; 1967 1968 ifnet_head_lock_shared(); 1969 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && 1970 (ifp = ifindex2ifnet[ifscope]) == NULL)) { 1971 ifnet_head_done(); 1972 return (ENXIO); 1973 } 1974 ifnet_head_done(); 1975 1976 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); 1977 1978 /* 1979 * A zero interface scope value indicates an "unbind". 1980 * Otherwise, take in whatever value the app desires; 1981 * the app may already know the scope (or force itself 1982 * to such a scope) ahead of time before the interface 1983 * gets attached. It doesn't matter either way; any 1984 * route lookup from this point on will require an 1985 * exact match for the embedded interface scope. 1986 */ 1987 inp->inp_boundifp = ifp; 1988 if (inp->inp_boundifp == NULL) 1989 inp->inp_flags &= ~INP_BOUND_IF; 1990 else 1991 inp->inp_flags |= INP_BOUND_IF; 1992 1993 /* Blow away any cached route in the PCB */ 1994 if (inp->inp_route.ro_rt != NULL) { 1995 rtfree(inp->inp_route.ro_rt); 1996 inp->inp_route.ro_rt = NULL; 1997 } 1998 1999 return (0); 2000} 2001 2002/* 2003 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option. 2004 */ 2005int 2006inp_nocellular(struct inpcb *inp, unsigned int val) 2007{ 2008 if (val) { 2009 inp->inp_flags |= INP_NO_IFT_CELLULAR; 2010 } else if (inp->inp_flags & INP_NO_IFT_CELLULAR) { 2011 /* once set, it cannot be unset */ 2012 return (EINVAL); 2013 } 2014 2015 /* Blow away any cached route in the PCB */ 2016 if (inp->inp_route.ro_rt != NULL) { 2017 rtfree(inp->inp_route.ro_rt); 2018 inp->inp_route.ro_rt = NULL; 2019 } 2020 2021 return (0); 2022} 2023 2024/* 2025 * Calculate flow hash for an inp, used by an interface to identify a 2026 * flow. When an interface provides flow control advisory, this flow 2027 * hash is used as an identifier. 2028 */ 2029u_int32_t 2030inp_calc_flowhash(struct inpcb *inp) 2031{ 2032 struct inp_flowhash_key fh __attribute__((aligned(8))); 2033 u_int32_t flowhash = 0; 2034 2035 if (inp_hash_seed == 0) 2036 inp_hash_seed = RandomULong(); 2037 2038 bzero(&fh, sizeof (fh)); 2039 2040 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); 2041 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); 2042 2043 fh.infh_lport = inp->inp_lport; 2044 fh.infh_fport = inp->inp_fport; 2045 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; 2046 fh.infh_proto = inp->inp_ip_p; 2047 fh.infh_rand1 = RandomULong(); 2048 fh.infh_rand2 = RandomULong(); 2049 2050try_again: 2051 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); 2052 if (flowhash == 0) { 2053 /* try to get a non-zero flowhash */ 2054 inp_hash_seed = RandomULong(); 2055 goto try_again; 2056 } 2057 2058 return flowhash; 2059} 2060 2061/* 2062 * Function to compare inp_fc_entries in inp flow control tree 2063 */ 2064static inline int 2065infc_cmp(const struct inp_fc_entry *fc1, const struct inp_fc_entry *fc2) 2066{ 2067 return (fc1->infc_flowhash - fc2->infc_flowhash); 2068} 2069 2070int 2071inp_fc_addinp(struct inpcb *inp) 2072{ 2073 struct inp_fc_entry keyfc, *infc; 2074 u_int32_t flowhash = inp->inp_flowhash; 2075 2076 keyfc.infc_flowhash = flowhash; 2077 2078 lck_mtx_lock_spin(&inp_fc_lck); 2079 infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc); 2080 if (infc != NULL && infc->infc_inp == inp) { 2081 /* Entry is already in inp_fc_tree, return */ 2082 lck_mtx_unlock(&inp_fc_lck); 2083 return (1); 2084 } 2085 2086 if (infc != NULL) { 2087 /* 2088 * There is a different fc entry with the same 2089 * flow hash but different inp pointer. There 2090 * can be a collision on flow hash but the 2091 * probability is low. Let's just avoid 2092 * adding a second one when there is a collision 2093 */ 2094 lck_mtx_unlock(&inp_fc_lck); 2095 return (0); 2096 } 2097 2098 /* become regular mutex */ 2099 lck_mtx_convert_spin(&inp_fc_lck); 2100 2101 infc = zalloc_noblock(inp_fcezone); 2102 if (infc == NULL) { 2103 /* memory allocation failed */ 2104 lck_mtx_unlock(&inp_fc_lck); 2105 return (0); 2106 } 2107 bzero(infc, sizeof (*infc)); 2108 2109 infc->infc_flowhash = flowhash; 2110 infc->infc_inp = inp; 2111 2112 RB_INSERT(inp_fc_tree, &inp_fc_tree, infc); 2113 lck_mtx_unlock(&inp_fc_lck); 2114 return (1); 2115} 2116 2117struct inp_fc_entry* 2118inp_fc_getinp(u_int32_t flowhash) 2119{ 2120 struct inp_fc_entry keyfc, *infc; 2121 2122 keyfc.infc_flowhash = flowhash; 2123 2124 lck_mtx_lock_spin(&inp_fc_lck); 2125 infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc); 2126 if (infc == NULL) { 2127 /* inp is not present, return */ 2128 lck_mtx_unlock(&inp_fc_lck); 2129 return (NULL); 2130 } 2131 2132 RB_REMOVE(inp_fc_tree, &inp_fc_tree, infc); 2133 2134 if (in_pcb_checkstate(infc->infc_inp, WNT_ACQUIRE, 0) == 2135 WNT_STOPUSING) { 2136 /* become regular mutex */ 2137 lck_mtx_convert_spin(&inp_fc_lck); 2138 2139 /* 2140 * This inp is going away, just don't process it. 2141 */ 2142 inp_fc_entry_free(infc); 2143 infc = NULL; 2144 } 2145 lck_mtx_unlock(&inp_fc_lck); 2146 2147 return (infc); 2148} 2149 2150void 2151inp_fc_entry_free(struct inp_fc_entry *infc) 2152{ 2153 zfree(inp_fcezone, infc); 2154} 2155 2156void 2157inp_fc_feedback(struct inpcb *inp) 2158{ 2159 struct socket *so = inp->inp_socket; 2160 2161 /* we already hold a want_cnt on this inp, socket can't be null */ 2162 VERIFY (so != NULL); 2163 socket_lock(so, 1); 2164 2165 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 2166 socket_unlock(so, 1); 2167 return; 2168 } 2169 2170 /* 2171 * Return if the connection is not in flow-controlled state. 2172 * This can happen if the connection experienced 2173 * loss while it was in flow controlled state 2174 */ 2175 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { 2176 socket_unlock(so, 1); 2177 return; 2178 } 2179 inp_reset_fc_state(inp); 2180 2181 if (so->so_proto->pr_type == SOCK_STREAM) 2182 inp_fc_unthrottle_tcp(inp); 2183 2184 socket_unlock(so, 1); 2185} 2186 2187void 2188inp_reset_fc_state(struct inpcb *inp) 2189{ 2190 struct socket *so = inp->inp_socket; 2191 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; 2192 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; 2193 2194 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2195 2196 if (suspended) { 2197 so->so_flags &= ~(SOF_SUSPENDED); 2198 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); 2199 } 2200 2201 if (inp->inp_sndinprog_cnt > 0) 2202 inp->inp_flags |= INP_FC_FEEDBACK; 2203 2204 /* Give a write wakeup to unblock the socket */ 2205 if (needwakeup) 2206 sowwakeup(so); 2207} 2208 2209int 2210inp_set_fc_state(struct inpcb *inp, int advcode) 2211{ 2212 /* 2213 * If there was a feedback from the interface when 2214 * send operation was in progress, we should ignore 2215 * this flow advisory to avoid a race between setting 2216 * flow controlled state and receiving feedback from 2217 * the interface 2218 */ 2219 if (inp->inp_flags & INP_FC_FEEDBACK) 2220 return(0); 2221 2222 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2223 if (inp_fc_addinp(inp)) { 2224 switch (advcode) { 2225 case FADV_FLOW_CONTROLLED: 2226 inp->inp_flags |= INP_FLOW_CONTROLLED; 2227 break; 2228 case FADV_SUSPENDED: 2229 inp->inp_flags |= INP_FLOW_SUSPENDED; 2230 soevent(inp->inp_socket, 2231 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); 2232 2233 /* Record the fact that suspend event was sent */ 2234 inp->inp_socket->so_flags |= SOF_SUSPENDED; 2235 break; 2236 } 2237 } 2238 return(1); 2239} 2240 2241/* 2242 * Handler for SO_FLUSH socket option. 2243 */ 2244int 2245inp_flush(struct inpcb *inp, int optval) 2246{ 2247 u_int32_t flowhash = inp->inp_flowhash; 2248 struct rtentry *rt; 2249 2250 /* Either all classes or one of the valid ones */ 2251 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) 2252 return (EINVAL); 2253 2254 /* We need a flow hash for identification */ 2255 if (flowhash == 0) 2256 return (0); 2257 2258 /* We need a cached route for the interface */ 2259 if ((rt = inp->inp_route.ro_rt) != NULL) { 2260 struct ifnet *ifp = rt->rt_ifp; 2261 if_qflush_sc(ifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); 2262 } 2263 2264 return (0); 2265} 2266 2267/* 2268 * Clear the INP_INADDR_ANY flag (special case for PPP only) 2269 */ 2270void inp_clear_INP_INADDR_ANY(struct socket *so) 2271{ 2272 struct inpcb *inp = NULL; 2273 2274 socket_lock(so, 1); 2275 inp = sotoinpcb(so); 2276 if (inp) { 2277 inp->inp_flags &= ~INP_INADDR_ANY; 2278 } 2279 socket_unlock(so, 1); 2280} 2281 2282