1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1991, 1993, 1995 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ 62 */ 63 64#include <sys/param.h> 65#include <sys/systm.h> 66#include <sys/malloc.h> 67#include <sys/mbuf.h> 68#include <sys/domain.h> 69#include <sys/protosw.h> 70#include <sys/socket.h> 71#include <sys/socketvar.h> 72#include <sys/proc.h> 73#include <sys/kernel.h> 74#include <sys/sysctl.h> 75#include <sys/mcache.h> 76#include <sys/kauth.h> 77#include <sys/priv.h> 78#include <sys/proc_uuid_policy.h> 79#include <sys/syslog.h> 80 81#include <libkern/OSAtomic.h> 82#include <kern/locks.h> 83 84#include <machine/limits.h> 85 86#include <kern/zalloc.h> 87 88#include <net/if.h> 89#include <net/if_types.h> 90#include <net/route.h> 91#include <net/flowhash.h> 92#include <net/flowadv.h> 93 94#include <netinet/in.h> 95#include <netinet/in_pcb.h> 96#include <netinet/in_var.h> 97#include <netinet/ip_var.h> 98#if INET6 99#include <netinet/ip6.h> 100#include <netinet6/ip6_var.h> 101#endif /* INET6 */ 102 103#if IPSEC 104#include <netinet6/ipsec.h> 105#include <netkey/key.h> 106#endif /* IPSEC */ 107 108#include <sys/kdebug.h> 109#include <sys/random.h> 110 111#include <dev/random/randomdev.h> 112#include <mach/boolean.h> 113 114#if FLOW_DIVERT 115#include <netinet/flow_divert.h> 116#endif 117 118static lck_grp_t *inpcb_lock_grp; 119static lck_attr_t *inpcb_lock_attr; 120static lck_grp_attr_t *inpcb_lock_grp_attr; 121decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ 122decl_lck_mtx_data(static, inpcb_timeout_lock); 123 124static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); 125 126static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ 127static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ 128static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ 129static boolean_t inpcb_fast_timer_on = FALSE; 130static void inpcb_sched_timeout(struct timeval *); 131static void inpcb_timeout(void *); 132int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ 133extern int tvtohz(struct timeval *); 134 135#if CONFIG_PROC_UUID_POLICY 136static void inp_update_cellular_policy(struct inpcb *, boolean_t); 137#if FLOW_DIVERT 138static void inp_update_flow_divert_policy(struct inpcb *, boolean_t); 139#endif /* FLOW_DIVERT */ 140#endif /* !CONFIG_PROC_UUID_POLICY */ 141 142#if IPSEC 143extern int ipsec_bypass; 144#endif /* IPSEC */ 145 146#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) 147#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) 148 149/* 150 * These configure the range of local port addresses assigned to 151 * "unspecified" outgoing connections/packets/whatever. 152 */ 153int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 154int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 155int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 156int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 157int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 158int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 159 160#define RANGECHK(var, min, max) \ 161 if ((var) < (min)) { (var) = (min); } \ 162 else if ((var) > (max)) { (var) = (max); } 163 164static int 165sysctl_net_ipport_check SYSCTL_HANDLER_ARGS 166{ 167#pragma unused(arg1, arg2) 168 int error; 169 170 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 171 if (!error) { 172 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 173 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 174 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 175 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 176 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 177 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 178 } 179 return (error); 180} 181 182#undef RANGECHK 183 184SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, 185 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); 186 187SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, 188 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 189 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 190SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, 191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 192 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 193SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, 194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 195 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 196SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, 197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 198 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 199SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, 200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 201 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 202SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, 203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 204 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 205 206extern int udp_use_randomport; 207extern int tcp_use_randomport; 208 209/* Structs used for flowhash computation */ 210struct inp_flowhash_key_addr { 211 union { 212 struct in_addr v4; 213 struct in6_addr v6; 214 u_int8_t addr8[16]; 215 u_int16_t addr16[8]; 216 u_int32_t addr32[4]; 217 } infha; 218}; 219 220struct inp_flowhash_key { 221 struct inp_flowhash_key_addr infh_laddr; 222 struct inp_flowhash_key_addr infh_faddr; 223 u_int32_t infh_lport; 224 u_int32_t infh_fport; 225 u_int32_t infh_af; 226 u_int32_t infh_proto; 227 u_int32_t infh_rand1; 228 u_int32_t infh_rand2; 229}; 230 231static u_int32_t inp_hash_seed = 0; 232 233static int infc_cmp(const struct inpcb *, const struct inpcb *); 234 235/* Flags used by inp_fc_getinp */ 236#define INPFC_SOLOCKED 0x1 237#define INPFC_REMOVE 0x2 238static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); 239 240static void inp_fc_feedback(struct inpcb *); 241extern void tcp_remove_from_time_wait(struct inpcb *inp); 242 243decl_lck_mtx_data(static, inp_fc_lck); 244 245RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; 246RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); 247RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); 248 249/* 250 * Use this inp as a key to find an inp in the flowhash tree. 251 * Accesses to it are protected by inp_fc_lck. 252 */ 253struct inpcb key_inp; 254 255/* 256 * in_pcb.c: manage the Protocol Control Blocks. 257 */ 258 259void 260in_pcbinit(void) 261{ 262 static int inpcb_initialized = 0; 263 264 VERIFY(!inpcb_initialized); 265 inpcb_initialized = 1; 266 267 inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); 268 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); 269 inpcb_lock_attr = lck_attr_alloc_init(); 270 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); 271 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); 272 273 /* 274 * Initialize data structures required to deliver 275 * flow advisories. 276 */ 277 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); 278 lck_mtx_lock(&inp_fc_lck); 279 RB_INIT(&inp_fc_tree); 280 bzero(&key_inp, sizeof(key_inp)); 281 lck_mtx_unlock(&inp_fc_lck); 282} 283 284#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ 285 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) 286static void 287inpcb_timeout(void *arg) 288{ 289#pragma unused(arg) 290 struct inpcbinfo *ipi; 291 boolean_t t, gc; 292 struct intimercount gccnt, tmcnt; 293 struct timeval leeway; 294 295 /* 296 * Update coarse-grained networking timestamp (in sec.); the idea 297 * is to piggy-back on the timeout callout to update the counter 298 * returnable via net_uptime(). 299 */ 300 net_update_uptime(); 301 302 lck_mtx_lock_spin(&inpcb_timeout_lock); 303 gc = inpcb_garbage_collecting; 304 inpcb_garbage_collecting = FALSE; 305 bzero(&gccnt, sizeof(gccnt)); 306 bzero(&tmcnt, sizeof(tmcnt)); 307 308 t = inpcb_ticking; 309 inpcb_ticking = FALSE; 310 311 if (gc || t) { 312 lck_mtx_unlock(&inpcb_timeout_lock); 313 314 lck_mtx_lock(&inpcb_lock); 315 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { 316 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { 317 bzero(&ipi->ipi_gc_req, 318 sizeof(ipi->ipi_gc_req)); 319 if (gc && ipi->ipi_gc != NULL) { 320 ipi->ipi_gc(ipi); 321 gccnt.intimer_lazy += 322 ipi->ipi_gc_req.intimer_lazy; 323 gccnt.intimer_fast += 324 ipi->ipi_gc_req.intimer_fast; 325 gccnt.intimer_nodelay += 326 ipi->ipi_gc_req.intimer_nodelay; 327 } 328 } 329 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { 330 bzero(&ipi->ipi_timer_req, 331 sizeof(ipi->ipi_timer_req)); 332 if (t && ipi->ipi_timer != NULL) { 333 ipi->ipi_timer(ipi); 334 tmcnt.intimer_lazy += 335 ipi->ipi_timer_req.intimer_lazy; 336 tmcnt.intimer_lazy += 337 ipi->ipi_timer_req.intimer_fast; 338 tmcnt.intimer_nodelay += 339 ipi->ipi_timer_req.intimer_nodelay; 340 } 341 } 342 } 343 lck_mtx_unlock(&inpcb_lock); 344 lck_mtx_lock_spin(&inpcb_timeout_lock); 345 } 346 347 /* lock was dropped above, so check first before overriding */ 348 if (!inpcb_garbage_collecting) 349 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); 350 if (!inpcb_ticking) 351 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); 352 353 /* re-arm the timer if there's work to do */ 354 inpcb_timeout_run--; 355 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); 356 357 bzero(&leeway, sizeof(leeway)); 358 leeway.tv_sec = inpcb_timeout_lazy; 359 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) 360 inpcb_sched_timeout(NULL); 361 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) 362 /* be lazy when idle with little activity */ 363 inpcb_sched_timeout(&leeway); 364 else 365 inpcb_sched_timeout(NULL); 366 367 lck_mtx_unlock(&inpcb_timeout_lock); 368} 369 370static void 371inpcb_sched_timeout(struct timeval *leeway) 372{ 373 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); 374 375 if (inpcb_timeout_run == 0 && 376 (inpcb_garbage_collecting || inpcb_ticking)) { 377 lck_mtx_convert_spin(&inpcb_timeout_lock); 378 inpcb_timeout_run++; 379 if (leeway == NULL) { 380 inpcb_fast_timer_on = TRUE; 381 timeout(inpcb_timeout, NULL, hz); 382 } else { 383 inpcb_fast_timer_on = FALSE; 384 timeout_with_leeway(inpcb_timeout, NULL, hz, 385 tvtohz(leeway)); 386 } 387 } else if (inpcb_timeout_run == 1 && 388 leeway == NULL && !inpcb_fast_timer_on) { 389 /* 390 * Since the request was for a fast timer but the 391 * scheduled timer is a lazy timer, try to schedule 392 * another instance of fast timer also 393 */ 394 lck_mtx_convert_spin(&inpcb_timeout_lock); 395 inpcb_timeout_run++; 396 inpcb_fast_timer_on = TRUE; 397 timeout(inpcb_timeout, NULL, hz); 398 } 399} 400 401void 402inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) 403{ 404 struct timeval leeway; 405 lck_mtx_lock_spin(&inpcb_timeout_lock); 406 inpcb_garbage_collecting = TRUE; 407 switch (type) { 408 case INPCB_TIMER_NODELAY: 409 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); 410 inpcb_sched_timeout(NULL); 411 break; 412 case INPCB_TIMER_FAST: 413 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); 414 inpcb_sched_timeout(NULL); 415 break; 416 default: 417 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); 418 leeway.tv_sec = inpcb_timeout_lazy; 419 leeway.tv_usec = 0; 420 inpcb_sched_timeout(&leeway); 421 break; 422 } 423 lck_mtx_unlock(&inpcb_timeout_lock); 424} 425 426void 427inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) 428{ 429 struct timeval leeway; 430 lck_mtx_lock_spin(&inpcb_timeout_lock); 431 inpcb_ticking = TRUE; 432 switch (type) { 433 case INPCB_TIMER_NODELAY: 434 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); 435 inpcb_sched_timeout(NULL); 436 break; 437 case INPCB_TIMER_FAST: 438 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); 439 inpcb_sched_timeout(NULL); 440 break; 441 default: 442 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); 443 leeway.tv_sec = inpcb_timeout_lazy; 444 leeway.tv_usec = 0; 445 inpcb_sched_timeout(&leeway); 446 break; 447 } 448 lck_mtx_unlock(&inpcb_timeout_lock); 449} 450 451void 452in_pcbinfo_attach(struct inpcbinfo *ipi) 453{ 454 struct inpcbinfo *ipi0; 455 456 lck_mtx_lock(&inpcb_lock); 457 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { 458 if (ipi0 == ipi) { 459 panic("%s: ipi %p already in the list\n", 460 __func__, ipi); 461 /* NOTREACHED */ 462 } 463 } 464 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); 465 lck_mtx_unlock(&inpcb_lock); 466} 467 468int 469in_pcbinfo_detach(struct inpcbinfo *ipi) 470{ 471 struct inpcbinfo *ipi0; 472 int error = 0; 473 474 lck_mtx_lock(&inpcb_lock); 475 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { 476 if (ipi0 == ipi) 477 break; 478 } 479 if (ipi0 != NULL) 480 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); 481 else 482 error = ENXIO; 483 lck_mtx_unlock(&inpcb_lock); 484 485 return (error); 486} 487 488/* 489 * Allocate a PCB and associate it with the socket. 490 * 491 * Returns: 0 Success 492 * ENOBUFS 493 * ENOMEM 494 * ipsec_init_policy:??? [IPSEC] 495 */ 496int 497in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) 498{ 499#pragma unused(p) 500 struct inpcb *inp; 501 caddr_t temp; 502#if CONFIG_MACF_NET 503 int mac_error; 504#endif /* CONFIG_MACF_NET */ 505 506 if (!so->cached_in_sock_layer) { 507 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); 508 if (inp == NULL) 509 return (ENOBUFS); 510 bzero((caddr_t)inp, sizeof (*inp)); 511 } else { 512 inp = (struct inpcb *)(void *)so->so_saved_pcb; 513 temp = inp->inp_saved_ppcb; 514 bzero((caddr_t)inp, sizeof (*inp)); 515 inp->inp_saved_ppcb = temp; 516 } 517 518 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 519 inp->inp_pcbinfo = pcbinfo; 520 inp->inp_socket = so; 521#if CONFIG_MACF_NET 522 mac_error = mac_inpcb_label_init(inp, M_WAITOK); 523 if (mac_error != 0) { 524 if (!so->cached_in_sock_layer) 525 zfree(pcbinfo->ipi_zone, inp); 526 return (mac_error); 527 } 528 mac_inpcb_label_associate(so, inp); 529#endif /* CONFIG_MACF_NET */ 530 /* make sure inp_stat is always 64-bit aligned */ 531 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, 532 sizeof (u_int64_t)); 533 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + 534 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { 535 panic("%s: insufficient space to align inp_stat", __func__); 536 /* NOTREACHED */ 537 } 538 539 /* make sure inp_cstat is always 64-bit aligned */ 540 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, 541 sizeof (u_int64_t)); 542 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + 543 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { 544 panic("%s: insufficient space to align inp_cstat", __func__); 545 /* NOTREACHED */ 546 } 547 548 /* make sure inp_wstat is always 64-bit aligned */ 549 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, 550 sizeof (u_int64_t)); 551 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + 552 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { 553 panic("%s: insufficient space to align inp_wstat", __func__); 554 /* NOTREACHED */ 555 } 556 557 so->so_pcb = (caddr_t)inp; 558 559 if (so->so_proto->pr_flags & PR_PCBLOCK) { 560 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, 561 pcbinfo->ipi_lock_attr); 562 } 563 564 565#if INET6 566 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) 567 inp->inp_flags |= IN6P_IPV6_V6ONLY; 568 569 if (ip6_auto_flowlabel) 570 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 571#endif /* INET6 */ 572 573 (void) inp_update_policy(inp); 574 575 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 576 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 577 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); 578 pcbinfo->ipi_count++; 579 lck_rw_done(pcbinfo->ipi_lock); 580 return (0); 581} 582 583/* 584 * in_pcblookup_local_and_cleanup does everything 585 * in_pcblookup_local does but it checks for a socket 586 * that's going away. Since we know that the lock is 587 * held read+write when this funciton is called, we 588 * can safely dispose of this socket like the slow 589 * timer would usually do and return NULL. This is 590 * great for bind. 591 */ 592struct inpcb * 593in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, 594 u_int lport_arg, int wild_okay) 595{ 596 struct inpcb *inp; 597 598 /* Perform normal lookup */ 599 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); 600 601 /* Check if we found a match but it's waiting to be disposed */ 602 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { 603 struct socket *so = inp->inp_socket; 604 605 lck_mtx_lock(&inp->inpcb_mtx); 606 607 if (so->so_usecount == 0) { 608 if (inp->inp_state != INPCB_STATE_DEAD) 609 in_pcbdetach(inp); 610 in_pcbdispose(inp); /* will unlock & destroy */ 611 inp = NULL; 612 } else { 613 lck_mtx_unlock(&inp->inpcb_mtx); 614 } 615 } 616 617 return (inp); 618} 619 620static void 621in_pcb_conflict_post_msg(u_int16_t port) 622{ 623 /* 624 * Radar 5523020 send a kernel event notification if a 625 * non-participating socket tries to bind the port a socket 626 * who has set SOF_NOTIFYCONFLICT owns. 627 */ 628 struct kev_msg ev_msg; 629 struct kev_in_portinuse in_portinuse; 630 631 bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); 632 bzero(&ev_msg, sizeof (struct kev_msg)); 633 in_portinuse.port = ntohs(port); /* port in host order */ 634 in_portinuse.req_pid = proc_selfpid(); 635 ev_msg.vendor_code = KEV_VENDOR_APPLE; 636 ev_msg.kev_class = KEV_NETWORK_CLASS; 637 ev_msg.kev_subclass = KEV_INET_SUBCLASS; 638 ev_msg.event_code = KEV_INET_PORTINUSE; 639 ev_msg.dv[0].data_ptr = &in_portinuse; 640 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); 641 ev_msg.dv[1].data_length = 0; 642 kev_post_msg(&ev_msg); 643} 644 645/* 646 * Bind an INPCB to an address and/or port. This routine should not alter 647 * the caller-supplied local address "nam". 648 * 649 * Returns: 0 Success 650 * EADDRNOTAVAIL Address not available. 651 * EINVAL Invalid argument 652 * EAFNOSUPPORT Address family not supported [notdef] 653 * EACCES Permission denied 654 * EADDRINUSE Address in use 655 * EAGAIN Resource unavailable, try again 656 * priv_check_cred:EPERM Operation not permitted 657 */ 658int 659in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) 660{ 661 struct socket *so = inp->inp_socket; 662 unsigned short *lastport; 663 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 664 u_short lport = 0, rand_port = 0; 665 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 666 int error, randomport, conflict = 0; 667 kauth_cred_t cred; 668 669 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 670 return (EADDRNOTAVAIL); 671 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 672 return (EINVAL); 673 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 674 wild = 1; 675 socket_unlock(so, 0); /* keep reference on socket */ 676 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 677 if (nam != NULL) { 678 struct ifnet *outif = NULL; 679 680 if (nam->sa_len != sizeof (struct sockaddr_in)) { 681 lck_rw_done(pcbinfo->ipi_lock); 682 socket_lock(so, 0); 683 return (EINVAL); 684 } 685#if 0 686 /* 687 * We should check the family, but old programs 688 * incorrectly fail to initialize it. 689 */ 690 if (nam->sa_family != AF_INET) { 691 lck_rw_done(pcbinfo->ipi_lock); 692 socket_lock(so, 0); 693 return (EAFNOSUPPORT); 694 } 695#endif /* 0 */ 696 lport = SIN(nam)->sin_port; 697 698 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { 699 /* 700 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 701 * allow complete duplication of binding if 702 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 703 * and a multicast address is bound on both 704 * new and duplicated sockets. 705 */ 706 if (so->so_options & SO_REUSEADDR) 707 reuseport = SO_REUSEADDR|SO_REUSEPORT; 708 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { 709 struct sockaddr_in sin; 710 struct ifaddr *ifa; 711 712 /* Sanitized for interface address searches */ 713 bzero(&sin, sizeof (sin)); 714 sin.sin_family = AF_INET; 715 sin.sin_len = sizeof (struct sockaddr_in); 716 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; 717 718 ifa = ifa_ifwithaddr(SA(&sin)); 719 if (ifa == NULL) { 720 lck_rw_done(pcbinfo->ipi_lock); 721 socket_lock(so, 0); 722 return (EADDRNOTAVAIL); 723 } else { 724 /* 725 * Opportunistically determine the outbound 726 * interface that may be used; this may not 727 * hold true if we end up using a route 728 * going over a different interface, e.g. 729 * when sending to a local address. This 730 * will get updated again after sending. 731 */ 732 IFA_LOCK(ifa); 733 outif = ifa->ifa_ifp; 734 IFA_UNLOCK(ifa); 735 IFA_REMREF(ifa); 736 } 737 } 738 if (lport != 0) { 739 struct inpcb *t; 740 uid_t u; 741 742 /* GROSS */ 743 if (ntohs(lport) < IPPORT_RESERVED) { 744 cred = kauth_cred_proc_ref(p); 745 error = priv_check_cred(cred, 746 PRIV_NETINET_RESERVEDPORT, 0); 747 kauth_cred_unref(&cred); 748 if (error != 0) { 749 lck_rw_done(pcbinfo->ipi_lock); 750 socket_lock(so, 0); 751 return (EACCES); 752 } 753 } 754 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && 755 (u = kauth_cred_getuid(so->so_cred)) != 0 && 756 (t = in_pcblookup_local_and_cleanup( 757 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, 758 INPLOOKUP_WILDCARD)) != NULL && 759 (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 760 t->inp_laddr.s_addr != INADDR_ANY || 761 !(t->inp_socket->so_options & SO_REUSEPORT)) && 762 (u != kauth_cred_getuid(t->inp_socket->so_cred)) && 763 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && 764 (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 765 t->inp_laddr.s_addr != INADDR_ANY)) { 766 if ((t->inp_socket->so_flags & 767 SOF_NOTIFYCONFLICT) && 768 !(so->so_flags & SOF_NOTIFYCONFLICT)) 769 conflict = 1; 770 771 lck_rw_done(pcbinfo->ipi_lock); 772 773 if (conflict) 774 in_pcb_conflict_post_msg(lport); 775 776 socket_lock(so, 0); 777 return (EADDRINUSE); 778 } 779 t = in_pcblookup_local_and_cleanup(pcbinfo, 780 SIN(nam)->sin_addr, lport, wild); 781 if (t != NULL && 782 (reuseport & t->inp_socket->so_options) == 0) { 783#if INET6 784 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 785 t->inp_laddr.s_addr != INADDR_ANY || 786 SOCK_DOM(so) != PF_INET6 || 787 SOCK_DOM(t->inp_socket) != PF_INET6) 788#endif /* INET6 */ 789 { 790 791 if ((t->inp_socket->so_flags & 792 SOF_NOTIFYCONFLICT) && 793 !(so->so_flags & SOF_NOTIFYCONFLICT)) 794 conflict = 1; 795 796 lck_rw_done(pcbinfo->ipi_lock); 797 798 if (conflict) 799 in_pcb_conflict_post_msg(lport); 800 socket_lock(so, 0); 801 return (EADDRINUSE); 802 } 803 } 804 } 805 inp->inp_laddr = SIN(nam)->sin_addr; 806 inp->inp_last_outifp = outif; 807 } 808 if (lport == 0) { 809 u_short first, last; 810 int count; 811 812 randomport = (so->so_flags & SOF_BINDRANDOMPORT) || 813 (so->so_type == SOCK_STREAM ? tcp_use_randomport : 814 udp_use_randomport); 815 816 /* 817 * TODO: 818 * 819 * The following should be moved into its own routine and 820 * thus can be shared with in6_pcbsetport(); the latter 821 * currently duplicates the logic. 822 */ 823 824 inp->inp_flags |= INP_ANONPORT; 825 826 if (inp->inp_flags & INP_HIGHPORT) { 827 first = ipport_hifirstauto; /* sysctl */ 828 last = ipport_hilastauto; 829 lastport = &pcbinfo->ipi_lasthi; 830 } else if (inp->inp_flags & INP_LOWPORT) { 831 cred = kauth_cred_proc_ref(p); 832 error = priv_check_cred(cred, 833 PRIV_NETINET_RESERVEDPORT, 0); 834 kauth_cred_unref(&cred); 835 if (error != 0) { 836 lck_rw_done(pcbinfo->ipi_lock); 837 socket_lock(so, 0); 838 return (error); 839 } 840 first = ipport_lowfirstauto; /* 1023 */ 841 last = ipport_lowlastauto; /* 600 */ 842 lastport = &pcbinfo->ipi_lastlow; 843 } else { 844 first = ipport_firstauto; /* sysctl */ 845 last = ipport_lastauto; 846 lastport = &pcbinfo->ipi_lastport; 847 } 848 /* No point in randomizing if only one port is available */ 849 850 if (first == last) 851 randomport = 0; 852 /* 853 * Simple check to ensure all ports are not used up causing 854 * a deadlock here. 855 * 856 * We split the two cases (up and down) so that the direction 857 * is not being tested on each round of the loop. 858 */ 859 if (first > last) { 860 /* 861 * counting down 862 */ 863 if (randomport) { 864 read_random(&rand_port, sizeof (rand_port)); 865 *lastport = 866 first - (rand_port % (first - last)); 867 } 868 count = first - last; 869 870 do { 871 if (count-- < 0) { /* completely used? */ 872 lck_rw_done(pcbinfo->ipi_lock); 873 socket_lock(so, 0); 874 inp->inp_laddr.s_addr = INADDR_ANY; 875 inp->inp_last_outifp = NULL; 876 return (EADDRNOTAVAIL); 877 } 878 --*lastport; 879 if (*lastport > first || *lastport < last) 880 *lastport = first; 881 lport = htons(*lastport); 882 } while (in_pcblookup_local_and_cleanup(pcbinfo, 883 inp->inp_laddr, lport, wild)); 884 } else { 885 /* 886 * counting up 887 */ 888 if (randomport) { 889 read_random(&rand_port, sizeof (rand_port)); 890 *lastport = 891 first + (rand_port % (first - last)); 892 } 893 count = last - first; 894 895 do { 896 if (count-- < 0) { /* completely used? */ 897 lck_rw_done(pcbinfo->ipi_lock); 898 socket_lock(so, 0); 899 inp->inp_laddr.s_addr = INADDR_ANY; 900 inp->inp_last_outifp = NULL; 901 return (EADDRNOTAVAIL); 902 } 903 ++*lastport; 904 if (*lastport < first || *lastport > last) 905 *lastport = first; 906 lport = htons(*lastport); 907 } while (in_pcblookup_local_and_cleanup(pcbinfo, 908 inp->inp_laddr, lport, wild)); 909 } 910 } 911 socket_lock(so, 0); 912 inp->inp_lport = lport; 913 if (in_pcbinshash(inp, 1) != 0) { 914 inp->inp_laddr.s_addr = INADDR_ANY; 915 inp->inp_lport = 0; 916 inp->inp_last_outifp = NULL; 917 lck_rw_done(pcbinfo->ipi_lock); 918 return (EAGAIN); 919 } 920 lck_rw_done(pcbinfo->ipi_lock); 921 sflt_notify(so, sock_evt_bound, NULL); 922 return (0); 923} 924 925/* 926 * Transform old in_pcbconnect() into an inner subroutine for new 927 * in_pcbconnect(); do some validity-checking on the remote address 928 * (in "nam") and then determine local host address (i.e., which 929 * interface) to use to access that remote host. 930 * 931 * This routine may alter the caller-supplied remote address "nam". 932 * 933 * The caller may override the bound-to-interface setting of the socket 934 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) 935 * 936 * This routine might return an ifp with a reference held if the caller 937 * provides a non-NULL outif, even in the error case. The caller is 938 * responsible for releasing its reference. 939 * 940 * Returns: 0 Success 941 * EINVAL Invalid argument 942 * EAFNOSUPPORT Address family not supported 943 * EADDRNOTAVAIL Address not available 944 */ 945int 946in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, 947 unsigned int ifscope, struct ifnet **outif) 948{ 949 boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR); 950 struct route *ro = &inp->inp_route; 951 struct in_ifaddr *ia = NULL; 952 struct sockaddr_in sin; 953 int error = 0; 954 955 if (outif != NULL) 956 *outif = NULL; 957 if (nam->sa_len != sizeof (struct sockaddr_in)) 958 return (EINVAL); 959 if (SIN(nam)->sin_family != AF_INET) 960 return (EAFNOSUPPORT); 961 if (SIN(nam)->sin_port == 0) 962 return (EADDRNOTAVAIL); 963 964 /* 965 * If the destination address is INADDR_ANY, 966 * use the primary local address. 967 * If the supplied address is INADDR_BROADCAST, 968 * and the primary interface supports broadcast, 969 * choose the broadcast address for that interface. 970 */ 971 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || 972 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { 973 lck_rw_lock_shared(in_ifaddr_rwlock); 974 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 975 ia = TAILQ_FIRST(&in_ifaddrhead); 976 IFA_LOCK_SPIN(&ia->ia_ifa); 977 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { 978 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; 979 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { 980 SIN(nam)->sin_addr = 981 SIN(&ia->ia_broadaddr)->sin_addr; 982 } 983 IFA_UNLOCK(&ia->ia_ifa); 984 ia = NULL; 985 } 986 lck_rw_done(in_ifaddr_rwlock); 987 } 988 /* 989 * Otherwise, if the socket has already bound the source, just use it. 990 */ 991 if (inp->inp_laddr.s_addr != INADDR_ANY) { 992 VERIFY(ia == NULL); 993 *laddr = inp->inp_laddr; 994 return (0); 995 } 996 997 /* 998 * If the ifscope is specified by the caller (e.g. IP_PKTINFO) 999 * then it overrides the sticky ifscope set for the socket. 1000 */ 1001 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) 1002 ifscope = inp->inp_boundifp->if_index; 1003 1004 /* 1005 * If route is known or can be allocated now, 1006 * our src addr is taken from the i/f, else punt. 1007 * Note that we should check the address family of the cached 1008 * destination, in case of sharing the cache with IPv6. 1009 */ 1010 if (ro->ro_rt != NULL) 1011 RT_LOCK_SPIN(ro->ro_rt); 1012 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || 1013 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || 1014 (inp->inp_socket->so_options & SO_DONTROUTE)) { 1015 if (ro->ro_rt != NULL) 1016 RT_UNLOCK(ro->ro_rt); 1017 ROUTE_RELEASE(ro); 1018 } 1019 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && 1020 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { 1021 if (ro->ro_rt != NULL) 1022 RT_UNLOCK(ro->ro_rt); 1023 ROUTE_RELEASE(ro); 1024 /* No route yet, so try to acquire one */ 1025 bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); 1026 ro->ro_dst.sa_family = AF_INET; 1027 ro->ro_dst.sa_len = sizeof (struct sockaddr_in); 1028 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; 1029 rtalloc_scoped(ro, ifscope); 1030 if (ro->ro_rt != NULL) 1031 RT_LOCK_SPIN(ro->ro_rt); 1032 } 1033 /* Sanitized local copy for interface address searches */ 1034 bzero(&sin, sizeof (sin)); 1035 sin.sin_family = AF_INET; 1036 sin.sin_len = sizeof (struct sockaddr_in); 1037 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; 1038 /* 1039 * If we did not find (or use) a route, assume dest is reachable 1040 * on a directly connected network and try to find a corresponding 1041 * interface to take the source address from. 1042 */ 1043 if (ro->ro_rt == NULL) { 1044 VERIFY(ia == NULL); 1045 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); 1046 if (ia == NULL) 1047 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); 1048 error = ((ia == NULL) ? ENETUNREACH : 0); 1049 goto done; 1050 } 1051 RT_LOCK_ASSERT_HELD(ro->ro_rt); 1052 /* 1053 * If the outgoing interface on the route found is not 1054 * a loopback interface, use the address from that interface. 1055 */ 1056 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 1057 VERIFY(ia == NULL); 1058 /* 1059 * If the route points to a cellular interface and the 1060 * caller forbids our using interfaces of such type, 1061 * pretend that there is no route. 1062 */ 1063 if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { 1064 RT_UNLOCK(ro->ro_rt); 1065 ROUTE_RELEASE(ro); 1066 error = EHOSTUNREACH; 1067 } else { 1068 /* Become a regular mutex */ 1069 RT_CONVERT_LOCK(ro->ro_rt); 1070 ia = ifatoia(ro->ro_rt->rt_ifa); 1071 IFA_ADDREF(&ia->ia_ifa); 1072 RT_UNLOCK(ro->ro_rt); 1073 error = 0; 1074 } 1075 goto done; 1076 } 1077 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); 1078 RT_UNLOCK(ro->ro_rt); 1079 /* 1080 * The outgoing interface is marked with 'loopback net', so a route 1081 * to ourselves is here. 1082 * Try to find the interface of the destination address and then 1083 * take the address from there. That interface is not necessarily 1084 * a loopback interface. 1085 */ 1086 VERIFY(ia == NULL); 1087 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); 1088 if (ia == NULL) 1089 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); 1090 if (ia == NULL) 1091 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); 1092 if (ia == NULL) { 1093 RT_LOCK(ro->ro_rt); 1094 ia = ifatoia(ro->ro_rt->rt_ifa); 1095 if (ia != NULL) 1096 IFA_ADDREF(&ia->ia_ifa); 1097 RT_UNLOCK(ro->ro_rt); 1098 } 1099 error = ((ia == NULL) ? ENETUNREACH : 0); 1100 1101done: 1102 /* 1103 * If the destination address is multicast and an outgoing 1104 * interface has been set as a multicast option, use the 1105 * address of that interface as our source address. 1106 */ 1107 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && 1108 inp->inp_moptions != NULL) { 1109 struct ip_moptions *imo; 1110 struct ifnet *ifp; 1111 1112 imo = inp->inp_moptions; 1113 IMO_LOCK(imo); 1114 if (imo->imo_multicast_ifp != NULL && (ia == NULL || 1115 ia->ia_ifp != imo->imo_multicast_ifp)) { 1116 ifp = imo->imo_multicast_ifp; 1117 if (ia != NULL) 1118 IFA_REMREF(&ia->ia_ifa); 1119 lck_rw_lock_shared(in_ifaddr_rwlock); 1120 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1121 if (ia->ia_ifp == ifp) 1122 break; 1123 } 1124 if (ia != NULL) 1125 IFA_ADDREF(&ia->ia_ifa); 1126 lck_rw_done(in_ifaddr_rwlock); 1127 if (ia == NULL) 1128 error = EADDRNOTAVAIL; 1129 else 1130 error = 0; 1131 } 1132 IMO_UNLOCK(imo); 1133 } 1134 /* 1135 * Don't do pcblookup call here; return interface in laddr 1136 * and exit to caller, that will do the lookup. 1137 */ 1138 if (ia != NULL) { 1139 /* 1140 * If the source address belongs to a cellular interface 1141 * and the socket forbids our using interfaces of such 1142 * type, pretend that there is no source address. 1143 */ 1144 IFA_LOCK_SPIN(&ia->ia_ifa); 1145 if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) { 1146 IFA_UNLOCK(&ia->ia_ifa); 1147 error = EHOSTUNREACH; 1148 } else if (error == 0) { 1149 *laddr = ia->ia_addr.sin_addr; 1150 if (outif != NULL) { 1151 struct ifnet *ifp; 1152 1153 if (ro->ro_rt != NULL) 1154 ifp = ro->ro_rt->rt_ifp; 1155 else 1156 ifp = ia->ia_ifp; 1157 1158 VERIFY(ifp != NULL); 1159 IFA_CONVERT_LOCK(&ia->ia_ifa); 1160 ifnet_reference(ifp); /* for caller */ 1161 if (*outif != NULL) 1162 ifnet_release(*outif); 1163 *outif = ifp; 1164 } 1165 IFA_UNLOCK(&ia->ia_ifa); 1166 } else { 1167 IFA_UNLOCK(&ia->ia_ifa); 1168 } 1169 IFA_REMREF(&ia->ia_ifa); 1170 ia = NULL; 1171 } 1172 1173 if (nocell && error == EHOSTUNREACH) { 1174 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | 1175 SO_FILT_HINT_IFDENIED)); 1176 } 1177 1178 return (error); 1179} 1180 1181/* 1182 * Outer subroutine: 1183 * Connect from a socket to a specified address. 1184 * Both address and port must be specified in argument sin. 1185 * If don't have a local address for this socket yet, 1186 * then pick one. 1187 * 1188 * The caller may override the bound-to-interface setting of the socket 1189 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) 1190 */ 1191int 1192in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, 1193 unsigned int ifscope, struct ifnet **outif) 1194{ 1195 struct in_addr laddr; 1196 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 1197 struct inpcb *pcb; 1198 int error; 1199 1200 /* 1201 * Call inner routine, to assign local interface address. 1202 */ 1203 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) 1204 return (error); 1205 1206 socket_unlock(inp->inp_socket, 0); 1207 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, 1208 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, 1209 inp->inp_lport, 0, NULL); 1210 socket_lock(inp->inp_socket, 0); 1211 1212 /* 1213 * Check if the socket is still in a valid state. When we unlock this 1214 * embryonic socket, it can get aborted if another thread is closing 1215 * the listener (radar 7947600). 1216 */ 1217 if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) 1218 return (ECONNREFUSED); 1219 1220 if (pcb != NULL) { 1221 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); 1222 return (EADDRINUSE); 1223 } 1224 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1225 if (inp->inp_lport == 0) { 1226 error = in_pcbbind(inp, NULL, p); 1227 if (error) 1228 return (error); 1229 } 1230 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1231 /* 1232 * Lock inversion issue, mostly with udp 1233 * multicast packets. 1234 */ 1235 socket_unlock(inp->inp_socket, 0); 1236 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1237 socket_lock(inp->inp_socket, 0); 1238 } 1239 inp->inp_laddr = laddr; 1240 /* no reference needed */ 1241 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; 1242 inp->inp_flags |= INP_INADDR_ANY; 1243 } else { 1244 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1245 /* 1246 * Lock inversion issue, mostly with udp 1247 * multicast packets. 1248 */ 1249 socket_unlock(inp->inp_socket, 0); 1250 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1251 socket_lock(inp->inp_socket, 0); 1252 } 1253 } 1254 inp->inp_faddr = sin->sin_addr; 1255 inp->inp_fport = sin->sin_port; 1256 in_pcbrehash(inp); 1257 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1258 return (0); 1259} 1260 1261void 1262in_pcbdisconnect(struct inpcb *inp) 1263{ 1264 struct socket *so = inp->inp_socket; 1265 1266 inp->inp_faddr.s_addr = INADDR_ANY; 1267 inp->inp_fport = 0; 1268 1269 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1270 /* lock inversion issue, mostly with udp multicast packets */ 1271 socket_unlock(so, 0); 1272 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1273 socket_lock(so, 0); 1274 } 1275 1276 in_pcbrehash(inp); 1277 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1278 /* 1279 * A multipath subflow socket would have its SS_NOFDREF set by default, 1280 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; 1281 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. 1282 */ 1283 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) 1284 in_pcbdetach(inp); 1285} 1286 1287void 1288in_pcbdetach(struct inpcb *inp) 1289{ 1290 struct socket *so = inp->inp_socket; 1291 1292 if (so->so_pcb == NULL) { 1293 /* PCB has been disposed */ 1294 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, 1295 inp, so, SOCK_PROTO(so)); 1296 /* NOTREACHED */ 1297 } 1298 1299#if IPSEC 1300 if (inp->inp_sp != NULL) { 1301 (void) ipsec4_delete_pcbpolicy(inp); 1302 } 1303#endif /* IPSEC */ 1304 1305 /* mark socket state as dead */ 1306 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { 1307 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", 1308 __func__, so, SOCK_PROTO(so)); 1309 /* NOTREACHED */ 1310 } 1311 1312 if (!(so->so_flags & SOF_PCBCLEARING)) { 1313 struct ip_moptions *imo; 1314 1315 inp->inp_vflag = 0; 1316 if (inp->inp_options != NULL) { 1317 (void) m_free(inp->inp_options); 1318 inp->inp_options = NULL; 1319 } 1320 ROUTE_RELEASE(&inp->inp_route); 1321 imo = inp->inp_moptions; 1322 inp->inp_moptions = NULL; 1323 if (imo != NULL) 1324 IMO_REMREF(imo); 1325 sofreelastref(so, 0); 1326 inp->inp_state = INPCB_STATE_DEAD; 1327 /* makes sure we're not called twice from so_close */ 1328 so->so_flags |= SOF_PCBCLEARING; 1329 1330 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); 1331 } 1332} 1333 1334 1335void 1336in_pcbdispose(struct inpcb *inp) 1337{ 1338 struct socket *so = inp->inp_socket; 1339 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1340 1341 if (so != NULL && so->so_usecount != 0) { 1342 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", 1343 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, 1344 solockhistory_nr(so)); 1345 /* NOTREACHED */ 1346 } else if (inp->inp_wantcnt != WNT_STOPUSING) { 1347 if (so != NULL) { 1348 panic_plain("%s: inp %p invalid wantcnt %d, so %p " 1349 "[%d,%d] usecount %d retaincnt %d state 0x%x " 1350 "flags 0x%x lockhistory %s\n", __func__, inp, 1351 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), 1352 so->so_usecount, so->so_retaincnt, so->so_state, 1353 so->so_flags, solockhistory_nr(so)); 1354 /* NOTREACHED */ 1355 } else { 1356 panic("%s: inp %p invalid wantcnt %d no socket\n", 1357 __func__, inp, inp->inp_wantcnt); 1358 /* NOTREACHED */ 1359 } 1360 } 1361 1362 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); 1363 1364 inp->inp_gencnt = ++ipi->ipi_gencnt; 1365 /* access ipi in in_pcbremlists */ 1366 in_pcbremlists(inp); 1367 1368 if (so != NULL) { 1369 if (so->so_proto->pr_flags & PR_PCBLOCK) { 1370 sofreelastref(so, 0); 1371 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { 1372 /* 1373 * selthreadclear() already called 1374 * during sofreelastref() above. 1375 */ 1376 sbrelease(&so->so_rcv); 1377 sbrelease(&so->so_snd); 1378 } 1379 if (so->so_head != NULL) { 1380 panic("%s: so=%p head still exist\n", 1381 __func__, so); 1382 /* NOTREACHED */ 1383 } 1384 lck_mtx_unlock(&inp->inpcb_mtx); 1385 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); 1386 } 1387 /* makes sure we're not called twice from so_close */ 1388 so->so_flags |= SOF_PCBCLEARING; 1389 so->so_saved_pcb = (caddr_t)inp; 1390 so->so_pcb = NULL; 1391 inp->inp_socket = NULL; 1392#if CONFIG_MACF_NET 1393 mac_inpcb_label_destroy(inp); 1394#endif /* CONFIG_MACF_NET */ 1395 /* 1396 * In case there a route cached after a detach (possible 1397 * in the tcp case), make sure that it is freed before 1398 * we deallocate the structure. 1399 */ 1400 ROUTE_RELEASE(&inp->inp_route); 1401 if (!so->cached_in_sock_layer) { 1402 zfree(ipi->ipi_zone, inp); 1403 } 1404 sodealloc(so); 1405 } 1406} 1407 1408/* 1409 * The calling convention of in_getsockaddr() and in_getpeeraddr() was 1410 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 1411 * in struct pr_usrreqs, so that protocols can just reference then directly 1412 * without the need for a wrapper function. 1413 */ 1414int 1415in_getsockaddr(struct socket *so, struct sockaddr **nam) 1416{ 1417 struct inpcb *inp; 1418 struct sockaddr_in *sin; 1419 1420 /* 1421 * Do the malloc first in case it blocks. 1422 */ 1423 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); 1424 if (sin == NULL) 1425 return (ENOBUFS); 1426 bzero(sin, sizeof (*sin)); 1427 sin->sin_family = AF_INET; 1428 sin->sin_len = sizeof (*sin); 1429 1430 if ((inp = sotoinpcb(so)) == NULL) { 1431 FREE(sin, M_SONAME); 1432 return (EINVAL); 1433 } 1434 sin->sin_port = inp->inp_lport; 1435 sin->sin_addr = inp->inp_laddr; 1436 1437 *nam = (struct sockaddr *)sin; 1438 return (0); 1439} 1440 1441int 1442in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) 1443{ 1444 struct sockaddr_in *sin = SIN(ss); 1445 struct inpcb *inp; 1446 1447 VERIFY(ss != NULL); 1448 bzero(ss, sizeof (*ss)); 1449 1450 sin->sin_family = AF_INET; 1451 sin->sin_len = sizeof (*sin); 1452 1453 if ((inp = sotoinpcb(so)) == NULL || 1454 (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) 1455 return (inp == NULL ? EINVAL : EPROTOTYPE); 1456 1457 sin->sin_port = inp->inp_lport; 1458 sin->sin_addr = inp->inp_laddr; 1459 return (0); 1460} 1461 1462int 1463in_getpeeraddr(struct socket *so, struct sockaddr **nam) 1464{ 1465 struct inpcb *inp; 1466 struct sockaddr_in *sin; 1467 1468 /* 1469 * Do the malloc first in case it blocks. 1470 */ 1471 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); 1472 if (sin == NULL) 1473 return (ENOBUFS); 1474 bzero((caddr_t)sin, sizeof (*sin)); 1475 sin->sin_family = AF_INET; 1476 sin->sin_len = sizeof (*sin); 1477 1478 if ((inp = sotoinpcb(so)) == NULL) { 1479 FREE(sin, M_SONAME); 1480 return (EINVAL); 1481 } 1482 sin->sin_port = inp->inp_fport; 1483 sin->sin_addr = inp->inp_faddr; 1484 1485 *nam = (struct sockaddr *)sin; 1486 return (0); 1487} 1488 1489int 1490in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) 1491{ 1492 struct sockaddr_in *sin = SIN(ss); 1493 struct inpcb *inp; 1494 1495 VERIFY(ss != NULL); 1496 bzero(ss, sizeof (*ss)); 1497 1498 sin->sin_family = AF_INET; 1499 sin->sin_len = sizeof (*sin); 1500 1501 if ((inp = sotoinpcb(so)) == NULL || 1502 (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { 1503 return (inp == NULL ? EINVAL : EPROTOTYPE); 1504 } 1505 1506 sin->sin_port = inp->inp_fport; 1507 sin->sin_addr = inp->inp_faddr; 1508 return (0); 1509} 1510 1511void 1512in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1513 int errno, void (*notify)(struct inpcb *, int)) 1514{ 1515 struct inpcb *inp; 1516 1517 lck_rw_lock_shared(pcbinfo->ipi_lock); 1518 1519 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 1520#if INET6 1521 if (!(inp->inp_vflag & INP_IPV4)) 1522 continue; 1523#endif /* INET6 */ 1524 if (inp->inp_faddr.s_addr != faddr.s_addr || 1525 inp->inp_socket == NULL) 1526 continue; 1527 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 1528 continue; 1529 socket_lock(inp->inp_socket, 1); 1530 (*notify)(inp, errno); 1531 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); 1532 socket_unlock(inp->inp_socket, 1); 1533 } 1534 lck_rw_done(pcbinfo->ipi_lock); 1535} 1536 1537/* 1538 * Check for alternatives when higher level complains 1539 * about service problems. For now, invalidate cached 1540 * routing information. If the route was created dynamically 1541 * (by a redirect), time to try a default gateway again. 1542 */ 1543void 1544in_losing(struct inpcb *inp) 1545{ 1546 boolean_t release = FALSE; 1547 struct rtentry *rt; 1548 struct rt_addrinfo info; 1549 1550 if ((rt = inp->inp_route.ro_rt) != NULL) { 1551 struct in_ifaddr *ia = NULL; 1552 1553 bzero((caddr_t)&info, sizeof (info)); 1554 RT_LOCK(rt); 1555 info.rti_info[RTAX_DST] = 1556 (struct sockaddr *)&inp->inp_route.ro_dst; 1557 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1558 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1559 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 1560 if (rt->rt_flags & RTF_DYNAMIC) { 1561 /* 1562 * Prevent another thread from modifying rt_key, 1563 * rt_gateway via rt_setgate() after rt_lock is 1564 * dropped by marking the route as defunct. 1565 */ 1566 rt->rt_flags |= RTF_CONDEMNED; 1567 RT_UNLOCK(rt); 1568 (void) rtrequest(RTM_DELETE, rt_key(rt), 1569 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 1570 } else { 1571 RT_UNLOCK(rt); 1572 } 1573 /* if the address is gone keep the old route in the pcb */ 1574 if (inp->inp_laddr.s_addr != INADDR_ANY && 1575 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { 1576 /* 1577 * Address is around; ditch the route. A new route 1578 * can be allocated the next time output is attempted. 1579 */ 1580 release = TRUE; 1581 } 1582 if (ia != NULL) 1583 IFA_REMREF(&ia->ia_ifa); 1584 } 1585 if (rt == NULL || release) 1586 ROUTE_RELEASE(&inp->inp_route); 1587} 1588 1589/* 1590 * After a routing change, flush old routing 1591 * and allocate a (hopefully) better one. 1592 */ 1593void 1594in_rtchange(struct inpcb *inp, int errno) 1595{ 1596#pragma unused(errno) 1597 boolean_t release = FALSE; 1598 struct rtentry *rt; 1599 1600 if ((rt = inp->inp_route.ro_rt) != NULL) { 1601 struct in_ifaddr *ia = NULL; 1602 1603 /* if address is gone, keep the old route */ 1604 if (inp->inp_laddr.s_addr != INADDR_ANY && 1605 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { 1606 /* 1607 * Address is around; ditch the route. A new route 1608 * can be allocated the next time output is attempted. 1609 */ 1610 release = TRUE; 1611 } 1612 if (ia != NULL) 1613 IFA_REMREF(&ia->ia_ifa); 1614 } 1615 if (rt == NULL || release) 1616 ROUTE_RELEASE(&inp->inp_route); 1617} 1618 1619/* 1620 * Lookup a PCB based on the local address and port. 1621 */ 1622struct inpcb * 1623in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1624 unsigned int lport_arg, int wild_okay) 1625{ 1626 struct inpcb *inp; 1627 int matchwild = 3, wildcard; 1628 u_short lport = lport_arg; 1629 1630 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); 1631 1632 if (!wild_okay) { 1633 struct inpcbhead *head; 1634 /* 1635 * Look for an unconnected (wildcard foreign addr) PCB that 1636 * matches the local address and port we're looking for. 1637 */ 1638 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1639 pcbinfo->ipi_hashmask)]; 1640 LIST_FOREACH(inp, head, inp_hash) { 1641#if INET6 1642 if (!(inp->inp_vflag & INP_IPV4)) 1643 continue; 1644#endif /* INET6 */ 1645 if (inp->inp_faddr.s_addr == INADDR_ANY && 1646 inp->inp_laddr.s_addr == laddr.s_addr && 1647 inp->inp_lport == lport) { 1648 /* 1649 * Found. 1650 */ 1651 return (inp); 1652 } 1653 } 1654 /* 1655 * Not found. 1656 */ 1657 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); 1658 return (NULL); 1659 } else { 1660 struct inpcbporthead *porthash; 1661 struct inpcbport *phd; 1662 struct inpcb *match = NULL; 1663 /* 1664 * Best fit PCB lookup. 1665 * 1666 * First see if this local port is in use by looking on the 1667 * port hash list. 1668 */ 1669 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 1670 pcbinfo->ipi_porthashmask)]; 1671 LIST_FOREACH(phd, porthash, phd_hash) { 1672 if (phd->phd_port == lport) 1673 break; 1674 } 1675 if (phd != NULL) { 1676 /* 1677 * Port is in use by one or more PCBs. Look for best 1678 * fit. 1679 */ 1680 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1681 wildcard = 0; 1682#if INET6 1683 if (!(inp->inp_vflag & INP_IPV4)) 1684 continue; 1685#endif /* INET6 */ 1686 if (inp->inp_faddr.s_addr != INADDR_ANY) 1687 wildcard++; 1688 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1689 if (laddr.s_addr == INADDR_ANY) 1690 wildcard++; 1691 else if (inp->inp_laddr.s_addr != 1692 laddr.s_addr) 1693 continue; 1694 } else { 1695 if (laddr.s_addr != INADDR_ANY) 1696 wildcard++; 1697 } 1698 if (wildcard < matchwild) { 1699 match = inp; 1700 matchwild = wildcard; 1701 if (matchwild == 0) { 1702 break; 1703 } 1704 } 1705 } 1706 } 1707 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, 1708 0, 0, 0, 0); 1709 return (match); 1710 } 1711} 1712 1713/* 1714 * Check if PCB exists in hash list. 1715 */ 1716int 1717in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1718 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1719 uid_t *uid, gid_t *gid, struct ifnet *ifp) 1720{ 1721 struct inpcbhead *head; 1722 struct inpcb *inp; 1723 u_short fport = fport_arg, lport = lport_arg; 1724 int found = 0; 1725 struct inpcb *local_wild = NULL; 1726#if INET6 1727 struct inpcb *local_wild_mapped = NULL; 1728#endif /* INET6 */ 1729 1730 *uid = UID_MAX; 1731 *gid = GID_MAX; 1732 1733 /* 1734 * We may have found the pcb in the last lookup - check this first. 1735 */ 1736 1737 lck_rw_lock_shared(pcbinfo->ipi_lock); 1738 1739 /* 1740 * First look for an exact match. 1741 */ 1742 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1743 pcbinfo->ipi_hashmask)]; 1744 LIST_FOREACH(inp, head, inp_hash) { 1745#if INET6 1746 if (!(inp->inp_vflag & INP_IPV4)) 1747 continue; 1748#endif /* INET6 */ 1749 if (inp_restricted(inp, ifp)) 1750 continue; 1751 1752 if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && 1753 (inp->inp_flags & INP_NO_IFT_CELLULAR)) 1754 continue; 1755 1756 if (inp->inp_faddr.s_addr == faddr.s_addr && 1757 inp->inp_laddr.s_addr == laddr.s_addr && 1758 inp->inp_fport == fport && 1759 inp->inp_lport == lport) { 1760 if ((found = (inp->inp_socket != NULL))) { 1761 /* 1762 * Found. 1763 */ 1764 *uid = kauth_cred_getuid( 1765 inp->inp_socket->so_cred); 1766 *gid = kauth_cred_getgid( 1767 inp->inp_socket->so_cred); 1768 } 1769 lck_rw_done(pcbinfo->ipi_lock); 1770 return (found); 1771 } 1772 } 1773 1774 if (!wildcard) { 1775 /* 1776 * Not found. 1777 */ 1778 lck_rw_done(pcbinfo->ipi_lock); 1779 return (0); 1780 } 1781 1782 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1783 pcbinfo->ipi_hashmask)]; 1784 LIST_FOREACH(inp, head, inp_hash) { 1785#if INET6 1786 if (!(inp->inp_vflag & INP_IPV4)) 1787 continue; 1788#endif /* INET6 */ 1789 if (inp_restricted(inp, ifp)) 1790 continue; 1791 1792 if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && 1793 (inp->inp_flags & INP_NO_IFT_CELLULAR)) 1794 continue; 1795 1796 if (inp->inp_faddr.s_addr == INADDR_ANY && 1797 inp->inp_lport == lport) { 1798 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1799 if ((found = (inp->inp_socket != NULL))) { 1800 *uid = kauth_cred_getuid( 1801 inp->inp_socket->so_cred); 1802 *gid = kauth_cred_getgid( 1803 inp->inp_socket->so_cred); 1804 } 1805 lck_rw_done(pcbinfo->ipi_lock); 1806 return (found); 1807 } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1808#if INET6 1809 if (inp->inp_socket && 1810 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) 1811 local_wild_mapped = inp; 1812 else 1813#endif /* INET6 */ 1814 local_wild = inp; 1815 } 1816 } 1817 } 1818 if (local_wild == NULL) { 1819#if INET6 1820 if (local_wild_mapped != NULL) { 1821 if ((found = (local_wild_mapped->inp_socket != NULL))) { 1822 *uid = kauth_cred_getuid( 1823 local_wild_mapped->inp_socket->so_cred); 1824 *gid = kauth_cred_getgid( 1825 local_wild_mapped->inp_socket->so_cred); 1826 } 1827 lck_rw_done(pcbinfo->ipi_lock); 1828 return (found); 1829 } 1830#endif /* INET6 */ 1831 lck_rw_done(pcbinfo->ipi_lock); 1832 return (0); 1833 } 1834 if ((found = (local_wild->inp_socket != NULL))) { 1835 *uid = kauth_cred_getuid( 1836 local_wild->inp_socket->so_cred); 1837 *gid = kauth_cred_getgid( 1838 local_wild->inp_socket->so_cred); 1839 } 1840 lck_rw_done(pcbinfo->ipi_lock); 1841 return (found); 1842} 1843 1844/* 1845 * Lookup PCB in hash list. 1846 */ 1847struct inpcb * 1848in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1849 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1850 struct ifnet *ifp) 1851{ 1852 struct inpcbhead *head; 1853 struct inpcb *inp; 1854 u_short fport = fport_arg, lport = lport_arg; 1855 struct inpcb *local_wild = NULL; 1856#if INET6 1857 struct inpcb *local_wild_mapped = NULL; 1858#endif /* INET6 */ 1859 1860 /* 1861 * We may have found the pcb in the last lookup - check this first. 1862 */ 1863 1864 lck_rw_lock_shared(pcbinfo->ipi_lock); 1865 1866 /* 1867 * First look for an exact match. 1868 */ 1869 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1870 pcbinfo->ipi_hashmask)]; 1871 LIST_FOREACH(inp, head, inp_hash) { 1872#if INET6 1873 if (!(inp->inp_vflag & INP_IPV4)) 1874 continue; 1875#endif /* INET6 */ 1876 if (inp_restricted(inp, ifp)) 1877 continue; 1878 1879 if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && 1880 (inp->inp_flags & INP_NO_IFT_CELLULAR)) 1881 continue; 1882 1883 if (inp->inp_faddr.s_addr == faddr.s_addr && 1884 inp->inp_laddr.s_addr == laddr.s_addr && 1885 inp->inp_fport == fport && 1886 inp->inp_lport == lport) { 1887 /* 1888 * Found. 1889 */ 1890 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != 1891 WNT_STOPUSING) { 1892 lck_rw_done(pcbinfo->ipi_lock); 1893 return (inp); 1894 } else { 1895 /* it's there but dead, say it isn't found */ 1896 lck_rw_done(pcbinfo->ipi_lock); 1897 return (NULL); 1898 } 1899 } 1900 } 1901 1902 if (!wildcard) { 1903 /* 1904 * Not found. 1905 */ 1906 lck_rw_done(pcbinfo->ipi_lock); 1907 return (NULL); 1908 } 1909 1910 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1911 pcbinfo->ipi_hashmask)]; 1912 LIST_FOREACH(inp, head, inp_hash) { 1913#if INET6 1914 if (!(inp->inp_vflag & INP_IPV4)) 1915 continue; 1916#endif /* INET6 */ 1917 if (inp_restricted(inp, ifp)) 1918 continue; 1919 1920 if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && 1921 (inp->inp_flags & INP_NO_IFT_CELLULAR)) 1922 continue; 1923 1924 if (inp->inp_faddr.s_addr == INADDR_ANY && 1925 inp->inp_lport == lport) { 1926 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1927 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != 1928 WNT_STOPUSING) { 1929 lck_rw_done(pcbinfo->ipi_lock); 1930 return (inp); 1931 } else { 1932 /* it's dead; say it isn't found */ 1933 lck_rw_done(pcbinfo->ipi_lock); 1934 return (NULL); 1935 } 1936 } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1937#if INET6 1938 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) 1939 local_wild_mapped = inp; 1940 else 1941#endif /* INET6 */ 1942 local_wild = inp; 1943 } 1944 } 1945 } 1946 if (local_wild == NULL) { 1947#if INET6 1948 if (local_wild_mapped != NULL) { 1949 if (in_pcb_checkstate(local_wild_mapped, 1950 WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1951 lck_rw_done(pcbinfo->ipi_lock); 1952 return (local_wild_mapped); 1953 } else { 1954 /* it's dead; say it isn't found */ 1955 lck_rw_done(pcbinfo->ipi_lock); 1956 return (NULL); 1957 } 1958 } 1959#endif /* INET6 */ 1960 lck_rw_done(pcbinfo->ipi_lock); 1961 return (NULL); 1962 } 1963 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 1964 lck_rw_done(pcbinfo->ipi_lock); 1965 return (local_wild); 1966 } 1967 /* 1968 * It's either not found or is already dead. 1969 */ 1970 lck_rw_done(pcbinfo->ipi_lock); 1971 return (NULL); 1972} 1973 1974/* 1975 * Insert PCB onto various hash lists. 1976 */ 1977int 1978in_pcbinshash(struct inpcb *inp, int locked) 1979{ 1980 struct inpcbhead *pcbhash; 1981 struct inpcbporthead *pcbporthash; 1982 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1983 struct inpcbport *phd; 1984 u_int32_t hashkey_faddr; 1985 1986 if (!locked) { 1987 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { 1988 /* 1989 * Lock inversion issue, mostly with udp 1990 * multicast packets 1991 */ 1992 socket_unlock(inp->inp_socket, 0); 1993 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 1994 socket_lock(inp->inp_socket, 0); 1995 if (inp->inp_state == INPCB_STATE_DEAD) { 1996 /* 1997 * The socket got dropped when 1998 * it was unlocked 1999 */ 2000 lck_rw_done(pcbinfo->ipi_lock); 2001 return (ECONNABORTED); 2002 } 2003 } 2004 } 2005 2006#if INET6 2007 if (inp->inp_vflag & INP_IPV6) 2008 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 2009 else 2010#endif /* INET6 */ 2011 hashkey_faddr = inp->inp_faddr.s_addr; 2012 2013 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 2014 inp->inp_fport, pcbinfo->ipi_hashmask); 2015 2016 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; 2017 2018 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, 2019 pcbinfo->ipi_porthashmask)]; 2020 2021 /* 2022 * Go through port list and look for a head for this lport. 2023 */ 2024 LIST_FOREACH(phd, pcbporthash, phd_hash) { 2025 if (phd->phd_port == inp->inp_lport) 2026 break; 2027 } 2028 2029 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2030 2031 /* 2032 * If none exists, malloc one and tack it on. 2033 */ 2034 if (phd == NULL) { 2035 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), 2036 M_PCB, M_WAITOK); 2037 if (phd == NULL) { 2038 if (!locked) 2039 lck_rw_done(pcbinfo->ipi_lock); 2040 return (ENOBUFS); /* XXX */ 2041 } 2042 phd->phd_port = inp->inp_lport; 2043 LIST_INIT(&phd->phd_pcblist); 2044 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 2045 } 2046 inp->inp_phd = phd; 2047 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 2048 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 2049 if (!locked) 2050 lck_rw_done(pcbinfo->ipi_lock); 2051 return (0); 2052} 2053 2054/* 2055 * Move PCB to the proper hash bucket when { faddr, fport } have been 2056 * changed. NOTE: This does not handle the case of the lport changing (the 2057 * hashed port list would have to be updated as well), so the lport must 2058 * not change after in_pcbinshash() has been called. 2059 */ 2060void 2061in_pcbrehash(struct inpcb *inp) 2062{ 2063 struct inpcbhead *head; 2064 u_int32_t hashkey_faddr; 2065 2066#if INET6 2067 if (inp->inp_vflag & INP_IPV6) 2068 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 2069 else 2070#endif /* INET6 */ 2071 hashkey_faddr = inp->inp_faddr.s_addr; 2072 2073 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 2074 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); 2075 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; 2076 2077 LIST_REMOVE(inp, inp_hash); 2078 LIST_INSERT_HEAD(head, inp, inp_hash); 2079} 2080 2081/* 2082 * Remove PCB from various lists. 2083 * Must be called pcbinfo lock is held in exclusive mode. 2084 */ 2085void 2086in_pcbremlists(struct inpcb *inp) 2087{ 2088 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 2089 2090 if (inp->inp_lport) { 2091 struct inpcbport *phd = inp->inp_phd; 2092 2093 LIST_REMOVE(inp, inp_hash); 2094 LIST_REMOVE(inp, inp_portlist); 2095 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) { 2096 LIST_REMOVE(phd, phd_hash); 2097 FREE(phd, M_PCB); 2098 } 2099 } 2100 2101 if (inp->inp_flags2 & INP2_TIMEWAIT) { 2102 /* Remove from time-wait queue */ 2103 tcp_remove_from_time_wait(inp); 2104 inp->inp_flags2 &= ~INP2_TIMEWAIT; 2105 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); 2106 inp->inp_pcbinfo->ipi_twcount--; 2107 } else { 2108 /* Remove from global inp list if it is not time-wait */ 2109 LIST_REMOVE(inp, inp_list); 2110 } 2111 2112 if (inp->inp_flags2 & INP2_IN_FCTREE) { 2113 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); 2114 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); 2115 } 2116 2117 inp->inp_pcbinfo->ipi_count--; 2118} 2119 2120/* 2121 * Mechanism used to defer the memory release of PCBs 2122 * The pcb list will contain the pcb until the reaper can clean it up if 2123 * the following conditions are met: 2124 * 1) state "DEAD", 2125 * 2) wantcnt is STOPUSING 2126 * 3) usecount is 0 2127 * This function will be called to either mark the pcb as 2128 */ 2129int 2130in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) 2131{ 2132 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; 2133 UInt32 origwant; 2134 UInt32 newwant; 2135 2136 switch (mode) { 2137 case WNT_STOPUSING: 2138 /* 2139 * Try to mark the pcb as ready for recycling. CAS with 2140 * STOPUSING, if success we're good, if it's in use, will 2141 * be marked later 2142 */ 2143 if (locked == 0) 2144 socket_lock(pcb->inp_socket, 1); 2145 pcb->inp_state = INPCB_STATE_DEAD; 2146 2147stopusing: 2148 if (pcb->inp_socket->so_usecount < 0) { 2149 panic("%s: pcb=%p so=%p usecount is negative\n", 2150 __func__, pcb, pcb->inp_socket); 2151 /* NOTREACHED */ 2152 } 2153 if (locked == 0) 2154 socket_unlock(pcb->inp_socket, 1); 2155 2156 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); 2157 2158 origwant = *wantcnt; 2159 if ((UInt16) origwant == 0xffff) /* should stop using */ 2160 return (WNT_STOPUSING); 2161 newwant = 0xffff; 2162 if ((UInt16) origwant == 0) { 2163 /* try to mark it as unsuable now */ 2164 OSCompareAndSwap(origwant, newwant, wantcnt); 2165 } 2166 return (WNT_STOPUSING); 2167 break; 2168 2169 case WNT_ACQUIRE: 2170 /* 2171 * Try to increase reference to pcb. If WNT_STOPUSING 2172 * should bail out. If socket state DEAD, try to set count 2173 * to STOPUSING, return failed otherwise increase cnt. 2174 */ 2175 do { 2176 origwant = *wantcnt; 2177 if ((UInt16) origwant == 0xffff) { 2178 /* should stop using */ 2179 return (WNT_STOPUSING); 2180 } 2181 newwant = origwant + 1; 2182 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 2183 return (WNT_ACQUIRE); 2184 break; 2185 2186 case WNT_RELEASE: 2187 /* 2188 * Release reference. If result is null and pcb state 2189 * is DEAD, set wanted bit to STOPUSING 2190 */ 2191 if (locked == 0) 2192 socket_lock(pcb->inp_socket, 1); 2193 2194 do { 2195 origwant = *wantcnt; 2196 if ((UInt16) origwant == 0x0) { 2197 panic("%s: pcb=%p release with zero count", 2198 __func__, pcb); 2199 /* NOTREACHED */ 2200 } 2201 if ((UInt16) origwant == 0xffff) { 2202 /* should stop using */ 2203 if (locked == 0) 2204 socket_unlock(pcb->inp_socket, 1); 2205 return (WNT_STOPUSING); 2206 } 2207 newwant = origwant - 1; 2208 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 2209 2210 if (pcb->inp_state == INPCB_STATE_DEAD) 2211 goto stopusing; 2212 if (pcb->inp_socket->so_usecount < 0) { 2213 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", 2214 __func__, pcb, pcb->inp_socket); 2215 /* NOTREACHED */ 2216 } 2217 2218 if (locked == 0) 2219 socket_unlock(pcb->inp_socket, 1); 2220 return (WNT_RELEASE); 2221 break; 2222 2223 default: 2224 panic("%s: so=%p not a valid state =%x\n", __func__, 2225 pcb->inp_socket, mode); 2226 /* NOTREACHED */ 2227 } 2228 2229 /* NOTREACHED */ 2230 return (mode); 2231} 2232 2233/* 2234 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. 2235 * The inpcb_compat data structure is passed to user space and must 2236 * not change. We intentionally avoid copying pointers. 2237 */ 2238void 2239inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) 2240{ 2241 bzero(inp_compat, sizeof (*inp_compat)); 2242 inp_compat->inp_fport = inp->inp_fport; 2243 inp_compat->inp_lport = inp->inp_lport; 2244 inp_compat->nat_owner = 0; 2245 inp_compat->nat_cookie = 0; 2246 inp_compat->inp_gencnt = inp->inp_gencnt; 2247 inp_compat->inp_flags = inp->inp_flags; 2248 inp_compat->inp_flow = inp->inp_flow; 2249 inp_compat->inp_vflag = inp->inp_vflag; 2250 inp_compat->inp_ip_ttl = inp->inp_ip_ttl; 2251 inp_compat->inp_ip_p = inp->inp_ip_p; 2252 inp_compat->inp_dependfaddr.inp6_foreign = 2253 inp->inp_dependfaddr.inp6_foreign; 2254 inp_compat->inp_dependladdr.inp6_local = 2255 inp->inp_dependladdr.inp6_local; 2256 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 2257 inp_compat->inp_depend6.inp6_hlim = 0; 2258 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 2259 inp_compat->inp_depend6.inp6_ifindex = 0; 2260 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 2261} 2262 2263void 2264inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) 2265{ 2266 xinp->inp_fport = inp->inp_fport; 2267 xinp->inp_lport = inp->inp_lport; 2268 xinp->inp_gencnt = inp->inp_gencnt; 2269 xinp->inp_flags = inp->inp_flags; 2270 xinp->inp_flow = inp->inp_flow; 2271 xinp->inp_vflag = inp->inp_vflag; 2272 xinp->inp_ip_ttl = inp->inp_ip_ttl; 2273 xinp->inp_ip_p = inp->inp_ip_p; 2274 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; 2275 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; 2276 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 2277 xinp->inp_depend6.inp6_hlim = 0; 2278 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 2279 xinp->inp_depend6.inp6_ifindex = 0; 2280 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 2281} 2282 2283/* 2284 * The following routines implement this scheme: 2285 * 2286 * Callers of ip_output() that intend to cache the route in the inpcb pass 2287 * a local copy of the struct route to ip_output(). Using a local copy of 2288 * the cached route significantly simplifies things as IP no longer has to 2289 * worry about having exclusive access to the passed in struct route, since 2290 * it's defined in the caller's stack; in essence, this allows for a lock- 2291 * less operation when updating the struct route at the IP level and below, 2292 * whenever necessary. The scheme works as follows: 2293 * 2294 * Prior to dropping the socket's lock and calling ip_output(), the caller 2295 * copies the struct route from the inpcb into its stack, and adds a reference 2296 * to the cached route entry, if there was any. The socket's lock is then 2297 * dropped and ip_output() is called with a pointer to the copy of struct 2298 * route defined on the stack (not to the one in the inpcb.) 2299 * 2300 * Upon returning from ip_output(), the caller then acquires the socket's 2301 * lock and synchronizes the cache; if there is no route cached in the inpcb, 2302 * it copies the local copy of struct route (which may or may not contain any 2303 * route) back into the cache; otherwise, if the inpcb has a route cached in 2304 * it, the one in the local copy will be freed, if there's any. Trashing the 2305 * cached route in the inpcb can be avoided because ip_output() is single- 2306 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized 2307 * by the socket/transport layer.) 2308 */ 2309void 2310inp_route_copyout(struct inpcb *inp, struct route *dst) 2311{ 2312 struct route *src = &inp->inp_route; 2313 2314 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 2315 2316 /* 2317 * If the route in the PCB is stale or not for IPv4, blow it away; 2318 * this is possible in the case of IPv4-mapped address case. 2319 */ 2320 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) 2321 ROUTE_RELEASE(src); 2322 2323 route_copyout(dst, src, sizeof (*dst)); 2324} 2325 2326void 2327inp_route_copyin(struct inpcb *inp, struct route *src) 2328{ 2329 struct route *dst = &inp->inp_route; 2330 2331 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 2332 2333 /* Minor sanity check */ 2334 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) 2335 panic("%s: wrong or corrupted route: %p", __func__, src); 2336 2337 route_copyin(src, dst, sizeof (*src)); 2338} 2339 2340/* 2341 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. 2342 */ 2343int 2344inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) 2345{ 2346 struct ifnet *ifp = NULL; 2347 2348 ifnet_head_lock_shared(); 2349 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && 2350 (ifp = ifindex2ifnet[ifscope]) == NULL)) { 2351 ifnet_head_done(); 2352 return (ENXIO); 2353 } 2354 ifnet_head_done(); 2355 2356 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); 2357 2358 /* 2359 * A zero interface scope value indicates an "unbind". 2360 * Otherwise, take in whatever value the app desires; 2361 * the app may already know the scope (or force itself 2362 * to such a scope) ahead of time before the interface 2363 * gets attached. It doesn't matter either way; any 2364 * route lookup from this point on will require an 2365 * exact match for the embedded interface scope. 2366 */ 2367 inp->inp_boundifp = ifp; 2368 if (inp->inp_boundifp == NULL) 2369 inp->inp_flags &= ~INP_BOUND_IF; 2370 else 2371 inp->inp_flags |= INP_BOUND_IF; 2372 2373 /* Blow away any cached route in the PCB */ 2374 ROUTE_RELEASE(&inp->inp_route); 2375 2376 if (pifp != NULL) 2377 *pifp = ifp; 2378 2379 return (0); 2380} 2381 2382/* 2383 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, 2384 * as well as for setting PROC_UUID_NO_CELLULAR policy. 2385 */ 2386void 2387inp_set_nocellular(struct inpcb *inp) 2388{ 2389 inp->inp_flags |= INP_NO_IFT_CELLULAR; 2390 2391 /* Blow away any cached route in the PCB */ 2392 ROUTE_RELEASE(&inp->inp_route); 2393} 2394 2395/* 2396 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, 2397 * as well as for clearing PROC_UUID_NO_CELLULAR policy. 2398 */ 2399void 2400inp_clear_nocellular(struct inpcb *inp) 2401{ 2402 struct socket *so = inp->inp_socket; 2403 2404 /* 2405 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket 2406 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag 2407 * if and only if the socket is unrestricted. 2408 */ 2409 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { 2410 inp->inp_flags &= ~INP_NO_IFT_CELLULAR; 2411 2412 /* Blow away any cached route in the PCB */ 2413 ROUTE_RELEASE(&inp->inp_route); 2414 } 2415} 2416 2417#if FLOW_DIVERT 2418/* 2419 * Called when PROC_UUID_FLOW_DIVERT is set. 2420 */ 2421void 2422inp_set_flow_divert(struct inpcb *inp) 2423{ 2424 inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT; 2425} 2426 2427/* 2428 * Called when PROC_UUID_FLOW_DIVERT is cleared. 2429 */ 2430void 2431inp_clear_flow_divert(struct inpcb *inp) 2432{ 2433 inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT; 2434} 2435#endif /* FLOW_DIVERT */ 2436 2437/* 2438 * Calculate flow hash for an inp, used by an interface to identify a 2439 * flow. When an interface provides flow control advisory, this flow 2440 * hash is used as an identifier. 2441 */ 2442u_int32_t 2443inp_calc_flowhash(struct inpcb *inp) 2444{ 2445 struct inp_flowhash_key fh __attribute__((aligned(8))); 2446 u_int32_t flowhash = 0; 2447 struct inpcb *tmp_inp = NULL; 2448 2449 if (inp_hash_seed == 0) 2450 inp_hash_seed = RandomULong(); 2451 2452 bzero(&fh, sizeof (fh)); 2453 2454 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); 2455 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); 2456 2457 fh.infh_lport = inp->inp_lport; 2458 fh.infh_fport = inp->inp_fport; 2459 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; 2460 fh.infh_proto = inp->inp_ip_p; 2461 fh.infh_rand1 = RandomULong(); 2462 fh.infh_rand2 = RandomULong(); 2463 2464try_again: 2465 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); 2466 if (flowhash == 0) { 2467 /* try to get a non-zero flowhash */ 2468 inp_hash_seed = RandomULong(); 2469 goto try_again; 2470 } 2471 2472 inp->inp_flowhash = flowhash; 2473 2474 /* Insert the inp into inp_fc_tree */ 2475 lck_mtx_lock_spin(&inp_fc_lck); 2476 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); 2477 if (tmp_inp != NULL) { 2478 /* 2479 * There is a different inp with the same flowhash. 2480 * There can be a collision on flow hash but the 2481 * probability is low. Let's recompute the 2482 * flowhash. 2483 */ 2484 lck_mtx_unlock(&inp_fc_lck); 2485 /* recompute hash seed */ 2486 inp_hash_seed = RandomULong(); 2487 goto try_again; 2488 } 2489 2490 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); 2491 inp->inp_flags2 |= INP2_IN_FCTREE; 2492 lck_mtx_unlock(&inp_fc_lck); 2493 2494 return (flowhash); 2495} 2496 2497void 2498inp_flowadv(uint32_t flowhash) 2499{ 2500 struct inpcb *inp; 2501 2502 inp = inp_fc_getinp(flowhash, 0); 2503 2504 if (inp == NULL) 2505 return; 2506 inp_fc_feedback(inp); 2507} 2508 2509/* 2510 * Function to compare inp_fc_entries in inp flow control tree 2511 */ 2512static inline int 2513infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) 2514{ 2515 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), 2516 sizeof(inp1->inp_flowhash))); 2517} 2518 2519static struct inpcb * 2520inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) 2521{ 2522 struct inpcb *inp = NULL; 2523 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; 2524 2525 lck_mtx_lock_spin(&inp_fc_lck); 2526 key_inp.inp_flowhash = flowhash; 2527 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); 2528 if (inp == NULL) { 2529 /* inp is not present, return */ 2530 lck_mtx_unlock(&inp_fc_lck); 2531 return (NULL); 2532 } 2533 2534 if (flags & INPFC_REMOVE) { 2535 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); 2536 lck_mtx_unlock(&inp_fc_lck); 2537 2538 bzero(&(inp->infc_link), sizeof (inp->infc_link)); 2539 inp->inp_flags2 &= ~INP2_IN_FCTREE; 2540 return (NULL); 2541 } 2542 2543 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) 2544 inp = NULL; 2545 lck_mtx_unlock(&inp_fc_lck); 2546 2547 return (inp); 2548} 2549 2550static void 2551inp_fc_feedback(struct inpcb *inp) 2552{ 2553 struct socket *so = inp->inp_socket; 2554 2555 /* we already hold a want_cnt on this inp, socket can't be null */ 2556 VERIFY(so != NULL); 2557 socket_lock(so, 1); 2558 2559 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 2560 socket_unlock(so, 1); 2561 return; 2562 } 2563 2564 /* 2565 * Return if the connection is not in flow-controlled state. 2566 * This can happen if the connection experienced 2567 * loss while it was in flow controlled state 2568 */ 2569 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { 2570 socket_unlock(so, 1); 2571 return; 2572 } 2573 inp_reset_fc_state(inp); 2574 2575 if (SOCK_TYPE(so) == SOCK_STREAM) 2576 inp_fc_unthrottle_tcp(inp); 2577 2578 socket_unlock(so, 1); 2579} 2580 2581void 2582inp_reset_fc_state(struct inpcb *inp) 2583{ 2584 struct socket *so = inp->inp_socket; 2585 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; 2586 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; 2587 2588 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2589 2590 if (suspended) { 2591 so->so_flags &= ~(SOF_SUSPENDED); 2592 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); 2593 } 2594 2595 if (inp->inp_sndinprog_cnt > 0) 2596 inp->inp_flags |= INP_FC_FEEDBACK; 2597 2598 /* Give a write wakeup to unblock the socket */ 2599 if (needwakeup) 2600 sowwakeup(so); 2601} 2602 2603int 2604inp_set_fc_state(struct inpcb *inp, int advcode) 2605{ 2606 struct inpcb *tmp_inp = NULL; 2607 /* 2608 * If there was a feedback from the interface when 2609 * send operation was in progress, we should ignore 2610 * this flow advisory to avoid a race between setting 2611 * flow controlled state and receiving feedback from 2612 * the interface 2613 */ 2614 if (inp->inp_flags & INP_FC_FEEDBACK) 2615 return (0); 2616 2617 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2618 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, 2619 INPFC_SOLOCKED)) != NULL) { 2620 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) 2621 return (0); 2622 VERIFY(tmp_inp == inp); 2623 switch (advcode) { 2624 case FADV_FLOW_CONTROLLED: 2625 inp->inp_flags |= INP_FLOW_CONTROLLED; 2626 break; 2627 case FADV_SUSPENDED: 2628 inp->inp_flags |= INP_FLOW_SUSPENDED; 2629 soevent(inp->inp_socket, 2630 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); 2631 2632 /* Record the fact that suspend event was sent */ 2633 inp->inp_socket->so_flags |= SOF_SUSPENDED; 2634 break; 2635 } 2636 return (1); 2637 } 2638 return (0); 2639} 2640 2641/* 2642 * Handler for SO_FLUSH socket option. 2643 */ 2644int 2645inp_flush(struct inpcb *inp, int optval) 2646{ 2647 u_int32_t flowhash = inp->inp_flowhash; 2648 struct ifnet *rtifp, *oifp; 2649 2650 /* Either all classes or one of the valid ones */ 2651 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) 2652 return (EINVAL); 2653 2654 /* We need a flow hash for identification */ 2655 if (flowhash == 0) 2656 return (0); 2657 2658 /* Grab the interfaces from the route and pcb */ 2659 rtifp = ((inp->inp_route.ro_rt != NULL) ? 2660 inp->inp_route.ro_rt->rt_ifp : NULL); 2661 oifp = inp->inp_last_outifp; 2662 2663 if (rtifp != NULL) 2664 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); 2665 if (oifp != NULL && oifp != rtifp) 2666 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); 2667 2668 return (0); 2669} 2670 2671/* 2672 * Clear the INP_INADDR_ANY flag (special case for PPP only) 2673 */ 2674void 2675inp_clear_INP_INADDR_ANY(struct socket *so) 2676{ 2677 struct inpcb *inp = NULL; 2678 2679 socket_lock(so, 1); 2680 inp = sotoinpcb(so); 2681 if (inp) { 2682 inp->inp_flags &= ~INP_INADDR_ANY; 2683 } 2684 socket_unlock(so, 1); 2685} 2686 2687void 2688inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) 2689{ 2690 struct socket *so = inp->inp_socket; 2691 2692 soprocinfo->spi_pid = so->last_pid; 2693 /* 2694 * When not delegated, the effective pid is the same as the real pid 2695 */ 2696 if (so->so_flags & SOF_DELEGATED) 2697 soprocinfo->spi_epid = so->e_pid; 2698 else 2699 soprocinfo->spi_epid = so->last_pid; 2700} 2701 2702int 2703inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, 2704 struct so_procinfo *soprocinfo) 2705{ 2706 struct inpcb *inp = NULL; 2707 int found = 0; 2708 2709 bzero(soprocinfo, sizeof (struct so_procinfo)); 2710 2711 if (!flowhash) 2712 return (-1); 2713 2714 lck_rw_lock_shared(pcbinfo->ipi_lock); 2715 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 2716 if (inp->inp_state != INPCB_STATE_DEAD && 2717 inp->inp_socket != NULL && 2718 inp->inp_flowhash == flowhash) { 2719 found = 1; 2720 inp_get_soprocinfo(inp, soprocinfo); 2721 break; 2722 } 2723 } 2724 lck_rw_done(pcbinfo->ipi_lock); 2725 2726 return (found); 2727} 2728 2729#if CONFIG_PROC_UUID_POLICY 2730static void 2731inp_update_cellular_policy(struct inpcb *inp, boolean_t set) 2732{ 2733 struct socket *so = inp->inp_socket; 2734 int before, after; 2735 2736 VERIFY(so != NULL); 2737 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2738 2739 before = (inp->inp_flags & INP_NO_IFT_CELLULAR); 2740 if (set) { 2741 inp_set_nocellular(inp); 2742 } else { 2743 inp_clear_nocellular(inp); 2744 } 2745 after = (inp->inp_flags & INP_NO_IFT_CELLULAR); 2746 if (net_io_policy_log && (before != after)) { 2747 static const char *ok = "OK"; 2748 static const char *nok = "NOACCESS"; 2749 uuid_string_t euuid_buf; 2750 pid_t epid; 2751 2752 if (so->so_flags & SOF_DELEGATED) { 2753 uuid_unparse(so->e_uuid, euuid_buf); 2754 epid = so->e_pid; 2755 } else { 2756 uuid_unparse(so->last_uuid, euuid_buf); 2757 epid = so->last_pid; 2758 } 2759 2760 /* allow this socket to generate another notification event */ 2761 so->so_ifdenied_notifies = 0; 2762 2763 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " 2764 "euuid %s%s %s->%s\n", __func__, 2765 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), 2766 SOCK_TYPE(so), epid, euuid_buf, 2767 (so->so_flags & SOF_DELEGATED) ? 2768 " [delegated]" : "", 2769 ((before < after) ? ok : nok), 2770 ((before < after) ? nok : ok)); 2771 } 2772} 2773 2774#if FLOW_DIVERT 2775static void 2776inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) 2777{ 2778 struct socket *so = inp->inp_socket; 2779 int before, after; 2780 2781 VERIFY(so != NULL); 2782 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2783 2784 if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { 2785 set = !flow_divert_is_dns_service(so); 2786 } 2787 2788 before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); 2789 if (set) { 2790 inp_set_flow_divert(inp); 2791 } else { 2792 inp_clear_flow_divert(inp); 2793 } 2794 after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); 2795 if (net_io_policy_log && (before != after)) { 2796 static const char *wanted = "WANTED"; 2797 static const char *unwanted = "UNWANTED"; 2798 uuid_string_t euuid_buf; 2799 pid_t epid; 2800 2801 if (so->so_flags & SOF_DELEGATED) { 2802 uuid_unparse(so->e_uuid, euuid_buf); 2803 epid = so->e_pid; 2804 } else { 2805 uuid_unparse(so->last_uuid, euuid_buf); 2806 epid = so->last_pid; 2807 } 2808 2809 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " 2810 "euuid %s%s %s->%s\n", __func__, 2811 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), 2812 SOCK_TYPE(so), epid, euuid_buf, 2813 (so->so_flags & SOF_DELEGATED) ? 2814 " [delegated]" : "", 2815 ((before < after) ? unwanted : wanted), 2816 ((before < after) ? wanted : unwanted)); 2817 } 2818} 2819#endif /* FLOW_DIVERT */ 2820#endif /* !CONFIG_PROC_UUID_POLICY */ 2821 2822int 2823inp_update_policy(struct inpcb *inp) 2824{ 2825#if CONFIG_PROC_UUID_POLICY 2826 struct socket *so = inp->inp_socket; 2827 uint32_t pflags = 0; 2828 int32_t ogencnt; 2829 int err = 0; 2830 2831 if (!net_io_policy_uuid || 2832 so == NULL || inp->inp_state == INPCB_STATE_DEAD) 2833 return (0); 2834 2835 /* 2836 * Kernel-created sockets that aren't delegating other sockets 2837 * are currently exempted from UUID policy checks. 2838 */ 2839 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) 2840 return (0); 2841 2842 ogencnt = so->so_policy_gencnt; 2843 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? 2844 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); 2845 2846 /* 2847 * Discard cached generation count if the entry is gone (ENOENT), 2848 * so that we go thru the checks below. 2849 */ 2850 if (err == ENOENT && ogencnt != 0) 2851 so->so_policy_gencnt = 0; 2852 2853 /* 2854 * If the generation count has changed, inspect the policy flags 2855 * and act accordingly. If a policy flag was previously set and 2856 * the UUID is no longer present in the table (ENOENT), treat it 2857 * as if the flag has been cleared. 2858 */ 2859 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { 2860 /* update cellular policy for this socket */ 2861 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { 2862 inp_update_cellular_policy(inp, TRUE); 2863 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { 2864 inp_update_cellular_policy(inp, FALSE); 2865 } 2866#if FLOW_DIVERT 2867 /* update flow divert policy for this socket */ 2868 if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) { 2869 inp_update_flow_divert_policy(inp, TRUE); 2870 } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) { 2871 inp_update_flow_divert_policy(inp, FALSE); 2872 } 2873#endif /* FLOW_DIVERT */ 2874 } 2875 2876 return ((err == ENOENT) ? 0 : err); 2877#else /* !CONFIG_PROC_UUID_POLICY */ 2878#pragma unused(inp) 2879 return (0); 2880#endif /* !CONFIG_PROC_UUID_POLICY */ 2881} 2882 2883boolean_t 2884inp_restricted(struct inpcb *inp, struct ifnet *ifp) 2885{ 2886 VERIFY(inp != NULL); 2887 2888 if (!sorestrictrecv) 2889 return (FALSE); 2890 2891 if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV)) 2892 return (FALSE); 2893 2894 if (inp->inp_flags & INP_RECV_ANYIF) 2895 return (FALSE); 2896 2897 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) 2898 return (FALSE); 2899 2900 return (TRUE); 2901} 2902