1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1991, 1993, 1995 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ 62 */ 63 64#include <sys/param.h> 65#include <sys/systm.h> 66#include <sys/malloc.h> 67#include <sys/mbuf.h> 68#include <sys/domain.h> 69#include <sys/protosw.h> 70#include <sys/socket.h> 71#include <sys/socketvar.h> 72#include <sys/proc.h> 73#include <sys/kernel.h> 74#include <sys/sysctl.h> 75#include <sys/mcache.h> 76#include <sys/kauth.h> 77#include <sys/priv.h> 78#include <sys/proc_uuid_policy.h> 79#include <sys/syslog.h> 80#include <sys/priv.h> 81 82#include <libkern/OSAtomic.h> 83#include <kern/locks.h> 84 85#include <machine/limits.h> 86 87#include <kern/zalloc.h> 88 89#include <net/if.h> 90#include <net/if_types.h> 91#include <net/route.h> 92#include <net/flowhash.h> 93#include <net/flowadv.h> 94#include <net/ntstat.h> 95 96#include <netinet/in.h> 97#include <netinet/in_pcb.h> 98#include <netinet/in_var.h> 99#include <netinet/ip_var.h> 100#if INET6 101#include <netinet/ip6.h> 102#include <netinet6/ip6_var.h> 103#endif /* INET6 */ 104 105#include <sys/kdebug.h> 106#include <sys/random.h> 107 108#include <dev/random/randomdev.h> 109#include <mach/boolean.h> 110 111#if NECP 112#include <net/necp.h> 113#endif 114 115static lck_grp_t *inpcb_lock_grp; 116static lck_attr_t *inpcb_lock_attr; 117static lck_grp_attr_t *inpcb_lock_grp_attr; 118decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ 119decl_lck_mtx_data(static, inpcb_timeout_lock); 120 121static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); 122 123static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ 124static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ 125static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ 126static boolean_t inpcb_fast_timer_on = FALSE; 127 128/* 129 * If the total number of gc reqs is above a threshold, schedule 130 * garbage collect timer sooner 131 */ 132static boolean_t inpcb_toomany_gcreq = FALSE; 133 134#define INPCB_GCREQ_THRESHOLD 50000 135#define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */ 136 137static void inpcb_sched_timeout(struct timeval *); 138static void inpcb_timeout(void *); 139int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ 140extern int tvtohz(struct timeval *); 141 142#if CONFIG_PROC_UUID_POLICY 143static void inp_update_cellular_policy(struct inpcb *, boolean_t); 144#if NECP 145static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); 146#endif /* NECP */ 147#endif /* !CONFIG_PROC_UUID_POLICY */ 148 149#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) 150#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) 151 152/* 153 * These configure the range of local port addresses assigned to 154 * "unspecified" outgoing connections/packets/whatever. 155 */ 156int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 157int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 158int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 159int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 160int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 161int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 162 163#define RANGECHK(var, min, max) \ 164 if ((var) < (min)) { (var) = (min); } \ 165 else if ((var) > (max)) { (var) = (max); } 166 167static int 168sysctl_net_ipport_check SYSCTL_HANDLER_ARGS 169{ 170#pragma unused(arg1, arg2) 171 int error; 172 173 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 174 if (!error) { 175 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 176 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 177 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 178 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 179 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 180 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 181 } 182 return (error); 183} 184 185#undef RANGECHK 186 187SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, 188 CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); 189 190SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, 191 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 192 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 193SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, 194 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 195 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 196SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, 197 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 198 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 199SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, 200 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 201 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 202SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, 203 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 204 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 205SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, 206 CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, 207 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 208 209extern int udp_use_randomport; 210extern int tcp_use_randomport; 211 212/* Structs used for flowhash computation */ 213struct inp_flowhash_key_addr { 214 union { 215 struct in_addr v4; 216 struct in6_addr v6; 217 u_int8_t addr8[16]; 218 u_int16_t addr16[8]; 219 u_int32_t addr32[4]; 220 } infha; 221}; 222 223struct inp_flowhash_key { 224 struct inp_flowhash_key_addr infh_laddr; 225 struct inp_flowhash_key_addr infh_faddr; 226 u_int32_t infh_lport; 227 u_int32_t infh_fport; 228 u_int32_t infh_af; 229 u_int32_t infh_proto; 230 u_int32_t infh_rand1; 231 u_int32_t infh_rand2; 232}; 233 234static u_int32_t inp_hash_seed = 0; 235 236static int infc_cmp(const struct inpcb *, const struct inpcb *); 237 238/* Flags used by inp_fc_getinp */ 239#define INPFC_SOLOCKED 0x1 240#define INPFC_REMOVE 0x2 241static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); 242 243static void inp_fc_feedback(struct inpcb *); 244extern void tcp_remove_from_time_wait(struct inpcb *inp); 245 246decl_lck_mtx_data(static, inp_fc_lck); 247 248RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; 249RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); 250RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); 251 252/* 253 * Use this inp as a key to find an inp in the flowhash tree. 254 * Accesses to it are protected by inp_fc_lck. 255 */ 256struct inpcb key_inp; 257 258/* 259 * in_pcb.c: manage the Protocol Control Blocks. 260 */ 261 262void 263in_pcbinit(void) 264{ 265 static int inpcb_initialized = 0; 266 267 VERIFY(!inpcb_initialized); 268 inpcb_initialized = 1; 269 270 inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); 271 inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); 272 inpcb_lock_attr = lck_attr_alloc_init(); 273 lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); 274 lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); 275 276 /* 277 * Initialize data structures required to deliver 278 * flow advisories. 279 */ 280 lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); 281 lck_mtx_lock(&inp_fc_lck); 282 RB_INIT(&inp_fc_tree); 283 bzero(&key_inp, sizeof(key_inp)); 284 lck_mtx_unlock(&inp_fc_lck); 285} 286 287#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ 288 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) 289static void 290inpcb_timeout(void *arg) 291{ 292#pragma unused(arg) 293 struct inpcbinfo *ipi; 294 boolean_t t, gc; 295 struct intimercount gccnt, tmcnt; 296 struct timeval leeway; 297 boolean_t toomany_gc = FALSE; 298 299 if (arg != NULL) { 300 VERIFY(arg == &inpcb_toomany_gcreq); 301 toomany_gc = *(boolean_t *)arg; 302 } 303 304 /* 305 * Update coarse-grained networking timestamp (in sec.); the idea 306 * is to piggy-back on the timeout callout to update the counter 307 * returnable via net_uptime(). 308 */ 309 net_update_uptime(); 310 311 bzero(&gccnt, sizeof(gccnt)); 312 bzero(&tmcnt, sizeof(tmcnt)); 313 314 lck_mtx_lock_spin(&inpcb_timeout_lock); 315 gc = inpcb_garbage_collecting; 316 inpcb_garbage_collecting = FALSE; 317 318 t = inpcb_ticking; 319 inpcb_ticking = FALSE; 320 321 if (gc || t) { 322 lck_mtx_unlock(&inpcb_timeout_lock); 323 324 lck_mtx_lock(&inpcb_lock); 325 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { 326 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { 327 bzero(&ipi->ipi_gc_req, 328 sizeof(ipi->ipi_gc_req)); 329 if (gc && ipi->ipi_gc != NULL) { 330 ipi->ipi_gc(ipi); 331 gccnt.intimer_lazy += 332 ipi->ipi_gc_req.intimer_lazy; 333 gccnt.intimer_fast += 334 ipi->ipi_gc_req.intimer_fast; 335 gccnt.intimer_nodelay += 336 ipi->ipi_gc_req.intimer_nodelay; 337 } 338 } 339 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { 340 bzero(&ipi->ipi_timer_req, 341 sizeof(ipi->ipi_timer_req)); 342 if (t && ipi->ipi_timer != NULL) { 343 ipi->ipi_timer(ipi); 344 tmcnt.intimer_lazy += 345 ipi->ipi_timer_req.intimer_lazy; 346 tmcnt.intimer_lazy += 347 ipi->ipi_timer_req.intimer_fast; 348 tmcnt.intimer_nodelay += 349 ipi->ipi_timer_req.intimer_nodelay; 350 } 351 } 352 } 353 lck_mtx_unlock(&inpcb_lock); 354 lck_mtx_lock_spin(&inpcb_timeout_lock); 355 } 356 357 /* lock was dropped above, so check first before overriding */ 358 if (!inpcb_garbage_collecting) 359 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); 360 if (!inpcb_ticking) 361 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); 362 363 /* re-arm the timer if there's work to do */ 364 if (toomany_gc) { 365 inpcb_toomany_gcreq = FALSE; 366 } else { 367 inpcb_timeout_run--; 368 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); 369 } 370 371 bzero(&leeway, sizeof(leeway)); 372 leeway.tv_sec = inpcb_timeout_lazy; 373 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) 374 inpcb_sched_timeout(NULL); 375 else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) 376 /* be lazy when idle with little activity */ 377 inpcb_sched_timeout(&leeway); 378 else 379 inpcb_sched_timeout(NULL); 380 381 lck_mtx_unlock(&inpcb_timeout_lock); 382} 383 384static void 385inpcb_sched_timeout(struct timeval *leeway) 386{ 387 lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); 388 389 if (inpcb_timeout_run == 0 && 390 (inpcb_garbage_collecting || inpcb_ticking)) { 391 lck_mtx_convert_spin(&inpcb_timeout_lock); 392 inpcb_timeout_run++; 393 if (leeway == NULL) { 394 inpcb_fast_timer_on = TRUE; 395 timeout(inpcb_timeout, NULL, hz); 396 } else { 397 inpcb_fast_timer_on = FALSE; 398 timeout_with_leeway(inpcb_timeout, NULL, hz, 399 tvtohz(leeway)); 400 } 401 } else if (inpcb_timeout_run == 1 && 402 leeway == NULL && !inpcb_fast_timer_on) { 403 /* 404 * Since the request was for a fast timer but the 405 * scheduled timer is a lazy timer, try to schedule 406 * another instance of fast timer also 407 */ 408 lck_mtx_convert_spin(&inpcb_timeout_lock); 409 inpcb_timeout_run++; 410 inpcb_fast_timer_on = TRUE; 411 timeout(inpcb_timeout, NULL, hz); 412 } 413} 414 415void 416inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) 417{ 418 struct timeval leeway; 419 u_int32_t gccnt; 420 lck_mtx_lock_spin(&inpcb_timeout_lock); 421 inpcb_garbage_collecting = TRUE; 422 423 gccnt = ipi->ipi_gc_req.intimer_nodelay + 424 ipi->ipi_gc_req.intimer_fast; 425 426 if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { 427 inpcb_toomany_gcreq = TRUE; 428 429 /* 430 * There are toomany pcbs waiting to be garbage collected, 431 * schedule a much faster timeout in addition to 432 * the caller's request 433 */ 434 lck_mtx_convert_spin(&inpcb_timeout_lock); 435 timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq, 436 INPCB_TOOMANY_GCREQ_TIMER); 437 } 438 439 switch (type) { 440 case INPCB_TIMER_NODELAY: 441 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); 442 inpcb_sched_timeout(NULL); 443 break; 444 case INPCB_TIMER_FAST: 445 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); 446 inpcb_sched_timeout(NULL); 447 break; 448 default: 449 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); 450 leeway.tv_sec = inpcb_timeout_lazy; 451 leeway.tv_usec = 0; 452 inpcb_sched_timeout(&leeway); 453 break; 454 } 455 lck_mtx_unlock(&inpcb_timeout_lock); 456} 457 458void 459inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) 460{ 461 struct timeval leeway; 462 lck_mtx_lock_spin(&inpcb_timeout_lock); 463 inpcb_ticking = TRUE; 464 switch (type) { 465 case INPCB_TIMER_NODELAY: 466 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); 467 inpcb_sched_timeout(NULL); 468 break; 469 case INPCB_TIMER_FAST: 470 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); 471 inpcb_sched_timeout(NULL); 472 break; 473 default: 474 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); 475 leeway.tv_sec = inpcb_timeout_lazy; 476 leeway.tv_usec = 0; 477 inpcb_sched_timeout(&leeway); 478 break; 479 } 480 lck_mtx_unlock(&inpcb_timeout_lock); 481} 482 483void 484in_pcbinfo_attach(struct inpcbinfo *ipi) 485{ 486 struct inpcbinfo *ipi0; 487 488 lck_mtx_lock(&inpcb_lock); 489 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { 490 if (ipi0 == ipi) { 491 panic("%s: ipi %p already in the list\n", 492 __func__, ipi); 493 /* NOTREACHED */ 494 } 495 } 496 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); 497 lck_mtx_unlock(&inpcb_lock); 498} 499 500int 501in_pcbinfo_detach(struct inpcbinfo *ipi) 502{ 503 struct inpcbinfo *ipi0; 504 int error = 0; 505 506 lck_mtx_lock(&inpcb_lock); 507 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { 508 if (ipi0 == ipi) 509 break; 510 } 511 if (ipi0 != NULL) 512 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); 513 else 514 error = ENXIO; 515 lck_mtx_unlock(&inpcb_lock); 516 517 return (error); 518} 519 520/* 521 * Allocate a PCB and associate it with the socket. 522 * 523 * Returns: 0 Success 524 * ENOBUFS 525 * ENOMEM 526 */ 527int 528in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) 529{ 530#pragma unused(p) 531 struct inpcb *inp; 532 caddr_t temp; 533#if CONFIG_MACF_NET 534 int mac_error; 535#endif /* CONFIG_MACF_NET */ 536 537 if (!so->cached_in_sock_layer) { 538 inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); 539 if (inp == NULL) 540 return (ENOBUFS); 541 bzero((caddr_t)inp, sizeof (*inp)); 542 } else { 543 inp = (struct inpcb *)(void *)so->so_saved_pcb; 544 temp = inp->inp_saved_ppcb; 545 bzero((caddr_t)inp, sizeof (*inp)); 546 inp->inp_saved_ppcb = temp; 547 } 548 549 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 550 inp->inp_pcbinfo = pcbinfo; 551 inp->inp_socket = so; 552#if CONFIG_MACF_NET 553 mac_error = mac_inpcb_label_init(inp, M_WAITOK); 554 if (mac_error != 0) { 555 if (!so->cached_in_sock_layer) 556 zfree(pcbinfo->ipi_zone, inp); 557 return (mac_error); 558 } 559 mac_inpcb_label_associate(so, inp); 560#endif /* CONFIG_MACF_NET */ 561 /* make sure inp_stat is always 64-bit aligned */ 562 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, 563 sizeof (u_int64_t)); 564 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + 565 sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { 566 panic("%s: insufficient space to align inp_stat", __func__); 567 /* NOTREACHED */ 568 } 569 570 /* make sure inp_cstat is always 64-bit aligned */ 571 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, 572 sizeof (u_int64_t)); 573 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + 574 sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { 575 panic("%s: insufficient space to align inp_cstat", __func__); 576 /* NOTREACHED */ 577 } 578 579 /* make sure inp_wstat is always 64-bit aligned */ 580 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, 581 sizeof (u_int64_t)); 582 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + 583 sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { 584 panic("%s: insufficient space to align inp_wstat", __func__); 585 /* NOTREACHED */ 586 } 587 588 /* make sure inp_Wstat is always 64-bit aligned */ 589 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, 590 sizeof (u_int64_t)); 591 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + 592 sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { 593 panic("%s: insufficient space to align inp_Wstat", __func__); 594 /* NOTREACHED */ 595 } 596 597 so->so_pcb = (caddr_t)inp; 598 599 if (so->so_proto->pr_flags & PR_PCBLOCK) { 600 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, 601 pcbinfo->ipi_lock_attr); 602 } 603 604#if INET6 605 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) 606 inp->inp_flags |= IN6P_IPV6_V6ONLY; 607 608 if (ip6_auto_flowlabel) 609 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 610#endif /* INET6 */ 611 612 (void) inp_update_policy(inp); 613 614 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 615 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 616 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); 617 pcbinfo->ipi_count++; 618 lck_rw_done(pcbinfo->ipi_lock); 619 return (0); 620} 621 622/* 623 * in_pcblookup_local_and_cleanup does everything 624 * in_pcblookup_local does but it checks for a socket 625 * that's going away. Since we know that the lock is 626 * held read+write when this funciton is called, we 627 * can safely dispose of this socket like the slow 628 * timer would usually do and return NULL. This is 629 * great for bind. 630 */ 631struct inpcb * 632in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, 633 u_int lport_arg, int wild_okay) 634{ 635 struct inpcb *inp; 636 637 /* Perform normal lookup */ 638 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); 639 640 /* Check if we found a match but it's waiting to be disposed */ 641 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { 642 struct socket *so = inp->inp_socket; 643 644 lck_mtx_lock(&inp->inpcb_mtx); 645 646 if (so->so_usecount == 0) { 647 if (inp->inp_state != INPCB_STATE_DEAD) 648 in_pcbdetach(inp); 649 in_pcbdispose(inp); /* will unlock & destroy */ 650 inp = NULL; 651 } else { 652 lck_mtx_unlock(&inp->inpcb_mtx); 653 } 654 } 655 656 return (inp); 657} 658 659static void 660in_pcb_conflict_post_msg(u_int16_t port) 661{ 662 /* 663 * Radar 5523020 send a kernel event notification if a 664 * non-participating socket tries to bind the port a socket 665 * who has set SOF_NOTIFYCONFLICT owns. 666 */ 667 struct kev_msg ev_msg; 668 struct kev_in_portinuse in_portinuse; 669 670 bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); 671 bzero(&ev_msg, sizeof (struct kev_msg)); 672 in_portinuse.port = ntohs(port); /* port in host order */ 673 in_portinuse.req_pid = proc_selfpid(); 674 ev_msg.vendor_code = KEV_VENDOR_APPLE; 675 ev_msg.kev_class = KEV_NETWORK_CLASS; 676 ev_msg.kev_subclass = KEV_INET_SUBCLASS; 677 ev_msg.event_code = KEV_INET_PORTINUSE; 678 ev_msg.dv[0].data_ptr = &in_portinuse; 679 ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); 680 ev_msg.dv[1].data_length = 0; 681 kev_post_msg(&ev_msg); 682} 683 684/* 685 * Bind an INPCB to an address and/or port. This routine should not alter 686 * the caller-supplied local address "nam". 687 * 688 * Returns: 0 Success 689 * EADDRNOTAVAIL Address not available. 690 * EINVAL Invalid argument 691 * EAFNOSUPPORT Address family not supported [notdef] 692 * EACCES Permission denied 693 * EADDRINUSE Address in use 694 * EAGAIN Resource unavailable, try again 695 * priv_check_cred:EPERM Operation not permitted 696 */ 697int 698in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) 699{ 700 struct socket *so = inp->inp_socket; 701 unsigned short *lastport; 702 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 703 u_short lport = 0, rand_port = 0; 704 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 705 int error, randomport, conflict = 0; 706 boolean_t anonport = FALSE; 707 kauth_cred_t cred; 708 struct in_addr laddr; 709 struct ifnet *outif = NULL; 710 711 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 712 return (EADDRNOTAVAIL); 713 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 714 return (EINVAL); 715 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 716 wild = 1; 717 socket_unlock(so, 0); /* keep reference on socket */ 718 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 719 720 bzero(&laddr, sizeof(laddr)); 721 722 if (nam != NULL) { 723 724 if (nam->sa_len != sizeof (struct sockaddr_in)) { 725 lck_rw_done(pcbinfo->ipi_lock); 726 socket_lock(so, 0); 727 return (EINVAL); 728 } 729#if 0 730 /* 731 * We should check the family, but old programs 732 * incorrectly fail to initialize it. 733 */ 734 if (nam->sa_family != AF_INET) { 735 lck_rw_done(pcbinfo->ipi_lock); 736 socket_lock(so, 0); 737 return (EAFNOSUPPORT); 738 } 739#endif /* 0 */ 740 lport = SIN(nam)->sin_port; 741 742 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { 743 /* 744 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 745 * allow complete duplication of binding if 746 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 747 * and a multicast address is bound on both 748 * new and duplicated sockets. 749 */ 750 if (so->so_options & SO_REUSEADDR) 751 reuseport = SO_REUSEADDR|SO_REUSEPORT; 752 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { 753 struct sockaddr_in sin; 754 struct ifaddr *ifa; 755 756 /* Sanitized for interface address searches */ 757 bzero(&sin, sizeof (sin)); 758 sin.sin_family = AF_INET; 759 sin.sin_len = sizeof (struct sockaddr_in); 760 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; 761 762 ifa = ifa_ifwithaddr(SA(&sin)); 763 if (ifa == NULL) { 764 lck_rw_done(pcbinfo->ipi_lock); 765 socket_lock(so, 0); 766 return (EADDRNOTAVAIL); 767 } else { 768 /* 769 * Opportunistically determine the outbound 770 * interface that may be used; this may not 771 * hold true if we end up using a route 772 * going over a different interface, e.g. 773 * when sending to a local address. This 774 * will get updated again after sending. 775 */ 776 IFA_LOCK(ifa); 777 outif = ifa->ifa_ifp; 778 IFA_UNLOCK(ifa); 779 IFA_REMREF(ifa); 780 } 781 } 782 if (lport != 0) { 783 struct inpcb *t; 784 uid_t u; 785 786 if (ntohs(lport) < IPPORT_RESERVED) { 787 cred = kauth_cred_proc_ref(p); 788 error = priv_check_cred(cred, 789 PRIV_NETINET_RESERVEDPORT, 0); 790 kauth_cred_unref(&cred); 791 if (error != 0) { 792 lck_rw_done(pcbinfo->ipi_lock); 793 socket_lock(so, 0); 794 return (EACCES); 795 } 796 } 797 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && 798 (u = kauth_cred_getuid(so->so_cred)) != 0 && 799 (t = in_pcblookup_local_and_cleanup( 800 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, 801 INPLOOKUP_WILDCARD)) != NULL && 802 (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 803 t->inp_laddr.s_addr != INADDR_ANY || 804 !(t->inp_socket->so_options & SO_REUSEPORT)) && 805 (u != kauth_cred_getuid(t->inp_socket->so_cred)) && 806 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && 807 (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 808 t->inp_laddr.s_addr != INADDR_ANY)) { 809 if ((t->inp_socket->so_flags & 810 SOF_NOTIFYCONFLICT) && 811 !(so->so_flags & SOF_NOTIFYCONFLICT)) 812 conflict = 1; 813 814 lck_rw_done(pcbinfo->ipi_lock); 815 816 if (conflict) 817 in_pcb_conflict_post_msg(lport); 818 819 socket_lock(so, 0); 820 return (EADDRINUSE); 821 } 822 t = in_pcblookup_local_and_cleanup(pcbinfo, 823 SIN(nam)->sin_addr, lport, wild); 824 if (t != NULL && 825 (reuseport & t->inp_socket->so_options) == 0) { 826#if INET6 827 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || 828 t->inp_laddr.s_addr != INADDR_ANY || 829 SOCK_DOM(so) != PF_INET6 || 830 SOCK_DOM(t->inp_socket) != PF_INET6) 831#endif /* INET6 */ 832 { 833 834 if ((t->inp_socket->so_flags & 835 SOF_NOTIFYCONFLICT) && 836 !(so->so_flags & SOF_NOTIFYCONFLICT)) 837 conflict = 1; 838 839 lck_rw_done(pcbinfo->ipi_lock); 840 841 if (conflict) 842 in_pcb_conflict_post_msg(lport); 843 socket_lock(so, 0); 844 return (EADDRINUSE); 845 } 846 } 847 } 848 laddr = SIN(nam)->sin_addr; 849 } 850 if (lport == 0) { 851 u_short first, last; 852 int count; 853 854 randomport = (so->so_flags & SOF_BINDRANDOMPORT) || 855 (so->so_type == SOCK_STREAM ? tcp_use_randomport : 856 udp_use_randomport); 857 858 /* 859 * Even though this looks similar to the code in 860 * in6_pcbsetport, the v6 vs v4 checks are different. 861 */ 862 anonport = TRUE; 863 if (inp->inp_flags & INP_HIGHPORT) { 864 first = ipport_hifirstauto; /* sysctl */ 865 last = ipport_hilastauto; 866 lastport = &pcbinfo->ipi_lasthi; 867 } else if (inp->inp_flags & INP_LOWPORT) { 868 cred = kauth_cred_proc_ref(p); 869 error = priv_check_cred(cred, 870 PRIV_NETINET_RESERVEDPORT, 0); 871 kauth_cred_unref(&cred); 872 if (error != 0) { 873 lck_rw_done(pcbinfo->ipi_lock); 874 socket_lock(so, 0); 875 return (error); 876 } 877 first = ipport_lowfirstauto; /* 1023 */ 878 last = ipport_lowlastauto; /* 600 */ 879 lastport = &pcbinfo->ipi_lastlow; 880 } else { 881 first = ipport_firstauto; /* sysctl */ 882 last = ipport_lastauto; 883 lastport = &pcbinfo->ipi_lastport; 884 } 885 /* No point in randomizing if only one port is available */ 886 887 if (first == last) 888 randomport = 0; 889 /* 890 * Simple check to ensure all ports are not used up causing 891 * a deadlock here. 892 * 893 * We split the two cases (up and down) so that the direction 894 * is not being tested on each round of the loop. 895 */ 896 if (first > last) { 897 /* 898 * counting down 899 */ 900 if (randomport) { 901 read_random(&rand_port, sizeof (rand_port)); 902 *lastport = 903 first - (rand_port % (first - last)); 904 } 905 count = first - last; 906 907 do { 908 if (count-- < 0) { /* completely used? */ 909 lck_rw_done(pcbinfo->ipi_lock); 910 socket_lock(so, 0); 911 return (EADDRNOTAVAIL); 912 } 913 --*lastport; 914 if (*lastport > first || *lastport < last) 915 *lastport = first; 916 lport = htons(*lastport); 917 } while (in_pcblookup_local_and_cleanup(pcbinfo, 918 ((laddr.s_addr != INADDR_ANY) ? laddr : 919 inp->inp_laddr), lport, wild)); 920 } else { 921 /* 922 * counting up 923 */ 924 if (randomport) { 925 read_random(&rand_port, sizeof (rand_port)); 926 *lastport = 927 first + (rand_port % (first - last)); 928 } 929 count = last - first; 930 931 do { 932 if (count-- < 0) { /* completely used? */ 933 lck_rw_done(pcbinfo->ipi_lock); 934 socket_lock(so, 0); 935 return (EADDRNOTAVAIL); 936 } 937 ++*lastport; 938 if (*lastport < first || *lastport > last) 939 *lastport = first; 940 lport = htons(*lastport); 941 } while (in_pcblookup_local_and_cleanup(pcbinfo, 942 ((laddr.s_addr != INADDR_ANY) ? laddr : 943 inp->inp_laddr), lport, wild)); 944 } 945 } 946 socket_lock(so, 0); 947 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { 948 lck_rw_done(pcbinfo->ipi_lock); 949 return (EINVAL); 950 } 951 952 if (laddr.s_addr != INADDR_ANY) { 953 inp->inp_laddr = laddr; 954 inp->inp_last_outifp = outif; 955 } 956 inp->inp_lport = lport; 957 if (anonport) 958 inp->inp_flags |= INP_ANONPORT; 959 960 if (in_pcbinshash(inp, 1) != 0) { 961 inp->inp_laddr.s_addr = INADDR_ANY; 962 inp->inp_last_outifp = NULL; 963 964 inp->inp_lport = 0; 965 if (anonport) 966 inp->inp_flags &= ~INP_ANONPORT; 967 lck_rw_done(pcbinfo->ipi_lock); 968 return (EAGAIN); 969 } 970 lck_rw_done(pcbinfo->ipi_lock); 971 sflt_notify(so, sock_evt_bound, NULL); 972 return (0); 973} 974 975/* 976 * Transform old in_pcbconnect() into an inner subroutine for new 977 * in_pcbconnect(); do some validity-checking on the remote address 978 * (in "nam") and then determine local host address (i.e., which 979 * interface) to use to access that remote host. 980 * 981 * This routine may alter the caller-supplied remote address "nam". 982 * 983 * The caller may override the bound-to-interface setting of the socket 984 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) 985 * 986 * This routine might return an ifp with a reference held if the caller 987 * provides a non-NULL outif, even in the error case. The caller is 988 * responsible for releasing its reference. 989 * 990 * Returns: 0 Success 991 * EINVAL Invalid argument 992 * EAFNOSUPPORT Address family not supported 993 * EADDRNOTAVAIL Address not available 994 */ 995int 996in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, 997 unsigned int ifscope, struct ifnet **outif) 998{ 999 struct route *ro = &inp->inp_route; 1000 struct in_ifaddr *ia = NULL; 1001 struct sockaddr_in sin; 1002 int error = 0; 1003 boolean_t restricted = FALSE; 1004 1005 if (outif != NULL) 1006 *outif = NULL; 1007 if (nam->sa_len != sizeof (struct sockaddr_in)) 1008 return (EINVAL); 1009 if (SIN(nam)->sin_family != AF_INET) 1010 return (EAFNOSUPPORT); 1011 if (SIN(nam)->sin_port == 0) 1012 return (EADDRNOTAVAIL); 1013 1014 /* 1015 * If the destination address is INADDR_ANY, 1016 * use the primary local address. 1017 * If the supplied address is INADDR_BROADCAST, 1018 * and the primary interface supports broadcast, 1019 * choose the broadcast address for that interface. 1020 */ 1021 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || 1022 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { 1023 lck_rw_lock_shared(in_ifaddr_rwlock); 1024 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 1025 ia = TAILQ_FIRST(&in_ifaddrhead); 1026 IFA_LOCK_SPIN(&ia->ia_ifa); 1027 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { 1028 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; 1029 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { 1030 SIN(nam)->sin_addr = 1031 SIN(&ia->ia_broadaddr)->sin_addr; 1032 } 1033 IFA_UNLOCK(&ia->ia_ifa); 1034 ia = NULL; 1035 } 1036 lck_rw_done(in_ifaddr_rwlock); 1037 } 1038 /* 1039 * Otherwise, if the socket has already bound the source, just use it. 1040 */ 1041 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1042 VERIFY(ia == NULL); 1043 *laddr = inp->inp_laddr; 1044 return (0); 1045 } 1046 1047 /* 1048 * If the ifscope is specified by the caller (e.g. IP_PKTINFO) 1049 * then it overrides the sticky ifscope set for the socket. 1050 */ 1051 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) 1052 ifscope = inp->inp_boundifp->if_index; 1053 1054 /* 1055 * If route is known or can be allocated now, 1056 * our src addr is taken from the i/f, else punt. 1057 * Note that we should check the address family of the cached 1058 * destination, in case of sharing the cache with IPv6. 1059 */ 1060 if (ro->ro_rt != NULL) 1061 RT_LOCK_SPIN(ro->ro_rt); 1062 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || 1063 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || 1064 (inp->inp_socket->so_options & SO_DONTROUTE)) { 1065 if (ro->ro_rt != NULL) 1066 RT_UNLOCK(ro->ro_rt); 1067 ROUTE_RELEASE(ro); 1068 } 1069 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && 1070 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { 1071 if (ro->ro_rt != NULL) 1072 RT_UNLOCK(ro->ro_rt); 1073 ROUTE_RELEASE(ro); 1074 /* No route yet, so try to acquire one */ 1075 bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); 1076 ro->ro_dst.sa_family = AF_INET; 1077 ro->ro_dst.sa_len = sizeof (struct sockaddr_in); 1078 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; 1079 rtalloc_scoped(ro, ifscope); 1080 if (ro->ro_rt != NULL) 1081 RT_LOCK_SPIN(ro->ro_rt); 1082 } 1083 /* Sanitized local copy for interface address searches */ 1084 bzero(&sin, sizeof (sin)); 1085 sin.sin_family = AF_INET; 1086 sin.sin_len = sizeof (struct sockaddr_in); 1087 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; 1088 /* 1089 * If we did not find (or use) a route, assume dest is reachable 1090 * on a directly connected network and try to find a corresponding 1091 * interface to take the source address from. 1092 */ 1093 if (ro->ro_rt == NULL) { 1094 VERIFY(ia == NULL); 1095 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); 1096 if (ia == NULL) 1097 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); 1098 error = ((ia == NULL) ? ENETUNREACH : 0); 1099 goto done; 1100 } 1101 RT_LOCK_ASSERT_HELD(ro->ro_rt); 1102 /* 1103 * If the outgoing interface on the route found is not 1104 * a loopback interface, use the address from that interface. 1105 */ 1106 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 1107 VERIFY(ia == NULL); 1108 /* 1109 * If the route points to a cellular interface and the 1110 * caller forbids our using interfaces of such type, 1111 * pretend that there is no route. 1112 * Apply the same logic for expensive interfaces. 1113 */ 1114 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { 1115 RT_UNLOCK(ro->ro_rt); 1116 ROUTE_RELEASE(ro); 1117 error = EHOSTUNREACH; 1118 restricted = TRUE; 1119 } else { 1120 /* Become a regular mutex */ 1121 RT_CONVERT_LOCK(ro->ro_rt); 1122 ia = ifatoia(ro->ro_rt->rt_ifa); 1123 IFA_ADDREF(&ia->ia_ifa); 1124 RT_UNLOCK(ro->ro_rt); 1125 error = 0; 1126 } 1127 goto done; 1128 } 1129 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); 1130 RT_UNLOCK(ro->ro_rt); 1131 /* 1132 * The outgoing interface is marked with 'loopback net', so a route 1133 * to ourselves is here. 1134 * Try to find the interface of the destination address and then 1135 * take the address from there. That interface is not necessarily 1136 * a loopback interface. 1137 */ 1138 VERIFY(ia == NULL); 1139 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); 1140 if (ia == NULL) 1141 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); 1142 if (ia == NULL) 1143 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); 1144 if (ia == NULL) { 1145 RT_LOCK(ro->ro_rt); 1146 ia = ifatoia(ro->ro_rt->rt_ifa); 1147 if (ia != NULL) 1148 IFA_ADDREF(&ia->ia_ifa); 1149 RT_UNLOCK(ro->ro_rt); 1150 } 1151 error = ((ia == NULL) ? ENETUNREACH : 0); 1152 1153done: 1154 /* 1155 * If the destination address is multicast and an outgoing 1156 * interface has been set as a multicast option, use the 1157 * address of that interface as our source address. 1158 */ 1159 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && 1160 inp->inp_moptions != NULL) { 1161 struct ip_moptions *imo; 1162 struct ifnet *ifp; 1163 1164 imo = inp->inp_moptions; 1165 IMO_LOCK(imo); 1166 if (imo->imo_multicast_ifp != NULL && (ia == NULL || 1167 ia->ia_ifp != imo->imo_multicast_ifp)) { 1168 ifp = imo->imo_multicast_ifp; 1169 if (ia != NULL) 1170 IFA_REMREF(&ia->ia_ifa); 1171 lck_rw_lock_shared(in_ifaddr_rwlock); 1172 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1173 if (ia->ia_ifp == ifp) 1174 break; 1175 } 1176 if (ia != NULL) 1177 IFA_ADDREF(&ia->ia_ifa); 1178 lck_rw_done(in_ifaddr_rwlock); 1179 if (ia == NULL) 1180 error = EADDRNOTAVAIL; 1181 else 1182 error = 0; 1183 } 1184 IMO_UNLOCK(imo); 1185 } 1186 /* 1187 * Don't do pcblookup call here; return interface in laddr 1188 * and exit to caller, that will do the lookup. 1189 */ 1190 if (ia != NULL) { 1191 /* 1192 * If the source address belongs to a cellular interface 1193 * and the socket forbids our using interfaces of such 1194 * type, pretend that there is no source address. 1195 * Apply the same logic for expensive interfaces. 1196 */ 1197 IFA_LOCK_SPIN(&ia->ia_ifa); 1198 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { 1199 IFA_UNLOCK(&ia->ia_ifa); 1200 error = EHOSTUNREACH; 1201 restricted = TRUE; 1202 } else if (error == 0) { 1203 *laddr = ia->ia_addr.sin_addr; 1204 if (outif != NULL) { 1205 struct ifnet *ifp; 1206 1207 if (ro->ro_rt != NULL) 1208 ifp = ro->ro_rt->rt_ifp; 1209 else 1210 ifp = ia->ia_ifp; 1211 1212 VERIFY(ifp != NULL); 1213 IFA_CONVERT_LOCK(&ia->ia_ifa); 1214 ifnet_reference(ifp); /* for caller */ 1215 if (*outif != NULL) 1216 ifnet_release(*outif); 1217 *outif = ifp; 1218 } 1219 IFA_UNLOCK(&ia->ia_ifa); 1220 } else { 1221 IFA_UNLOCK(&ia->ia_ifa); 1222 } 1223 IFA_REMREF(&ia->ia_ifa); 1224 ia = NULL; 1225 } 1226 1227 if (restricted && error == EHOSTUNREACH) { 1228 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | 1229 SO_FILT_HINT_IFDENIED)); 1230 } 1231 1232 return (error); 1233} 1234 1235/* 1236 * Outer subroutine: 1237 * Connect from a socket to a specified address. 1238 * Both address and port must be specified in argument sin. 1239 * If don't have a local address for this socket yet, 1240 * then pick one. 1241 * 1242 * The caller may override the bound-to-interface setting of the socket 1243 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) 1244 */ 1245int 1246in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, 1247 unsigned int ifscope, struct ifnet **outif) 1248{ 1249 struct in_addr laddr; 1250 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 1251 struct inpcb *pcb; 1252 int error; 1253 struct socket *so = inp->inp_socket; 1254 1255 /* 1256 * Call inner routine, to assign local interface address. 1257 */ 1258 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) 1259 return (error); 1260 1261 socket_unlock(so, 0); 1262 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, 1263 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, 1264 inp->inp_lport, 0, NULL); 1265 socket_lock(so, 0); 1266 1267 /* 1268 * Check if the socket is still in a valid state. When we unlock this 1269 * embryonic socket, it can get aborted if another thread is closing 1270 * the listener (radar 7947600). 1271 */ 1272 if ((so->so_flags & SOF_ABORTED) != 0) 1273 return (ECONNREFUSED); 1274 1275 if (pcb != NULL) { 1276 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); 1277 return (EADDRINUSE); 1278 } 1279 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1280 if (inp->inp_lport == 0) { 1281 error = in_pcbbind(inp, NULL, p); 1282 if (error) 1283 return (error); 1284 } 1285 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1286 /* 1287 * Lock inversion issue, mostly with udp 1288 * multicast packets. 1289 */ 1290 socket_unlock(so, 0); 1291 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1292 socket_lock(so, 0); 1293 } 1294 inp->inp_laddr = laddr; 1295 /* no reference needed */ 1296 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; 1297 inp->inp_flags |= INP_INADDR_ANY; 1298 } else { 1299 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1300 /* 1301 * Lock inversion issue, mostly with udp 1302 * multicast packets. 1303 */ 1304 socket_unlock(so, 0); 1305 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1306 socket_lock(so, 0); 1307 } 1308 } 1309 inp->inp_faddr = sin->sin_addr; 1310 inp->inp_fport = sin->sin_port; 1311 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) 1312 nstat_pcb_invalidate_cache(inp); 1313 in_pcbrehash(inp); 1314 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1315 return (0); 1316} 1317 1318void 1319in_pcbdisconnect(struct inpcb *inp) 1320{ 1321 struct socket *so = inp->inp_socket; 1322 1323 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) 1324 nstat_pcb_cache(inp); 1325 1326 inp->inp_faddr.s_addr = INADDR_ANY; 1327 inp->inp_fport = 0; 1328 1329 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1330 /* lock inversion issue, mostly with udp multicast packets */ 1331 socket_unlock(so, 0); 1332 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1333 socket_lock(so, 0); 1334 } 1335 1336 in_pcbrehash(inp); 1337 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1338 /* 1339 * A multipath subflow socket would have its SS_NOFDREF set by default, 1340 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; 1341 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. 1342 */ 1343 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) 1344 in_pcbdetach(inp); 1345} 1346 1347void 1348in_pcbdetach(struct inpcb *inp) 1349{ 1350 struct socket *so = inp->inp_socket; 1351 1352 if (so->so_pcb == NULL) { 1353 /* PCB has been disposed */ 1354 panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, 1355 inp, so, SOCK_PROTO(so)); 1356 /* NOTREACHED */ 1357 } 1358 1359#if IPSEC 1360 if (inp->inp_sp != NULL) { 1361 (void) ipsec4_delete_pcbpolicy(inp); 1362 } 1363#endif /* IPSEC */ 1364 1365 /* 1366 * Let NetworkStatistics know this PCB is going away 1367 * before we detach it. 1368 */ 1369 if (nstat_collect && 1370 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) 1371 nstat_pcb_detach(inp); 1372 /* mark socket state as dead */ 1373 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { 1374 panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", 1375 __func__, so, SOCK_PROTO(so)); 1376 /* NOTREACHED */ 1377 } 1378 1379 if (!(so->so_flags & SOF_PCBCLEARING)) { 1380 struct ip_moptions *imo; 1381 1382 inp->inp_vflag = 0; 1383 if (inp->inp_options != NULL) { 1384 (void) m_free(inp->inp_options); 1385 inp->inp_options = NULL; 1386 } 1387 ROUTE_RELEASE(&inp->inp_route); 1388 imo = inp->inp_moptions; 1389 inp->inp_moptions = NULL; 1390 if (imo != NULL) 1391 IMO_REMREF(imo); 1392 sofreelastref(so, 0); 1393 inp->inp_state = INPCB_STATE_DEAD; 1394 /* makes sure we're not called twice from so_close */ 1395 so->so_flags |= SOF_PCBCLEARING; 1396 1397 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); 1398 } 1399} 1400 1401 1402void 1403in_pcbdispose(struct inpcb *inp) 1404{ 1405 struct socket *so = inp->inp_socket; 1406 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1407 1408 if (so != NULL && so->so_usecount != 0) { 1409 panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", 1410 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, 1411 solockhistory_nr(so)); 1412 /* NOTREACHED */ 1413 } else if (inp->inp_wantcnt != WNT_STOPUSING) { 1414 if (so != NULL) { 1415 panic_plain("%s: inp %p invalid wantcnt %d, so %p " 1416 "[%d,%d] usecount %d retaincnt %d state 0x%x " 1417 "flags 0x%x lockhistory %s\n", __func__, inp, 1418 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), 1419 so->so_usecount, so->so_retaincnt, so->so_state, 1420 so->so_flags, solockhistory_nr(so)); 1421 /* NOTREACHED */ 1422 } else { 1423 panic("%s: inp %p invalid wantcnt %d no socket\n", 1424 __func__, inp, inp->inp_wantcnt); 1425 /* NOTREACHED */ 1426 } 1427 } 1428 1429 lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); 1430 1431 inp->inp_gencnt = ++ipi->ipi_gencnt; 1432 /* access ipi in in_pcbremlists */ 1433 in_pcbremlists(inp); 1434 1435 if (so != NULL) { 1436 if (so->so_proto->pr_flags & PR_PCBLOCK) { 1437 sofreelastref(so, 0); 1438 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { 1439 /* 1440 * selthreadclear() already called 1441 * during sofreelastref() above. 1442 */ 1443 sbrelease(&so->so_rcv); 1444 sbrelease(&so->so_snd); 1445 } 1446 if (so->so_head != NULL) { 1447 panic("%s: so=%p head still exist\n", 1448 __func__, so); 1449 /* NOTREACHED */ 1450 } 1451 lck_mtx_unlock(&inp->inpcb_mtx); 1452 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); 1453 } 1454 /* makes sure we're not called twice from so_close */ 1455 so->so_flags |= SOF_PCBCLEARING; 1456 so->so_saved_pcb = (caddr_t)inp; 1457 so->so_pcb = NULL; 1458 inp->inp_socket = NULL; 1459#if CONFIG_MACF_NET 1460 mac_inpcb_label_destroy(inp); 1461#endif /* CONFIG_MACF_NET */ 1462 /* 1463 * In case there a route cached after a detach (possible 1464 * in the tcp case), make sure that it is freed before 1465 * we deallocate the structure. 1466 */ 1467 ROUTE_RELEASE(&inp->inp_route); 1468 if (!so->cached_in_sock_layer) { 1469 zfree(ipi->ipi_zone, inp); 1470 } 1471 sodealloc(so); 1472 } 1473} 1474 1475/* 1476 * The calling convention of in_getsockaddr() and in_getpeeraddr() was 1477 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 1478 * in struct pr_usrreqs, so that protocols can just reference then directly 1479 * without the need for a wrapper function. 1480 */ 1481int 1482in_getsockaddr(struct socket *so, struct sockaddr **nam) 1483{ 1484 struct inpcb *inp; 1485 struct sockaddr_in *sin; 1486 1487 /* 1488 * Do the malloc first in case it blocks. 1489 */ 1490 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); 1491 if (sin == NULL) 1492 return (ENOBUFS); 1493 bzero(sin, sizeof (*sin)); 1494 sin->sin_family = AF_INET; 1495 sin->sin_len = sizeof (*sin); 1496 1497 if ((inp = sotoinpcb(so)) == NULL) { 1498 FREE(sin, M_SONAME); 1499 return (EINVAL); 1500 } 1501 sin->sin_port = inp->inp_lport; 1502 sin->sin_addr = inp->inp_laddr; 1503 1504 *nam = (struct sockaddr *)sin; 1505 return (0); 1506} 1507 1508int 1509in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) 1510{ 1511 struct sockaddr_in *sin = SIN(ss); 1512 struct inpcb *inp; 1513 1514 VERIFY(ss != NULL); 1515 bzero(ss, sizeof (*ss)); 1516 1517 sin->sin_family = AF_INET; 1518 sin->sin_len = sizeof (*sin); 1519 1520 if ((inp = sotoinpcb(so)) == NULL 1521#if NECP 1522 || (necp_socket_should_use_flow_divert(inp)) 1523#endif /* NECP */ 1524 ) 1525 return (inp == NULL ? EINVAL : EPROTOTYPE); 1526 1527 sin->sin_port = inp->inp_lport; 1528 sin->sin_addr = inp->inp_laddr; 1529 return (0); 1530} 1531 1532int 1533in_getpeeraddr(struct socket *so, struct sockaddr **nam) 1534{ 1535 struct inpcb *inp; 1536 struct sockaddr_in *sin; 1537 1538 /* 1539 * Do the malloc first in case it blocks. 1540 */ 1541 MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); 1542 if (sin == NULL) 1543 return (ENOBUFS); 1544 bzero((caddr_t)sin, sizeof (*sin)); 1545 sin->sin_family = AF_INET; 1546 sin->sin_len = sizeof (*sin); 1547 1548 if ((inp = sotoinpcb(so)) == NULL) { 1549 FREE(sin, M_SONAME); 1550 return (EINVAL); 1551 } 1552 sin->sin_port = inp->inp_fport; 1553 sin->sin_addr = inp->inp_faddr; 1554 1555 *nam = (struct sockaddr *)sin; 1556 return (0); 1557} 1558 1559int 1560in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) 1561{ 1562 struct sockaddr_in *sin = SIN(ss); 1563 struct inpcb *inp; 1564 1565 VERIFY(ss != NULL); 1566 bzero(ss, sizeof (*ss)); 1567 1568 sin->sin_family = AF_INET; 1569 sin->sin_len = sizeof (*sin); 1570 1571 if ((inp = sotoinpcb(so)) == NULL 1572#if NECP 1573 || (necp_socket_should_use_flow_divert(inp)) 1574#endif /* NECP */ 1575 ) { 1576 return (inp == NULL ? EINVAL : EPROTOTYPE); 1577 } 1578 1579 sin->sin_port = inp->inp_fport; 1580 sin->sin_addr = inp->inp_faddr; 1581 return (0); 1582} 1583 1584void 1585in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1586 int errno, void (*notify)(struct inpcb *, int)) 1587{ 1588 struct inpcb *inp; 1589 1590 lck_rw_lock_shared(pcbinfo->ipi_lock); 1591 1592 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 1593#if INET6 1594 if (!(inp->inp_vflag & INP_IPV4)) 1595 continue; 1596#endif /* INET6 */ 1597 if (inp->inp_faddr.s_addr != faddr.s_addr || 1598 inp->inp_socket == NULL) 1599 continue; 1600 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 1601 continue; 1602 socket_lock(inp->inp_socket, 1); 1603 (*notify)(inp, errno); 1604 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); 1605 socket_unlock(inp->inp_socket, 1); 1606 } 1607 lck_rw_done(pcbinfo->ipi_lock); 1608} 1609 1610/* 1611 * Check for alternatives when higher level complains 1612 * about service problems. For now, invalidate cached 1613 * routing information. If the route was created dynamically 1614 * (by a redirect), time to try a default gateway again. 1615 */ 1616void 1617in_losing(struct inpcb *inp) 1618{ 1619 boolean_t release = FALSE; 1620 struct rtentry *rt; 1621 struct rt_addrinfo info; 1622 1623 if ((rt = inp->inp_route.ro_rt) != NULL) { 1624 struct in_ifaddr *ia = NULL; 1625 1626 bzero((caddr_t)&info, sizeof (info)); 1627 RT_LOCK(rt); 1628 info.rti_info[RTAX_DST] = 1629 (struct sockaddr *)&inp->inp_route.ro_dst; 1630 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1631 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1632 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 1633 if (rt->rt_flags & RTF_DYNAMIC) { 1634 /* 1635 * Prevent another thread from modifying rt_key, 1636 * rt_gateway via rt_setgate() after rt_lock is 1637 * dropped by marking the route as defunct. 1638 */ 1639 rt->rt_flags |= RTF_CONDEMNED; 1640 RT_UNLOCK(rt); 1641 (void) rtrequest(RTM_DELETE, rt_key(rt), 1642 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 1643 } else { 1644 RT_UNLOCK(rt); 1645 } 1646 /* if the address is gone keep the old route in the pcb */ 1647 if (inp->inp_laddr.s_addr != INADDR_ANY && 1648 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { 1649 /* 1650 * Address is around; ditch the route. A new route 1651 * can be allocated the next time output is attempted. 1652 */ 1653 release = TRUE; 1654 } 1655 if (ia != NULL) 1656 IFA_REMREF(&ia->ia_ifa); 1657 } 1658 if (rt == NULL || release) 1659 ROUTE_RELEASE(&inp->inp_route); 1660} 1661 1662/* 1663 * After a routing change, flush old routing 1664 * and allocate a (hopefully) better one. 1665 */ 1666void 1667in_rtchange(struct inpcb *inp, int errno) 1668{ 1669#pragma unused(errno) 1670 boolean_t release = FALSE; 1671 struct rtentry *rt; 1672 1673 if ((rt = inp->inp_route.ro_rt) != NULL) { 1674 struct in_ifaddr *ia = NULL; 1675 1676 /* if address is gone, keep the old route */ 1677 if (inp->inp_laddr.s_addr != INADDR_ANY && 1678 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { 1679 /* 1680 * Address is around; ditch the route. A new route 1681 * can be allocated the next time output is attempted. 1682 */ 1683 release = TRUE; 1684 } 1685 if (ia != NULL) 1686 IFA_REMREF(&ia->ia_ifa); 1687 } 1688 if (rt == NULL || release) 1689 ROUTE_RELEASE(&inp->inp_route); 1690} 1691 1692/* 1693 * Lookup a PCB based on the local address and port. 1694 */ 1695struct inpcb * 1696in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1697 unsigned int lport_arg, int wild_okay) 1698{ 1699 struct inpcb *inp; 1700 int matchwild = 3, wildcard; 1701 u_short lport = lport_arg; 1702 1703 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); 1704 1705 if (!wild_okay) { 1706 struct inpcbhead *head; 1707 /* 1708 * Look for an unconnected (wildcard foreign addr) PCB that 1709 * matches the local address and port we're looking for. 1710 */ 1711 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1712 pcbinfo->ipi_hashmask)]; 1713 LIST_FOREACH(inp, head, inp_hash) { 1714#if INET6 1715 if (!(inp->inp_vflag & INP_IPV4)) 1716 continue; 1717#endif /* INET6 */ 1718 if (inp->inp_faddr.s_addr == INADDR_ANY && 1719 inp->inp_laddr.s_addr == laddr.s_addr && 1720 inp->inp_lport == lport) { 1721 /* 1722 * Found. 1723 */ 1724 return (inp); 1725 } 1726 } 1727 /* 1728 * Not found. 1729 */ 1730 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); 1731 return (NULL); 1732 } else { 1733 struct inpcbporthead *porthash; 1734 struct inpcbport *phd; 1735 struct inpcb *match = NULL; 1736 /* 1737 * Best fit PCB lookup. 1738 * 1739 * First see if this local port is in use by looking on the 1740 * port hash list. 1741 */ 1742 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 1743 pcbinfo->ipi_porthashmask)]; 1744 LIST_FOREACH(phd, porthash, phd_hash) { 1745 if (phd->phd_port == lport) 1746 break; 1747 } 1748 if (phd != NULL) { 1749 /* 1750 * Port is in use by one or more PCBs. Look for best 1751 * fit. 1752 */ 1753 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1754 wildcard = 0; 1755#if INET6 1756 if (!(inp->inp_vflag & INP_IPV4)) 1757 continue; 1758#endif /* INET6 */ 1759 if (inp->inp_faddr.s_addr != INADDR_ANY) 1760 wildcard++; 1761 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1762 if (laddr.s_addr == INADDR_ANY) 1763 wildcard++; 1764 else if (inp->inp_laddr.s_addr != 1765 laddr.s_addr) 1766 continue; 1767 } else { 1768 if (laddr.s_addr != INADDR_ANY) 1769 wildcard++; 1770 } 1771 if (wildcard < matchwild) { 1772 match = inp; 1773 matchwild = wildcard; 1774 if (matchwild == 0) { 1775 break; 1776 } 1777 } 1778 } 1779 } 1780 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, 1781 0, 0, 0, 0); 1782 return (match); 1783 } 1784} 1785 1786/* 1787 * Check if PCB exists in hash list. 1788 */ 1789int 1790in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1791 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1792 uid_t *uid, gid_t *gid, struct ifnet *ifp) 1793{ 1794 struct inpcbhead *head; 1795 struct inpcb *inp; 1796 u_short fport = fport_arg, lport = lport_arg; 1797 int found = 0; 1798 struct inpcb *local_wild = NULL; 1799#if INET6 1800 struct inpcb *local_wild_mapped = NULL; 1801#endif /* INET6 */ 1802 1803 *uid = UID_MAX; 1804 *gid = GID_MAX; 1805 1806 /* 1807 * We may have found the pcb in the last lookup - check this first. 1808 */ 1809 1810 lck_rw_lock_shared(pcbinfo->ipi_lock); 1811 1812 /* 1813 * First look for an exact match. 1814 */ 1815 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1816 pcbinfo->ipi_hashmask)]; 1817 LIST_FOREACH(inp, head, inp_hash) { 1818#if INET6 1819 if (!(inp->inp_vflag & INP_IPV4)) 1820 continue; 1821#endif /* INET6 */ 1822 if (inp_restricted_recv(inp, ifp)) 1823 continue; 1824 1825 if (inp->inp_faddr.s_addr == faddr.s_addr && 1826 inp->inp_laddr.s_addr == laddr.s_addr && 1827 inp->inp_fport == fport && 1828 inp->inp_lport == lport) { 1829 if ((found = (inp->inp_socket != NULL))) { 1830 /* 1831 * Found. 1832 */ 1833 *uid = kauth_cred_getuid( 1834 inp->inp_socket->so_cred); 1835 *gid = kauth_cred_getgid( 1836 inp->inp_socket->so_cred); 1837 } 1838 lck_rw_done(pcbinfo->ipi_lock); 1839 return (found); 1840 } 1841 } 1842 1843 if (!wildcard) { 1844 /* 1845 * Not found. 1846 */ 1847 lck_rw_done(pcbinfo->ipi_lock); 1848 return (0); 1849 } 1850 1851 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1852 pcbinfo->ipi_hashmask)]; 1853 LIST_FOREACH(inp, head, inp_hash) { 1854#if INET6 1855 if (!(inp->inp_vflag & INP_IPV4)) 1856 continue; 1857#endif /* INET6 */ 1858 if (inp_restricted_recv(inp, ifp)) 1859 continue; 1860 1861 if (inp->inp_faddr.s_addr == INADDR_ANY && 1862 inp->inp_lport == lport) { 1863 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1864 if ((found = (inp->inp_socket != NULL))) { 1865 *uid = kauth_cred_getuid( 1866 inp->inp_socket->so_cred); 1867 *gid = kauth_cred_getgid( 1868 inp->inp_socket->so_cred); 1869 } 1870 lck_rw_done(pcbinfo->ipi_lock); 1871 return (found); 1872 } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1873#if INET6 1874 if (inp->inp_socket && 1875 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) 1876 local_wild_mapped = inp; 1877 else 1878#endif /* INET6 */ 1879 local_wild = inp; 1880 } 1881 } 1882 } 1883 if (local_wild == NULL) { 1884#if INET6 1885 if (local_wild_mapped != NULL) { 1886 if ((found = (local_wild_mapped->inp_socket != NULL))) { 1887 *uid = kauth_cred_getuid( 1888 local_wild_mapped->inp_socket->so_cred); 1889 *gid = kauth_cred_getgid( 1890 local_wild_mapped->inp_socket->so_cred); 1891 } 1892 lck_rw_done(pcbinfo->ipi_lock); 1893 return (found); 1894 } 1895#endif /* INET6 */ 1896 lck_rw_done(pcbinfo->ipi_lock); 1897 return (0); 1898 } 1899 if ((found = (local_wild->inp_socket != NULL))) { 1900 *uid = kauth_cred_getuid( 1901 local_wild->inp_socket->so_cred); 1902 *gid = kauth_cred_getgid( 1903 local_wild->inp_socket->so_cred); 1904 } 1905 lck_rw_done(pcbinfo->ipi_lock); 1906 return (found); 1907} 1908 1909/* 1910 * Lookup PCB in hash list. 1911 */ 1912struct inpcb * 1913in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1914 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1915 struct ifnet *ifp) 1916{ 1917 struct inpcbhead *head; 1918 struct inpcb *inp; 1919 u_short fport = fport_arg, lport = lport_arg; 1920 struct inpcb *local_wild = NULL; 1921#if INET6 1922 struct inpcb *local_wild_mapped = NULL; 1923#endif /* INET6 */ 1924 1925 /* 1926 * We may have found the pcb in the last lookup - check this first. 1927 */ 1928 1929 lck_rw_lock_shared(pcbinfo->ipi_lock); 1930 1931 /* 1932 * First look for an exact match. 1933 */ 1934 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1935 pcbinfo->ipi_hashmask)]; 1936 LIST_FOREACH(inp, head, inp_hash) { 1937#if INET6 1938 if (!(inp->inp_vflag & INP_IPV4)) 1939 continue; 1940#endif /* INET6 */ 1941 if (inp_restricted_recv(inp, ifp)) 1942 continue; 1943 1944 if (inp->inp_faddr.s_addr == faddr.s_addr && 1945 inp->inp_laddr.s_addr == laddr.s_addr && 1946 inp->inp_fport == fport && 1947 inp->inp_lport == lport) { 1948 /* 1949 * Found. 1950 */ 1951 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != 1952 WNT_STOPUSING) { 1953 lck_rw_done(pcbinfo->ipi_lock); 1954 return (inp); 1955 } else { 1956 /* it's there but dead, say it isn't found */ 1957 lck_rw_done(pcbinfo->ipi_lock); 1958 return (NULL); 1959 } 1960 } 1961 } 1962 1963 if (!wildcard) { 1964 /* 1965 * Not found. 1966 */ 1967 lck_rw_done(pcbinfo->ipi_lock); 1968 return (NULL); 1969 } 1970 1971 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, 1972 pcbinfo->ipi_hashmask)]; 1973 LIST_FOREACH(inp, head, inp_hash) { 1974#if INET6 1975 if (!(inp->inp_vflag & INP_IPV4)) 1976 continue; 1977#endif /* INET6 */ 1978 if (inp_restricted_recv(inp, ifp)) 1979 continue; 1980 1981 if (inp->inp_faddr.s_addr == INADDR_ANY && 1982 inp->inp_lport == lport) { 1983 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1984 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != 1985 WNT_STOPUSING) { 1986 lck_rw_done(pcbinfo->ipi_lock); 1987 return (inp); 1988 } else { 1989 /* it's dead; say it isn't found */ 1990 lck_rw_done(pcbinfo->ipi_lock); 1991 return (NULL); 1992 } 1993 } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1994#if INET6 1995 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) 1996 local_wild_mapped = inp; 1997 else 1998#endif /* INET6 */ 1999 local_wild = inp; 2000 } 2001 } 2002 } 2003 if (local_wild == NULL) { 2004#if INET6 2005 if (local_wild_mapped != NULL) { 2006 if (in_pcb_checkstate(local_wild_mapped, 2007 WNT_ACQUIRE, 0) != WNT_STOPUSING) { 2008 lck_rw_done(pcbinfo->ipi_lock); 2009 return (local_wild_mapped); 2010 } else { 2011 /* it's dead; say it isn't found */ 2012 lck_rw_done(pcbinfo->ipi_lock); 2013 return (NULL); 2014 } 2015 } 2016#endif /* INET6 */ 2017 lck_rw_done(pcbinfo->ipi_lock); 2018 return (NULL); 2019 } 2020 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { 2021 lck_rw_done(pcbinfo->ipi_lock); 2022 return (local_wild); 2023 } 2024 /* 2025 * It's either not found or is already dead. 2026 */ 2027 lck_rw_done(pcbinfo->ipi_lock); 2028 return (NULL); 2029} 2030 2031/* 2032 * Insert PCB onto various hash lists. 2033 */ 2034int 2035in_pcbinshash(struct inpcb *inp, int locked) 2036{ 2037 struct inpcbhead *pcbhash; 2038 struct inpcbporthead *pcbporthash; 2039 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 2040 struct inpcbport *phd; 2041 u_int32_t hashkey_faddr; 2042 2043 if (!locked) { 2044 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { 2045 /* 2046 * Lock inversion issue, mostly with udp 2047 * multicast packets 2048 */ 2049 socket_unlock(inp->inp_socket, 0); 2050 lck_rw_lock_exclusive(pcbinfo->ipi_lock); 2051 socket_lock(inp->inp_socket, 0); 2052 if (inp->inp_state == INPCB_STATE_DEAD) { 2053 /* 2054 * The socket got dropped when 2055 * it was unlocked 2056 */ 2057 lck_rw_done(pcbinfo->ipi_lock); 2058 return (ECONNABORTED); 2059 } 2060 } 2061 } 2062 2063#if INET6 2064 if (inp->inp_vflag & INP_IPV6) 2065 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 2066 else 2067#endif /* INET6 */ 2068 hashkey_faddr = inp->inp_faddr.s_addr; 2069 2070 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 2071 inp->inp_fport, pcbinfo->ipi_hashmask); 2072 2073 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; 2074 2075 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, 2076 pcbinfo->ipi_porthashmask)]; 2077 2078 /* 2079 * Go through port list and look for a head for this lport. 2080 */ 2081 LIST_FOREACH(phd, pcbporthash, phd_hash) { 2082 if (phd->phd_port == inp->inp_lport) 2083 break; 2084 } 2085 2086 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2087 2088 /* 2089 * If none exists, malloc one and tack it on. 2090 */ 2091 if (phd == NULL) { 2092 MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), 2093 M_PCB, M_WAITOK); 2094 if (phd == NULL) { 2095 if (!locked) 2096 lck_rw_done(pcbinfo->ipi_lock); 2097 return (ENOBUFS); /* XXX */ 2098 } 2099 phd->phd_port = inp->inp_lport; 2100 LIST_INIT(&phd->phd_pcblist); 2101 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 2102 } 2103 2104 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); 2105 inp->inp_phd = phd; 2106 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 2107 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 2108 inp->inp_flags2 |= INP2_INHASHLIST; 2109 2110 if (!locked) 2111 lck_rw_done(pcbinfo->ipi_lock); 2112 2113#if NECP 2114 // This call catches the original setting of the local address 2115 inp_update_necp_policy(inp, NULL, NULL, 0); 2116#endif /* NECP */ 2117 2118 return (0); 2119} 2120 2121/* 2122 * Move PCB to the proper hash bucket when { faddr, fport } have been 2123 * changed. NOTE: This does not handle the case of the lport changing (the 2124 * hashed port list would have to be updated as well), so the lport must 2125 * not change after in_pcbinshash() has been called. 2126 */ 2127void 2128in_pcbrehash(struct inpcb *inp) 2129{ 2130 struct inpcbhead *head; 2131 u_int32_t hashkey_faddr; 2132 2133#if INET6 2134 if (inp->inp_vflag & INP_IPV6) 2135 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 2136 else 2137#endif /* INET6 */ 2138 hashkey_faddr = inp->inp_faddr.s_addr; 2139 2140 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 2141 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); 2142 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; 2143 2144 if (inp->inp_flags2 & INP2_INHASHLIST) { 2145 LIST_REMOVE(inp, inp_hash); 2146 inp->inp_flags2 &= ~INP2_INHASHLIST; 2147 } 2148 2149 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); 2150 LIST_INSERT_HEAD(head, inp, inp_hash); 2151 inp->inp_flags2 |= INP2_INHASHLIST; 2152 2153#if NECP 2154 // This call catches updates to the remote addresses 2155 inp_update_necp_policy(inp, NULL, NULL, 0); 2156#endif /* NECP */ 2157} 2158 2159/* 2160 * Remove PCB from various lists. 2161 * Must be called pcbinfo lock is held in exclusive mode. 2162 */ 2163void 2164in_pcbremlists(struct inpcb *inp) 2165{ 2166 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 2167 2168 /* 2169 * Check if it's in hashlist -- an inp is placed in hashlist when 2170 * it's local port gets assigned. So it should also be present 2171 * in the port list. 2172 */ 2173 if (inp->inp_flags2 & INP2_INHASHLIST) { 2174 struct inpcbport *phd = inp->inp_phd; 2175 2176 VERIFY(phd != NULL && inp->inp_lport > 0); 2177 2178 LIST_REMOVE(inp, inp_hash); 2179 inp->inp_hash.le_next = NULL; 2180 inp->inp_hash.le_prev = NULL; 2181 2182 LIST_REMOVE(inp, inp_portlist); 2183 inp->inp_portlist.le_next = NULL; 2184 inp->inp_portlist.le_prev = NULL; 2185 if (LIST_EMPTY(&phd->phd_pcblist)) { 2186 LIST_REMOVE(phd, phd_hash); 2187 FREE(phd, M_PCB); 2188 } 2189 inp->inp_phd = NULL; 2190 inp->inp_flags2 &= ~INP2_INHASHLIST; 2191 } 2192 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); 2193 2194 if (inp->inp_flags2 & INP2_TIMEWAIT) { 2195 /* Remove from time-wait queue */ 2196 tcp_remove_from_time_wait(inp); 2197 inp->inp_flags2 &= ~INP2_TIMEWAIT; 2198 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); 2199 inp->inp_pcbinfo->ipi_twcount--; 2200 } else { 2201 /* Remove from global inp list if it is not time-wait */ 2202 LIST_REMOVE(inp, inp_list); 2203 } 2204 2205 if (inp->inp_flags2 & INP2_IN_FCTREE) { 2206 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); 2207 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); 2208 } 2209 2210 inp->inp_pcbinfo->ipi_count--; 2211} 2212 2213/* 2214 * Mechanism used to defer the memory release of PCBs 2215 * The pcb list will contain the pcb until the reaper can clean it up if 2216 * the following conditions are met: 2217 * 1) state "DEAD", 2218 * 2) wantcnt is STOPUSING 2219 * 3) usecount is 0 2220 * This function will be called to either mark the pcb as 2221 */ 2222int 2223in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) 2224{ 2225 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; 2226 UInt32 origwant; 2227 UInt32 newwant; 2228 2229 switch (mode) { 2230 case WNT_STOPUSING: 2231 /* 2232 * Try to mark the pcb as ready for recycling. CAS with 2233 * STOPUSING, if success we're good, if it's in use, will 2234 * be marked later 2235 */ 2236 if (locked == 0) 2237 socket_lock(pcb->inp_socket, 1); 2238 pcb->inp_state = INPCB_STATE_DEAD; 2239 2240stopusing: 2241 if (pcb->inp_socket->so_usecount < 0) { 2242 panic("%s: pcb=%p so=%p usecount is negative\n", 2243 __func__, pcb, pcb->inp_socket); 2244 /* NOTREACHED */ 2245 } 2246 if (locked == 0) 2247 socket_unlock(pcb->inp_socket, 1); 2248 2249 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); 2250 2251 origwant = *wantcnt; 2252 if ((UInt16) origwant == 0xffff) /* should stop using */ 2253 return (WNT_STOPUSING); 2254 newwant = 0xffff; 2255 if ((UInt16) origwant == 0) { 2256 /* try to mark it as unsuable now */ 2257 OSCompareAndSwap(origwant, newwant, wantcnt); 2258 } 2259 return (WNT_STOPUSING); 2260 break; 2261 2262 case WNT_ACQUIRE: 2263 /* 2264 * Try to increase reference to pcb. If WNT_STOPUSING 2265 * should bail out. If socket state DEAD, try to set count 2266 * to STOPUSING, return failed otherwise increase cnt. 2267 */ 2268 do { 2269 origwant = *wantcnt; 2270 if ((UInt16) origwant == 0xffff) { 2271 /* should stop using */ 2272 return (WNT_STOPUSING); 2273 } 2274 newwant = origwant + 1; 2275 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 2276 return (WNT_ACQUIRE); 2277 break; 2278 2279 case WNT_RELEASE: 2280 /* 2281 * Release reference. If result is null and pcb state 2282 * is DEAD, set wanted bit to STOPUSING 2283 */ 2284 if (locked == 0) 2285 socket_lock(pcb->inp_socket, 1); 2286 2287 do { 2288 origwant = *wantcnt; 2289 if ((UInt16) origwant == 0x0) { 2290 panic("%s: pcb=%p release with zero count", 2291 __func__, pcb); 2292 /* NOTREACHED */ 2293 } 2294 if ((UInt16) origwant == 0xffff) { 2295 /* should stop using */ 2296 if (locked == 0) 2297 socket_unlock(pcb->inp_socket, 1); 2298 return (WNT_STOPUSING); 2299 } 2300 newwant = origwant - 1; 2301 } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); 2302 2303 if (pcb->inp_state == INPCB_STATE_DEAD) 2304 goto stopusing; 2305 if (pcb->inp_socket->so_usecount < 0) { 2306 panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", 2307 __func__, pcb, pcb->inp_socket); 2308 /* NOTREACHED */ 2309 } 2310 2311 if (locked == 0) 2312 socket_unlock(pcb->inp_socket, 1); 2313 return (WNT_RELEASE); 2314 break; 2315 2316 default: 2317 panic("%s: so=%p not a valid state =%x\n", __func__, 2318 pcb->inp_socket, mode); 2319 /* NOTREACHED */ 2320 } 2321 2322 /* NOTREACHED */ 2323 return (mode); 2324} 2325 2326/* 2327 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. 2328 * The inpcb_compat data structure is passed to user space and must 2329 * not change. We intentionally avoid copying pointers. 2330 */ 2331void 2332inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) 2333{ 2334 bzero(inp_compat, sizeof (*inp_compat)); 2335 inp_compat->inp_fport = inp->inp_fport; 2336 inp_compat->inp_lport = inp->inp_lport; 2337 inp_compat->nat_owner = 0; 2338 inp_compat->nat_cookie = 0; 2339 inp_compat->inp_gencnt = inp->inp_gencnt; 2340 inp_compat->inp_flags = inp->inp_flags; 2341 inp_compat->inp_flow = inp->inp_flow; 2342 inp_compat->inp_vflag = inp->inp_vflag; 2343 inp_compat->inp_ip_ttl = inp->inp_ip_ttl; 2344 inp_compat->inp_ip_p = inp->inp_ip_p; 2345 inp_compat->inp_dependfaddr.inp6_foreign = 2346 inp->inp_dependfaddr.inp6_foreign; 2347 inp_compat->inp_dependladdr.inp6_local = 2348 inp->inp_dependladdr.inp6_local; 2349 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 2350 inp_compat->inp_depend6.inp6_hlim = 0; 2351 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 2352 inp_compat->inp_depend6.inp6_ifindex = 0; 2353 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 2354} 2355 2356void 2357inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) 2358{ 2359 xinp->inp_fport = inp->inp_fport; 2360 xinp->inp_lport = inp->inp_lport; 2361 xinp->inp_gencnt = inp->inp_gencnt; 2362 xinp->inp_flags = inp->inp_flags; 2363 xinp->inp_flow = inp->inp_flow; 2364 xinp->inp_vflag = inp->inp_vflag; 2365 xinp->inp_ip_ttl = inp->inp_ip_ttl; 2366 xinp->inp_ip_p = inp->inp_ip_p; 2367 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; 2368 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; 2369 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; 2370 xinp->inp_depend6.inp6_hlim = 0; 2371 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; 2372 xinp->inp_depend6.inp6_ifindex = 0; 2373 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; 2374} 2375 2376/* 2377 * The following routines implement this scheme: 2378 * 2379 * Callers of ip_output() that intend to cache the route in the inpcb pass 2380 * a local copy of the struct route to ip_output(). Using a local copy of 2381 * the cached route significantly simplifies things as IP no longer has to 2382 * worry about having exclusive access to the passed in struct route, since 2383 * it's defined in the caller's stack; in essence, this allows for a lock- 2384 * less operation when updating the struct route at the IP level and below, 2385 * whenever necessary. The scheme works as follows: 2386 * 2387 * Prior to dropping the socket's lock and calling ip_output(), the caller 2388 * copies the struct route from the inpcb into its stack, and adds a reference 2389 * to the cached route entry, if there was any. The socket's lock is then 2390 * dropped and ip_output() is called with a pointer to the copy of struct 2391 * route defined on the stack (not to the one in the inpcb.) 2392 * 2393 * Upon returning from ip_output(), the caller then acquires the socket's 2394 * lock and synchronizes the cache; if there is no route cached in the inpcb, 2395 * it copies the local copy of struct route (which may or may not contain any 2396 * route) back into the cache; otherwise, if the inpcb has a route cached in 2397 * it, the one in the local copy will be freed, if there's any. Trashing the 2398 * cached route in the inpcb can be avoided because ip_output() is single- 2399 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized 2400 * by the socket/transport layer.) 2401 */ 2402void 2403inp_route_copyout(struct inpcb *inp, struct route *dst) 2404{ 2405 struct route *src = &inp->inp_route; 2406 2407 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 2408 2409 /* 2410 * If the route in the PCB is stale or not for IPv4, blow it away; 2411 * this is possible in the case of IPv4-mapped address case. 2412 */ 2413 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) 2414 ROUTE_RELEASE(src); 2415 2416 route_copyout(dst, src, sizeof (*dst)); 2417} 2418 2419void 2420inp_route_copyin(struct inpcb *inp, struct route *src) 2421{ 2422 struct route *dst = &inp->inp_route; 2423 2424 lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 2425 2426 /* Minor sanity check */ 2427 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) 2428 panic("%s: wrong or corrupted route: %p", __func__, src); 2429 2430 route_copyin(src, dst, sizeof (*src)); 2431} 2432 2433/* 2434 * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. 2435 */ 2436int 2437inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) 2438{ 2439 struct ifnet *ifp = NULL; 2440 2441 ifnet_head_lock_shared(); 2442 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && 2443 (ifp = ifindex2ifnet[ifscope]) == NULL)) { 2444 ifnet_head_done(); 2445 return (ENXIO); 2446 } 2447 ifnet_head_done(); 2448 2449 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); 2450 2451 /* 2452 * A zero interface scope value indicates an "unbind". 2453 * Otherwise, take in whatever value the app desires; 2454 * the app may already know the scope (or force itself 2455 * to such a scope) ahead of time before the interface 2456 * gets attached. It doesn't matter either way; any 2457 * route lookup from this point on will require an 2458 * exact match for the embedded interface scope. 2459 */ 2460 inp->inp_boundifp = ifp; 2461 if (inp->inp_boundifp == NULL) 2462 inp->inp_flags &= ~INP_BOUND_IF; 2463 else 2464 inp->inp_flags |= INP_BOUND_IF; 2465 2466 /* Blow away any cached route in the PCB */ 2467 ROUTE_RELEASE(&inp->inp_route); 2468 2469 if (pifp != NULL) 2470 *pifp = ifp; 2471 2472 return (0); 2473} 2474 2475/* 2476 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, 2477 * as well as for setting PROC_UUID_NO_CELLULAR policy. 2478 */ 2479void 2480inp_set_nocellular(struct inpcb *inp) 2481{ 2482 inp->inp_flags |= INP_NO_IFT_CELLULAR; 2483 2484 /* Blow away any cached route in the PCB */ 2485 ROUTE_RELEASE(&inp->inp_route); 2486} 2487 2488/* 2489 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, 2490 * as well as for clearing PROC_UUID_NO_CELLULAR policy. 2491 */ 2492void 2493inp_clear_nocellular(struct inpcb *inp) 2494{ 2495 struct socket *so = inp->inp_socket; 2496 2497 /* 2498 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket 2499 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag 2500 * if and only if the socket is unrestricted. 2501 */ 2502 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { 2503 inp->inp_flags &= ~INP_NO_IFT_CELLULAR; 2504 2505 /* Blow away any cached route in the PCB */ 2506 ROUTE_RELEASE(&inp->inp_route); 2507 } 2508} 2509 2510void 2511inp_set_noexpensive(struct inpcb *inp) 2512{ 2513 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; 2514 2515 /* Blow away any cached route in the PCB */ 2516 ROUTE_RELEASE(&inp->inp_route); 2517} 2518 2519void 2520inp_set_awdl_unrestricted(struct inpcb *inp) 2521{ 2522 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; 2523 2524 /* Blow away any cached route in the PCB */ 2525 ROUTE_RELEASE(&inp->inp_route); 2526} 2527 2528boolean_t 2529inp_get_awdl_unrestricted(struct inpcb *inp) 2530{ 2531 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; 2532} 2533 2534void 2535inp_clear_awdl_unrestricted(struct inpcb *inp) 2536{ 2537 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; 2538 2539 /* Blow away any cached route in the PCB */ 2540 ROUTE_RELEASE(&inp->inp_route); 2541} 2542 2543#if NECP 2544/* 2545 * Called when PROC_UUID_NECP_APP_POLICY is set. 2546 */ 2547void 2548inp_set_want_app_policy(struct inpcb *inp) 2549{ 2550 inp->inp_flags2 |= INP2_WANT_APP_POLICY; 2551} 2552 2553/* 2554 * Called when PROC_UUID_NECP_APP_POLICY is cleared. 2555 */ 2556void 2557inp_clear_want_app_policy(struct inpcb *inp) 2558{ 2559 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; 2560} 2561#endif /* NECP */ 2562 2563/* 2564 * Calculate flow hash for an inp, used by an interface to identify a 2565 * flow. When an interface provides flow control advisory, this flow 2566 * hash is used as an identifier. 2567 */ 2568u_int32_t 2569inp_calc_flowhash(struct inpcb *inp) 2570{ 2571 struct inp_flowhash_key fh __attribute__((aligned(8))); 2572 u_int32_t flowhash = 0; 2573 struct inpcb *tmp_inp = NULL; 2574 2575 if (inp_hash_seed == 0) 2576 inp_hash_seed = RandomULong(); 2577 2578 bzero(&fh, sizeof (fh)); 2579 2580 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); 2581 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); 2582 2583 fh.infh_lport = inp->inp_lport; 2584 fh.infh_fport = inp->inp_fport; 2585 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; 2586 fh.infh_proto = inp->inp_ip_p; 2587 fh.infh_rand1 = RandomULong(); 2588 fh.infh_rand2 = RandomULong(); 2589 2590try_again: 2591 flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); 2592 if (flowhash == 0) { 2593 /* try to get a non-zero flowhash */ 2594 inp_hash_seed = RandomULong(); 2595 goto try_again; 2596 } 2597 2598 inp->inp_flowhash = flowhash; 2599 2600 /* Insert the inp into inp_fc_tree */ 2601 lck_mtx_lock_spin(&inp_fc_lck); 2602 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); 2603 if (tmp_inp != NULL) { 2604 /* 2605 * There is a different inp with the same flowhash. 2606 * There can be a collision on flow hash but the 2607 * probability is low. Let's recompute the 2608 * flowhash. 2609 */ 2610 lck_mtx_unlock(&inp_fc_lck); 2611 /* recompute hash seed */ 2612 inp_hash_seed = RandomULong(); 2613 goto try_again; 2614 } 2615 2616 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); 2617 inp->inp_flags2 |= INP2_IN_FCTREE; 2618 lck_mtx_unlock(&inp_fc_lck); 2619 2620 return (flowhash); 2621} 2622 2623void 2624inp_flowadv(uint32_t flowhash) 2625{ 2626 struct inpcb *inp; 2627 2628 inp = inp_fc_getinp(flowhash, 0); 2629 2630 if (inp == NULL) 2631 return; 2632 inp_fc_feedback(inp); 2633} 2634 2635/* 2636 * Function to compare inp_fc_entries in inp flow control tree 2637 */ 2638static inline int 2639infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) 2640{ 2641 return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), 2642 sizeof(inp1->inp_flowhash))); 2643} 2644 2645static struct inpcb * 2646inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) 2647{ 2648 struct inpcb *inp = NULL; 2649 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; 2650 2651 lck_mtx_lock_spin(&inp_fc_lck); 2652 key_inp.inp_flowhash = flowhash; 2653 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); 2654 if (inp == NULL) { 2655 /* inp is not present, return */ 2656 lck_mtx_unlock(&inp_fc_lck); 2657 return (NULL); 2658 } 2659 2660 if (flags & INPFC_REMOVE) { 2661 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); 2662 lck_mtx_unlock(&inp_fc_lck); 2663 2664 bzero(&(inp->infc_link), sizeof (inp->infc_link)); 2665 inp->inp_flags2 &= ~INP2_IN_FCTREE; 2666 return (NULL); 2667 } 2668 2669 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) 2670 inp = NULL; 2671 lck_mtx_unlock(&inp_fc_lck); 2672 2673 return (inp); 2674} 2675 2676static void 2677inp_fc_feedback(struct inpcb *inp) 2678{ 2679 struct socket *so = inp->inp_socket; 2680 2681 /* we already hold a want_cnt on this inp, socket can't be null */ 2682 VERIFY(so != NULL); 2683 socket_lock(so, 1); 2684 2685 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 2686 socket_unlock(so, 1); 2687 return; 2688 } 2689 2690 if (inp->inp_sndinprog_cnt > 0) 2691 inp->inp_flags |= INP_FC_FEEDBACK; 2692 2693 /* 2694 * Return if the connection is not in flow-controlled state. 2695 * This can happen if the connection experienced 2696 * loss while it was in flow controlled state 2697 */ 2698 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { 2699 socket_unlock(so, 1); 2700 return; 2701 } 2702 inp_reset_fc_state(inp); 2703 2704 if (SOCK_TYPE(so) == SOCK_STREAM) 2705 inp_fc_unthrottle_tcp(inp); 2706 2707 socket_unlock(so, 1); 2708} 2709 2710void 2711inp_reset_fc_state(struct inpcb *inp) 2712{ 2713 struct socket *so = inp->inp_socket; 2714 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; 2715 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; 2716 2717 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2718 2719 if (suspended) { 2720 so->so_flags &= ~(SOF_SUSPENDED); 2721 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); 2722 } 2723 2724 /* Give a write wakeup to unblock the socket */ 2725 if (needwakeup) 2726 sowwakeup(so); 2727} 2728 2729int 2730inp_set_fc_state(struct inpcb *inp, int advcode) 2731{ 2732 struct inpcb *tmp_inp = NULL; 2733 /* 2734 * If there was a feedback from the interface when 2735 * send operation was in progress, we should ignore 2736 * this flow advisory to avoid a race between setting 2737 * flow controlled state and receiving feedback from 2738 * the interface 2739 */ 2740 if (inp->inp_flags & INP_FC_FEEDBACK) 2741 return (0); 2742 2743 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); 2744 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, 2745 INPFC_SOLOCKED)) != NULL) { 2746 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) 2747 return (0); 2748 VERIFY(tmp_inp == inp); 2749 switch (advcode) { 2750 case FADV_FLOW_CONTROLLED: 2751 inp->inp_flags |= INP_FLOW_CONTROLLED; 2752 break; 2753 case FADV_SUSPENDED: 2754 inp->inp_flags |= INP_FLOW_SUSPENDED; 2755 soevent(inp->inp_socket, 2756 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); 2757 2758 /* Record the fact that suspend event was sent */ 2759 inp->inp_socket->so_flags |= SOF_SUSPENDED; 2760 break; 2761 } 2762 return (1); 2763 } 2764 return (0); 2765} 2766 2767/* 2768 * Handler for SO_FLUSH socket option. 2769 */ 2770int 2771inp_flush(struct inpcb *inp, int optval) 2772{ 2773 u_int32_t flowhash = inp->inp_flowhash; 2774 struct ifnet *rtifp, *oifp; 2775 2776 /* Either all classes or one of the valid ones */ 2777 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) 2778 return (EINVAL); 2779 2780 /* We need a flow hash for identification */ 2781 if (flowhash == 0) 2782 return (0); 2783 2784 /* Grab the interfaces from the route and pcb */ 2785 rtifp = ((inp->inp_route.ro_rt != NULL) ? 2786 inp->inp_route.ro_rt->rt_ifp : NULL); 2787 oifp = inp->inp_last_outifp; 2788 2789 if (rtifp != NULL) 2790 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); 2791 if (oifp != NULL && oifp != rtifp) 2792 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); 2793 2794 return (0); 2795} 2796 2797/* 2798 * Clear the INP_INADDR_ANY flag (special case for PPP only) 2799 */ 2800void 2801inp_clear_INP_INADDR_ANY(struct socket *so) 2802{ 2803 struct inpcb *inp = NULL; 2804 2805 socket_lock(so, 1); 2806 inp = sotoinpcb(so); 2807 if (inp) { 2808 inp->inp_flags &= ~INP_INADDR_ANY; 2809 } 2810 socket_unlock(so, 1); 2811} 2812 2813void 2814inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) 2815{ 2816 struct socket *so = inp->inp_socket; 2817 2818 soprocinfo->spi_pid = so->last_pid; 2819 if (so->last_pid != 0) 2820 uuid_copy(soprocinfo->spi_uuid, so->last_uuid); 2821 /* 2822 * When not delegated, the effective pid is the same as the real pid 2823 */ 2824 if (so->so_flags & SOF_DELEGATED) { 2825 soprocinfo->spi_epid = so->e_pid; 2826 if (so->e_pid != 0) 2827 uuid_copy(soprocinfo->spi_euuid, so->e_uuid); 2828 } else { 2829 soprocinfo->spi_epid = so->last_pid; 2830 } 2831} 2832 2833int 2834inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, 2835 struct so_procinfo *soprocinfo) 2836{ 2837 struct inpcb *inp = NULL; 2838 int found = 0; 2839 2840 bzero(soprocinfo, sizeof (struct so_procinfo)); 2841 2842 if (!flowhash) 2843 return (-1); 2844 2845 lck_rw_lock_shared(pcbinfo->ipi_lock); 2846 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 2847 if (inp->inp_state != INPCB_STATE_DEAD && 2848 inp->inp_socket != NULL && 2849 inp->inp_flowhash == flowhash) { 2850 found = 1; 2851 inp_get_soprocinfo(inp, soprocinfo); 2852 break; 2853 } 2854 } 2855 lck_rw_done(pcbinfo->ipi_lock); 2856 2857 return (found); 2858} 2859 2860#if CONFIG_PROC_UUID_POLICY 2861static void 2862inp_update_cellular_policy(struct inpcb *inp, boolean_t set) 2863{ 2864 struct socket *so = inp->inp_socket; 2865 int before, after; 2866 2867 VERIFY(so != NULL); 2868 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2869 2870 before = INP_NO_CELLULAR(inp); 2871 if (set) { 2872 inp_set_nocellular(inp); 2873 } else { 2874 inp_clear_nocellular(inp); 2875 } 2876 after = INP_NO_CELLULAR(inp); 2877 if (net_io_policy_log && (before != after)) { 2878 static const char *ok = "OK"; 2879 static const char *nok = "NOACCESS"; 2880 uuid_string_t euuid_buf; 2881 pid_t epid; 2882 2883 if (so->so_flags & SOF_DELEGATED) { 2884 uuid_unparse(so->e_uuid, euuid_buf); 2885 epid = so->e_pid; 2886 } else { 2887 uuid_unparse(so->last_uuid, euuid_buf); 2888 epid = so->last_pid; 2889 } 2890 2891 /* allow this socket to generate another notification event */ 2892 so->so_ifdenied_notifies = 0; 2893 2894 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " 2895 "euuid %s%s %s->%s\n", __func__, 2896 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), 2897 SOCK_TYPE(so), epid, euuid_buf, 2898 (so->so_flags & SOF_DELEGATED) ? 2899 " [delegated]" : "", 2900 ((before < after) ? ok : nok), 2901 ((before < after) ? nok : ok)); 2902 } 2903} 2904 2905#if NECP 2906static void 2907inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) 2908{ 2909 struct socket *so = inp->inp_socket; 2910 int before, after; 2911 2912 VERIFY(so != NULL); 2913 VERIFY(inp->inp_state != INPCB_STATE_DEAD); 2914 2915 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); 2916 if (set) { 2917 inp_set_want_app_policy(inp); 2918 } else { 2919 inp_clear_want_app_policy(inp); 2920 } 2921 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); 2922 if (net_io_policy_log && (before != after)) { 2923 static const char *wanted = "WANTED"; 2924 static const char *unwanted = "UNWANTED"; 2925 uuid_string_t euuid_buf; 2926 pid_t epid; 2927 2928 if (so->so_flags & SOF_DELEGATED) { 2929 uuid_unparse(so->e_uuid, euuid_buf); 2930 epid = so->e_pid; 2931 } else { 2932 uuid_unparse(so->last_uuid, euuid_buf); 2933 epid = so->last_pid; 2934 } 2935 2936 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " 2937 "euuid %s%s %s->%s\n", __func__, 2938 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), 2939 SOCK_TYPE(so), epid, euuid_buf, 2940 (so->so_flags & SOF_DELEGATED) ? 2941 " [delegated]" : "", 2942 ((before < after) ? unwanted : wanted), 2943 ((before < after) ? wanted : unwanted)); 2944 } 2945} 2946#endif /* NECP */ 2947#endif /* !CONFIG_PROC_UUID_POLICY */ 2948 2949#if NECP 2950void 2951inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) 2952{ 2953 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); 2954 if (necp_socket_should_rescope(inp) && 2955 inp->inp_lport == 0 && 2956 inp->inp_laddr.s_addr == INADDR_ANY && 2957 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 2958 // If we should rescope, and the socket is not yet bound 2959 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); 2960 } 2961} 2962#endif /* NECP */ 2963 2964int 2965inp_update_policy(struct inpcb *inp) 2966{ 2967#if CONFIG_PROC_UUID_POLICY 2968 struct socket *so = inp->inp_socket; 2969 uint32_t pflags = 0; 2970 int32_t ogencnt; 2971 int err = 0; 2972 2973 if (!net_io_policy_uuid || 2974 so == NULL || inp->inp_state == INPCB_STATE_DEAD) 2975 return (0); 2976 2977 /* 2978 * Kernel-created sockets that aren't delegating other sockets 2979 * are currently exempted from UUID policy checks. 2980 */ 2981 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) 2982 return (0); 2983 2984 ogencnt = so->so_policy_gencnt; 2985 err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? 2986 so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); 2987 2988 /* 2989 * Discard cached generation count if the entry is gone (ENOENT), 2990 * so that we go thru the checks below. 2991 */ 2992 if (err == ENOENT && ogencnt != 0) 2993 so->so_policy_gencnt = 0; 2994 2995 /* 2996 * If the generation count has changed, inspect the policy flags 2997 * and act accordingly. If a policy flag was previously set and 2998 * the UUID is no longer present in the table (ENOENT), treat it 2999 * as if the flag has been cleared. 3000 */ 3001 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { 3002 /* update cellular policy for this socket */ 3003 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { 3004 inp_update_cellular_policy(inp, TRUE); 3005 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { 3006 inp_update_cellular_policy(inp, FALSE); 3007 } 3008#if NECP 3009 /* update necp want app policy for this socket */ 3010 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { 3011 inp_update_necp_want_app_policy(inp, TRUE); 3012 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { 3013 inp_update_necp_want_app_policy(inp, FALSE); 3014 } 3015#endif /* NECP */ 3016 } 3017 3018 return ((err == ENOENT) ? 0 : err); 3019#else /* !CONFIG_PROC_UUID_POLICY */ 3020#pragma unused(inp) 3021 return (0); 3022#endif /* !CONFIG_PROC_UUID_POLICY */ 3023} 3024/* 3025 * Called when we need to enforce policy restrictions in the input path. 3026 * 3027 * Returns TRUE if we're not allowed to receive data, otherwise FALSE. 3028 */ 3029boolean_t 3030inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) 3031{ 3032 VERIFY(inp != NULL); 3033 3034 /* 3035 * Inbound restrictions. 3036 */ 3037 if (!sorestrictrecv) 3038 return (FALSE); 3039 3040 if (ifp == NULL) 3041 return (FALSE); 3042 3043 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) 3044 return (TRUE); 3045 3046 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) 3047 return (TRUE); 3048 3049 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) 3050 return (TRUE); 3051 3052 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) 3053 return (FALSE); 3054 3055 if (inp->inp_flags & INP_RECV_ANYIF) 3056 return (FALSE); 3057 3058 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) 3059 return (FALSE); 3060 3061 return (TRUE); 3062} 3063 3064/* 3065 * Called when we need to enforce policy restrictions in the output path. 3066 * 3067 * Returns TRUE if we're not allowed to send data out, otherwise FALSE. 3068 */ 3069boolean_t 3070inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) 3071{ 3072 VERIFY(inp != NULL); 3073 3074 /* 3075 * Outbound restrictions. 3076 */ 3077 if (!sorestrictsend) 3078 return (FALSE); 3079 3080 if (ifp == NULL) 3081 return (FALSE); 3082 3083 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) 3084 return (TRUE); 3085 3086 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) 3087 return (TRUE); 3088 3089 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) 3090 return (TRUE); 3091 3092 return (FALSE); 3093} 3094