1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1980, 1986, 1991, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)route.c 8.2 (Berkeley) 11/15/93 61 * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $ 62 */ 63 64#include <sys/param.h> 65#include <sys/sysctl.h> 66#include <sys/systm.h> 67#include <sys/malloc.h> 68#include <sys/mbuf.h> 69#include <sys/socket.h> 70#include <sys/domain.h> 71#include <sys/syslog.h> 72#include <sys/queue.h> 73#include <sys/mcache.h> 74#include <sys/protosw.h> 75#include <sys/kernel.h> 76#include <kern/locks.h> 77#include <kern/zalloc.h> 78 79#include <net/dlil.h> 80#include <net/if.h> 81#include <net/route.h> 82#include <net/ntstat.h> 83 84#include <netinet/in.h> 85#include <netinet/in_var.h> 86#include <netinet/ip_var.h> 87#include <netinet/ip6.h> 88 89#if INET6 90#include <netinet6/ip6_var.h> 91#include <netinet6/in6_var.h> 92#include <netinet6/nd6.h> 93#endif /* INET6 */ 94 95#include <net/if_dl.h> 96 97#include <libkern/OSAtomic.h> 98#include <libkern/OSDebug.h> 99 100#include <pexpert/pexpert.h> 101 102#if CONFIG_MACF 103#include <sys/kauth.h> 104#endif 105 106/* 107 * Synchronization notes: 108 * 109 * Routing entries fall under two locking domains: the global routing table 110 * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that 111 * resides (statically defined) in the rtentry structure. 112 * 113 * The locking domains for routing are defined as follows: 114 * 115 * The global routing lock is used to serialize all accesses to the radix 116 * trees defined by rt_tables[], as well as the tree of masks. This includes 117 * lookups, insertions and removals of nodes to/from the respective tree. 118 * It is also used to protect certain fields in the route entry that aren't 119 * often modified and/or require global serialization (more details below.) 120 * 121 * The per-route entry lock is used to serialize accesses to several routing 122 * entry fields (more details below.) Acquiring and releasing this lock is 123 * done via RT_LOCK() and RT_UNLOCK() routines. 124 * 125 * In cases where both rnh_lock and rt_lock must be held, the former must be 126 * acquired first in order to maintain lock ordering. It is not a requirement 127 * that rnh_lock be acquired first before rt_lock, but in case both must be 128 * acquired in succession, the correct lock ordering must be followed. 129 * 130 * The fields of the rtentry structure are protected in the following way: 131 * 132 * rt_nodes[] 133 * 134 * - Routing table lock (rnh_lock). 135 * 136 * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid 137 * 138 * - Set once during creation and never changes; no locks to read. 139 * 140 * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute 141 * 142 * - Routing entry lock (rt_lock) for read/write access. 143 * 144 * - Some values of rt_flags are either set once at creation time, 145 * or aren't currently used, and thus checking against them can 146 * be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC, 147 * RTF_DONE, RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE, 148 * RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL, 149 * RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE, RTF_IFREF. 150 * 151 * rt_key, rt_gateway, rt_ifp, rt_ifa 152 * 153 * - Always written/modified with both rnh_lock and rt_lock held. 154 * 155 * - May be read freely with rnh_lock held, else must hold rt_lock 156 * for read access; holding both locks for read is also okay. 157 * 158 * - In the event rnh_lock is not acquired, or is not possible to be 159 * acquired across the operation, setting RTF_CONDEMNED on a route 160 * entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa 161 * from being modified. This is typically done on a route that 162 * has been chosen for a removal (from the tree) prior to dropping 163 * the rt_lock, so that those values will remain the same until 164 * the route is freed. 165 * 166 * When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are 167 * single-threaded, thus exclusive. This flag will also prevent the 168 * route from being looked up via rt_lookup(). 169 * 170 * rt_genid 171 * 172 * - Assumes that 32-bit writes are atomic; no locks. 173 * 174 * rt_dlt, rt_output 175 * 176 * - Currently unused; no locks. 177 * 178 * Operations on a route entry can be described as follows: 179 * 180 * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE. 181 * 182 * INSERTION of an entry into the radix tree holds the rnh_lock, checks 183 * for duplicates and then adds the entry. rtrequest returns the entry 184 * after bumping up the reference count to 1 (for the caller). 185 * 186 * LOOKUP of an entry holds the rnh_lock and bumps up the reference count 187 * before returning; it is valid to also bump up the reference count using 188 * RT_ADDREF after the lookup has returned an entry. 189 * 190 * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the 191 * entry but does not decrement the reference count. Removal happens when 192 * the route is explicitly deleted (RTM_DELETE) or when it is in the cached 193 * state and it expires. The route is said to be "down" when it is no 194 * longer present in the tree. Freeing the entry will happen on the last 195 * reference release of such a "down" route. 196 * 197 * RT_ADDREF/RT_REMREF operates on the routing entry which increments/ 198 * decrements the reference count, rt_refcnt, atomically on the rtentry. 199 * rt_refcnt is modified only using this routine. The general rule is to 200 * do RT_ADDREF in the function that is passing the entry as an argument, 201 * in order to prevent the entry from being freed by the callee. 202 */ 203 204#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) 205 206extern void kdp_set_gateway_mac(void *gatewaymac); 207 208__private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 }; 209struct radix_node_head *rt_tables[AF_MAX+1]; 210 211decl_lck_mtx_data(, rnh_lock_data); /* global routing tables mutex */ 212lck_mtx_t *rnh_lock = &rnh_lock_data; 213static lck_attr_t *rnh_lock_attr; 214static lck_grp_t *rnh_lock_grp; 215static lck_grp_attr_t *rnh_lock_grp_attr; 216 217/* Lock group and attribute for routing entry locks */ 218static lck_attr_t *rte_mtx_attr; 219static lck_grp_t *rte_mtx_grp; 220static lck_grp_attr_t *rte_mtx_grp_attr; 221 222int rttrash = 0; /* routes not in table but not freed */ 223 224unsigned int rte_debug; 225 226/* Possible flags for rte_debug */ 227#define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */ 228#define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */ 229#define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */ 230 231#define RTE_NAME "rtentry" /* name for zone and rt_lock */ 232 233static struct zone *rte_zone; /* special zone for rtentry */ 234#define RTE_ZONE_MAX 65536 /* maximum elements in zone */ 235#define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */ 236 237#define RTD_INUSE 0xFEEDFACE /* entry is in use */ 238#define RTD_FREED 0xDEADBEEF /* entry is freed */ 239 240/* For gdb */ 241__private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE; 242__private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE; 243 244/* 245 * Debug variant of rtentry structure. 246 */ 247struct rtentry_dbg { 248 struct rtentry rtd_entry; /* rtentry */ 249 struct rtentry rtd_entry_saved; /* saved rtentry */ 250 uint32_t rtd_inuse; /* in use pattern */ 251 uint16_t rtd_refhold_cnt; /* # of rtref */ 252 uint16_t rtd_refrele_cnt; /* # of rtunref */ 253 uint32_t rtd_lock_cnt; /* # of locks */ 254 uint32_t rtd_unlock_cnt; /* # of unlocks */ 255 /* 256 * Alloc and free callers. 257 */ 258 ctrace_t rtd_alloc; 259 ctrace_t rtd_free; 260 /* 261 * Circular lists of rtref and rtunref callers. 262 */ 263 ctrace_t rtd_refhold[CTRACE_HIST_SIZE]; 264 ctrace_t rtd_refrele[CTRACE_HIST_SIZE]; 265 /* 266 * Circular lists of locks and unlocks. 267 */ 268 ctrace_t rtd_lock[CTRACE_HIST_SIZE]; 269 ctrace_t rtd_unlock[CTRACE_HIST_SIZE]; 270 /* 271 * Trash list linkage 272 */ 273 TAILQ_ENTRY(rtentry_dbg) rtd_trash_link; 274}; 275 276/* List of trash route entries protected by rnh_lock */ 277static TAILQ_HEAD(, rtentry_dbg) rttrash_head; 278 279static void rte_lock_init(struct rtentry *); 280static void rte_lock_destroy(struct rtentry *); 281static inline struct rtentry *rte_alloc_debug(void); 282static inline void rte_free_debug(struct rtentry *); 283static inline void rte_lock_debug(struct rtentry_dbg *); 284static inline void rte_unlock_debug(struct rtentry_dbg *); 285static void rt_maskedcopy(struct sockaddr *, 286 struct sockaddr *, struct sockaddr *); 287static void rtable_init(void **); 288static inline void rtref_audit(struct rtentry_dbg *); 289static inline void rtunref_audit(struct rtentry_dbg *); 290static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t, 291 unsigned int); 292static int rtrequest_common_locked(int, struct sockaddr *, 293 struct sockaddr *, struct sockaddr *, int, struct rtentry **, 294 unsigned int); 295static struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t); 296static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int); 297static inline void sin6_set_ifscope(struct sockaddr *, unsigned int); 298static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int); 299static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *); 300static struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *, 301 unsigned int *); 302static struct sockaddr *ma_copy(int, struct sockaddr *, 303 struct sockaddr_storage *, unsigned int); 304static struct sockaddr *sa_trim(struct sockaddr *, int); 305static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *, 306 unsigned int); 307static struct radix_node *node_lookup_default(int); 308static struct rtentry *rt_lookup_common(boolean_t, boolean_t, struct sockaddr *, 309 struct sockaddr *, struct radix_node_head *, unsigned int); 310static int rn_match_ifscope(struct radix_node *, void *); 311static struct ifaddr *ifa_ifwithroute_common_locked(int, 312 const struct sockaddr *, const struct sockaddr *, unsigned int); 313static struct rtentry *rte_alloc(void); 314static void rte_free(struct rtentry *); 315static void rtfree_common(struct rtentry *, boolean_t); 316static void rte_if_ref(struct ifnet *, int); 317static void rt_set_idleref(struct rtentry *); 318static void rt_clear_idleref(struct rtentry *); 319static void rt_str4(struct rtentry *, char *, uint32_t, char *, uint32_t); 320#if INET6 321static void rt_str6(struct rtentry *, char *, uint32_t, char *, uint32_t); 322#endif /* INET6 */ 323 324uint32_t route_genid_inet = 0; 325#if INET6 326uint32_t route_genid_inet6 = 0; 327#endif /* INET6 */ 328 329#define ASSERT_SINIFSCOPE(sa) { \ 330 if ((sa)->sa_family != AF_INET || \ 331 (sa)->sa_len < sizeof (struct sockaddr_in)) \ 332 panic("%s: bad sockaddr_in %p\n", __func__, sa); \ 333} 334 335#define ASSERT_SIN6IFSCOPE(sa) { \ 336 if ((sa)->sa_family != AF_INET6 || \ 337 (sa)->sa_len < sizeof (struct sockaddr_in6)) \ 338 panic("%s: bad sockaddr_in6 %p\n", __func__, sa); \ 339} 340 341/* 342 * Argument to leaf-matching routine; at present it is scoped routing 343 * specific but can be expanded in future to include other search filters. 344 */ 345struct matchleaf_arg { 346 unsigned int ifscope; /* interface scope */ 347}; 348 349/* 350 * For looking up the non-scoped default route (sockaddr instead 351 * of sockaddr_in for convenience). 352 */ 353static struct sockaddr sin_def = { 354 sizeof (struct sockaddr_in), AF_INET, { 0, } 355}; 356 357static struct sockaddr_in6 sin6_def = { 358 sizeof (struct sockaddr_in6), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 359}; 360 361/* 362 * Interface index (scope) of the primary interface; determined at 363 * the time when the default, non-scoped route gets added, changed 364 * or deleted. Protected by rnh_lock. 365 */ 366static unsigned int primary_ifscope = IFSCOPE_NONE; 367static unsigned int primary6_ifscope = IFSCOPE_NONE; 368 369#define INET_DEFAULT(sa) \ 370 ((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0) 371 372#define INET6_DEFAULT(sa) \ 373 ((sa)->sa_family == AF_INET6 && \ 374 IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr)) 375 376#define SA_DEFAULT(sa) (INET_DEFAULT(sa) || INET6_DEFAULT(sa)) 377#define RT(r) ((struct rtentry *)r) 378#define RN(r) ((struct radix_node *)r) 379#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST) 380 381SYSCTL_DECL(_net_route); 382 383unsigned int rt_verbose; /* verbosity level (0 to disable) */ 384SYSCTL_UINT(_net_route, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED, 385 &rt_verbose, 0, ""); 386 387static void 388rtable_init(void **table) 389{ 390 struct domain *dom; 391 392 domain_proto_mtx_lock_assert_held(); 393 394 TAILQ_FOREACH(dom, &domains, dom_entry) { 395 if (dom->dom_rtattach != NULL) 396 dom->dom_rtattach(&table[dom->dom_family], 397 dom->dom_rtoffset); 398 } 399} 400 401/* 402 * Called by route_dinit(). 403 */ 404void 405route_init(void) 406{ 407 int size; 408 409#if INET6 410 _CASSERT(offsetof(struct route, ro_rt) == 411 offsetof(struct route_in6, ro_rt)); 412 _CASSERT(offsetof(struct route, ro_srcia) == 413 offsetof(struct route_in6, ro_srcia)); 414 _CASSERT(offsetof(struct route, ro_flags) == 415 offsetof(struct route_in6, ro_flags)); 416 _CASSERT(offsetof(struct route, ro_dst) == 417 offsetof(struct route_in6, ro_dst)); 418#endif /* INET6 */ 419 420 PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug)); 421 if (rte_debug != 0) 422 rte_debug |= RTD_DEBUG; 423 424 rnh_lock_grp_attr = lck_grp_attr_alloc_init(); 425 rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr); 426 rnh_lock_attr = lck_attr_alloc_init(); 427 lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr); 428 429 rte_mtx_grp_attr = lck_grp_attr_alloc_init(); 430 rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr); 431 rte_mtx_attr = lck_attr_alloc_init(); 432 433 lck_mtx_lock(rnh_lock); 434 rn_init(); /* initialize all zeroes, all ones, mask table */ 435 lck_mtx_unlock(rnh_lock); 436 rtable_init((void **)rt_tables); 437 438 if (rte_debug & RTD_DEBUG) 439 size = sizeof (struct rtentry_dbg); 440 else 441 size = sizeof (struct rtentry); 442 443 rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME); 444 if (rte_zone == NULL) { 445 panic("%s: failed allocating rte_zone", __func__); 446 /* NOTREACHED */ 447 } 448 zone_change(rte_zone, Z_EXPAND, TRUE); 449 zone_change(rte_zone, Z_CALLERACCT, FALSE); 450 zone_change(rte_zone, Z_NOENCRYPT, TRUE); 451 452 TAILQ_INIT(&rttrash_head); 453} 454 455/* 456 * Given a route, determine whether or not it is the non-scoped default 457 * route; dst typically comes from rt_key(rt) but may be coming from 458 * a separate place when rt is in the process of being created. 459 */ 460boolean_t 461rt_primary_default(struct rtentry *rt, struct sockaddr *dst) 462{ 463 return (SA_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE)); 464} 465 466/* 467 * Set the ifscope of the primary interface; caller holds rnh_lock. 468 */ 469void 470set_primary_ifscope(int af, unsigned int ifscope) 471{ 472 if (af == AF_INET) 473 primary_ifscope = ifscope; 474 else 475 primary6_ifscope = ifscope; 476} 477 478/* 479 * Return the ifscope of the primary interface; caller holds rnh_lock. 480 */ 481unsigned int 482get_primary_ifscope(int af) 483{ 484 return (af == AF_INET ? primary_ifscope : primary6_ifscope); 485} 486 487/* 488 * Set the scope ID of a given a sockaddr_in. 489 */ 490void 491sin_set_ifscope(struct sockaddr *sa, unsigned int ifscope) 492{ 493 /* Caller must pass in sockaddr_in */ 494 ASSERT_SINIFSCOPE(sa); 495 496 SINIFSCOPE(sa)->sin_scope_id = ifscope; 497} 498 499/* 500 * Set the scope ID of given a sockaddr_in6. 501 */ 502static inline void 503sin6_set_ifscope(struct sockaddr *sa, unsigned int ifscope) 504{ 505 /* Caller must pass in sockaddr_in6 */ 506 ASSERT_SIN6IFSCOPE(sa); 507 508 SIN6IFSCOPE(sa)->sin6_scope_id = ifscope; 509} 510 511/* 512 * Given a sockaddr_in, return the scope ID to the caller. 513 */ 514unsigned int 515sin_get_ifscope(struct sockaddr *sa) 516{ 517 /* Caller must pass in sockaddr_in */ 518 ASSERT_SINIFSCOPE(sa); 519 520 return (SINIFSCOPE(sa)->sin_scope_id); 521} 522 523/* 524 * Given a sockaddr_in6, return the scope ID to the caller. 525 */ 526unsigned int 527sin6_get_ifscope(struct sockaddr *sa) 528{ 529 /* Caller must pass in sockaddr_in6 */ 530 ASSERT_SIN6IFSCOPE(sa); 531 532 return (SIN6IFSCOPE(sa)->sin6_scope_id); 533} 534 535static inline void 536sin6_set_embedded_ifscope(struct sockaddr *sa, unsigned int ifscope) 537{ 538 /* Caller must pass in sockaddr_in6 */ 539 ASSERT_SIN6IFSCOPE(sa); 540 VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa)->sin6_addr))); 541 542 SIN6(sa)->sin6_addr.s6_addr16[1] = htons(ifscope); 543} 544 545static inline unsigned int 546sin6_get_embedded_ifscope(struct sockaddr *sa) 547{ 548 /* Caller must pass in sockaddr_in6 */ 549 ASSERT_SIN6IFSCOPE(sa); 550 551 return (ntohs(SIN6(sa)->sin6_addr.s6_addr16[1])); 552} 553 554/* 555 * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst. 556 * 557 * To clear the scope ID, pass is a NULL pifscope. To set the scope ID, pass 558 * in a non-NULL pifscope with non-zero ifscope. Otherwise if pifscope is 559 * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact. 560 * In any case, the effective scope ID value is returned to the caller via 561 * pifscope, if it is non-NULL. 562 */ 563static struct sockaddr * 564sa_copy(struct sockaddr *src, struct sockaddr_storage *dst, 565 unsigned int *pifscope) 566{ 567 int af = src->sa_family; 568 unsigned int ifscope = (pifscope != NULL) ? *pifscope : IFSCOPE_NONE; 569 570 VERIFY(af == AF_INET || af == AF_INET6); 571 572 bzero(dst, sizeof (*dst)); 573 574 if (af == AF_INET) { 575 bcopy(src, dst, sizeof (struct sockaddr_in)); 576 if (pifscope == NULL || ifscope != IFSCOPE_NONE) 577 sin_set_ifscope(SA(dst), ifscope); 578 } else { 579 bcopy(src, dst, sizeof (struct sockaddr_in6)); 580 if (pifscope != NULL && 581 IN6_IS_SCOPE_EMBED(&SIN6(dst)->sin6_addr)) { 582 unsigned int eifscope; 583 /* 584 * If the address contains the embedded scope ID, 585 * use that as the value for sin6_scope_id as long 586 * the caller doesn't insist on clearing it (by 587 * passing NULL) or setting it. 588 */ 589 eifscope = sin6_get_embedded_ifscope(SA(dst)); 590 if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE) 591 ifscope = eifscope; 592 sin6_set_ifscope(SA(dst), ifscope); 593 /* 594 * If sin6_scope_id is set but the address doesn't 595 * contain the equivalent embedded value, set it. 596 */ 597 if (ifscope != IFSCOPE_NONE && eifscope != ifscope) 598 sin6_set_embedded_ifscope(SA(dst), ifscope); 599 } else if (pifscope == NULL || ifscope != IFSCOPE_NONE) { 600 sin6_set_ifscope(SA(dst), ifscope); 601 } 602 } 603 604 if (pifscope != NULL) { 605 *pifscope = (af == AF_INET) ? sin_get_ifscope(SA(dst)) : 606 sin6_get_ifscope(SA(dst)); 607 } 608 609 return (SA(dst)); 610} 611 612/* 613 * Copy a mask from src to a dst storage and set scope ID into dst. 614 */ 615static struct sockaddr * 616ma_copy(int af, struct sockaddr *src, struct sockaddr_storage *dst, 617 unsigned int ifscope) 618{ 619 VERIFY(af == AF_INET || af == AF_INET6); 620 621 bzero(dst, sizeof (*dst)); 622 rt_maskedcopy(src, SA(dst), src); 623 624 /* 625 * The length of the mask sockaddr would need to be adjusted 626 * to cover the additional {sin,sin6}_ifscope field; when ifscope 627 * is IFSCOPE_NONE, we'd end up clearing the scope ID field on 628 * the destination mask in addition to extending the length 629 * of the sockaddr, as a side effect. This is okay, as any 630 * trailing zeroes would be skipped by rn_addmask prior to 631 * inserting or looking up the mask in the mask tree. 632 */ 633 if (af == AF_INET) { 634 SINIFSCOPE(dst)->sin_scope_id = ifscope; 635 SINIFSCOPE(dst)->sin_len = 636 offsetof(struct sockaddr_inifscope, sin_scope_id) + 637 sizeof (SINIFSCOPE(dst)->sin_scope_id); 638 } else { 639 SIN6IFSCOPE(dst)->sin6_scope_id = ifscope; 640 SIN6IFSCOPE(dst)->sin6_len = 641 offsetof(struct sockaddr_in6, sin6_scope_id) + 642 sizeof (SIN6IFSCOPE(dst)->sin6_scope_id); 643 } 644 645 return (SA(dst)); 646} 647 648/* 649 * Trim trailing zeroes on a sockaddr and update its length. 650 */ 651static struct sockaddr * 652sa_trim(struct sockaddr *sa, int skip) 653{ 654 caddr_t cp, base = (caddr_t)sa + skip; 655 656 if (sa->sa_len <= skip) 657 return (sa); 658 659 for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0; ) 660 cp--; 661 662 sa->sa_len = (cp - base) + skip; 663 if (sa->sa_len < skip) { 664 /* Must not happen, and if so, panic */ 665 panic("%s: broken logic (sa_len %d < skip %d )", __func__, 666 sa->sa_len, skip); 667 /* NOTREACHED */ 668 } else if (sa->sa_len == skip) { 669 /* If we end up with all zeroes, then there's no mask */ 670 sa->sa_len = 0; 671 } 672 673 return (sa); 674} 675 676/* 677 * Called by rtm_msg{1,2} routines to "scrub" socket address structures of 678 * kernel private information, so that clients of the routing socket will 679 * not be confused by the presence of the information, or the side effect of 680 * the increased length due to that. The source sockaddr is not modified; 681 * instead, the scrubbing happens on the destination sockaddr storage that 682 * is passed in by the caller. 683 * 684 * Scrubbing entails: 685 * - removing embedded scope identifiers from network mask and destination 686 * IPv4 and IPv6 socket addresses 687 * - optionally removing global scope interface hardware addresses from 688 * link-layer interface addresses when the MAC framework check fails. 689 */ 690struct sockaddr * 691rtm_scrub(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, 692 void *buf, uint32_t buflen, kauth_cred_t *credp) 693{ 694 struct sockaddr_storage *ss = (struct sockaddr_storage *)buf; 695 struct sockaddr *ret = sa; 696 697 VERIFY(buf != NULL && buflen >= sizeof (*ss)); 698 bzero(buf, buflen); 699 700 switch (idx) { 701 case RTAX_DST: 702 /* 703 * If this is for an AF_INET/AF_INET6 destination address, 704 * call sa_copy() to clear the scope ID field. 705 */ 706 if (sa->sa_family == AF_INET && 707 SINIFSCOPE(sa)->sin_scope_id != IFSCOPE_NONE) { 708 ret = sa_copy(sa, ss, NULL); 709 } else if (sa->sa_family == AF_INET6 && 710 SIN6IFSCOPE(sa)->sin6_scope_id != IFSCOPE_NONE) { 711 ret = sa_copy(sa, ss, NULL); 712 } 713 break; 714 715 case RTAX_NETMASK: { 716 int skip, af; 717 /* 718 * If this is for a mask, we can't tell whether or not there 719 * is an valid scope ID value, as the span of bytes between 720 * sa_len and the beginning of the mask (offset of sin_addr in 721 * the case of AF_INET, or sin6_addr for AF_INET6) may be 722 * filled with all-ones by rn_addmask(), and hence we cannot 723 * rely on sa_family. Because of this, we use the sa_family 724 * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to 725 * whether or not the mask is to be treated as one for AF_INET 726 * or AF_INET6. Clearing the scope ID field involves setting 727 * it to IFSCOPE_NONE followed by calling sa_trim() to trim 728 * trailing zeroes from the storage sockaddr, which reverses 729 * what was done earlier by ma_copy() on the source sockaddr. 730 */ 731 if (hint == NULL || 732 ((af = hint->sa_family) != AF_INET && af != AF_INET6)) 733 break; /* nothing to do */ 734 735 skip = (af == AF_INET) ? 736 offsetof(struct sockaddr_in, sin_addr) : 737 offsetof(struct sockaddr_in6, sin6_addr); 738 739 if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss)) { 740 bcopy(sa, ss, sa->sa_len); 741 /* 742 * Don't use {sin,sin6}_set_ifscope() as sa_family 743 * and sa_len for the netmask might not be set to 744 * the corresponding expected values of the hint. 745 */ 746 if (hint->sa_family == AF_INET) 747 SINIFSCOPE(ss)->sin_scope_id = IFSCOPE_NONE; 748 else 749 SIN6IFSCOPE(ss)->sin6_scope_id = IFSCOPE_NONE; 750 ret = sa_trim(SA(ss), skip); 751 752 /* 753 * For AF_INET6 mask, set sa_len appropriately unless 754 * this is requested via systl_dumpentry(), in which 755 * case we return the raw value. 756 */ 757 if (hint->sa_family == AF_INET6 && 758 type != RTM_GET && type != RTM_GET2) 759 SA(ret)->sa_len = sizeof (struct sockaddr_in6); 760 } 761 break; 762 } 763 case RTAX_IFP: { 764 if (sa->sa_family == AF_LINK && credp) { 765 struct sockaddr_dl *sdl = SDL(buf); 766 const void *bytes; 767 size_t size; 768 769 /* caller should handle worst case: SOCK_MAXADDRLEN */ 770 VERIFY(buflen >= sa->sa_len); 771 772 bcopy(sa, sdl, sa->sa_len); 773 bytes = dlil_ifaddr_bytes(sdl, &size, credp); 774 if (bytes != CONST_LLADDR(sdl)) { 775 VERIFY(sdl->sdl_alen == size); 776 bcopy(bytes, LLADDR(sdl), size); 777 } 778 ret = (struct sockaddr *)sdl; 779 } 780 break; 781 } 782 default: 783 break; 784 } 785 786 return (ret); 787} 788 789/* 790 * Callback leaf-matching routine for rn_matchaddr_args used 791 * for looking up an exact match for a scoped route entry. 792 */ 793static int 794rn_match_ifscope(struct radix_node *rn, void *arg) 795{ 796 struct rtentry *rt = (struct rtentry *)rn; 797 struct matchleaf_arg *ma = arg; 798 int af = rt_key(rt)->sa_family; 799 800 if (!(rt->rt_flags & RTF_IFSCOPE) || (af != AF_INET && af != AF_INET6)) 801 return (0); 802 803 return (af == AF_INET ? 804 (SINIFSCOPE(rt_key(rt))->sin_scope_id == ma->ifscope) : 805 (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope)); 806} 807 808/* 809 * Atomically increment route generation counter 810 */ 811void 812routegenid_update(void) 813{ 814 routegenid_inet_update(); 815#if INET6 816 routegenid_inet6_update(); 817#endif /* INET6 */ 818} 819 820void 821routegenid_inet_update(void) 822{ 823 atomic_add_32(&route_genid_inet, 1); 824} 825 826#if INET6 827void 828routegenid_inet6_update(void) 829{ 830 atomic_add_32(&route_genid_inet6, 1); 831} 832#endif /* INET6 */ 833 834/* 835 * Packet routing routines. 836 */ 837void 838rtalloc(struct route *ro) 839{ 840 rtalloc_ign(ro, 0); 841} 842 843void 844rtalloc_scoped(struct route *ro, unsigned int ifscope) 845{ 846 rtalloc_scoped_ign(ro, 0, ifscope); 847} 848 849static void 850rtalloc_ign_common_locked(struct route *ro, uint32_t ignore, 851 unsigned int ifscope) 852{ 853 struct rtentry *rt; 854 855 if ((rt = ro->ro_rt) != NULL) { 856 RT_LOCK_SPIN(rt); 857 if (rt->rt_ifp != NULL && !ROUTE_UNUSABLE(ro)) { 858 RT_UNLOCK(rt); 859 return; 860 } 861 RT_UNLOCK(rt); 862 ROUTE_RELEASE_LOCKED(ro); /* rnh_lock already held */ 863 } 864 ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope); 865 if (ro->ro_rt != NULL) { 866 RT_GENID_SYNC(ro->ro_rt); 867 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); 868 } 869} 870 871void 872rtalloc_ign(struct route *ro, uint32_t ignore) 873{ 874 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 875 lck_mtx_lock(rnh_lock); 876 rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE); 877 lck_mtx_unlock(rnh_lock); 878} 879 880void 881rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope) 882{ 883 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 884 lck_mtx_lock(rnh_lock); 885 rtalloc_ign_common_locked(ro, ignore, ifscope); 886 lck_mtx_unlock(rnh_lock); 887} 888 889static struct rtentry * 890rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags) 891{ 892 return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE)); 893} 894 895struct rtentry * 896rtalloc1_scoped_locked(struct sockaddr *dst, int report, uint32_t ignflags, 897 unsigned int ifscope) 898{ 899 return (rtalloc1_common_locked(dst, report, ignflags, ifscope)); 900} 901 902/* 903 * Look up the route that matches the address given 904 * Or, at least try.. Create a cloned route if needed. 905 */ 906static struct rtentry * 907rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags, 908 unsigned int ifscope) 909{ 910 struct radix_node_head *rnh = rt_tables[dst->sa_family]; 911 struct rtentry *rt, *newrt = NULL; 912 struct rt_addrinfo info; 913 uint32_t nflags; 914 int err = 0, msgtype = RTM_MISS; 915 916 if (rnh == NULL) 917 goto unreachable; 918 919 /* 920 * Find the longest prefix or exact (in the scoped case) address match; 921 * callee adds a reference to entry and checks for root node as well 922 */ 923 rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope); 924 if (rt == NULL) 925 goto unreachable; 926 927 RT_LOCK_SPIN(rt); 928 newrt = rt; 929 nflags = rt->rt_flags & ~ignflags; 930 RT_UNLOCK(rt); 931 if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { 932 /* 933 * We are apparently adding (report = 0 in delete). 934 * If it requires that it be cloned, do so. 935 * (This implies it wasn't a HOST route.) 936 */ 937 err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt); 938 if (err) { 939 /* 940 * If the cloning didn't succeed, maybe what we 941 * have from lookup above will do. Return that; 942 * no need to hold another reference since it's 943 * already done. 944 */ 945 newrt = rt; 946 goto miss; 947 } 948 949 /* 950 * We cloned it; drop the original route found during lookup. 951 * The resulted cloned route (newrt) would now have an extra 952 * reference held during rtrequest. 953 */ 954 rtfree_locked(rt); 955 if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { 956 /* 957 * If the new route specifies it be 958 * externally resolved, then go do that. 959 */ 960 msgtype = RTM_RESOLVE; 961 goto miss; 962 } 963 } 964 goto done; 965 966unreachable: 967 /* 968 * Either we hit the root or couldn't find any match, 969 * Which basically means "cant get there from here" 970 */ 971 rtstat.rts_unreach++; 972miss: 973 if (report) { 974 /* 975 * If required, report the failure to the supervising 976 * Authorities. 977 * For a delete, this is not an error. (report == 0) 978 */ 979 bzero((caddr_t)&info, sizeof(info)); 980 info.rti_info[RTAX_DST] = dst; 981 rt_missmsg(msgtype, &info, 0, err); 982 } 983done: 984 return (newrt); 985} 986 987struct rtentry * 988rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags) 989{ 990 struct rtentry *entry; 991 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 992 lck_mtx_lock(rnh_lock); 993 entry = rtalloc1_locked(dst, report, ignflags); 994 lck_mtx_unlock(rnh_lock); 995 return (entry); 996} 997 998struct rtentry * 999rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags, 1000 unsigned int ifscope) 1001{ 1002 struct rtentry *entry; 1003 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 1004 lck_mtx_lock(rnh_lock); 1005 entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope); 1006 lck_mtx_unlock(rnh_lock); 1007 return (entry); 1008} 1009 1010/* 1011 * Remove a reference count from an rtentry. 1012 * If the count gets low enough, take it out of the routing table 1013 */ 1014void 1015rtfree_locked(struct rtentry *rt) 1016{ 1017 rtfree_common(rt, TRUE); 1018} 1019 1020static void 1021rtfree_common(struct rtentry *rt, boolean_t locked) 1022{ 1023 struct radix_node_head *rnh; 1024 1025 lck_mtx_assert(rnh_lock, locked ? 1026 LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED); 1027 1028 /* 1029 * Atomically decrement the reference count and if it reaches 0, 1030 * and there is a close function defined, call the close function. 1031 */ 1032 RT_LOCK_SPIN(rt); 1033 if (rtunref(rt) > 0) { 1034 RT_UNLOCK(rt); 1035 return; 1036 } 1037 1038 /* 1039 * To avoid violating lock ordering, we must drop rt_lock before 1040 * trying to acquire the global rnh_lock. If we are called with 1041 * rnh_lock held, then we already have exclusive access; otherwise 1042 * we do the lock dance. 1043 */ 1044 if (!locked) { 1045 /* 1046 * Note that we check it again below after grabbing rnh_lock, 1047 * since it is possible that another thread doing a lookup wins 1048 * the race, grabs the rnh_lock first, and bumps up reference 1049 * count in which case the route should be left alone as it is 1050 * still in use. It's also possible that another thread frees 1051 * the route after we drop rt_lock; to prevent the route from 1052 * being freed, we hold an extra reference. 1053 */ 1054 RT_ADDREF_LOCKED(rt); 1055 RT_UNLOCK(rt); 1056 lck_mtx_lock(rnh_lock); 1057 RT_LOCK_SPIN(rt); 1058 if (rtunref(rt) > 0) { 1059 /* We've lost the race, so abort */ 1060 RT_UNLOCK(rt); 1061 goto done; 1062 } 1063 } 1064 1065 /* 1066 * We may be blocked on other lock(s) as part of freeing 1067 * the entry below, so convert from spin to full mutex. 1068 */ 1069 RT_CONVERT_LOCK(rt); 1070 1071 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1072 1073 /* Negative refcnt must never happen */ 1074 if (rt->rt_refcnt != 0) { 1075 panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt); 1076 /* NOTREACHED */ 1077 } 1078 /* Idle refcnt must have been dropped during rtunref() */ 1079 VERIFY(!(rt->rt_flags & RTF_IFREF)); 1080 1081 /* 1082 * find the tree for that address family 1083 * Note: in the case of igmp packets, there might not be an rnh 1084 */ 1085 rnh = rt_tables[rt_key(rt)->sa_family]; 1086 1087 /* 1088 * On last reference give the "close method" a chance to cleanup 1089 * private state. This also permits (for IPv4 and IPv6) a chance 1090 * to decide if the routing table entry should be purged immediately 1091 * or at a later time. When an immediate purge is to happen the 1092 * close routine typically issues RTM_DELETE which clears the RTF_UP 1093 * flag on the entry so that the code below reclaims the storage. 1094 */ 1095 if (rnh != NULL && rnh->rnh_close != NULL) 1096 rnh->rnh_close((struct radix_node *)rt, rnh); 1097 1098 /* 1099 * If we are no longer "up" (and ref == 0) then we can free the 1100 * resources associated with the route. 1101 */ 1102 if (!(rt->rt_flags & RTF_UP)) { 1103 struct rtentry *rt_parent; 1104 struct ifaddr *rt_ifa; 1105 1106 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { 1107 panic("rt %p freed while in radix tree\n", rt); 1108 /* NOTREACHED */ 1109 } 1110 /* 1111 * the rtentry must have been removed from the routing table 1112 * so it is represented in rttrash; remove that now. 1113 */ 1114 (void) OSDecrementAtomic(&rttrash); 1115 if (rte_debug & RTD_DEBUG) { 1116 TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt, 1117 rtd_trash_link); 1118 } 1119 1120 /* 1121 * release references on items we hold them on.. 1122 * e.g other routes and ifaddrs. 1123 */ 1124 if ((rt_parent = rt->rt_parent) != NULL) 1125 rt->rt_parent = NULL; 1126 1127 if ((rt_ifa = rt->rt_ifa) != NULL) 1128 rt->rt_ifa = NULL; 1129 1130 /* 1131 * Now free any attached link-layer info. 1132 */ 1133 if (rt->rt_llinfo != NULL) { 1134 if (rt->rt_llinfo_free != NULL) 1135 (*rt->rt_llinfo_free)(rt->rt_llinfo); 1136 else 1137 R_Free(rt->rt_llinfo); 1138 rt->rt_llinfo = NULL; 1139 } 1140 1141 /* 1142 * Route is no longer in the tree and refcnt is 0; 1143 * we have exclusive access, so destroy it. 1144 */ 1145 RT_UNLOCK(rt); 1146 1147 if (rt_parent != NULL) 1148 rtfree_locked(rt_parent); 1149 1150 if (rt_ifa != NULL) 1151 IFA_REMREF(rt_ifa); 1152 1153 /* 1154 * The key is separately alloc'd so free it (see rt_setgate()). 1155 * This also frees the gateway, as they are always malloc'd 1156 * together. 1157 */ 1158 R_Free(rt_key(rt)); 1159 1160 /* 1161 * Free any statistics that may have been allocated 1162 */ 1163 nstat_route_detach(rt); 1164 1165 /* 1166 * and the rtentry itself of course 1167 */ 1168 rte_lock_destroy(rt); 1169 rte_free(rt); 1170 } else { 1171 /* 1172 * The "close method" has been called, but the route is 1173 * still in the radix tree with zero refcnt, i.e. "up" 1174 * and in the cached state. 1175 */ 1176 RT_UNLOCK(rt); 1177 } 1178done: 1179 if (!locked) 1180 lck_mtx_unlock(rnh_lock); 1181} 1182 1183void 1184rtfree(struct rtentry *rt) 1185{ 1186 rtfree_common(rt, FALSE); 1187} 1188 1189/* 1190 * Decrements the refcount but does not free the route when 1191 * the refcount reaches zero. Unless you have really good reason, 1192 * use rtfree not rtunref. 1193 */ 1194int 1195rtunref(struct rtentry *p) 1196{ 1197 RT_LOCK_ASSERT_HELD(p); 1198 1199 if (p->rt_refcnt == 0) { 1200 panic("%s(%p) bad refcnt\n", __func__, p); 1201 /* NOTREACHED */ 1202 } else if (--p->rt_refcnt == 0) { 1203 /* 1204 * Release any idle reference count held on the interface; 1205 * if the route is eligible, still UP and the refcnt becomes 1206 * non-zero at some point in future before it is purged from 1207 * the routing table, rt_set_idleref() will undo this. 1208 */ 1209 rt_clear_idleref(p); 1210 } 1211 1212 if (rte_debug & RTD_DEBUG) 1213 rtunref_audit((struct rtentry_dbg *)p); 1214 1215 /* Return new value */ 1216 return (p->rt_refcnt); 1217} 1218 1219static inline void 1220rtunref_audit(struct rtentry_dbg *rte) 1221{ 1222 uint16_t idx; 1223 1224 if (rte->rtd_inuse != RTD_INUSE) { 1225 panic("rtunref: on freed rte=%p\n", rte); 1226 /* NOTREACHED */ 1227 } 1228 idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE; 1229 if (rte_debug & RTD_TRACE) 1230 ctrace_record(&rte->rtd_refrele[idx]); 1231} 1232 1233/* 1234 * Add a reference count from an rtentry. 1235 */ 1236void 1237rtref(struct rtentry *p) 1238{ 1239 RT_LOCK_ASSERT_HELD(p); 1240 1241 if (++p->rt_refcnt == 0) { 1242 panic("%s(%p) bad refcnt\n", __func__, p); 1243 /* NOTREACHED */ 1244 } else if (p->rt_refcnt == 1) { 1245 /* 1246 * Hold an idle reference count on the interface, 1247 * if the route is eligible for it. 1248 */ 1249 rt_set_idleref(p); 1250 } 1251 1252 if (rte_debug & RTD_DEBUG) 1253 rtref_audit((struct rtentry_dbg *)p); 1254} 1255 1256static inline void 1257rtref_audit(struct rtentry_dbg *rte) 1258{ 1259 uint16_t idx; 1260 1261 if (rte->rtd_inuse != RTD_INUSE) { 1262 panic("rtref_audit: on freed rte=%p\n", rte); 1263 /* NOTREACHED */ 1264 } 1265 idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE; 1266 if (rte_debug & RTD_TRACE) 1267 ctrace_record(&rte->rtd_refhold[idx]); 1268} 1269 1270void 1271rtsetifa(struct rtentry *rt, struct ifaddr *ifa) 1272{ 1273 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1274 1275 RT_LOCK_ASSERT_HELD(rt); 1276 1277 if (rt->rt_ifa == ifa) 1278 return; 1279 1280 /* Become a regular mutex, just in case */ 1281 RT_CONVERT_LOCK(rt); 1282 1283 /* Release the old ifa */ 1284 if (rt->rt_ifa) 1285 IFA_REMREF(rt->rt_ifa); 1286 1287 /* Set rt_ifa */ 1288 rt->rt_ifa = ifa; 1289 1290 /* Take a reference to the ifa */ 1291 if (rt->rt_ifa) 1292 IFA_ADDREF(rt->rt_ifa); 1293} 1294 1295/* 1296 * Force a routing table entry to the specified 1297 * destination to go through the given gateway. 1298 * Normally called as a result of a routing redirect 1299 * message from the network layer. 1300 */ 1301void 1302rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, 1303 struct sockaddr *netmask, int flags, struct sockaddr *src, 1304 struct rtentry **rtp) 1305{ 1306 struct rtentry *rt = NULL; 1307 int error = 0; 1308 short *stat = 0; 1309 struct rt_addrinfo info; 1310 struct ifaddr *ifa = NULL; 1311 unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; 1312 struct sockaddr_storage ss; 1313 int af = src->sa_family; 1314 1315 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 1316 lck_mtx_lock(rnh_lock); 1317 1318 /* 1319 * Transform src into the internal routing table form for 1320 * comparison against rt_gateway below. 1321 */ 1322#if INET6 1323 if ((af == AF_INET && ip_doscopedroute) || 1324 (af == AF_INET6 && ip6_doscopedroute)) 1325#else 1326 if (af == AF_INET && ip_doscopedroute) 1327#endif /* !INET6 */ 1328 src = sa_copy(src, &ss, &ifscope); 1329 1330 /* 1331 * Verify the gateway is directly reachable; if scoped routing 1332 * is enabled, verify that it is reachable from the interface 1333 * where the ICMP redirect arrived on. 1334 */ 1335 if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) { 1336 error = ENETUNREACH; 1337 goto out; 1338 } 1339 1340 /* Lookup route to the destination (from the original IP header) */ 1341 rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope); 1342 if (rt != NULL) 1343 RT_LOCK(rt); 1344 1345 /* 1346 * If the redirect isn't from our current router for this dst, 1347 * it's either old or wrong. If it redirects us to ourselves, 1348 * we have a routing loop, perhaps as a result of an interface 1349 * going down recently. Holding rnh_lock here prevents the 1350 * possibility of rt_ifa/ifa's ifa_addr from changing (e.g. 1351 * in_ifinit), so okay to access ifa_addr without locking. 1352 */ 1353 if (!(flags & RTF_DONE) && rt != NULL && 1354 (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr, 1355 ifa->ifa_addr))) { 1356 error = EINVAL; 1357 } else { 1358 IFA_REMREF(ifa); 1359 if ((ifa = ifa_ifwithaddr(gateway))) { 1360 IFA_REMREF(ifa); 1361 ifa = NULL; 1362 error = EHOSTUNREACH; 1363 } 1364 } 1365 1366 if (ifa) { 1367 IFA_REMREF(ifa); 1368 ifa = NULL; 1369 } 1370 1371 if (error) { 1372 if (rt != NULL) 1373 RT_UNLOCK(rt); 1374 goto done; 1375 } 1376 1377 /* 1378 * Create a new entry if we just got back a wildcard entry 1379 * or the the lookup failed. This is necessary for hosts 1380 * which use routing redirects generated by smart gateways 1381 * to dynamically build the routing tables. 1382 */ 1383 if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2)) 1384 goto create; 1385 /* 1386 * Don't listen to the redirect if it's 1387 * for a route to an interface. 1388 */ 1389 RT_LOCK_ASSERT_HELD(rt); 1390 if (rt->rt_flags & RTF_GATEWAY) { 1391 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 1392 /* 1393 * Changing from route to net => route to host. 1394 * Create new route, rather than smashing route 1395 * to net; similar to cloned routes, the newly 1396 * created host route is scoped as well. 1397 */ 1398create: 1399 if (rt != NULL) 1400 RT_UNLOCK(rt); 1401 flags |= RTF_GATEWAY | RTF_DYNAMIC; 1402 error = rtrequest_scoped_locked(RTM_ADD, dst, 1403 gateway, netmask, flags, NULL, ifscope); 1404 stat = &rtstat.rts_dynamic; 1405 } else { 1406 /* 1407 * Smash the current notion of the gateway to 1408 * this destination. Should check about netmask!!! 1409 */ 1410 rt->rt_flags |= RTF_MODIFIED; 1411 flags |= RTF_MODIFIED; 1412 stat = &rtstat.rts_newgateway; 1413 /* 1414 * add the key and gateway (in one malloc'd chunk). 1415 */ 1416 error = rt_setgate(rt, rt_key(rt), gateway); 1417 RT_UNLOCK(rt); 1418 } 1419 } else { 1420 RT_UNLOCK(rt); 1421 error = EHOSTUNREACH; 1422 } 1423done: 1424 if (rt != NULL) { 1425 RT_LOCK_ASSERT_NOTHELD(rt); 1426 if (rtp && !error) 1427 *rtp = rt; 1428 else 1429 rtfree_locked(rt); 1430 } 1431out: 1432 if (error) { 1433 rtstat.rts_badredirect++; 1434 } else { 1435 if (stat != NULL) 1436 (*stat)++; 1437 1438 if (af == AF_INET) 1439 routegenid_inet_update(); 1440#if INET6 1441 else if (af == AF_INET6) 1442 routegenid_inet6_update(); 1443#endif /* INET6 */ 1444 } 1445 lck_mtx_unlock(rnh_lock); 1446 bzero((caddr_t)&info, sizeof(info)); 1447 info.rti_info[RTAX_DST] = dst; 1448 info.rti_info[RTAX_GATEWAY] = gateway; 1449 info.rti_info[RTAX_NETMASK] = netmask; 1450 info.rti_info[RTAX_AUTHOR] = src; 1451 rt_missmsg(RTM_REDIRECT, &info, flags, error); 1452} 1453 1454/* 1455* Routing table ioctl interface. 1456*/ 1457int 1458rtioctl(unsigned long req, caddr_t data, struct proc *p) 1459{ 1460#pragma unused(p, req, data) 1461 return (ENXIO); 1462} 1463 1464struct ifaddr * 1465ifa_ifwithroute( 1466 int flags, 1467 const struct sockaddr *dst, 1468 const struct sockaddr *gateway) 1469{ 1470 struct ifaddr *ifa; 1471 1472 lck_mtx_lock(rnh_lock); 1473 ifa = ifa_ifwithroute_locked(flags, dst, gateway); 1474 lck_mtx_unlock(rnh_lock); 1475 1476 return (ifa); 1477} 1478 1479struct ifaddr * 1480ifa_ifwithroute_locked(int flags, const struct sockaddr *dst, 1481 const struct sockaddr *gateway) 1482{ 1483 return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst, 1484 gateway, IFSCOPE_NONE)); 1485} 1486 1487struct ifaddr * 1488ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst, 1489 const struct sockaddr *gateway, unsigned int ifscope) 1490{ 1491 if (ifscope != IFSCOPE_NONE) 1492 flags |= RTF_IFSCOPE; 1493 else 1494 flags &= ~RTF_IFSCOPE; 1495 1496 return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope)); 1497} 1498 1499static struct ifaddr * 1500ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, 1501 const struct sockaddr *gw, unsigned int ifscope) 1502{ 1503 struct ifaddr *ifa = NULL; 1504 struct rtentry *rt = NULL; 1505 struct sockaddr_storage dst_ss, gw_ss; 1506 1507 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1508 1509 /* 1510 * Just in case the sockaddr passed in by the caller 1511 * contains a scope ID, make sure to clear it since 1512 * interface addresses aren't scoped. 1513 */ 1514#if INET6 1515 if (dst != NULL && 1516 ((dst->sa_family == AF_INET && ip_doscopedroute) || 1517 (dst->sa_family == AF_INET6 && ip6_doscopedroute))) 1518#else 1519 if (dst != NULL && dst->sa_family == AF_INET && ip_doscopedroute) 1520#endif /* !INET6 */ 1521 dst = sa_copy(SA((uintptr_t)dst), &dst_ss, NULL); 1522 1523#if INET6 1524 if (gw != NULL && 1525 ((gw->sa_family == AF_INET && ip_doscopedroute) || 1526 (gw->sa_family == AF_INET6 && ip6_doscopedroute))) 1527#else 1528 if (gw != NULL && gw->sa_family == AF_INET && ip_doscopedroute) 1529#endif /* !INET6 */ 1530 gw = sa_copy(SA((uintptr_t)gw), &gw_ss, NULL); 1531 1532 if (!(flags & RTF_GATEWAY)) { 1533 /* 1534 * If we are adding a route to an interface, 1535 * and the interface is a pt to pt link 1536 * we should search for the destination 1537 * as our clue to the interface. Otherwise 1538 * we can use the local address. 1539 */ 1540 if (flags & RTF_HOST) { 1541 ifa = ifa_ifwithdstaddr(dst); 1542 } 1543 if (ifa == NULL) 1544 ifa = ifa_ifwithaddr_scoped(gw, ifscope); 1545 } else { 1546 /* 1547 * If we are adding a route to a remote net 1548 * or host, the gateway may still be on the 1549 * other end of a pt to pt link. 1550 */ 1551 ifa = ifa_ifwithdstaddr(gw); 1552 } 1553 if (ifa == NULL) 1554 ifa = ifa_ifwithnet_scoped(gw, ifscope); 1555 if (ifa == NULL) { 1556 /* Workaround to avoid gcc warning regarding const variable */ 1557 rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst, 1558 0, 0, ifscope); 1559 if (rt != NULL) { 1560 RT_LOCK_SPIN(rt); 1561 ifa = rt->rt_ifa; 1562 if (ifa != NULL) { 1563 /* Become a regular mutex */ 1564 RT_CONVERT_LOCK(rt); 1565 IFA_ADDREF(ifa); 1566 } 1567 RT_REMREF_LOCKED(rt); 1568 RT_UNLOCK(rt); 1569 rt = NULL; 1570 } 1571 } 1572 /* 1573 * Holding rnh_lock here prevents the possibility of ifa from 1574 * changing (e.g. in_ifinit), so it is safe to access its 1575 * ifa_addr (here and down below) without locking. 1576 */ 1577 if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) { 1578 struct ifaddr *newifa; 1579 /* Callee adds reference to newifa upon success */ 1580 newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1581 if (newifa != NULL) { 1582 IFA_REMREF(ifa); 1583 ifa = newifa; 1584 } 1585 } 1586 /* 1587 * If we are adding a gateway, it is quite possible that the 1588 * routing table has a static entry in place for the gateway, 1589 * that may not agree with info garnered from the interfaces. 1590 * The routing table should carry more precedence than the 1591 * interfaces in this matter. Must be careful not to stomp 1592 * on new entries from rtinit, hence (ifa->ifa_addr != gw). 1593 */ 1594 if ((ifa == NULL || 1595 !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gw)) && 1596 (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gw, 1597 0, 0, ifscope)) != NULL) { 1598 if (ifa != NULL) 1599 IFA_REMREF(ifa); 1600 RT_LOCK_SPIN(rt); 1601 ifa = rt->rt_ifa; 1602 if (ifa != NULL) { 1603 /* Become a regular mutex */ 1604 RT_CONVERT_LOCK(rt); 1605 IFA_ADDREF(ifa); 1606 } 1607 RT_REMREF_LOCKED(rt); 1608 RT_UNLOCK(rt); 1609 } 1610 /* 1611 * If an interface scope was specified, the interface index of 1612 * the found ifaddr must be equivalent to that of the scope; 1613 * otherwise there is no match. 1614 */ 1615 if ((flags & RTF_IFSCOPE) && 1616 ifa != NULL && ifa->ifa_ifp->if_index != ifscope) { 1617 IFA_REMREF(ifa); 1618 ifa = NULL; 1619 } 1620 1621 return (ifa); 1622} 1623 1624static int rt_fixdelete(struct radix_node *, void *); 1625static int rt_fixchange(struct radix_node *, void *); 1626 1627struct rtfc_arg { 1628 struct rtentry *rt0; 1629 struct radix_node_head *rnh; 1630}; 1631 1632int 1633rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway, 1634 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1635{ 1636 return (rtrequest_common_locked(req, dst, gateway, netmask, 1637 (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE)); 1638} 1639 1640int 1641rtrequest_scoped_locked(int req, struct sockaddr *dst, 1642 struct sockaddr *gateway, struct sockaddr *netmask, int flags, 1643 struct rtentry **ret_nrt, unsigned int ifscope) 1644{ 1645 if (ifscope != IFSCOPE_NONE) 1646 flags |= RTF_IFSCOPE; 1647 else 1648 flags &= ~RTF_IFSCOPE; 1649 1650 return (rtrequest_common_locked(req, dst, gateway, netmask, 1651 flags, ret_nrt, ifscope)); 1652} 1653 1654/* 1655 * Do appropriate manipulations of a routing tree given all the bits of 1656 * info needed. 1657 * 1658 * Storing the scope ID in the radix key is an internal job that should be 1659 * left to routines in this module. Callers should specify the scope value 1660 * to the "scoped" variants of route routines instead of manipulating the 1661 * key itself. This is typically done when creating a scoped route, e.g. 1662 * rtrequest(RTM_ADD). Once such a route is created and marked with the 1663 * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it 1664 * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is 1665 * during certain routing socket operations where the search key might be 1666 * derived from the routing message itself, in which case the caller must 1667 * specify the destination address and scope value for RTM_ADD/RTM_DELETE. 1668 */ 1669static int 1670rtrequest_common_locked(int req, struct sockaddr *dst0, 1671 struct sockaddr *gateway, struct sockaddr *netmask, int flags, 1672 struct rtentry **ret_nrt, unsigned int ifscope) 1673{ 1674 int error = 0; 1675 struct rtentry *rt; 1676 struct radix_node *rn; 1677 struct radix_node_head *rnh; 1678 struct ifaddr *ifa = NULL; 1679 struct sockaddr *ndst, *dst = dst0; 1680 struct sockaddr_storage ss, mask; 1681 struct timeval caltime; 1682 int af = dst->sa_family; 1683 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *); 1684 1685#define senderr(x) { error = x; goto bad; } 1686 1687 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1688 /* 1689 * Find the correct routing tree to use for this Address Family 1690 */ 1691 if ((rnh = rt_tables[af]) == NULL) 1692 senderr(ESRCH); 1693 /* 1694 * If we are adding a host route then we don't want to put 1695 * a netmask in the tree 1696 */ 1697 if (flags & RTF_HOST) 1698 netmask = NULL; 1699 1700 /* 1701 * If Scoped Routing is enabled, use a local copy of the destination 1702 * address to store the scope ID into. This logic is repeated below 1703 * in the RTM_RESOLVE handler since the caller does not normally 1704 * specify such a flag during a resolve, as well as for the handling 1705 * of IPv4 link-local address; instead, it passes in the route used for 1706 * cloning for which the scope info is derived from. Note also that 1707 * in the case of RTM_DELETE, the address passed in by the caller 1708 * might already contain the scope ID info when it is the key itself, 1709 * thus making RTF_IFSCOPE unnecessary; one instance where it is 1710 * explicitly set is inside route_output() as part of handling a 1711 * routing socket request. 1712 */ 1713#if INET6 1714 if (req != RTM_RESOLVE && 1715 ((af == AF_INET && ip_doscopedroute) || 1716 (af == AF_INET6 && ip6_doscopedroute))) { 1717#else 1718 if (req != RTM_RESOLVE && af == AF_INET && ip_doscopedroute) { 1719#endif /* !INET6 */ 1720 /* Transform dst into the internal routing table form */ 1721 dst = sa_copy(dst, &ss, &ifscope); 1722 1723 /* Transform netmask into the internal routing table form */ 1724 if (netmask != NULL) 1725 netmask = ma_copy(af, netmask, &mask, ifscope); 1726 1727 if (ifscope != IFSCOPE_NONE) 1728 flags |= RTF_IFSCOPE; 1729 } else { 1730 if ((flags & RTF_IFSCOPE) && (af != AF_INET && af != AF_INET6)) 1731 senderr(EINVAL); 1732 1733#if INET6 1734 if ((af == AF_INET && !ip_doscopedroute) || 1735 (af == AF_INET6 && !ip6_doscopedroute)) 1736#else 1737 if (af == AF_INET && !ip_doscopedroute) 1738#endif /* !INET6 */ 1739 ifscope = IFSCOPE_NONE; 1740 } 1741 1742 if (ifscope == IFSCOPE_NONE) 1743 flags &= ~RTF_IFSCOPE; 1744 1745 switch (req) { 1746 case RTM_DELETE: { 1747 struct rtentry *gwrt = NULL; 1748 /* 1749 * Remove the item from the tree and return it. 1750 * Complain if it is not there and do no more processing. 1751 */ 1752 if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) 1753 senderr(ESRCH); 1754 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { 1755 panic("rtrequest delete"); 1756 /* NOTREACHED */ 1757 } 1758 rt = (struct rtentry *)rn; 1759 1760 RT_LOCK(rt); 1761 rt->rt_flags &= ~RTF_UP; 1762 /* 1763 * Release any idle reference count held on the interface 1764 * as this route is no longer externally visible. 1765 */ 1766 rt_clear_idleref(rt); 1767 /* 1768 * Take an extra reference to handle the deletion of a route 1769 * entry whose reference count is already 0; e.g. an expiring 1770 * cloned route entry or an entry that was added to the table 1771 * with 0 reference. If the caller is interested in this route, 1772 * we will return it with the reference intact. Otherwise we 1773 * will decrement the reference via rtfree_locked() and then 1774 * possibly deallocate it. 1775 */ 1776 RT_ADDREF_LOCKED(rt); 1777 1778 /* 1779 * For consistency, in case the caller didn't set the flag. 1780 */ 1781 rt->rt_flags |= RTF_CONDEMNED; 1782 1783 /* 1784 * Clear RTF_ROUTER if it's set. 1785 */ 1786 if (rt->rt_flags & RTF_ROUTER) { 1787 VERIFY(rt->rt_flags & RTF_HOST); 1788 rt->rt_flags &= ~RTF_ROUTER; 1789 } 1790 1791 /* 1792 * Now search what's left of the subtree for any cloned 1793 * routes which might have been formed from this node. 1794 */ 1795 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) && 1796 rt_mask(rt)) { 1797 RT_UNLOCK(rt); 1798 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), 1799 rt_fixdelete, rt); 1800 RT_LOCK(rt); 1801 } 1802 1803 /* 1804 * Remove any external references we may have. 1805 */ 1806 if ((gwrt = rt->rt_gwroute) != NULL) 1807 rt->rt_gwroute = NULL; 1808 1809 /* 1810 * give the protocol a chance to keep things in sync. 1811 */ 1812 if ((ifa = rt->rt_ifa) != NULL) { 1813 IFA_LOCK_SPIN(ifa); 1814 ifa_rtrequest = ifa->ifa_rtrequest; 1815 IFA_UNLOCK(ifa); 1816 if (ifa_rtrequest != NULL) 1817 ifa_rtrequest(RTM_DELETE, rt, NULL); 1818 /* keep reference on rt_ifa */ 1819 ifa = NULL; 1820 } 1821 1822 /* 1823 * one more rtentry floating around that is not 1824 * linked to the routing table. 1825 */ 1826 (void) OSIncrementAtomic(&rttrash); 1827 if (rte_debug & RTD_DEBUG) { 1828 TAILQ_INSERT_TAIL(&rttrash_head, 1829 (struct rtentry_dbg *)rt, rtd_trash_link); 1830 } 1831 1832 /* 1833 * If this is the (non-scoped) default route, clear 1834 * the interface index used for the primary ifscope. 1835 */ 1836 if (rt_primary_default(rt, rt_key(rt))) { 1837 set_primary_ifscope(rt_key(rt)->sa_family, 1838 IFSCOPE_NONE); 1839 } 1840 1841 RT_UNLOCK(rt); 1842 1843 /* 1844 * This might result in another rtentry being freed if 1845 * we held its last reference. Do this after the rtentry 1846 * lock is dropped above, as it could lead to the same 1847 * lock being acquired if gwrt is a clone of rt. 1848 */ 1849 if (gwrt != NULL) 1850 rtfree_locked(gwrt); 1851 1852 /* 1853 * If the caller wants it, then it can have it, 1854 * but it's up to it to free the rtentry as we won't be 1855 * doing it. 1856 */ 1857 if (ret_nrt != NULL) { 1858 /* Return the route to caller with reference intact */ 1859 *ret_nrt = rt; 1860 } else { 1861 /* Dereference or deallocate the route */ 1862 rtfree_locked(rt); 1863 } 1864 if (af == AF_INET) 1865 routegenid_inet_update(); 1866#if INET6 1867 else if (af == AF_INET6) 1868 routegenid_inet6_update(); 1869#endif /* INET6 */ 1870 break; 1871 } 1872 case RTM_RESOLVE: 1873 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) 1874 senderr(EINVAL); 1875 /* 1876 * According to the UNIX conformance tests, we need to return 1877 * ENETUNREACH when the parent route is RTF_REJECT. 1878 * However, there isn't any point in cloning RTF_REJECT 1879 * routes, so we immediately return an error. 1880 */ 1881 if (rt->rt_flags & RTF_REJECT) { 1882 if (rt->rt_flags & RTF_HOST) { 1883 senderr(EHOSTUNREACH); 1884 } else { 1885 senderr(ENETUNREACH); 1886 } 1887 } 1888 /* 1889 * If cloning, we have the parent route given by the caller 1890 * and will use its rt_gateway, rt_rmx as part of the cloning 1891 * process below. Since rnh_lock is held at this point, the 1892 * parent's rt_ifa and rt_gateway will not change, and its 1893 * relevant rt_flags will not change as well. The only thing 1894 * that could change are the metrics, and thus we hold the 1895 * parent route's rt_lock later on during the actual copying 1896 * of rt_rmx. 1897 */ 1898 ifa = rt->rt_ifa; 1899 IFA_ADDREF(ifa); 1900 flags = rt->rt_flags & 1901 ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); 1902 flags |= RTF_WASCLONED; 1903 gateway = rt->rt_gateway; 1904 if ((netmask = rt->rt_genmask) == NULL) 1905 flags |= RTF_HOST; 1906 1907#if INET6 1908 if ((af != AF_INET && af != AF_INET6) || 1909 (af == AF_INET && !ip_doscopedroute) || 1910 (af == AF_INET6 && !ip6_doscopedroute)) 1911#else 1912 if (af != AF_INET || !ip_doscopedroute) 1913#endif /* !INET6 */ 1914 goto makeroute; 1915 1916 /* 1917 * When scoped routing is enabled, cloned entries are 1918 * always scoped according to the interface portion of 1919 * the parent route. The exception to this are IPv4 1920 * link local addresses, or those routes that are cloned 1921 * from a RTF_PROXY route. For the latter, the clone 1922 * gets to keep the RTF_PROXY flag. 1923 */ 1924 if ((af == AF_INET && 1925 IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) || 1926 (rt->rt_flags & RTF_PROXY)) { 1927 ifscope = IFSCOPE_NONE; 1928 flags &= ~RTF_IFSCOPE; 1929 /* 1930 * These types of cloned routes aren't currently 1931 * eligible for idle interface reference counting. 1932 */ 1933 flags |= RTF_NOIFREF; 1934 } else { 1935 if (flags & RTF_IFSCOPE) { 1936 ifscope = (af == AF_INET) ? 1937 sin_get_ifscope(rt_key(rt)) : 1938 sin6_get_ifscope(rt_key(rt)); 1939 } else { 1940 ifscope = rt->rt_ifp->if_index; 1941 flags |= RTF_IFSCOPE; 1942 } 1943 VERIFY(ifscope != IFSCOPE_NONE); 1944 } 1945 1946 /* 1947 * Transform dst into the internal routing table form, 1948 * clearing out the scope ID field if ifscope isn't set. 1949 */ 1950 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? 1951 NULL : &ifscope); 1952 1953 /* Transform netmask into the internal routing table form */ 1954 if (netmask != NULL) 1955 netmask = ma_copy(af, netmask, &mask, ifscope); 1956 1957 goto makeroute; 1958 1959 case RTM_ADD: 1960 if ((flags & RTF_GATEWAY) && !gateway) { 1961 panic("rtrequest: RTF_GATEWAY but no gateway"); 1962 /* NOTREACHED */ 1963 } 1964 if (flags & RTF_IFSCOPE) { 1965 ifa = ifa_ifwithroute_scoped_locked(flags, dst0, 1966 gateway, ifscope); 1967 } else { 1968 ifa = ifa_ifwithroute_locked(flags, dst0, gateway); 1969 } 1970 if (ifa == NULL) 1971 senderr(ENETUNREACH); 1972makeroute: 1973 if ((rt = rte_alloc()) == NULL) 1974 senderr(ENOBUFS); 1975 Bzero(rt, sizeof(*rt)); 1976 rte_lock_init(rt); 1977 getmicrotime(&caltime); 1978 rt->base_calendartime = caltime.tv_sec; 1979 rt->base_uptime = net_uptime(); 1980 RT_LOCK(rt); 1981 rt->rt_flags = RTF_UP | flags; 1982 1983 /* 1984 * Point the generation ID to the tree's. 1985 */ 1986 switch (af) { 1987 case AF_INET: 1988 rt->rt_tree_genid = &route_genid_inet; 1989 break; 1990#if INET6 1991 case AF_INET6: 1992 rt->rt_tree_genid = &route_genid_inet6; 1993 break; 1994#endif /* INET6 */ 1995 default: 1996 break; 1997 } 1998 1999 /* 2000 * Add the gateway. Possibly re-malloc-ing the storage for it 2001 * also add the rt_gwroute if possible. 2002 */ 2003 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 2004 int tmp = error; 2005 RT_UNLOCK(rt); 2006 nstat_route_detach(rt); 2007 rte_lock_destroy(rt); 2008 rte_free(rt); 2009 senderr(tmp); 2010 } 2011 2012 /* 2013 * point to the (possibly newly malloc'd) dest address. 2014 */ 2015 ndst = rt_key(rt); 2016 2017 /* 2018 * make sure it contains the value we want (masked if needed). 2019 */ 2020 if (netmask) 2021 rt_maskedcopy(dst, ndst, netmask); 2022 else 2023 Bcopy(dst, ndst, dst->sa_len); 2024 2025 /* 2026 * Note that we now have a reference to the ifa. 2027 * This moved from below so that rnh->rnh_addaddr() can 2028 * examine the ifa and ifa->ifa_ifp if it so desires. 2029 */ 2030 rtsetifa(rt, ifa); 2031 rt->rt_ifp = rt->rt_ifa->ifa_ifp; 2032 2033 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 2034 2035 rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, 2036 rnh, rt->rt_nodes); 2037 if (rn == 0) { 2038 struct rtentry *rt2; 2039 /* 2040 * Uh-oh, we already have one of these in the tree. 2041 * We do a special hack: if the route that's already 2042 * there was generated by the protocol-cloning 2043 * mechanism, then we just blow it away and retry 2044 * the insertion of the new one. 2045 */ 2046 if (flags & RTF_IFSCOPE) { 2047 rt2 = rtalloc1_scoped_locked(dst0, 0, 2048 RTF_CLONING | RTF_PRCLONING, ifscope); 2049 } else { 2050 rt2 = rtalloc1_locked(dst, 0, 2051 RTF_CLONING | RTF_PRCLONING); 2052 } 2053 if (rt2 && rt2->rt_parent) { 2054 /* 2055 * rnh_lock is held here, so rt_key and 2056 * rt_gateway of rt2 will not change. 2057 */ 2058 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), 2059 rt2->rt_gateway, rt_mask(rt2), 2060 rt2->rt_flags, 0); 2061 rtfree_locked(rt2); 2062 rn = rnh->rnh_addaddr((caddr_t)ndst, 2063 (caddr_t)netmask, rnh, rt->rt_nodes); 2064 } else if (rt2) { 2065 /* undo the extra ref we got */ 2066 rtfree_locked(rt2); 2067 } 2068 } 2069 2070 /* 2071 * If it still failed to go into the tree, 2072 * then un-make it (this should be a function) 2073 */ 2074 if (rn == NULL) { 2075 /* Clear gateway route */ 2076 rt_set_gwroute(rt, rt_key(rt), NULL); 2077 if (rt->rt_ifa) { 2078 IFA_REMREF(rt->rt_ifa); 2079 rt->rt_ifa = NULL; 2080 } 2081 R_Free(rt_key(rt)); 2082 RT_UNLOCK(rt); 2083 nstat_route_detach(rt); 2084 rte_lock_destroy(rt); 2085 rte_free(rt); 2086 senderr(EEXIST); 2087 } 2088 2089 rt->rt_parent = NULL; 2090 2091 /* 2092 * If we got here from RESOLVE, then we are cloning so clone 2093 * the rest, and note that we are a clone (and increment the 2094 * parent's references). rnh_lock is still held, which prevents 2095 * a lookup from returning the newly-created route. Hence 2096 * holding and releasing the parent's rt_lock while still 2097 * holding the route's rt_lock is safe since the new route 2098 * is not yet externally visible. 2099 */ 2100 if (req == RTM_RESOLVE) { 2101 RT_LOCK_SPIN(*ret_nrt); 2102 VERIFY((*ret_nrt)->rt_expire == 0 || 2103 (*ret_nrt)->rt_rmx.rmx_expire != 0); 2104 VERIFY((*ret_nrt)->rt_expire != 0 || 2105 (*ret_nrt)->rt_rmx.rmx_expire == 0); 2106 rt->rt_rmx = (*ret_nrt)->rt_rmx; 2107 rt_setexpire(rt, (*ret_nrt)->rt_expire); 2108 if ((*ret_nrt)->rt_flags & 2109 (RTF_CLONING | RTF_PRCLONING)) { 2110 rt->rt_parent = (*ret_nrt); 2111 RT_ADDREF_LOCKED(*ret_nrt); 2112 } 2113 RT_UNLOCK(*ret_nrt); 2114 } 2115 2116 /* 2117 * if this protocol has something to add to this then 2118 * allow it to do that as well. 2119 */ 2120 IFA_LOCK_SPIN(ifa); 2121 ifa_rtrequest = ifa->ifa_rtrequest; 2122 IFA_UNLOCK(ifa); 2123 if (ifa_rtrequest != NULL) 2124 ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL)); 2125 IFA_REMREF(ifa); 2126 ifa = NULL; 2127 2128 /* 2129 * If this is the (non-scoped) default route, record 2130 * the interface index used for the primary ifscope. 2131 */ 2132 if (rt_primary_default(rt, rt_key(rt))) { 2133 set_primary_ifscope(rt_key(rt)->sa_family, 2134 rt->rt_ifp->if_index); 2135 } 2136 2137 /* 2138 * actually return a resultant rtentry and 2139 * give the caller a single reference. 2140 */ 2141 if (ret_nrt) { 2142 *ret_nrt = rt; 2143 RT_ADDREF_LOCKED(rt); 2144 } 2145 2146 if (af == AF_INET) 2147 routegenid_inet_update(); 2148#if INET6 2149 else if (af == AF_INET6) 2150 routegenid_inet6_update(); 2151#endif /* INET6 */ 2152 2153 RT_GENID_SYNC(rt); 2154 2155 /* 2156 * We repeat the same procedures from rt_setgate() here 2157 * because they weren't completed when we called it earlier, 2158 * since the node was embryonic. 2159 */ 2160 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL) 2161 rt_set_gwroute(rt, rt_key(rt), rt->rt_gwroute); 2162 2163 if (req == RTM_ADD && 2164 !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) { 2165 struct rtfc_arg arg; 2166 arg.rnh = rnh; 2167 arg.rt0 = rt; 2168 RT_UNLOCK(rt); 2169 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), 2170 rt_fixchange, &arg); 2171 } else { 2172 RT_UNLOCK(rt); 2173 } 2174 2175 nstat_route_new_entry(rt); 2176 break; 2177 } 2178bad: 2179 if (ifa) 2180 IFA_REMREF(ifa); 2181 return (error); 2182} 2183#undef senderr 2184 2185int 2186rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, 2187 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 2188{ 2189 int error; 2190 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 2191 lck_mtx_lock(rnh_lock); 2192 error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt); 2193 lck_mtx_unlock(rnh_lock); 2194 return (error); 2195} 2196 2197int 2198rtrequest_scoped(int req, struct sockaddr *dst, struct sockaddr *gateway, 2199 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, 2200 unsigned int ifscope) 2201{ 2202 int error; 2203 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 2204 lck_mtx_lock(rnh_lock); 2205 error = rtrequest_scoped_locked(req, dst, gateway, netmask, flags, 2206 ret_nrt, ifscope); 2207 lck_mtx_unlock(rnh_lock); 2208 return (error); 2209} 2210 2211/* 2212 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' 2213 * (i.e., the routes related to it by the operation of cloning). This 2214 * routine is iterated over all potential former-child-routes by way of 2215 * rnh->rnh_walktree_from() above, and those that actually are children of 2216 * the late parent (passed in as VP here) are themselves deleted. 2217 */ 2218static int 2219rt_fixdelete(struct radix_node *rn, void *vp) 2220{ 2221 struct rtentry *rt = (struct rtentry *)rn; 2222 struct rtentry *rt0 = vp; 2223 2224 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2225 2226 RT_LOCK(rt); 2227 if (rt->rt_parent == rt0 && 2228 !(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { 2229 /* 2230 * Safe to drop rt_lock and use rt_key, since holding 2231 * rnh_lock here prevents another thread from calling 2232 * rt_setgate() on this route. 2233 */ 2234 RT_UNLOCK(rt); 2235 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, 2236 rt_mask(rt), rt->rt_flags, NULL)); 2237 } 2238 RT_UNLOCK(rt); 2239 return (0); 2240} 2241 2242/* 2243 * This routine is called from rt_setgate() to do the analogous thing for 2244 * adds and changes. There is the added complication in this case of a 2245 * middle insert; i.e., insertion of a new network route between an older 2246 * network route and (cloned) host routes. For this reason, a simple check 2247 * of rt->rt_parent is insufficient; each candidate route must be tested 2248 * against the (mask, value) of the new route (passed as before in vp) 2249 * to see if the new route matches it. 2250 * 2251 * XXX - it may be possible to do fixdelete() for changes and reserve this 2252 * routine just for adds. I'm not sure why I thought it was necessary to do 2253 * changes this way. 2254 */ 2255static int 2256rt_fixchange(struct radix_node *rn, void *vp) 2257{ 2258 struct rtentry *rt = (struct rtentry *)rn; 2259 struct rtfc_arg *ap = vp; 2260 struct rtentry *rt0 = ap->rt0; 2261 struct radix_node_head *rnh = ap->rnh; 2262 u_char *xk1, *xm1, *xk2, *xmp; 2263 int i, len; 2264 2265 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2266 2267 RT_LOCK(rt); 2268 2269 if (!rt->rt_parent || 2270 (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { 2271 RT_UNLOCK(rt); 2272 return (0); 2273 } 2274 2275 if (rt->rt_parent == rt0) 2276 goto delete_rt; 2277 2278 /* 2279 * There probably is a function somewhere which does this... 2280 * if not, there should be. 2281 */ 2282 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len); 2283 2284 xk1 = (u_char *)rt_key(rt0); 2285 xm1 = (u_char *)rt_mask(rt0); 2286 xk2 = (u_char *)rt_key(rt); 2287 2288 /* 2289 * Avoid applying a less specific route; do this only if the parent 2290 * route (rt->rt_parent) is a network route, since otherwise its mask 2291 * will be NULL if it is a cloning host route. 2292 */ 2293 if ((xmp = (u_char *)rt_mask(rt->rt_parent)) != NULL) { 2294 int mlen = rt_mask(rt->rt_parent)->sa_len; 2295 if (mlen > rt_mask(rt0)->sa_len) { 2296 RT_UNLOCK(rt); 2297 return (0); 2298 } 2299 2300 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) { 2301 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) { 2302 RT_UNLOCK(rt); 2303 return (0); 2304 } 2305 } 2306 } 2307 2308 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) { 2309 if ((xk2[i] & xm1[i]) != xk1[i]) { 2310 RT_UNLOCK(rt); 2311 return (0); 2312 } 2313 } 2314 2315 /* 2316 * OK, this node is a clone, and matches the node currently being 2317 * changed/added under the node's mask. So, get rid of it. 2318 */ 2319delete_rt: 2320 /* 2321 * Safe to drop rt_lock and use rt_key, since holding rnh_lock here 2322 * prevents another thread from calling rt_setgate() on this route. 2323 */ 2324 RT_UNLOCK(rt); 2325 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, 2326 rt_mask(rt), rt->rt_flags, NULL)); 2327} 2328 2329/* 2330 * Round up sockaddr len to multiples of 32-bytes. This will reduce 2331 * or even eliminate the need to re-allocate the chunk of memory used 2332 * for rt_key and rt_gateway in the event the gateway portion changes. 2333 * Certain code paths (e.g. IPSec) are notorious for caching the address 2334 * of rt_gateway; this rounding-up would help ensure that the gateway 2335 * portion never gets deallocated (though it may change contents) and 2336 * thus greatly simplifies things. 2337 */ 2338#define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32))) 2339 2340/* 2341 * Sets the gateway and/or gateway route portion of a route; may be 2342 * called on an existing route to modify the gateway portion. Both 2343 * rt_key and rt_gateway are allocated out of the same memory chunk. 2344 * Route entry lock must be held by caller; this routine will return 2345 * with the lock held. 2346 */ 2347int 2348rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 2349{ 2350 int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len); 2351 struct radix_node_head *rnh = NULL; 2352 boolean_t loop = FALSE; 2353 2354 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) { 2355 return (EINVAL); 2356 } 2357 2358 rnh = rt_tables[dst->sa_family]; 2359 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2360 RT_LOCK_ASSERT_HELD(rt); 2361 2362 /* 2363 * If this is for a route that is on its way of being removed, 2364 * or is temporarily frozen, reject the modification request. 2365 */ 2366 if (rt->rt_flags & RTF_CONDEMNED) { 2367 return (EBUSY); 2368 } 2369 2370 /* Add an extra ref for ourselves */ 2371 RT_ADDREF_LOCKED(rt); 2372 2373 if (rt->rt_flags & RTF_GATEWAY) { 2374 if ((dst->sa_len == gate->sa_len) && 2375 (dst->sa_family == AF_INET || dst->sa_family == AF_INET6)) { 2376 struct sockaddr_storage dst_ss, gate_ss; 2377 2378 (void) sa_copy(dst, &dst_ss, NULL); 2379 (void) sa_copy(gate, &gate_ss, NULL); 2380 2381 loop = equal(SA(&dst_ss), SA(&gate_ss)); 2382 } else { 2383 loop = (dst->sa_len == gate->sa_len && 2384 equal(dst, gate)); 2385 } 2386 } 2387 2388 /* 2389 * A (cloning) network route with the destination equal to the gateway 2390 * will create an endless loop (see notes below), so disallow it. 2391 */ 2392 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == 2393 RTF_GATEWAY) && loop) { 2394 /* Release extra ref */ 2395 RT_REMREF_LOCKED(rt); 2396 return (EADDRNOTAVAIL); 2397 } 2398 2399 /* 2400 * A host route with the destination equal to the gateway 2401 * will interfere with keeping LLINFO in the routing 2402 * table, so disallow it. 2403 */ 2404 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == 2405 (RTF_HOST|RTF_GATEWAY)) && loop) { 2406 /* 2407 * The route might already exist if this is an RTM_CHANGE 2408 * or a routing redirect, so try to delete it. 2409 */ 2410 if (rt_key(rt) != NULL) { 2411 /* 2412 * Safe to drop rt_lock and use rt_key, rt_gateway, 2413 * since holding rnh_lock here prevents another thread 2414 * from calling rt_setgate() on this route. 2415 */ 2416 RT_UNLOCK(rt); 2417 (void) rtrequest_locked(RTM_DELETE, rt_key(rt), 2418 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 2419 RT_LOCK(rt); 2420 } 2421 /* Release extra ref */ 2422 RT_REMREF_LOCKED(rt); 2423 return (EADDRNOTAVAIL); 2424 } 2425 2426 /* 2427 * The destination is not directly reachable. Get a route 2428 * to the next-hop gateway and store it in rt_gwroute. 2429 */ 2430 if (rt->rt_flags & RTF_GATEWAY) { 2431 struct rtentry *gwrt; 2432 unsigned int ifscope; 2433 2434 if (dst->sa_family == AF_INET) 2435 ifscope = sin_get_ifscope(dst); 2436 else if (dst->sa_family == AF_INET6) 2437 ifscope = sin6_get_ifscope(dst); 2438 else 2439 ifscope = IFSCOPE_NONE; 2440 2441 RT_UNLOCK(rt); 2442 /* 2443 * Don't ignore RTF_CLONING, since we prefer that rt_gwroute 2444 * points to a clone rather than a cloning route; see above 2445 * check for cloning loop avoidance (dst == gate). 2446 */ 2447 gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope); 2448 if (gwrt != NULL) 2449 RT_LOCK_ASSERT_NOTHELD(gwrt); 2450 RT_LOCK(rt); 2451 2452 /* 2453 * Cloning loop avoidance: 2454 * 2455 * In the presence of protocol-cloning and bad configuration, 2456 * it is possible to get stuck in bottomless mutual recursion 2457 * (rtrequest rt_setgate rtalloc1). We avoid this by not 2458 * allowing protocol-cloning to operate for gateways (which 2459 * is probably the correct choice anyway), and avoid the 2460 * resulting reference loops by disallowing any route to run 2461 * through itself as a gateway. This is obviously mandatory 2462 * when we get rt->rt_output(). It implies that a route to 2463 * the gateway must already be present in the system in order 2464 * for the gateway to be referred to by another route. 2465 */ 2466 if (gwrt == rt) { 2467 RT_REMREF_LOCKED(gwrt); 2468 /* Release extra ref */ 2469 RT_REMREF_LOCKED(rt); 2470 return (EADDRINUSE); /* failure */ 2471 } 2472 2473 /* 2474 * If scoped, the gateway route must use the same interface; 2475 * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt 2476 * should not change and are freely accessible. 2477 */ 2478 if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) && 2479 gwrt != NULL && gwrt->rt_ifp != NULL && 2480 gwrt->rt_ifp->if_index != ifscope) { 2481 rtfree_locked(gwrt); /* rt != gwrt, no deadlock */ 2482 /* Release extra ref */ 2483 RT_REMREF_LOCKED(rt); 2484 return ((rt->rt_flags & RTF_HOST) ? 2485 EHOSTUNREACH : ENETUNREACH); 2486 } 2487 2488 /* Check again since we dropped the lock above */ 2489 if (rt->rt_flags & RTF_CONDEMNED) { 2490 if (gwrt != NULL) 2491 rtfree_locked(gwrt); 2492 /* Release extra ref */ 2493 RT_REMREF_LOCKED(rt); 2494 return (EBUSY); 2495 } 2496 2497 /* Set gateway route; callee adds ref to gwrt if non-NULL */ 2498 rt_set_gwroute(rt, dst, gwrt); 2499 2500 /* 2501 * In case the (non-scoped) default route gets modified via 2502 * an ICMP redirect, record the interface index used for the 2503 * primary ifscope. Also done in rt_setif() to take care 2504 * of the non-redirect cases. 2505 */ 2506 if (rt_primary_default(rt, dst) && rt->rt_ifp != NULL) { 2507 set_primary_ifscope(dst->sa_family, 2508 rt->rt_ifp->if_index); 2509 } 2510 2511 /* 2512 * Tell the kernel debugger about the new default gateway 2513 * if the gateway route uses the primary interface, or 2514 * if we are in a transient state before the non-scoped 2515 * default gateway is installed (similar to how the system 2516 * was behaving in the past). In future, it would be good 2517 * to do all this only when KDP is enabled. 2518 */ 2519 if ((dst->sa_family == AF_INET) && 2520 gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK && 2521 (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) || 2522 get_primary_ifscope(AF_INET) == IFSCOPE_NONE)) { 2523 kdp_set_gateway_mac(SDL((void *)gwrt->rt_gateway)-> 2524 sdl_data); 2525 } 2526 2527 /* Release extra ref from rtalloc1() */ 2528 if (gwrt != NULL) 2529 RT_REMREF(gwrt); 2530 } 2531 2532 /* 2533 * Prepare to store the gateway in rt_gateway. Both dst and gateway 2534 * are stored one after the other in the same malloc'd chunk. If we 2535 * have room, reuse the old buffer since rt_gateway already points 2536 * to the right place. Otherwise, malloc a new block and update 2537 * the 'dst' address and point rt_gateway to the right place. 2538 */ 2539 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway->sa_len)) { 2540 caddr_t new; 2541 2542 /* The underlying allocation is done with M_WAITOK set */ 2543 R_Malloc(new, caddr_t, dlen + glen); 2544 if (new == NULL) { 2545 /* Clear gateway route */ 2546 rt_set_gwroute(rt, dst, NULL); 2547 /* Release extra ref */ 2548 RT_REMREF_LOCKED(rt); 2549 return (ENOBUFS); 2550 } 2551 2552 /* 2553 * Copy from 'dst' and not rt_key(rt) because we can get 2554 * here to initialize a newly allocated route entry, in 2555 * which case rt_key(rt) is NULL (and so does rt_gateway). 2556 */ 2557 bzero(new, dlen + glen); 2558 Bcopy(dst, new, dst->sa_len); 2559 R_Free(rt_key(rt)); /* free old block; NULL is okay */ 2560 rt->rt_nodes->rn_key = new; 2561 rt->rt_gateway = (struct sockaddr *)(new + dlen); 2562 } 2563 2564 /* 2565 * Copy the new gateway value into the memory chunk. 2566 */ 2567 Bcopy(gate, rt->rt_gateway, gate->sa_len); 2568 2569 /* 2570 * For consistency between rt_gateway and rt_key(gwrt). 2571 */ 2572 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL && 2573 (rt->rt_gwroute->rt_flags & RTF_IFSCOPE)) { 2574 if (rt->rt_gateway->sa_family == AF_INET && 2575 rt_key(rt->rt_gwroute)->sa_family == AF_INET) { 2576 sin_set_ifscope(rt->rt_gateway, 2577 sin_get_ifscope(rt_key(rt->rt_gwroute))); 2578 } else if (rt->rt_gateway->sa_family == AF_INET6 && 2579 rt_key(rt->rt_gwroute)->sa_family == AF_INET6) { 2580 sin6_set_ifscope(rt->rt_gateway, 2581 sin6_get_ifscope(rt_key(rt->rt_gwroute))); 2582 } 2583 } 2584 2585 /* 2586 * This isn't going to do anything useful for host routes, so 2587 * don't bother. Also make sure we have a reasonable mask 2588 * (we don't yet have one during adds). 2589 */ 2590 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { 2591 struct rtfc_arg arg; 2592 arg.rnh = rnh; 2593 arg.rt0 = rt; 2594 RT_UNLOCK(rt); 2595 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), 2596 rt_fixchange, &arg); 2597 RT_LOCK(rt); 2598 } 2599 2600 /* Release extra ref */ 2601 RT_REMREF_LOCKED(rt); 2602 return (0); 2603} 2604 2605#undef SA_SIZE 2606 2607void 2608rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt) 2609{ 2610 boolean_t gwrt_isrouter; 2611 2612 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2613 RT_LOCK_ASSERT_HELD(rt); 2614 2615 if (gwrt != NULL) 2616 RT_ADDREF(gwrt); /* for this routine */ 2617 2618 /* 2619 * Get rid of existing gateway route; if rt_gwroute is already 2620 * set to gwrt, this is slightly redundant (though safe since 2621 * we held an extra ref above) but makes the code simpler. 2622 */ 2623 if (rt->rt_gwroute != NULL) { 2624 struct rtentry *ogwrt = rt->rt_gwroute; 2625 2626 VERIFY(rt != ogwrt); /* sanity check */ 2627 rt->rt_gwroute = NULL; 2628 RT_UNLOCK(rt); 2629 rtfree_locked(ogwrt); 2630 RT_LOCK(rt); 2631 VERIFY(rt->rt_gwroute == NULL); 2632 } 2633 2634 /* 2635 * And associate the new gateway route. 2636 */ 2637 if ((rt->rt_gwroute = gwrt) != NULL) { 2638 RT_ADDREF(gwrt); /* for rt */ 2639 2640 if (rt->rt_flags & RTF_WASCLONED) { 2641 /* rt_parent might be NULL if rt is embryonic */ 2642 gwrt_isrouter = (rt->rt_parent != NULL && 2643 SA_DEFAULT(rt_key(rt->rt_parent)) && 2644 !RT_HOST(rt->rt_parent)); 2645 } else { 2646 gwrt_isrouter = (SA_DEFAULT(dst) && !RT_HOST(rt)); 2647 } 2648 2649 /* If gwrt points to a default router, mark it accordingly */ 2650 if (gwrt_isrouter && RT_HOST(gwrt) && 2651 !(gwrt->rt_flags & RTF_ROUTER)) { 2652 RT_LOCK(gwrt); 2653 gwrt->rt_flags |= RTF_ROUTER; 2654 RT_UNLOCK(gwrt); 2655 } 2656 2657 RT_REMREF(gwrt); /* for this routine */ 2658 } 2659} 2660 2661static void 2662rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, 2663 struct sockaddr *netmask) 2664{ 2665 u_char *cp1 = (u_char *)src; 2666 u_char *cp2 = (u_char *)dst; 2667 u_char *cp3 = (u_char *)netmask; 2668 u_char *cplim = cp2 + *cp3; 2669 u_char *cplim2 = cp2 + *cp1; 2670 2671 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 2672 cp3 += 2; 2673 if (cplim > cplim2) 2674 cplim = cplim2; 2675 while (cp2 < cplim) 2676 *cp2++ = *cp1++ & *cp3++; 2677 if (cp2 < cplim2) 2678 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 2679} 2680 2681/* 2682 * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the 2683 * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped). 2684 */ 2685static struct radix_node * 2686node_lookup(struct sockaddr *dst, struct sockaddr *netmask, 2687 unsigned int ifscope) 2688{ 2689 struct radix_node_head *rnh; 2690 struct radix_node *rn; 2691 struct sockaddr_storage ss, mask; 2692 int af = dst->sa_family; 2693 struct matchleaf_arg ma = { ifscope }; 2694 rn_matchf_t *f = rn_match_ifscope; 2695 void *w = &ma; 2696 2697 if (af != AF_INET && af != AF_INET6) 2698 return (NULL); 2699 2700 rnh = rt_tables[af]; 2701 2702 /* 2703 * Transform dst into the internal routing table form, 2704 * clearing out the scope ID field if ifscope isn't set. 2705 */ 2706 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? NULL : &ifscope); 2707 2708 /* Transform netmask into the internal routing table form */ 2709 if (netmask != NULL) 2710 netmask = ma_copy(af, netmask, &mask, ifscope); 2711 2712 if (ifscope == IFSCOPE_NONE) 2713 f = w = NULL; 2714 2715 rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w); 2716 if (rn != NULL && (rn->rn_flags & RNF_ROOT)) 2717 rn = NULL; 2718 2719 return (rn); 2720} 2721 2722/* 2723 * Lookup the AF_INET/AF_INET6 non-scoped default route. 2724 */ 2725static struct radix_node * 2726node_lookup_default(int af) 2727{ 2728 struct radix_node_head *rnh; 2729 2730 VERIFY(af == AF_INET || af == AF_INET6); 2731 rnh = rt_tables[af]; 2732 2733 return (af == AF_INET ? rnh->rnh_lookup(&sin_def, NULL, rnh) : 2734 rnh->rnh_lookup(&sin6_def, NULL, rnh)); 2735} 2736 2737/* 2738 * Common routine to lookup/match a route. It invokes the lookup/matchaddr 2739 * callback which could be address family-specific. The main difference 2740 * between the two (at least for AF_INET/AF_INET6) is that a lookup does 2741 * not alter the expiring state of a route, whereas a match would unexpire 2742 * or revalidate the route. 2743 * 2744 * The optional scope or interface index property of a route allows for a 2745 * per-interface route instance. This permits multiple route entries having 2746 * the same destination (but not necessarily the same gateway) to exist in 2747 * the routing table; each of these entries is specific to the corresponding 2748 * interface. This is made possible by storing the scope ID value into the 2749 * radix key, thus making each route entry unique. These scoped entries 2750 * exist along with the regular, non-scoped entries in the same radix tree 2751 * for a given address family (AF_INET/AF_INET6); the scope logically 2752 * partitions it into multiple per-interface sub-trees. 2753 * 2754 * When a scoped route lookup is performed, the routing table is searched for 2755 * the best match that would result in a route using the same interface as the 2756 * one associated with the scope (the exception to this are routes that point 2757 * to the loopback interface). The search rule follows the longest matching 2758 * prefix with the additional interface constraint. 2759 */ 2760static struct rtentry * 2761rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, 2762 struct sockaddr *netmask, struct radix_node_head *rnh, unsigned int ifscope) 2763{ 2764 struct radix_node *rn0, *rn; 2765 boolean_t dontcare; 2766 int af = dst->sa_family; 2767 struct sockaddr_storage dst_ss, mask_ss; 2768 2769 VERIFY(!coarse || ifscope == IFSCOPE_NONE); 2770 2771 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2772#if INET6 2773 /* 2774 * While we have rnh_lock held, see if we need to schedule the timer. 2775 */ 2776 if (nd6_sched_timeout_want) 2777 nd6_sched_timeout(NULL, NULL); 2778#endif /* INET6 */ 2779 2780 if (!lookup_only) 2781 netmask = NULL; 2782 2783 /* 2784 * Non-scoped route lookup. 2785 */ 2786#if INET6 2787 if ((af != AF_INET && af != AF_INET6) || 2788 (af == AF_INET && !ip_doscopedroute) || 2789 (af == AF_INET6 && !ip6_doscopedroute)) { 2790#else 2791 if (af != AF_INET || !ip_doscopedroute) { 2792#endif /* !INET6 */ 2793 rn = rnh->rnh_matchaddr(dst, rnh); 2794 2795 /* 2796 * Don't return a root node; also, rnh_matchaddr callback 2797 * would have done the necessary work to clear RTPRF_OURS 2798 * for certain protocol families. 2799 */ 2800 if (rn != NULL && (rn->rn_flags & RNF_ROOT)) 2801 rn = NULL; 2802 if (rn != NULL) { 2803 RT_LOCK_SPIN(RT(rn)); 2804 if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) { 2805 RT_ADDREF_LOCKED(RT(rn)); 2806 RT_UNLOCK(RT(rn)); 2807 } else { 2808 RT_UNLOCK(RT(rn)); 2809 rn = NULL; 2810 } 2811 } 2812 return (RT(rn)); 2813 } 2814 2815 /* Transform dst/netmask into the internal routing table form */ 2816 dst = sa_copy(dst, &dst_ss, &ifscope); 2817 if (netmask != NULL) 2818 netmask = ma_copy(af, netmask, &mask_ss, ifscope); 2819 dontcare = (ifscope == IFSCOPE_NONE); 2820 2821 /* 2822 * Scoped route lookup: 2823 * 2824 * We first perform a non-scoped lookup for the original result. 2825 * Afterwards, depending on whether or not the caller has specified 2826 * a scope, we perform a more specific scoped search and fallback 2827 * to this original result upon failure. 2828 */ 2829 rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE); 2830 2831 /* 2832 * If the caller did not specify a scope, use the primary scope 2833 * derived from the system's non-scoped default route. If, for 2834 * any reason, there is no primary interface, ifscope will be 2835 * set to IFSCOPE_NONE; if the above lookup resulted in a route, 2836 * we'll do a more-specific search below, scoped to the interface 2837 * of that route. 2838 */ 2839 if (dontcare) 2840 ifscope = get_primary_ifscope(af); 2841 2842 /* 2843 * Keep the original result if either of the following is true: 2844 * 2845 * 1) The interface portion of the route has the same interface 2846 * index as the scope value and it is marked with RTF_IFSCOPE. 2847 * 2) The route uses the loopback interface, in which case the 2848 * destination (host/net) is local/loopback. 2849 * 2850 * Otherwise, do a more specified search using the scope; 2851 * we're holding rnh_lock now, so rt_ifp should not change. 2852 */ 2853 if (rn != NULL) { 2854 struct rtentry *rt = RT(rn); 2855 if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 2856 if (rt->rt_ifp->if_index != ifscope) { 2857 /* 2858 * Wrong interface; keep the original result 2859 * only if the caller did not specify a scope, 2860 * and do a more specific scoped search using 2861 * the scope of the found route. Otherwise, 2862 * start again from scratch. 2863 */ 2864 rn = NULL; 2865 if (dontcare) 2866 ifscope = rt->rt_ifp->if_index; 2867 else 2868 rn0 = NULL; 2869 } else if (!(rt->rt_flags & RTF_IFSCOPE)) { 2870 /* 2871 * Right interface, except that this route 2872 * isn't marked with RTF_IFSCOPE. Do a more 2873 * specific scoped search. Keep the original 2874 * result and return it it in case the scoped 2875 * search fails. 2876 */ 2877 rn = NULL; 2878 } 2879 } 2880 } 2881 2882 /* 2883 * Scoped search. Find the most specific entry having the same 2884 * interface scope as the one requested. The following will result 2885 * in searching for the longest prefix scoped match. 2886 */ 2887 if (rn == NULL) 2888 rn = node_lookup(dst, netmask, ifscope); 2889 2890 /* 2891 * Use the original result if either of the following is true: 2892 * 2893 * 1) The scoped search did not yield any result. 2894 * 2) The caller insists on performing a coarse-grained lookup. 2895 * 3) The result from the scoped search is a scoped default route, 2896 * and the original (non-scoped) result is not a default route, 2897 * i.e. the original result is a more specific host/net route. 2898 * 4) The scoped search yielded a net route but the original 2899 * result is a host route, i.e. the original result is treated 2900 * as a more specific route. 2901 */ 2902 if (rn == NULL || coarse || (rn0 != NULL && 2903 ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) || 2904 (!RT_HOST(rn) && RT_HOST(rn0))))) 2905 rn = rn0; 2906 2907 /* 2908 * If we still don't have a route, use the non-scoped default 2909 * route as long as the interface portion satistifes the scope. 2910 */ 2911 if (rn == NULL && (rn = node_lookup_default(af)) != NULL && 2912 RT(rn)->rt_ifp->if_index != ifscope) 2913 rn = NULL; 2914 2915 if (rn != NULL) { 2916 /* 2917 * Manually clear RTPRF_OURS using rt_validate() and 2918 * bump up the reference count after, and not before; 2919 * we only get here for AF_INET/AF_INET6. node_lookup() 2920 * has done the check against RNF_ROOT, so we can be sure 2921 * that we're not returning a root node here. 2922 */ 2923 RT_LOCK_SPIN(RT(rn)); 2924 if (rt_validate(RT(rn))) { 2925 RT_ADDREF_LOCKED(RT(rn)); 2926 RT_UNLOCK(RT(rn)); 2927 } else { 2928 RT_UNLOCK(RT(rn)); 2929 rn = NULL; 2930 } 2931 } 2932 2933 return (RT(rn)); 2934} 2935 2936struct rtentry * 2937rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, 2938 struct radix_node_head *rnh, unsigned int ifscope) 2939{ 2940 return (rt_lookup_common(lookup_only, FALSE, dst, netmask, 2941 rnh, ifscope)); 2942} 2943 2944struct rtentry * 2945rt_lookup_coarse(boolean_t lookup_only, struct sockaddr *dst, 2946 struct sockaddr *netmask, struct radix_node_head *rnh) 2947{ 2948 return (rt_lookup_common(lookup_only, TRUE, dst, netmask, 2949 rnh, IFSCOPE_NONE)); 2950} 2951 2952boolean_t 2953rt_validate(struct rtentry *rt) 2954{ 2955 RT_LOCK_ASSERT_HELD(rt); 2956 2957 if ((rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) == RTF_UP) { 2958 int af = rt_key(rt)->sa_family; 2959 2960 if (af == AF_INET) 2961 (void) in_validate(RN(rt)); 2962 else if (af == AF_INET6) 2963 (void) in6_validate(RN(rt)); 2964 } else { 2965 rt = NULL; 2966 } 2967 2968 return (rt != NULL); 2969} 2970 2971/* 2972 * Set up a routing table entry, normally 2973 * for an interface. 2974 */ 2975int 2976rtinit(struct ifaddr *ifa, int cmd, int flags) 2977{ 2978 int error; 2979 2980 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 2981 2982 lck_mtx_lock(rnh_lock); 2983 error = rtinit_locked(ifa, cmd, flags); 2984 lck_mtx_unlock(rnh_lock); 2985 2986 return (error); 2987} 2988 2989int 2990rtinit_locked(struct ifaddr *ifa, int cmd, int flags) 2991{ 2992 struct radix_node_head *rnh; 2993 uint8_t nbuf[128]; /* long enough for IPv6 */ 2994 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 2995 char abuf[MAX_IPv6_STR_LEN]; 2996 struct rtentry *rt = NULL; 2997 struct sockaddr *dst; 2998 struct sockaddr *netmask; 2999 int error = 0; 3000 3001 /* 3002 * Holding rnh_lock here prevents the possibility of ifa from 3003 * changing (e.g. in_ifinit), so it is safe to access its 3004 * ifa_{dst}addr (here and down below) without locking. 3005 */ 3006 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 3007 3008 if (flags & RTF_HOST) { 3009 dst = ifa->ifa_dstaddr; 3010 netmask = NULL; 3011 } else { 3012 dst = ifa->ifa_addr; 3013 netmask = ifa->ifa_netmask; 3014 } 3015 3016 if (dst->sa_len == 0) { 3017 log(LOG_ERR, "%s: %s failed, invalid dst sa_len %d\n", 3018 __func__, rtm2str(cmd), dst->sa_len); 3019 error = EINVAL; 3020 goto done; 3021 } 3022 if (netmask != NULL && netmask->sa_len > sizeof (nbuf)) { 3023 log(LOG_ERR, "%s: %s failed, mask sa_len %d too large\n", 3024 __func__, rtm2str(cmd), dst->sa_len); 3025 error = EINVAL; 3026 goto done; 3027 } 3028 3029 if (dst->sa_family == AF_INET) { 3030 (void) inet_ntop(AF_INET, &SIN(dst)->sin_addr.s_addr, 3031 abuf, sizeof (abuf)); 3032 } 3033#if INET6 3034 else if (dst->sa_family == AF_INET6) { 3035 (void) inet_ntop(AF_INET6, &SIN6(dst)->sin6_addr, 3036 abuf, sizeof (abuf)); 3037 } 3038#endif /* INET6 */ 3039 3040 if ((rnh = rt_tables[dst->sa_family]) == NULL) { 3041 error = EINVAL; 3042 goto done; 3043 } 3044 3045 /* 3046 * If it's a delete, check that if it exists, it's on the correct 3047 * interface or we might scrub a route to another ifa which would 3048 * be confusing at best and possibly worse. 3049 */ 3050 if (cmd == RTM_DELETE) { 3051 /* 3052 * It's a delete, so it should already exist.. 3053 * If it's a net, mask off the host bits 3054 * (Assuming we have a mask) 3055 */ 3056 if (netmask != NULL) { 3057 rt_maskedcopy(dst, SA(nbuf), netmask); 3058 dst = SA(nbuf); 3059 } 3060 /* 3061 * Get an rtentry that is in the routing tree and contains 3062 * the correct info. Note that we perform a coarse-grained 3063 * lookup here, in case there is a scoped variant of the 3064 * subnet/prefix route which we should ignore, as we never 3065 * add a scoped subnet/prefix route as part of adding an 3066 * interface address. 3067 */ 3068 rt = rt_lookup_coarse(TRUE, dst, NULL, rnh); 3069 if (rt != NULL) { 3070 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 3071 /* 3072 * Ok so we found the rtentry. it has an extra reference 3073 * for us at this stage. we won't need that so 3074 * lop that off now. 3075 */ 3076 RT_LOCK(rt); 3077 if (rt->rt_ifa != ifa) { 3078 /* 3079 * If the interface address in the rtentry 3080 * doesn't match the interface we are using, 3081 * then we don't want to delete it, so return 3082 * an error. This seems to be the only point 3083 * of this whole RTM_DELETE clause. 3084 */ 3085 if (rt_verbose) { 3086 log(LOG_DEBUG, "%s: not removing " 3087 "route to %s->%s->%s, flags %b, " 3088 "ifaddr %s, rt_ifa 0x%llx != " 3089 "ifa 0x%llx\n", __func__, dbuf, 3090 gbuf, ((rt->rt_ifp != NULL) ? 3091 rt->rt_ifp->if_xname : ""), 3092 rt->rt_flags, RTF_BITS, abuf, 3093 (uint64_t)VM_KERNEL_ADDRPERM( 3094 rt->rt_ifa), 3095 (uint64_t)VM_KERNEL_ADDRPERM(ifa)); 3096 } 3097 RT_REMREF_LOCKED(rt); 3098 RT_UNLOCK(rt); 3099 rt = NULL; 3100 error = ((flags & RTF_HOST) ? 3101 EHOSTUNREACH : ENETUNREACH); 3102 goto done; 3103 } else if (rt->rt_flags & RTF_STATIC) { 3104 /* 3105 * Don't remove the subnet/prefix route if 3106 * this was manually added from above. 3107 */ 3108 if (rt_verbose) { 3109 log(LOG_DEBUG, "%s: not removing " 3110 "static route to %s->%s->%s, " 3111 "flags %b, ifaddr %s\n", __func__, 3112 dbuf, gbuf, ((rt->rt_ifp != NULL) ? 3113 rt->rt_ifp->if_xname : ""), 3114 rt->rt_flags, RTF_BITS, abuf); 3115 } 3116 RT_REMREF_LOCKED(rt); 3117 RT_UNLOCK(rt); 3118 rt = NULL; 3119 error = EBUSY; 3120 goto done; 3121 } 3122 if (rt_verbose) { 3123 log(LOG_DEBUG, "%s: removing route to " 3124 "%s->%s->%s, flags %b, ifaddr %s\n", 3125 __func__, dbuf, gbuf, 3126 ((rt->rt_ifp != NULL) ? 3127 rt->rt_ifp->if_xname : ""), 3128 rt->rt_flags, RTF_BITS, abuf); 3129 } 3130 RT_REMREF_LOCKED(rt); 3131 RT_UNLOCK(rt); 3132 rt = NULL; 3133 } 3134 } 3135 /* 3136 * Do the actual request 3137 */ 3138 if ((error = rtrequest_locked(cmd, dst, ifa->ifa_addr, netmask, 3139 flags | ifa->ifa_flags, &rt)) != 0) 3140 goto done; 3141 3142 VERIFY(rt != NULL); 3143 3144 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 3145 3146 switch (cmd) { 3147 case RTM_DELETE: 3148 /* 3149 * If we are deleting, and we found an entry, then it's 3150 * been removed from the tree. Notify any listening 3151 * routing agents of the change and throw it away. 3152 */ 3153 RT_LOCK(rt); 3154 rt_newaddrmsg(cmd, ifa, error, rt); 3155 RT_UNLOCK(rt); 3156 if (rt_verbose) { 3157 log(LOG_DEBUG, "%s: removed route to %s->%s->%s, " 3158 "flags %b, ifaddr %s\n", __func__, dbuf, gbuf, 3159 ((rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : ""), 3160 rt->rt_flags, RTF_BITS, abuf); 3161 } 3162 rtfree_locked(rt); 3163 break; 3164 3165 case RTM_ADD: 3166 /* 3167 * We are adding, and we have a returned routing entry. 3168 * We need to sanity check the result. If it came back 3169 * with an unexpected interface, then it must have already 3170 * existed or something. 3171 */ 3172 RT_LOCK(rt); 3173 if (rt->rt_ifa != ifa) { 3174 void (*ifa_rtrequest) 3175 (int, struct rtentry *, struct sockaddr *); 3176 3177 if (!(rt->rt_ifa->ifa_ifp->if_flags & 3178 (IFF_POINTOPOINT|IFF_LOOPBACK))) { 3179 log(LOG_ERR, "%s: %s route to %s->%s->%s, " 3180 "flags %b, ifaddr %s, rt_ifa 0x%llx != " 3181 "ifa 0x%llx\n", __func__, rtm2str(cmd), 3182 dbuf, gbuf, ((rt->rt_ifp != NULL) ? 3183 rt->rt_ifp->if_xname : ""), rt->rt_flags, 3184 RTF_BITS, abuf, 3185 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), 3186 (uint64_t)VM_KERNEL_ADDRPERM(ifa)); 3187 } 3188 3189 if (rt_verbose) { 3190 log(LOG_DEBUG, "%s: %s route to %s->%s->%s, " 3191 "flags %b, ifaddr %s, rt_ifa was 0x%llx " 3192 "now 0x%llx\n", __func__, rtm2str(cmd), 3193 dbuf, gbuf, ((rt->rt_ifp != NULL) ? 3194 rt->rt_ifp->if_xname : ""), rt->rt_flags, 3195 RTF_BITS, abuf, 3196 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), 3197 (uint64_t)VM_KERNEL_ADDRPERM(ifa)); 3198 } 3199 3200 /* 3201 * Ask that the protocol in question 3202 * remove anything it has associated with 3203 * this route and ifaddr. 3204 */ 3205 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest; 3206 if (ifa_rtrequest != NULL) 3207 ifa_rtrequest(RTM_DELETE, rt, NULL); 3208 /* 3209 * Set the route's ifa. 3210 */ 3211 rtsetifa(rt, ifa); 3212 3213 if (rt->rt_ifp != ifa->ifa_ifp) { 3214 /* 3215 * Purge any link-layer info caching. 3216 */ 3217 if (rt->rt_llinfo_purge != NULL) 3218 rt->rt_llinfo_purge(rt); 3219 /* 3220 * Adjust route ref count for the interfaces. 3221 */ 3222 if (rt->rt_if_ref_fn != NULL) { 3223 rt->rt_if_ref_fn(ifa->ifa_ifp, 1); 3224 rt->rt_if_ref_fn(rt->rt_ifp, -1); 3225 } 3226 } 3227 3228 /* 3229 * And substitute in references to the ifaddr 3230 * we are adding. 3231 */ 3232 rt->rt_ifp = ifa->ifa_ifp; 3233 /* 3234 * If rmx_mtu is not locked, update it 3235 * to the MTU used by the new interface. 3236 */ 3237 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) 3238 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 3239 3240 /* 3241 * Now ask the protocol to check if it needs 3242 * any special processing in its new form. 3243 */ 3244 ifa_rtrequest = ifa->ifa_rtrequest; 3245 if (ifa_rtrequest != NULL) 3246 ifa_rtrequest(RTM_ADD, rt, NULL); 3247 } else { 3248 if (rt_verbose) { 3249 log(LOG_DEBUG, "%s: added route to %s->%s->%s, " 3250 "flags %b, ifaddr %s\n", __func__, dbuf, 3251 gbuf, ((rt->rt_ifp != NULL) ? 3252 rt->rt_ifp->if_xname : ""), rt->rt_flags, 3253 RTF_BITS, abuf); 3254 } 3255 } 3256 /* 3257 * notify any listenning routing agents of the change 3258 */ 3259 rt_newaddrmsg(cmd, ifa, error, rt); 3260 /* 3261 * We just wanted to add it; we don't actually need a 3262 * reference. This will result in a route that's added 3263 * to the routing table without a reference count. The 3264 * RTM_DELETE code will do the necessary step to adjust 3265 * the reference count at deletion time. 3266 */ 3267 RT_REMREF_LOCKED(rt); 3268 RT_UNLOCK(rt); 3269 break; 3270 3271 default: 3272 VERIFY(0); 3273 /* NOTREACHED */ 3274 } 3275done: 3276 return (error); 3277} 3278 3279static void 3280rt_set_idleref(struct rtentry *rt) 3281{ 3282 RT_LOCK_ASSERT_HELD(rt); 3283 3284 /* 3285 * We currently keep idle refcnt only on unicast cloned routes 3286 * that aren't marked with RTF_NOIFREF. 3287 */ 3288 if (rt->rt_parent != NULL && !(rt->rt_flags & 3289 (RTF_NOIFREF|RTF_BROADCAST | RTF_MULTICAST)) && 3290 (rt->rt_flags & (RTF_UP|RTF_WASCLONED|RTF_IFREF)) == 3291 (RTF_UP|RTF_WASCLONED)) { 3292 rt_clear_idleref(rt); /* drop existing refcnt if any */ 3293 rt->rt_if_ref_fn = rte_if_ref; 3294 /* Become a regular mutex, just in case */ 3295 RT_CONVERT_LOCK(rt); 3296 rt->rt_if_ref_fn(rt->rt_ifp, 1); 3297 rt->rt_flags |= RTF_IFREF; 3298 } 3299} 3300 3301void 3302rt_clear_idleref(struct rtentry *rt) 3303{ 3304 RT_LOCK_ASSERT_HELD(rt); 3305 3306 if (rt->rt_if_ref_fn != NULL) { 3307 VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF); 3308 /* Become a regular mutex, just in case */ 3309 RT_CONVERT_LOCK(rt); 3310 rt->rt_if_ref_fn(rt->rt_ifp, -1); 3311 rt->rt_flags &= ~RTF_IFREF; 3312 rt->rt_if_ref_fn = NULL; 3313 } 3314} 3315 3316void 3317rt_set_proxy(struct rtentry *rt, boolean_t set) 3318{ 3319 lck_mtx_lock(rnh_lock); 3320 RT_LOCK(rt); 3321 /* 3322 * Search for any cloned routes which might have 3323 * been formed from this node, and delete them. 3324 */ 3325 if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { 3326 struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family]; 3327 3328 if (set) 3329 rt->rt_flags |= RTF_PROXY; 3330 else 3331 rt->rt_flags &= ~RTF_PROXY; 3332 3333 RT_UNLOCK(rt); 3334 if (rnh != NULL && rt_mask(rt)) { 3335 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), 3336 rt_fixdelete, rt); 3337 } 3338 } else { 3339 RT_UNLOCK(rt); 3340 } 3341 lck_mtx_unlock(rnh_lock); 3342} 3343 3344static void 3345rte_lock_init(struct rtentry *rt) 3346{ 3347 lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr); 3348} 3349 3350static void 3351rte_lock_destroy(struct rtentry *rt) 3352{ 3353 RT_LOCK_ASSERT_NOTHELD(rt); 3354 lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp); 3355} 3356 3357void 3358rt_lock(struct rtentry *rt, boolean_t spin) 3359{ 3360 RT_LOCK_ASSERT_NOTHELD(rt); 3361 if (spin) 3362 lck_mtx_lock_spin(&rt->rt_lock); 3363 else 3364 lck_mtx_lock(&rt->rt_lock); 3365 if (rte_debug & RTD_DEBUG) 3366 rte_lock_debug((struct rtentry_dbg *)rt); 3367} 3368 3369void 3370rt_unlock(struct rtentry *rt) 3371{ 3372 if (rte_debug & RTD_DEBUG) 3373 rte_unlock_debug((struct rtentry_dbg *)rt); 3374 lck_mtx_unlock(&rt->rt_lock); 3375 3376} 3377 3378static inline void 3379rte_lock_debug(struct rtentry_dbg *rte) 3380{ 3381 uint32_t idx; 3382 3383 RT_LOCK_ASSERT_HELD((struct rtentry *)rte); 3384 idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE; 3385 if (rte_debug & RTD_TRACE) 3386 ctrace_record(&rte->rtd_lock[idx]); 3387} 3388 3389static inline void 3390rte_unlock_debug(struct rtentry_dbg *rte) 3391{ 3392 uint32_t idx; 3393 3394 RT_LOCK_ASSERT_HELD((struct rtentry *)rte); 3395 idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE; 3396 if (rte_debug & RTD_TRACE) 3397 ctrace_record(&rte->rtd_unlock[idx]); 3398} 3399 3400static struct rtentry * 3401rte_alloc(void) 3402{ 3403 if (rte_debug & RTD_DEBUG) 3404 return (rte_alloc_debug()); 3405 3406 return ((struct rtentry *)zalloc(rte_zone)); 3407} 3408 3409static void 3410rte_free(struct rtentry *p) 3411{ 3412 if (rte_debug & RTD_DEBUG) { 3413 rte_free_debug(p); 3414 return; 3415 } 3416 3417 if (p->rt_refcnt != 0) { 3418 panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt); 3419 /* NOTREACHED */ 3420 } 3421 zfree(rte_zone, p); 3422} 3423 3424static void 3425rte_if_ref(struct ifnet *ifp, int cnt) 3426{ 3427 struct kev_msg ev_msg; 3428 struct net_event_data ev_data; 3429 uint32_t old; 3430 3431 /* Force cnt to 1 increment/decrement */ 3432 if (cnt < -1 || cnt > 1) { 3433 panic("%s: invalid count argument (%d)", __func__, cnt); 3434 /* NOTREACHED */ 3435 } 3436 old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt); 3437 if (cnt < 0 && old == 0) { 3438 panic("%s: ifp=%p negative route refcnt!", __func__, ifp); 3439 /* NOTREACHED */ 3440 } 3441 /* 3442 * The following is done without first holding the ifnet lock, 3443 * for performance reasons. The relevant ifnet fields, with 3444 * the exception of the if_idle_flags, are never changed 3445 * during the lifetime of the ifnet. The if_idle_flags 3446 * may possibly be modified, so in the event that the value 3447 * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up 3448 * sending the event anyway. This is harmless as it is just 3449 * a notification to the monitoring agent in user space, and 3450 * it is expected to check via SIOCGIFGETRTREFCNT again anyway. 3451 */ 3452 if ((ifp->if_idle_flags & IFRF_IDLE_NOTIFY) && cnt < 0 && old == 1) { 3453 bzero(&ev_msg, sizeof (ev_msg)); 3454 bzero(&ev_data, sizeof (ev_data)); 3455 3456 ev_msg.vendor_code = KEV_VENDOR_APPLE; 3457 ev_msg.kev_class = KEV_NETWORK_CLASS; 3458 ev_msg.kev_subclass = KEV_DL_SUBCLASS; 3459 ev_msg.event_code = KEV_DL_IF_IDLE_ROUTE_REFCNT; 3460 3461 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); 3462 3463 ev_data.if_family = ifp->if_family; 3464 ev_data.if_unit = ifp->if_unit; 3465 ev_msg.dv[0].data_length = sizeof (struct net_event_data); 3466 ev_msg.dv[0].data_ptr = &ev_data; 3467 3468 kev_post_msg(&ev_msg); 3469 } 3470} 3471 3472static inline struct rtentry * 3473rte_alloc_debug(void) 3474{ 3475 struct rtentry_dbg *rte; 3476 3477 rte = ((struct rtentry_dbg *)zalloc(rte_zone)); 3478 if (rte != NULL) { 3479 bzero(rte, sizeof (*rte)); 3480 if (rte_debug & RTD_TRACE) 3481 ctrace_record(&rte->rtd_alloc); 3482 rte->rtd_inuse = RTD_INUSE; 3483 } 3484 return ((struct rtentry *)rte); 3485} 3486 3487static inline void 3488rte_free_debug(struct rtentry *p) 3489{ 3490 struct rtentry_dbg *rte = (struct rtentry_dbg *)p; 3491 3492 if (p->rt_refcnt != 0) { 3493 panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt); 3494 /* NOTREACHED */ 3495 } 3496 if (rte->rtd_inuse == RTD_FREED) { 3497 panic("rte_free: double free rte=%p\n", rte); 3498 /* NOTREACHED */ 3499 } else if (rte->rtd_inuse != RTD_INUSE) { 3500 panic("rte_free: corrupted rte=%p\n", rte); 3501 /* NOTREACHED */ 3502 } 3503 bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p)); 3504 /* Preserve rt_lock to help catch use-after-free cases */ 3505 bzero((caddr_t)p, offsetof(struct rtentry, rt_lock)); 3506 3507 rte->rtd_inuse = RTD_FREED; 3508 3509 if (rte_debug & RTD_TRACE) 3510 ctrace_record(&rte->rtd_free); 3511 3512 if (!(rte_debug & RTD_NO_FREE)) 3513 zfree(rte_zone, p); 3514} 3515 3516void 3517ctrace_record(ctrace_t *tr) 3518{ 3519 tr->th = current_thread(); 3520 bzero(tr->pc, sizeof (tr->pc)); 3521 (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE); 3522} 3523 3524void 3525route_copyout(struct route *dst, const struct route *src, size_t length) 3526{ 3527 /* Copy everything (rt, srcif, flags, dst) from src */ 3528 bcopy(src, dst, length); 3529 3530 /* Hold one reference for the local copy of struct route */ 3531 if (dst->ro_rt != NULL) 3532 RT_ADDREF(dst->ro_rt); 3533 3534 /* Hold one reference for the local copy of struct ifaddr */ 3535 if (dst->ro_srcia != NULL) 3536 IFA_ADDREF(dst->ro_srcia); 3537} 3538 3539void 3540route_copyin(struct route *src, struct route *dst, size_t length) 3541{ 3542 /* No cached route at the destination? */ 3543 if (dst->ro_rt == NULL) { 3544 /* 3545 * Ditch the address in the cached copy (dst) since 3546 * we're about to take everything there is in src. 3547 */ 3548 if (dst->ro_srcia != NULL) 3549 IFA_REMREF(dst->ro_srcia); 3550 /* 3551 * Copy everything (rt, srcia, flags, dst) from src; the 3552 * references to rt and/or srcia were held at the time 3553 * of storage and are kept intact. 3554 */ 3555 bcopy(src, dst, length); 3556 } else if (src->ro_rt != NULL) { 3557 /* 3558 * If the same, update srcia and flags, and ditch the route 3559 * in the local copy. Else ditch the one that is currently 3560 * cached, and cache the new route. 3561 */ 3562 if (dst->ro_rt == src->ro_rt) { 3563 dst->ro_flags = src->ro_flags; 3564 if (dst->ro_srcia != src->ro_srcia) { 3565 if (dst->ro_srcia != NULL) 3566 IFA_REMREF(dst->ro_srcia); 3567 dst->ro_srcia = src->ro_srcia; 3568 } else if (src->ro_srcia != NULL) { 3569 IFA_REMREF(src->ro_srcia); 3570 } 3571 rtfree(src->ro_rt); 3572 } else { 3573 rtfree(dst->ro_rt); 3574 if (dst->ro_srcia != NULL) 3575 IFA_REMREF(dst->ro_srcia); 3576 bcopy(src, dst, length); 3577 } 3578 } else if (src->ro_srcia != NULL) { 3579 /* 3580 * Ditch src address in the local copy (src) since we're 3581 * not caching the route entry anyway (ro_rt is NULL). 3582 */ 3583 IFA_REMREF(src->ro_srcia); 3584 } 3585 3586 /* This function consumes the references on src */ 3587 src->ro_rt = NULL; 3588 src->ro_srcia = NULL; 3589} 3590 3591/* 3592 * route_to_gwroute will find the gateway route for a given route. 3593 * 3594 * If the route is down, look the route up again. 3595 * If the route goes through a gateway, get the route to the gateway. 3596 * If the gateway route is down, look it up again. 3597 * If the route is set to reject, verify it hasn't expired. 3598 * 3599 * If the returned route is non-NULL, the caller is responsible for 3600 * releasing the reference and unlocking the route. 3601 */ 3602#define senderr(e) { error = (e); goto bad; } 3603errno_t 3604route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0, 3605 struct rtentry **out_route) 3606{ 3607 uint64_t timenow; 3608 struct rtentry *rt = hint0, *hint = hint0; 3609 errno_t error = 0; 3610 unsigned int ifindex; 3611 boolean_t gwroute; 3612 3613 *out_route = NULL; 3614 3615 if (rt == NULL) 3616 return (0); 3617 3618 /* 3619 * Next hop determination. Because we may involve the gateway route 3620 * in addition to the original route, locking is rather complicated. 3621 * The general concept is that regardless of whether the route points 3622 * to the original route or to the gateway route, this routine takes 3623 * an extra reference on such a route. This extra reference will be 3624 * released at the end. 3625 * 3626 * Care must be taken to ensure that the "hint0" route never gets freed 3627 * via rtfree(), since the caller may have stored it inside a struct 3628 * route with a reference held for that placeholder. 3629 */ 3630 RT_LOCK_SPIN(rt); 3631 ifindex = rt->rt_ifp->if_index; 3632 RT_ADDREF_LOCKED(rt); 3633 if (!(rt->rt_flags & RTF_UP)) { 3634 RT_REMREF_LOCKED(rt); 3635 RT_UNLOCK(rt); 3636 /* route is down, find a new one */ 3637 hint = rt = rtalloc1_scoped((struct sockaddr *) 3638 (size_t)net_dest, 1, 0, ifindex); 3639 if (hint != NULL) { 3640 RT_LOCK_SPIN(rt); 3641 ifindex = rt->rt_ifp->if_index; 3642 } else { 3643 senderr(EHOSTUNREACH); 3644 } 3645 } 3646 3647 /* 3648 * We have a reference to "rt" by now; it will either 3649 * be released or freed at the end of this routine. 3650 */ 3651 RT_LOCK_ASSERT_HELD(rt); 3652 if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) { 3653 struct rtentry *gwrt = rt->rt_gwroute; 3654 struct sockaddr_storage ss; 3655 struct sockaddr *gw = (struct sockaddr *)&ss; 3656 3657 VERIFY(rt == hint); 3658 RT_ADDREF_LOCKED(hint); 3659 3660 /* If there's no gateway rt, look it up */ 3661 if (gwrt == NULL) { 3662 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss), 3663 rt->rt_gateway->sa_len)); 3664 RT_UNLOCK(rt); 3665 goto lookup; 3666 } 3667 /* Become a regular mutex */ 3668 RT_CONVERT_LOCK(rt); 3669 3670 /* 3671 * Take gwrt's lock while holding route's lock; 3672 * this is okay since gwrt never points back 3673 * to "rt", so no lock ordering issues. 3674 */ 3675 RT_LOCK_SPIN(gwrt); 3676 if (!(gwrt->rt_flags & RTF_UP)) { 3677 rt->rt_gwroute = NULL; 3678 RT_UNLOCK(gwrt); 3679 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss), 3680 rt->rt_gateway->sa_len)); 3681 RT_UNLOCK(rt); 3682 rtfree(gwrt); 3683lookup: 3684 lck_mtx_lock(rnh_lock); 3685 gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex); 3686 3687 RT_LOCK(rt); 3688 /* 3689 * Bail out if the route is down, no route 3690 * to gateway, circular route, or if the 3691 * gateway portion of "rt" has changed. 3692 */ 3693 if (!(rt->rt_flags & RTF_UP) || gwrt == NULL || 3694 gwrt == rt || !equal(gw, rt->rt_gateway)) { 3695 if (gwrt == rt) { 3696 RT_REMREF_LOCKED(gwrt); 3697 gwrt = NULL; 3698 } 3699 VERIFY(rt == hint); 3700 RT_REMREF_LOCKED(hint); 3701 hint = NULL; 3702 RT_UNLOCK(rt); 3703 if (gwrt != NULL) 3704 rtfree_locked(gwrt); 3705 lck_mtx_unlock(rnh_lock); 3706 senderr(EHOSTUNREACH); 3707 } 3708 VERIFY(gwrt != NULL); 3709 /* 3710 * Set gateway route; callee adds ref to gwrt; 3711 * gwrt has an extra ref from rtalloc1() for 3712 * this routine. 3713 */ 3714 rt_set_gwroute(rt, rt_key(rt), gwrt); 3715 VERIFY(rt == hint); 3716 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */ 3717 RT_UNLOCK(rt); 3718 lck_mtx_unlock(rnh_lock); 3719 rt = gwrt; 3720 } else { 3721 RT_ADDREF_LOCKED(gwrt); 3722 RT_UNLOCK(gwrt); 3723 VERIFY(rt == hint); 3724 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */ 3725 RT_UNLOCK(rt); 3726 rt = gwrt; 3727 } 3728 VERIFY(rt == gwrt && rt != hint); 3729 3730 /* 3731 * This is an opportunity to revalidate the parent route's 3732 * rt_gwroute, in case it now points to a dead route entry. 3733 * Parent route won't go away since the clone (hint) holds 3734 * a reference to it. rt == gwrt. 3735 */ 3736 RT_LOCK_SPIN(hint); 3737 if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) == 3738 (RTF_WASCLONED | RTF_UP)) { 3739 struct rtentry *prt = hint->rt_parent; 3740 VERIFY(prt != NULL); 3741 3742 RT_CONVERT_LOCK(hint); 3743 RT_ADDREF(prt); 3744 RT_UNLOCK(hint); 3745 rt_revalidate_gwroute(prt, rt); 3746 RT_REMREF(prt); 3747 } else { 3748 RT_UNLOCK(hint); 3749 } 3750 3751 /* Clean up "hint" now; see notes above regarding hint0 */ 3752 if (hint == hint0) 3753 RT_REMREF(hint); 3754 else 3755 rtfree(hint); 3756 hint = NULL; 3757 3758 /* rt == gwrt; if it is now down, give up */ 3759 RT_LOCK_SPIN(rt); 3760 if (!(rt->rt_flags & RTF_UP)) { 3761 RT_UNLOCK(rt); 3762 senderr(EHOSTUNREACH); 3763 } 3764 } 3765 3766 if (rt->rt_flags & RTF_REJECT) { 3767 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); 3768 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); 3769 timenow = net_uptime(); 3770 if (rt->rt_expire == 0 || timenow < rt->rt_expire) { 3771 RT_UNLOCK(rt); 3772 senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH); 3773 } 3774 } 3775 3776 /* Become a regular mutex */ 3777 RT_CONVERT_LOCK(rt); 3778 3779 /* Caller is responsible for cleaning up "rt" */ 3780 *out_route = rt; 3781 return (0); 3782 3783bad: 3784 /* Clean up route (either it is "rt" or "gwrt") */ 3785 if (rt != NULL) { 3786 RT_LOCK_SPIN(rt); 3787 if (rt == hint0) { 3788 RT_REMREF_LOCKED(rt); 3789 RT_UNLOCK(rt); 3790 } else { 3791 RT_UNLOCK(rt); 3792 rtfree(rt); 3793 } 3794 } 3795 return (error); 3796} 3797#undef senderr 3798 3799void 3800rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt) 3801{ 3802 VERIFY(gwrt != NULL); 3803 3804 RT_LOCK_SPIN(rt); 3805 if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) && 3806 rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family == 3807 rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL || 3808 !(rt->rt_gwroute->rt_flags & RTF_UP))) { 3809 boolean_t isequal; 3810 VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)); 3811 3812 if (rt->rt_gateway->sa_family == AF_INET || 3813 rt->rt_gateway->sa_family == AF_INET6) { 3814 struct sockaddr_storage key_ss, gw_ss; 3815 /* 3816 * We need to compare rt_key and rt_gateway; create 3817 * local copies to get rid of any ifscope association. 3818 */ 3819 (void) sa_copy(rt_key(gwrt), &key_ss, NULL); 3820 (void) sa_copy(rt->rt_gateway, &gw_ss, NULL); 3821 3822 isequal = equal(SA(&key_ss), SA(&gw_ss)); 3823 } else { 3824 isequal = equal(rt_key(gwrt), rt->rt_gateway); 3825 } 3826 3827 /* If they are the same, update gwrt */ 3828 if (isequal) { 3829 RT_UNLOCK(rt); 3830 lck_mtx_lock(rnh_lock); 3831 RT_LOCK(rt); 3832 rt_set_gwroute(rt, rt_key(rt), gwrt); 3833 RT_UNLOCK(rt); 3834 lck_mtx_unlock(rnh_lock); 3835 } else { 3836 RT_UNLOCK(rt); 3837 } 3838 } else { 3839 RT_UNLOCK(rt); 3840 } 3841} 3842 3843static void 3844rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) 3845{ 3846 VERIFY(rt_key(rt)->sa_family == AF_INET); 3847 3848 if (ds != NULL) 3849 (void) inet_ntop(AF_INET, 3850 &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen); 3851 if (gs != NULL) { 3852 if (rt->rt_flags & RTF_GATEWAY) { 3853 (void) inet_ntop(AF_INET, 3854 &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen); 3855 } else if (rt->rt_ifp != NULL) { 3856 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); 3857 } else { 3858 snprintf(gs, gslen, "%s", "link"); 3859 } 3860 } 3861} 3862 3863#if INET6 3864static void 3865rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) 3866{ 3867 VERIFY(rt_key(rt)->sa_family == AF_INET6); 3868 3869 if (ds != NULL) 3870 (void) inet_ntop(AF_INET6, 3871 &SIN6(rt_key(rt))->sin6_addr, ds, dslen); 3872 if (gs != NULL) { 3873 if (rt->rt_flags & RTF_GATEWAY) { 3874 (void) inet_ntop(AF_INET6, 3875 &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen); 3876 } else if (rt->rt_ifp != NULL) { 3877 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); 3878 } else { 3879 snprintf(gs, gslen, "%s", "link"); 3880 } 3881 } 3882} 3883#endif /* INET6 */ 3884 3885 3886void 3887rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) 3888{ 3889 switch (rt_key(rt)->sa_family) { 3890 case AF_INET: 3891 rt_str4(rt, ds, dslen, gs, gslen); 3892 break; 3893#if INET6 3894 case AF_INET6: 3895 rt_str6(rt, ds, dslen, gs, gslen); 3896 break; 3897#endif /* INET6 */ 3898 default: 3899 if (ds != NULL) 3900 bzero(ds, dslen); 3901 if (gs != NULL) 3902 bzero(gs, gslen); 3903 break; 3904 } 3905} 3906