1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright 1994, 1995 Massachusetts Institute of Technology 30 * 31 * Permission to use, copy, modify, and distribute this software and 32 * its documentation for any purpose and without fee is hereby 33 * granted, provided that both the above copyright notice and this 34 * permission notice appear in all copies, that both the above 35 * copyright notice and this permission notice appear in all 36 * supporting documentation, and that the name of M.I.T. not be used 37 * in advertising or publicity pertaining to distribution of the 38 * software without specific, written prior permission. M.I.T. makes 39 * no representations about the suitability of this software for any 40 * purpose. It is provided "as is" without express or implied 41 * warranty. 42 * 43 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 44 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 45 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 47 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * SUCH DAMAGE. 55 * 56 */ 57 58/* 59 * This code does two things necessary for the enhanced TCP metrics to 60 * function in a useful manner: 61 * 1) It marks all non-host routes as `cloning', thus ensuring that 62 * every actual reference to such a route actually gets turned 63 * into a reference to a host route to the specific destination 64 * requested. 65 * 2) When such routes lose all their references, it arranges for them 66 * to be deleted in some random collection of circumstances, so that 67 * a large quantity of stale routing data is not kept in kernel memory 68 * indefinitely. See in_rtqtimo() below for the exact mechanism. 69 */ 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/kernel.h> 74#include <sys/sysctl.h> 75#include <sys/socket.h> 76#include <sys/mbuf.h> 77#include <sys/protosw.h> 78#include <sys/syslog.h> 79#include <sys/mcache.h> 80#include <kern/locks.h> 81 82#include <net/if.h> 83#include <net/route.h> 84#include <netinet/in.h> 85#include <netinet/in_var.h> 86#include <netinet/in_arp.h> 87 88extern int tvtohz(struct timeval *); 89 90static int in_rtqtimo_run; /* in_rtqtimo is scheduled to run */ 91static void in_rtqtimo(void *); 92static void in_sched_rtqtimo(struct timeval *); 93 94static struct radix_node *in_addroute(void *, void *, struct radix_node_head *, 95 struct radix_node *); 96static struct radix_node *in_deleteroute(void *, void *, 97 struct radix_node_head *); 98static struct radix_node *in_matroute(void *, struct radix_node_head *); 99static struct radix_node *in_matroute_args(void *, struct radix_node_head *, 100 rn_matchf_t *f, void *); 101static void in_clsroute(struct radix_node *, struct radix_node_head *); 102static int in_rtqkill(struct radix_node *, void *); 103 104static int in_ifadownkill(struct radix_node *, void *); 105 106#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ 107 108/* 109 * Do what we need to do when inserting a route. 110 */ 111static struct radix_node * 112in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 113 struct radix_node *treenodes) 114{ 115 struct rtentry *rt = (struct rtentry *)treenodes; 116 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)rt_key(rt); 117 struct radix_node *ret; 118 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 119 uint32_t flags = rt->rt_flags; 120 boolean_t verbose = (rt_verbose > 1); 121 122 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 123 RT_LOCK_ASSERT_HELD(rt); 124 125 if (verbose) 126 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 127 128 /* 129 * For IP, all unicast non-host routes are automatically cloning. 130 */ 131 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 132 rt->rt_flags |= RTF_MULTICAST; 133 134 if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) 135 rt->rt_flags |= RTF_PRCLONING; 136 137 /* 138 * A little bit of help for both IP output and input: 139 * For host routes, we make sure that RTF_BROADCAST 140 * is set for anything that looks like a broadcast address. 141 * This way, we can avoid an expensive call to in_broadcast() 142 * in ip_output() most of the time (because the route passed 143 * to ip_output() is almost always a host route). 144 * 145 * We also do the same for local addresses, with the thought 146 * that this might one day be used to speed up ip_input(). 147 * 148 * We also mark routes to multicast addresses as such, because 149 * it's easy to do and might be useful (but this is much more 150 * dubious since it's so easy to inspect the address). (This 151 * is done above.) 152 */ 153 if (rt->rt_flags & RTF_HOST) { 154 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { 155 rt->rt_flags |= RTF_BROADCAST; 156 } else { 157 /* Become a regular mutex */ 158 RT_CONVERT_LOCK(rt); 159 IFA_LOCK_SPIN(rt->rt_ifa); 160 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == 161 sin->sin_addr.s_addr) 162 rt->rt_flags |= RTF_LOCAL; 163 IFA_UNLOCK(rt->rt_ifa); 164 } 165 } 166 167 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && 168 rt->rt_ifp) 169 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 170 171 ret = rn_addroute(v_arg, n_arg, head, treenodes); 172 if (ret == NULL && (rt->rt_flags & RTF_HOST)) { 173 struct rtentry *rt2; 174 /* 175 * We are trying to add a host route, but can't. 176 * Find out if it is because of an 177 * ARP entry and delete it if so. 178 */ 179 rt2 = rtalloc1_scoped_locked(rt_key(rt), 0, 180 RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt))); 181 if (rt2 != NULL) { 182 char dbufc[MAX_IPv4_STR_LEN]; 183 184 RT_LOCK(rt2); 185 if (verbose) 186 rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0); 187 188 if ((rt2->rt_flags & RTF_LLINFO) && 189 (rt2->rt_flags & RTF_HOST) && 190 rt2->rt_gateway != NULL && 191 rt2->rt_gateway->sa_family == AF_LINK) { 192 if (verbose) { 193 log(LOG_DEBUG, "%s: unable to insert " 194 "route to %s;%s, flags=%b, due to " 195 "existing ARP route %s->%s " 196 "flags=%b, attempting to delete\n", 197 __func__, dbuf, 198 (rt->rt_ifp != NULL) ? 199 rt->rt_ifp->if_xname : "", 200 rt->rt_flags, RTF_BITS, dbufc, 201 (rt2->rt_ifp != NULL) ? 202 rt2->rt_ifp->if_xname : "", 203 rt2->rt_flags, RTF_BITS); 204 } 205 /* 206 * Safe to drop rt_lock and use rt_key, 207 * rt_gateway, since holding rnh_lock here 208 * prevents another thread from calling 209 * rt_setgate() on this route. 210 */ 211 RT_UNLOCK(rt2); 212 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), 213 rt2->rt_gateway, rt_mask(rt2), 214 rt2->rt_flags, NULL); 215 ret = rn_addroute(v_arg, n_arg, head, 216 treenodes); 217 } else { 218 RT_UNLOCK(rt2); 219 } 220 rtfree_locked(rt2); 221 } 222 } 223 224 if (!verbose) 225 goto done; 226 227 if (ret != NULL) { 228 if (flags != rt->rt_flags) { 229 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " 230 "oflags=%b, flags=%b\n", __func__, 231 dbuf, gbuf, (rt->rt_ifp != NULL) ? 232 rt->rt_ifp->if_xname : "", flags, RTF_BITS, 233 rt->rt_flags, RTF_BITS); 234 } else { 235 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " 236 "flags=%b\n", __func__, dbuf, gbuf, 237 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 238 rt->rt_flags, RTF_BITS); 239 } 240 } else { 241 log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, " 242 "flags=%b, already exists\n", __func__, dbuf, gbuf, 243 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 244 rt->rt_flags, RTF_BITS); 245 } 246done: 247 return (ret); 248} 249 250static struct radix_node * 251in_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head) 252{ 253 struct radix_node *rn; 254 255 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 256 257 rn = rn_delete(v_arg, netmask_arg, head); 258 if (rt_verbose > 1 && rn != NULL) { 259 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 260 struct rtentry *rt = (struct rtentry *)rn; 261 262 RT_LOCK(rt); 263 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 264 log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, " 265 "flags=%b\n", __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? 266 rt->rt_ifp->if_xname : "", rt->rt_flags, RTF_BITS); 267 RT_UNLOCK(rt); 268 } 269 return (rn); 270} 271 272/* 273 * Validate (unexpire) an expiring AF_INET route. 274 */ 275struct radix_node * 276in_validate(struct radix_node *rn) 277{ 278 struct rtentry *rt = (struct rtentry *)rn; 279 280 RT_LOCK_ASSERT_HELD(rt); 281 282 /* This is first reference? */ 283 if (rt->rt_refcnt == 0) { 284 if (rt_verbose > 2) { 285 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 286 287 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 288 log(LOG_DEBUG, "%s: route to %s->%s->%s validated, " 289 "flags=%b\n", __func__, dbuf, gbuf, 290 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 291 rt->rt_flags, RTF_BITS); 292 } 293 294 /* 295 * It's one of ours; unexpire it. If the timer is already 296 * scheduled, let it run later as it won't re-arm itself 297 * if there's nothing to do. 298 */ 299 if (rt->rt_flags & RTPRF_OURS) { 300 rt->rt_flags &= ~RTPRF_OURS; 301 rt_setexpire(rt, 0); 302 } 303 } 304 return (rn); 305} 306 307/* 308 * Similar to in_matroute_args except without the leaf-matching parameters. 309 */ 310static struct radix_node * 311in_matroute(void *v_arg, struct radix_node_head *head) 312{ 313 return (in_matroute_args(v_arg, head, NULL, NULL)); 314} 315 316/* 317 * This code is the inverse of in_clsroute: on first reference, if we 318 * were managing the route, stop doing so and set the expiration timer 319 * back off again. 320 */ 321static struct radix_node * 322in_matroute_args(void *v_arg, struct radix_node_head *head, 323 rn_matchf_t *f, void *w) 324{ 325 struct radix_node *rn = rn_match_args(v_arg, head, f, w); 326 327 if (rn != NULL) { 328 RT_LOCK_SPIN((struct rtentry *)rn); 329 in_validate(rn); 330 RT_UNLOCK((struct rtentry *)rn); 331 } 332 return (rn); 333} 334 335/* one hour is ``really old'' */ 336static uint32_t rtq_reallyold = 60*60; 337SYSCTL_UINT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, 338 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, 339 "Default expiration time on dynamically learned routes"); 340 341/* never automatically crank down to less */ 342static uint32_t rtq_minreallyold = 10; 343SYSCTL_UINT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, 344 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, 345 "Minimum time to attempt to hold onto dynamically learned routes"); 346 347/* 128 cached routes is ``too many'' */ 348static uint32_t rtq_toomany = 128; 349SYSCTL_UINT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, 350 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, 351 "Upper limit on dynamically learned routes"); 352 353/* 354 * On last reference drop, mark the route as belong to us so that it can be 355 * timed out. 356 */ 357static void 358in_clsroute(struct radix_node *rn, struct radix_node_head *head) 359{ 360#pragma unused(head) 361 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 362 struct rtentry *rt = (struct rtentry *)rn; 363 boolean_t verbose = (rt_verbose > 1); 364 365 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 366 RT_LOCK_ASSERT_HELD(rt); 367 368 if (!(rt->rt_flags & RTF_UP)) 369 return; /* prophylactic measures */ 370 371 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) 372 return; 373 374 if (rt->rt_flags & RTPRF_OURS) 375 return; 376 377 if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) 378 return; 379 380 if (verbose) 381 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 382 383 /* 384 * Delete the route immediately if RTF_DELCLONE is set or 385 * if route caching is disabled (rtq_reallyold set to 0). 386 * Otherwise, let it expire and be deleted by in_rtqkill(). 387 */ 388 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { 389 int err; 390 391 if (verbose) { 392 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " 393 "flags=%b\n", __func__, dbuf, gbuf, 394 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 395 rt->rt_flags, RTF_BITS); 396 } 397 /* 398 * Delete the route from the radix tree but since we are 399 * called when the route's reference count is 0, don't 400 * deallocate it until we return from this routine by 401 * telling rtrequest that we're interested in it. 402 * Safe to drop rt_lock and use rt_key, rt_gateway since 403 * holding rnh_lock here prevents another thread from 404 * calling rt_setgate() on this route. 405 */ 406 RT_UNLOCK(rt); 407 err = rtrequest_locked(RTM_DELETE, rt_key(rt), 408 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt); 409 if (err == 0) { 410 /* Now let the caller free it */ 411 RT_LOCK(rt); 412 RT_REMREF_LOCKED(rt); 413 } else { 414 RT_LOCK(rt); 415 if (!verbose) 416 rt_str(rt, dbuf, sizeof (dbuf), 417 gbuf, sizeof (gbuf)); 418 log(LOG_ERR, "%s: error deleting route to " 419 "%s->%s->%s, flags=%b, err=%d\n", __func__, 420 dbuf, gbuf, (rt->rt_ifp != NULL) ? 421 rt->rt_ifp->if_xname : "", rt->rt_flags, 422 RTF_BITS, err); 423 } 424 } else { 425 uint64_t timenow; 426 427 timenow = net_uptime(); 428 rt->rt_flags |= RTPRF_OURS; 429 rt_setexpire(rt, timenow + rtq_reallyold); 430 431 if (verbose) { 432 log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, " 433 "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf, 434 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 435 rt->rt_flags, RTF_BITS, rt->rt_expire - timenow); 436 } 437 438 /* We have at least one entry; arm the timer if not already */ 439 in_sched_rtqtimo(NULL); 440 } 441} 442 443struct rtqk_arg { 444 struct radix_node_head *rnh; 445 int updating; 446 int draining; 447 uint32_t killed; 448 uint32_t found; 449 uint64_t nextstop; 450}; 451 452/* 453 * Get rid of old routes. When draining, this deletes everything, even when 454 * the timeout is not expired yet. When updating, this makes sure that 455 * nothing has a timeout longer than the current value of rtq_reallyold. 456 */ 457static int 458in_rtqkill(struct radix_node *rn, void *rock) 459{ 460 struct rtqk_arg *ap = rock; 461 struct rtentry *rt = (struct rtentry *)rn; 462 boolean_t verbose = (rt_verbose > 1); 463 uint64_t timenow; 464 int err; 465 466 timenow = net_uptime(); 467 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 468 469 RT_LOCK(rt); 470 if (rt->rt_flags & RTPRF_OURS) { 471 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 472 473 if (verbose) 474 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 475 476 ap->found++; 477 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); 478 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); 479 if (ap->draining || rt->rt_expire <= timenow) { 480 if (rt->rt_refcnt > 0) { 481 panic("%s: route %p marked with RTPRF_OURS " 482 "with non-zero refcnt (%u)", __func__, 483 rt, rt->rt_refcnt); 484 /* NOTREACHED */ 485 } 486 if (verbose) { 487 log(LOG_DEBUG, "%s: deleting route to " 488 "%s->%s->%s, flags=%b, draining=%d\n", 489 __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? 490 rt->rt_ifp->if_xname : "", rt->rt_flags, 491 RTF_BITS, ap->draining); 492 } 493 RT_ADDREF_LOCKED(rt); /* for us to free below */ 494 /* 495 * Delete this route since we're done with it; 496 * the route may be freed afterwards, so we 497 * can no longer refer to 'rt' upon returning 498 * from rtrequest(). Safe to drop rt_lock and 499 * use rt_key, rt_gateway since holding rnh_lock 500 * here prevents another thread from calling 501 * rt_setgate() on this route. 502 */ 503 RT_UNLOCK(rt); 504 err = rtrequest_locked(RTM_DELETE, rt_key(rt), 505 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 506 if (err != 0) { 507 RT_LOCK(rt); 508 if (!verbose) 509 rt_str(rt, dbuf, sizeof (dbuf), 510 gbuf, sizeof (gbuf)); 511 log(LOG_ERR, "%s: error deleting route to " 512 "%s->%s->%s, flags=%b, err=%d\n", __func__, 513 dbuf, gbuf, (rt->rt_ifp != NULL) ? 514 rt->rt_ifp->if_xname : "", rt->rt_flags, 515 RTF_BITS, err); 516 RT_UNLOCK(rt); 517 } else { 518 ap->killed++; 519 } 520 rtfree_locked(rt); 521 } else { 522 uint64_t expire = (rt->rt_expire - timenow); 523 524 if (ap->updating && expire > rtq_reallyold) { 525 rt_setexpire(rt, timenow + rtq_reallyold); 526 if (verbose) { 527 log(LOG_DEBUG, "%s: route to " 528 "%s->%s->%s, flags=%b, adjusted " 529 "expire=T+%u (was T+%u)\n", 530 __func__, dbuf, gbuf, 531 (rt->rt_ifp != NULL) ? 532 rt->rt_ifp->if_xname : "", 533 rt->rt_flags, RTF_BITS, 534 (rt->rt_expire - timenow), expire); 535 } 536 } 537 ap->nextstop = lmin(ap->nextstop, rt->rt_expire); 538 RT_UNLOCK(rt); 539 } 540 } else { 541 RT_UNLOCK(rt); 542 } 543 544 return (0); 545} 546 547#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ 548static int rtq_timeout = RTQ_TIMEOUT; 549 550static void 551in_rtqtimo(void *targ) 552{ 553#pragma unused(targ) 554 struct radix_node_head *rnh; 555 struct rtqk_arg arg; 556 struct timeval atv; 557 static uint64_t last_adjusted_timeout = 0; 558 boolean_t verbose = (rt_verbose > 1); 559 uint64_t timenow; 560 uint32_t ours; 561 562 lck_mtx_lock(rnh_lock); 563 rnh = rt_tables[AF_INET]; 564 VERIFY(rnh != NULL); 565 566 /* Get the timestamp after we acquire the lock for better accuracy */ 567 timenow = net_uptime(); 568 if (verbose) { 569 log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n", 570 __func__, rtq_timeout); 571 } 572 bzero(&arg, sizeof (arg)); 573 arg.rnh = rnh; 574 arg.nextstop = timenow + rtq_timeout; 575 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 576 if (verbose) { 577 log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__, 578 arg.found, arg.killed); 579 } 580 /* 581 * Attempt to be somewhat dynamic about this: 582 * If there are ``too many'' routes sitting around taking up space, 583 * then crank down the timeout, and see if we can't make some more 584 * go away. However, we make sure that we will never adjust more 585 * than once in rtq_timeout seconds, to keep from cranking down too 586 * hard. 587 */ 588 ours = (arg.found - arg.killed); 589 if (ours > rtq_toomany && 590 ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && 591 rtq_reallyold > rtq_minreallyold) { 592 rtq_reallyold = 2 * rtq_reallyold / 3; 593 if (rtq_reallyold < rtq_minreallyold) 594 rtq_reallyold = rtq_minreallyold; 595 596 last_adjusted_timeout = timenow; 597 if (verbose) { 598 log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d " 599 "seconds\n", __func__, rtq_reallyold); 600 } 601 arg.found = arg.killed = 0; 602 arg.updating = 1; 603 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 604 } 605 606 atv.tv_usec = 0; 607 atv.tv_sec = arg.nextstop - timenow; 608 /* re-arm the timer only if there's work to do */ 609 in_rtqtimo_run = 0; 610 if (ours > 0) 611 in_sched_rtqtimo(&atv); 612 else if (verbose) 613 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__); 614 lck_mtx_unlock(rnh_lock); 615} 616 617static void 618in_sched_rtqtimo(struct timeval *atv) 619{ 620 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 621 622 if (!in_rtqtimo_run) { 623 struct timeval tv; 624 625 if (atv == NULL) { 626 tv.tv_usec = 0; 627 tv.tv_sec = MAX(rtq_timeout / 10, 1); 628 atv = &tv; 629 } 630 if (rt_verbose > 1) { 631 log(LOG_DEBUG, "%s: timer scheduled in " 632 "T+%llus.%lluu\n", __func__, 633 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); 634 } 635 in_rtqtimo_run = 1; 636 timeout(in_rtqtimo, NULL, tvtohz(atv)); 637 } 638} 639 640void 641in_rtqdrain(void) 642{ 643 struct radix_node_head *rnh; 644 struct rtqk_arg arg; 645 646 if (rt_verbose > 1) 647 log(LOG_DEBUG, "%s: draining routes\n", __func__); 648 649 lck_mtx_lock(rnh_lock); 650 rnh = rt_tables[AF_INET]; 651 VERIFY(rnh != NULL); 652 bzero(&arg, sizeof (arg)); 653 arg.rnh = rnh; 654 arg.draining = 1; 655 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 656 lck_mtx_unlock(rnh_lock); 657} 658 659/* 660 * Initialize our routing tree. 661 */ 662int 663in_inithead(void **head, int off) 664{ 665 struct radix_node_head *rnh; 666 667 /* If called from route_init(), make sure it is exactly once */ 668 VERIFY(head != (void **)&rt_tables[AF_INET] || *head == NULL); 669 670 if (!rn_inithead(head, off)) 671 return (0); 672 673 /* 674 * We can get here from nfs_subs.c as well, in which case this 675 * won't be for the real routing table and thus we're done; 676 * this also takes care of the case when we're called more than 677 * once from anywhere but route_init(). 678 */ 679 if (head != (void **)&rt_tables[AF_INET]) 680 return (1); /* only do this for the real routing table */ 681 682 rnh = *head; 683 rnh->rnh_addaddr = in_addroute; 684 rnh->rnh_deladdr = in_deleteroute; 685 rnh->rnh_matchaddr = in_matroute; 686 rnh->rnh_matchaddr_args = in_matroute_args; 687 rnh->rnh_close = in_clsroute; 688 return (1); 689} 690 691/* 692 * This zaps old routes when the interface goes down or interface 693 * address is deleted. In the latter case, it deletes static routes 694 * that point to this address. If we don't do this, we may end up 695 * using the old address in the future. The ones we always want to 696 * get rid of are things like ARP entries, since the user might down 697 * the interface, walk over to a completely different network, and 698 * plug back in. 699 */ 700struct in_ifadown_arg { 701 struct radix_node_head *rnh; 702 struct ifaddr *ifa; 703 int del; 704}; 705 706static int 707in_ifadownkill(struct radix_node *rn, void *xap) 708{ 709 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; 710 struct in_ifadown_arg *ap = xap; 711 struct rtentry *rt = (struct rtentry *)rn; 712 boolean_t verbose = (rt_verbose != 0); 713 int err; 714 715 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 716 717 RT_LOCK(rt); 718 if (rt->rt_ifa == ap->ifa && 719 (ap->del || !(rt->rt_flags & RTF_STATIC))) { 720 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 721 if (verbose) { 722 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " 723 "flags=%b\n", __func__, dbuf, gbuf, 724 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 725 rt->rt_flags, RTF_BITS); 726 } 727 RT_ADDREF_LOCKED(rt); /* for us to free below */ 728 /* 729 * We need to disable the automatic prune that happens 730 * in this case in rtrequest() because it will blow 731 * away the pointers that rn_walktree() needs in order 732 * continue our descent. We will end up deleting all 733 * the routes that rtrequest() would have in any case, 734 * so that behavior is not needed there. Safe to drop 735 * rt_lock and use rt_key, rt_gateway, since holding 736 * rnh_lock here prevents another thread from calling 737 * rt_setgate() on this route. 738 */ 739 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); 740 RT_UNLOCK(rt); 741 err = rtrequest_locked(RTM_DELETE, rt_key(rt), 742 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 743 if (err != 0) { 744 RT_LOCK(rt); 745 if (!verbose) 746 rt_str(rt, dbuf, sizeof (dbuf), 747 gbuf, sizeof (gbuf)); 748 log(LOG_ERR, "%s: error deleting route to " 749 "%s->%s->%s, flags=%b, err=%d\n", __func__, 750 dbuf, gbuf, (rt->rt_ifp != NULL) ? 751 rt->rt_ifp->if_xname : "", rt->rt_flags, 752 RTF_BITS, err); 753 RT_UNLOCK(rt); 754 } 755 rtfree_locked(rt); 756 } else { 757 RT_UNLOCK(rt); 758 } 759 return (0); 760} 761 762int 763in_ifadown(struct ifaddr *ifa, int delete) 764{ 765 struct in_ifadown_arg arg; 766 struct radix_node_head *rnh; 767 768 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 769 770 /* 771 * Holding rnh_lock here prevents the possibility of 772 * ifa from changing (e.g. in_ifinit), so it is safe 773 * to access its ifa_addr without locking. 774 */ 775 if (ifa->ifa_addr->sa_family != AF_INET) 776 return (1); 777 778 /* trigger route cache reevaluation */ 779 routegenid_inet_update(); 780 781 arg.rnh = rnh = rt_tables[AF_INET]; 782 arg.ifa = ifa; 783 arg.del = delete; 784 rnh->rnh_walktree(rnh, in_ifadownkill, &arg); 785 IFA_LOCK_SPIN(ifa); 786 ifa->ifa_flags &= ~IFA_ROUTE; 787 IFA_UNLOCK(ifa); 788 return (0); 789} 790