1/* 2 * Copyright (c) 2003-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the project nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58/* 59 * Copyright 1994, 1995 Massachusetts Institute of Technology 60 * 61 * Permission to use, copy, modify, and distribute this software and 62 * its documentation for any purpose and without fee is hereby 63 * granted, provided that both the above copyright notice and this 64 * permission notice appear in all copies, that both the above 65 * copyright notice and this permission notice appear in all 66 * supporting documentation, and that the name of M.I.T. not be used 67 * in advertising or publicity pertaining to distribution of the 68 * software without specific, written prior permission. M.I.T. makes 69 * no representations about the suitability of this software for any 70 * purpose. It is provided "as is" without express or implied 71 * warranty. 72 * 73 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 74 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 75 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 76 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 77 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 78 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 79 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 80 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 81 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 82 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 83 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 84 * SUCH DAMAGE. 85 * 86 */ 87 88/* 89 * This code does two things necessary for the enhanced TCP metrics to 90 * function in a useful manner: 91 * 1) It marks all non-host routes as `cloning', thus ensuring that 92 * every actual reference to such a route actually gets turned 93 * into a reference to a host route to the specific destination 94 * requested. 95 * 2) When such routes lose all their references, it arranges for them 96 * to be deleted in some random collection of circumstances, so that 97 * a large quantity of stale routing data is not kept in kernel memory 98 * indefinitely. See in6_rtqtimo() below for the exact mechanism. 99 */ 100 101#include <sys/param.h> 102#include <sys/systm.h> 103#include <sys/kernel.h> 104#include <sys/sysctl.h> 105#include <kern/queue.h> 106#include <sys/socket.h> 107#include <sys/socketvar.h> 108#include <sys/protosw.h> 109#include <sys/mbuf.h> 110#include <sys/syslog.h> 111#include <sys/mcache.h> 112#include <kern/lock.h> 113 114#include <net/if.h> 115#include <net/route.h> 116#include <netinet/in.h> 117#include <netinet/ip_var.h> 118#include <netinet/in_var.h> 119 120#include <netinet/ip6.h> 121#include <netinet6/ip6_var.h> 122 123#include <netinet/icmp6.h> 124 125#include <netinet/tcp.h> 126#include <netinet/tcp_seq.h> 127#include <netinet/tcp_timer.h> 128#include <netinet/tcp_var.h> 129 130extern int tvtohz(struct timeval *); 131 132static int in6_rtqtimo_run; /* in6_rtqtimo is scheduled to run */ 133static void in6_rtqtimo(void *); 134static void in6_sched_rtqtimo(struct timeval *); 135 136static struct radix_node *in6_addroute(void *, void *, struct radix_node_head *, 137 struct radix_node *); 138static struct radix_node *in6_deleteroute(void *, void *, 139 struct radix_node_head *); 140static struct radix_node *in6_matroute(void *, struct radix_node_head *); 141static struct radix_node *in6_matroute_args(void *, struct radix_node_head *, 142 rn_matchf_t *, void *); 143static void in6_clsroute(struct radix_node *, struct radix_node_head *); 144static int in6_rtqkill(struct radix_node *, void *); 145 146#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ 147 148/* 149 * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during 150 * which the routing lock (rnh_lock) is held and thus protects the variable. 151 */ 152static int in6dynroutes; 153 154/* 155 * Do what we need to do when inserting a route. 156 */ 157static struct radix_node * 158in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 159 struct radix_node *treenodes) 160{ 161 struct rtentry *rt = (struct rtentry *)treenodes; 162 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt); 163 struct radix_node *ret; 164 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 165 uint32_t flags = rt->rt_flags; 166 boolean_t verbose = (rt_verbose > 1); 167 168 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 169 RT_LOCK_ASSERT_HELD(rt); 170 171 if (verbose) 172 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 173 174 /* 175 * If this is a dynamic route (which is created via Redirect) and 176 * we already have the maximum acceptable number of such route entries, 177 * reject creating a new one. We could initiate garbage collection to 178 * make available space right now, but the benefit would probably not 179 * be worth the cleaning overhead; we only have to endure a slightly 180 * suboptimal path even without the redirected route. 181 */ 182 if ((rt->rt_flags & RTF_DYNAMIC) && 183 ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes) 184 return (NULL); 185 186 /* 187 * For IPv6, all unicast non-host routes are automatically cloning. 188 */ 189 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 190 rt->rt_flags |= RTF_MULTICAST; 191 192 if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) 193 rt->rt_flags |= RTF_PRCLONING; 194 195 /* 196 * A little bit of help for both IPv6 output and input: 197 * For local addresses, we make sure that RTF_LOCAL is set, 198 * with the thought that this might one day be used to speed up 199 * ip_input(). 200 * 201 * We also mark routes to multicast addresses as such, because 202 * it's easy to do and might be useful (but this is much more 203 * dubious since it's so easy to inspect the address). (This 204 * is done above.) 205 * 206 * XXX 207 * should elaborate the code. 208 */ 209 if (rt->rt_flags & RTF_HOST) { 210 IFA_LOCK_SPIN(rt->rt_ifa); 211 if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)-> 212 sin6_addr, &sin6->sin6_addr)) { 213 rt->rt_flags |= RTF_LOCAL; 214 } 215 IFA_UNLOCK(rt->rt_ifa); 216 } 217 218 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && 219 rt->rt_ifp) 220 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 221 222 ret = rn_addroute(v_arg, n_arg, head, treenodes); 223 if (ret == NULL && (rt->rt_flags & RTF_HOST)) { 224 struct rtentry *rt2; 225 /* 226 * We are trying to add a host route, but can't. 227 * Find out if it is because of an 228 * ND6 entry and delete it if so. 229 */ 230 rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0, 231 RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); 232 if (rt2 != NULL) { 233 char dbufc[MAX_IPv6_STR_LEN]; 234 235 RT_LOCK(rt2); 236 if (verbose) 237 rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0); 238 239 if ((rt2->rt_flags & RTF_LLINFO) && 240 (rt2->rt_flags & RTF_HOST) && 241 rt2->rt_gateway != NULL && 242 rt2->rt_gateway->sa_family == AF_LINK) { 243 if (verbose) { 244 log(LOG_DEBUG, "%s: unable to insert " 245 "route to %s:%s, flags=%b, due to " 246 "existing ND6 route %s->%s " 247 "flags=%b, attempting to delete\n", 248 __func__, dbuf, 249 (rt->rt_ifp != NULL) ? 250 rt->rt_ifp->if_xname : "", 251 rt->rt_flags, RTF_BITS, 252 dbufc, (rt2->rt_ifp != NULL) ? 253 rt2->rt_ifp->if_xname : "", 254 rt2->rt_flags, RTF_BITS); 255 } 256 /* 257 * Safe to drop rt_lock and use rt_key, 258 * rt_gateway, since holding rnh_lock here 259 * prevents another thread from calling 260 * rt_setgate() on this route. 261 */ 262 RT_UNLOCK(rt2); 263 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), 264 rt2->rt_gateway, rt_mask(rt2), 265 rt2->rt_flags, NULL); 266 ret = rn_addroute(v_arg, n_arg, head, 267 treenodes); 268 } else { 269 RT_UNLOCK(rt2); 270 } 271 rtfree_locked(rt2); 272 } 273 } else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) { 274 struct rtentry *rt2; 275 /* 276 * We are trying to add a net route, but can't. 277 * The following case should be allowed, so we'll make a 278 * special check for this: 279 * Two IPv6 addresses with the same prefix is assigned 280 * to a single interrface. 281 * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) 282 * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) 283 * In this case, (*1) and (*2) want to add the same 284 * net route entry, 3ffe:0501:: -> if0. 285 * This case should not raise an error. 286 */ 287 rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0, 288 RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); 289 if (rt2 != NULL) { 290 RT_LOCK(rt2); 291 if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST| 292 RTF_GATEWAY)) == RTF_CLONING && 293 rt2->rt_gateway && 294 rt2->rt_gateway->sa_family == AF_LINK && 295 rt2->rt_ifp == rt->rt_ifp) { 296 ret = rt2->rt_nodes; 297 } 298 RT_UNLOCK(rt2); 299 rtfree_locked(rt2); 300 } 301 } 302 303 if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC)) 304 in6dynroutes++; 305 306 if (!verbose) 307 goto done; 308 309 if (ret != NULL) { 310 if (flags != rt->rt_flags) { 311 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " 312 "oflags=%b, flags=%b\n", __func__, 313 dbuf, gbuf, (rt->rt_ifp != NULL) ? 314 rt->rt_ifp->if_xname : "", flags, RTF_BITS, 315 rt->rt_flags, RTF_BITS); 316 } else { 317 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " 318 "flags=%b\n", __func__, dbuf, gbuf, 319 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 320 rt->rt_flags, RTF_BITS); 321 } 322 } else { 323 log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, " 324 "flags=%b, already exists\n", __func__, dbuf, gbuf, 325 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 326 rt->rt_flags, RTF_BITS); 327 } 328done: 329 return (ret); 330} 331 332static struct radix_node * 333in6_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head) 334{ 335 struct radix_node *rn; 336 337 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 338 339 rn = rn_delete(v_arg, netmask_arg, head); 340 if (rn != NULL) { 341 struct rtentry *rt = (struct rtentry *)rn; 342 343 RT_LOCK(rt); 344 if (rt->rt_flags & RTF_DYNAMIC) 345 in6dynroutes--; 346 if (rt_verbose > 1) { 347 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 348 349 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 350 log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, " 351 "flags=%b\n", __func__, dbuf, gbuf, 352 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 353 rt->rt_flags, RTF_BITS); 354 } 355 RT_UNLOCK(rt); 356 } 357 return (rn); 358} 359 360/* 361 * Validate (unexpire) an expiring AF_INET6 route. 362 */ 363struct radix_node * 364in6_validate(struct radix_node *rn) 365{ 366 struct rtentry *rt = (struct rtentry *)rn; 367 368 RT_LOCK_ASSERT_HELD(rt); 369 370 /* This is first reference? */ 371 if (rt->rt_refcnt == 0) { 372 if (rt_verbose > 2) { 373 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 374 375 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 376 log(LOG_DEBUG, "%s: route to %s->%s->%s validated, " 377 "flags=%b\n", __func__, dbuf, gbuf, 378 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 379 rt->rt_flags, RTF_BITS); 380 } 381 382 /* 383 * It's one of ours; unexpire it. If the timer is already 384 * scheduled, let it run later as it won't re-arm itself 385 * if there's nothing to do. 386 */ 387 if (rt->rt_flags & RTPRF_OURS) { 388 rt->rt_flags &= ~RTPRF_OURS; 389 rt_setexpire(rt, 0); 390 } 391 } 392 return (rn); 393} 394 395/* 396 * Similar to in6_matroute_args except without the leaf-matching parameters. 397 */ 398static struct radix_node * 399in6_matroute(void *v_arg, struct radix_node_head *head) 400{ 401 return (in6_matroute_args(v_arg, head, NULL, NULL)); 402} 403 404/* 405 * This code is the inverse of in6_clsroute: on first reference, if we 406 * were managing the route, stop doing so and set the expiration timer 407 * back off again. 408 */ 409static struct radix_node * 410in6_matroute_args(void *v_arg, struct radix_node_head *head, 411 rn_matchf_t *f, void *w) 412{ 413 struct radix_node *rn = rn_match_args(v_arg, head, f, w); 414 415 if (rn != NULL) { 416 RT_LOCK_SPIN((struct rtentry *)rn); 417 in6_validate(rn); 418 RT_UNLOCK((struct rtentry *)rn); 419 } 420 return (rn); 421} 422 423SYSCTL_DECL(_net_inet6_ip6); 424 425/* one hour is ``really old'' */ 426static uint32_t rtq_reallyold = 60*60; 427SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, 428 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, ""); 429 430/* never automatically crank down to less */ 431static uint32_t rtq_minreallyold = 10; 432SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, 433 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, ""); 434 435/* 128 cached routes is ``too many'' */ 436static uint32_t rtq_toomany = 128; 437SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, 438 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, ""); 439 440/* 441 * On last reference drop, mark the route as belong to us so that it can be 442 * timed out. 443 */ 444static void 445in6_clsroute(struct radix_node *rn, struct radix_node_head *head) 446{ 447#pragma unused(head) 448 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 449 struct rtentry *rt = (struct rtentry *)rn; 450 boolean_t verbose = (rt_verbose > 1); 451 452 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 453 RT_LOCK_ASSERT_HELD(rt); 454 455 if (!(rt->rt_flags & RTF_UP)) 456 return; /* prophylactic measures */ 457 458 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) 459 return; 460 461 if (rt->rt_flags & RTPRF_OURS) 462 return; 463 464 if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) 465 return; 466 467 if (verbose) 468 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 469 470 /* 471 * Delete the route immediately if RTF_DELCLONE is set or 472 * if route caching is disabled (rtq_reallyold set to 0). 473 * Otherwise, let it expire and be deleted by in6_rtqkill(). 474 */ 475 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { 476 int err; 477 478 if (verbose) { 479 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " 480 "flags=%b\n", __func__, dbuf, gbuf, 481 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 482 rt->rt_flags, RTF_BITS); 483 } 484 /* 485 * Delete the route from the radix tree but since we are 486 * called when the route's reference count is 0, don't 487 * deallocate it until we return from this routine by 488 * telling rtrequest that we're interested in it. 489 * Safe to drop rt_lock and use rt_key, rt_gateway, 490 * since holding rnh_lock here prevents another thread 491 * from calling rt_setgate() on this route. 492 */ 493 RT_UNLOCK(rt); 494 err = rtrequest_locked(RTM_DELETE, rt_key(rt), 495 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt); 496 if (err == 0) { 497 /* Now let the caller free it */ 498 RT_LOCK(rt); 499 RT_REMREF_LOCKED(rt); 500 } else { 501 RT_LOCK(rt); 502 if (!verbose) 503 rt_str(rt, dbuf, sizeof (dbuf), 504 gbuf, sizeof (gbuf)); 505 log(LOG_ERR, "%s: error deleting route to " 506 "%s->%s->%s, flags=%b, err=%d\n", __func__, 507 dbuf, gbuf, (rt->rt_ifp != NULL) ? 508 rt->rt_ifp->if_xname : "", rt->rt_flags, 509 RTF_BITS, err); 510 } 511 } else { 512 uint64_t timenow; 513 514 timenow = net_uptime(); 515 rt->rt_flags |= RTPRF_OURS; 516 rt_setexpire(rt, timenow + rtq_reallyold); 517 518 if (verbose) { 519 log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, " 520 "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf, 521 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", 522 rt->rt_flags, RTF_BITS, rt->rt_expire - timenow); 523 } 524 525 /* We have at least one entry; arm the timer if not already */ 526 in6_sched_rtqtimo(NULL); 527 } 528} 529 530struct rtqk_arg { 531 struct radix_node_head *rnh; 532 int updating; 533 int draining; 534 uint32_t killed; 535 uint32_t found; 536 uint64_t nextstop; 537}; 538 539/* 540 * Get rid of old routes. When draining, this deletes everything, even when 541 * the timeout is not expired yet. This also applies if the route is dynamic 542 * and there are sufficiently large number of such routes (more than a half of 543 * maximum). When updating, this makes sure that nothing has a timeout longer 544 * than the current value of rtq_reallyold. 545 */ 546static int 547in6_rtqkill(struct radix_node *rn, void *rock) 548{ 549 struct rtqk_arg *ap = rock; 550 struct rtentry *rt = (struct rtentry *)rn; 551 boolean_t verbose = (rt_verbose > 1); 552 uint64_t timenow; 553 int err; 554 555 timenow = net_uptime(); 556 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 557 558 RT_LOCK(rt); 559 if (rt->rt_flags & RTPRF_OURS) { 560 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; 561 562 if (verbose) 563 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); 564 565 ap->found++; 566 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); 567 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); 568 if (ap->draining || rt->rt_expire <= timenow || 569 ((rt->rt_flags & RTF_DYNAMIC) && ip6_maxdynroutes >= 0 && 570 in6dynroutes > ip6_maxdynroutes / 2)) { 571 if (rt->rt_refcnt > 0) { 572 panic("%s: route %p marked with RTPRF_OURS " 573 "with non-zero refcnt (%u)", __func__, 574 rt, rt->rt_refcnt); 575 /* NOTREACHED */ 576 } 577 if (verbose) { 578 log(LOG_DEBUG, "%s: deleting route to " 579 "%s->%s->%s, flags=%b, draining=%d\n", 580 __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? 581 rt->rt_ifp->if_xname : "", rt->rt_flags, 582 RTF_BITS, ap->draining); 583 } 584 RT_ADDREF_LOCKED(rt); /* for us to free below */ 585 /* 586 * Delete this route since we're done with it; 587 * the route may be freed afterwards, so we 588 * can no longer refer to 'rt' upon returning 589 * from rtrequest(). Safe to drop rt_lock and 590 * use rt_key, rt_gateway, since holding rnh_lock 591 * here prevents another thread from calling 592 * rt_setgate() on this route. 593 */ 594 RT_UNLOCK(rt); 595 err = rtrequest_locked(RTM_DELETE, rt_key(rt), 596 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); 597 if (err != 0) { 598 RT_LOCK(rt); 599 if (!verbose) 600 rt_str(rt, dbuf, sizeof (dbuf), 601 gbuf, sizeof (gbuf)); 602 log(LOG_ERR, "%s: error deleting route to " 603 "%s->%s->%s, flags=%b, err=%d\n", __func__, 604 dbuf, gbuf, (rt->rt_ifp != NULL) ? 605 rt->rt_ifp->if_xname : "", rt->rt_flags, 606 RTF_BITS, err); 607 RT_UNLOCK(rt); 608 } else { 609 ap->killed++; 610 } 611 rtfree_locked(rt); 612 } else { 613 uint64_t expire = (rt->rt_expire - timenow); 614 615 if (ap->updating && expire > rtq_reallyold) { 616 rt_setexpire(rt, timenow + rtq_reallyold); 617 if (verbose) { 618 log(LOG_DEBUG, "%s: route to " 619 "%s->%s->%s, flags=%b, adjusted " 620 "expire=T+%u (was T+%u)\n", 621 __func__, dbuf, gbuf, 622 (rt->rt_ifp != NULL) ? 623 rt->rt_ifp->if_xname : "", 624 rt->rt_flags, RTF_BITS, 625 (rt->rt_expire - timenow), expire); 626 } 627 } 628 ap->nextstop = lmin(ap->nextstop, rt->rt_expire); 629 RT_UNLOCK(rt); 630 } 631 } else { 632 RT_UNLOCK(rt); 633 } 634 635 return (0); 636} 637 638#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ 639static int rtq_timeout = RTQ_TIMEOUT; 640 641static void 642in6_rtqtimo(void *targ) 643{ 644#pragma unused(targ) 645 struct radix_node_head *rnh; 646 struct rtqk_arg arg; 647 struct timeval atv; 648 static uint64_t last_adjusted_timeout = 0; 649 boolean_t verbose = (rt_verbose > 1); 650 uint64_t timenow; 651 uint32_t ours; 652 653 lck_mtx_lock(rnh_lock); 654 rnh = rt_tables[AF_INET6]; 655 VERIFY(rnh != NULL); 656 657 /* Get the timestamp after we acquire the lock for better accuracy */ 658 timenow = net_uptime(); 659 if (verbose) { 660 log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n", 661 __func__, rtq_timeout); 662 } 663 bzero(&arg, sizeof (arg)); 664 arg.rnh = rnh; 665 arg.nextstop = timenow + rtq_timeout; 666 rnh->rnh_walktree(rnh, in6_rtqkill, &arg); 667 if (verbose) { 668 log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__, 669 arg.found, arg.killed); 670 } 671 /* 672 * Attempt to be somewhat dynamic about this: 673 * If there are ``too many'' routes sitting around taking up space, 674 * then crank down the timeout, and see if we can't make some more 675 * go away. However, we make sure that we will never adjust more 676 * than once in rtq_timeout seconds, to keep from cranking down too 677 * hard. 678 */ 679 ours = (arg.found - arg.killed); 680 if (ours > rtq_toomany && 681 ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && 682 rtq_reallyold > rtq_minreallyold) { 683 rtq_reallyold = 2 * rtq_reallyold / 3; 684 if (rtq_reallyold < rtq_minreallyold) 685 rtq_reallyold = rtq_minreallyold; 686 687 last_adjusted_timeout = timenow; 688 if (verbose) { 689 log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d " 690 "seconds\n", __func__, rtq_reallyold); 691 } 692 arg.found = arg.killed = 0; 693 arg.updating = 1; 694 rnh->rnh_walktree(rnh, in6_rtqkill, &arg); 695 } 696 697 atv.tv_usec = 0; 698 atv.tv_sec = arg.nextstop - timenow; 699 /* re-arm the timer only if there's work to do */ 700 in6_rtqtimo_run = 0; 701 if (ours > 0) 702 in6_sched_rtqtimo(&atv); 703 else if (verbose) 704 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__); 705 lck_mtx_unlock(rnh_lock); 706} 707 708static void 709in6_sched_rtqtimo(struct timeval *atv) 710{ 711 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 712 713 if (!in6_rtqtimo_run) { 714 struct timeval tv; 715 716 if (atv == NULL) { 717 tv.tv_usec = 0; 718 tv.tv_sec = MAX(rtq_timeout / 10, 1); 719 atv = &tv; 720 } 721 if (rt_verbose > 1) { 722 log(LOG_DEBUG, "%s: timer scheduled in " 723 "T+%llus.%lluu\n", __func__, 724 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); 725 } 726 in6_rtqtimo_run = 1; 727 timeout(in6_rtqtimo, NULL, tvtohz(atv)); 728 } 729} 730 731void 732in6_rtqdrain(void) 733{ 734 struct radix_node_head *rnh; 735 struct rtqk_arg arg; 736 737 if (rt_verbose > 1) 738 log(LOG_DEBUG, "%s: draining routes\n", __func__); 739 740 lck_mtx_lock(rnh_lock); 741 rnh = rt_tables[AF_INET6]; 742 VERIFY(rnh != NULL); 743 bzero(&arg, sizeof (arg)); 744 arg.rnh = rnh; 745 arg.draining = 1; 746 rnh->rnh_walktree(rnh, in6_rtqkill, &arg); 747 lck_mtx_unlock(rnh_lock); 748} 749 750/* 751 * Initialize our routing tree. 752 */ 753int 754in6_inithead(void **head, int off) 755{ 756 struct radix_node_head *rnh; 757 758 /* If called from route_init(), make sure it is exactly once */ 759 VERIFY(head != (void **)&rt_tables[AF_INET6] || *head == NULL); 760 761 if (!rn_inithead(head, off)) 762 return (0); 763 764 /* 765 * We can get here from nfs_subs.c as well, in which case this 766 * won't be for the real routing table and thus we're done; 767 * this also takes care of the case when we're called more than 768 * once from anywhere but route_init(). 769 */ 770 if (head != (void **)&rt_tables[AF_INET6]) 771 return (1); /* only do this for the real routing table */ 772 773 rnh = *head; 774 rnh->rnh_addaddr = in6_addroute; 775 rnh->rnh_deladdr = in6_deleteroute; 776 rnh->rnh_matchaddr = in6_matroute; 777 rnh->rnh_matchaddr_args = in6_matroute_args; 778 rnh->rnh_close = in6_clsroute; 779 return (1); 780} 781