1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright 1994, 1995 Massachusetts Institute of Technology 30 * 31 * Permission to use, copy, modify, and distribute this software and 32 * its documentation for any purpose and without fee is hereby 33 * granted, provided that both the above copyright notice and this 34 * permission notice appear in all copies, that both the above 35 * copyright notice and this permission notice appear in all 36 * supporting documentation, and that the name of M.I.T. not be used 37 * in advertising or publicity pertaining to distribution of the 38 * software without specific, written prior permission. M.I.T. makes 39 * no representations about the suitability of this software for any 40 * purpose. It is provided "as is" without express or implied 41 * warranty. 42 * 43 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 44 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 45 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 47 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * SUCH DAMAGE. 55 * 56 * $FreeBSD: src/sys/netinet/in_rmx.c,v 1.37.2.1 2001/05/14 08:23:49 ru Exp $ 57 */ 58 59/* 60 * This code does two things necessary for the enhanced TCP metrics to 61 * function in a useful manner: 62 * 1) It marks all non-host routes as `cloning', thus ensuring that 63 * every actual reference to such a route actually gets turned 64 * into a reference to a host route to the specific destination 65 * requested. 66 * 2) When such routes lose all their references, it arranges for them 67 * to be deleted in some random collection of circumstances, so that 68 * a large quantity of stale routing data is not kept in kernel memory 69 * indefinitely. See in_rtqtimo() below for the exact mechanism. 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/kernel.h> 75#include <sys/sysctl.h> 76#include <sys/socket.h> 77#include <sys/mbuf.h> 78#include <sys/syslog.h> 79#include <kern/lock.h> 80 81#include <net/if.h> 82#include <net/route.h> 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85 86extern int tvtohz(struct timeval *); 87extern int in_inithead(void **head, int off); 88extern u_long route_generation; 89 90#ifdef __APPLE__ 91static void in_rtqtimo(void *rock); 92#endif 93 94static struct radix_node *in_matroute_args(void *, struct radix_node_head *, 95 rn_matchf_t *f, void *); 96 97#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ 98 99/* 100 * Do what we need to do when inserting a route. 101 */ 102static struct radix_node * 103in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 104 struct radix_node *treenodes) 105{ 106 struct rtentry *rt = (struct rtentry *)treenodes; 107 struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); 108 struct radix_node *ret; 109 110 /* 111 * For IP, all unicast non-host routes are automatically cloning. 112 */ 113 if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 114 rt->rt_flags |= RTF_MULTICAST; 115 116 if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { 117 rt->rt_flags |= RTF_PRCLONING; 118 } 119 120 /* 121 * A little bit of help for both IP output and input: 122 * For host routes, we make sure that RTF_BROADCAST 123 * is set for anything that looks like a broadcast address. 124 * This way, we can avoid an expensive call to in_broadcast() 125 * in ip_output() most of the time (because the route passed 126 * to ip_output() is almost always a host route). 127 * 128 * We also do the same for local addresses, with the thought 129 * that this might one day be used to speed up ip_input(). 130 * 131 * We also mark routes to multicast addresses as such, because 132 * it's easy to do and might be useful (but this is much more 133 * dubious since it's so easy to inspect the address). (This 134 * is done above.) 135 */ 136 if (rt->rt_flags & RTF_HOST) { 137 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { 138 rt->rt_flags |= RTF_BROADCAST; 139 } else { 140#define satosin(sa) ((struct sockaddr_in *)sa) 141 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr 142 == sin->sin_addr.s_addr) 143 rt->rt_flags |= RTF_LOCAL; 144#undef satosin 145 } 146 } 147 148 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) 149 && rt->rt_ifp) 150 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 151 152 ret = rn_addroute(v_arg, n_arg, head, treenodes); 153 if (ret == NULL && rt->rt_flags & RTF_HOST) { 154 struct rtentry *rt2; 155 /* 156 * We are trying to add a host route, but can't. 157 * Find out if it is because of an 158 * ARP entry and delete it if so. 159 */ 160 rt2 = rtalloc1_scoped_locked(rt_key(rt), 0, 161 RTF_CLONING | RTF_PRCLONING, sa_get_ifscope(rt_key(rt))); 162 if (rt2) { 163 if (rt2->rt_flags & RTF_LLINFO && 164 rt2->rt_flags & RTF_HOST && 165 rt2->rt_gateway && 166 rt2->rt_gateway->sa_family == AF_LINK) { 167 rtrequest_locked(RTM_DELETE, 168 (struct sockaddr *)rt_key(rt2), 169 rt2->rt_gateway, 170 rt_mask(rt2), rt2->rt_flags, 0); 171 ret = rn_addroute(v_arg, n_arg, head, 172 treenodes); 173 } 174 rtfree_locked(rt2); 175 } 176 } 177 return ret; 178} 179 180/* 181 * Validate (unexpire) an expiring AF_INET route. 182 */ 183struct radix_node * 184in_validate(struct radix_node *rn) 185{ 186 struct rtentry *rt = (struct rtentry *)rn; 187 188 /* This is first reference? */ 189 if (rt != NULL && rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { 190 rt->rt_flags &= ~RTPRF_OURS; 191 rt->rt_rmx.rmx_expire = 0; 192 } 193 return (rn); 194} 195 196/* 197 * Similar to in_matroute_args except without the leaf-matching parameters. 198 */ 199static struct radix_node * 200in_matroute(void *v_arg, struct radix_node_head *head) 201{ 202 return (in_matroute_args(v_arg, head, NULL, NULL)); 203} 204 205/* 206 * This code is the inverse of in_clsroute: on first reference, if we 207 * were managing the route, stop doing so and set the expiration timer 208 * back off again. 209 */ 210static struct radix_node * 211in_matroute_args(void *v_arg, struct radix_node_head *head, 212 rn_matchf_t *f, void *w) 213{ 214 struct radix_node *rn = rn_match_args(v_arg, head, f, w); 215 216 return (in_validate(rn)); 217} 218 219static int rtq_reallyold = 60*60; 220 /* one hour is ``really old'' */ 221SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, 222 &rtq_reallyold , 0, 223 "Default expiration time on dynamically learned routes"); 224 225static int rtq_minreallyold = 10; 226 /* never automatically crank down to less */ 227SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, 228 &rtq_minreallyold , 0, 229 "Minimum time to attempt to hold onto dynamically learned routes"); 230 231static int rtq_toomany = 128; 232 /* 128 cached routes is ``too many'' */ 233SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, 234 &rtq_toomany , 0, "Upper limit on dynamically learned routes"); 235 236#ifdef __APPLE__ 237/* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954) 238 * AOL is adding a circular route ("10.0.1.1/32 10.0.1.1") when establishing its ppp tunnel 239 * to the AP BaseStation by removing the default gateway and replacing it with their tunnel entry point. 240 * There is no apparent reason to add this route as there is a valid 10.0.1.1/24 route to the BS. 241 * That circular route was ignored on previous version of MacOS X because of a routing bug 242 * corrected with the merge to FreeBSD4.4 (a route generated from an RTF_CLONING route had the RTF_WASCLONED 243 * flag set but did not have a reference to the parent route) and that entry was left in the RT. This workaround is 244 * made in order to provide binary compatibility with AOL. 245 * If we catch a process adding a circular route with a /32 from the routing socket, we error it out instead of 246 * confusing the routing table with a wrong route to the previous default gateway 247 * If for some reason a circular route is needed, turn this sysctl (net.inet.ip.check_route_selfref) to zero. 248 */ 249int check_routeselfref = 1; 250SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW, 251 &check_routeselfref , 0, ""); 252#endif 253 254__private_extern__ int use_routegenid = 1; 255SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW, 256 &use_routegenid , 0, ""); 257 258/* 259 * On last reference drop, mark the route as belong to us so that it can be 260 * timed out. 261 */ 262static void 263in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) 264{ 265 struct rtentry *rt = (struct rtentry *)rn; 266 267 if (!(rt->rt_flags & RTF_UP)) 268 return; /* prophylactic measures */ 269 270 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) 271 return; 272 273 if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) 274 return; 275 276 /* 277 * Delete the route immediately if RTF_DELCLONE is set or 278 * if route caching is disabled (rtq_reallyold set to 0). 279 * Otherwise, let it expire and be deleted by in_rtqkill(). 280 */ 281 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { 282 /* 283 * Delete the route from the radix tree but since we are 284 * called when the route's reference count is 0, don't 285 * deallocate it until we return from this routine by 286 * telling rtrequest that we're interested in it. 287 */ 288 if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), 289 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { 290 /* Now let the caller free it */ 291 rtunref(rt); 292 } 293 } else { 294 struct timeval timenow; 295 296 getmicrotime(&timenow); 297 rt->rt_flags |= RTPRF_OURS; 298 rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; 299 } 300} 301 302struct rtqk_arg { 303 struct radix_node_head *rnh; 304 int draining; 305 int killed; 306 int found; 307 int updating; 308 time_t nextstop; 309}; 310 311/* 312 * Get rid of old routes. When draining, this deletes everything, even when 313 * the timeout is not expired yet. When updating, this makes sure that 314 * nothing has a timeout longer than the current value of rtq_reallyold. 315 */ 316static int 317in_rtqkill(struct radix_node *rn, void *rock) 318{ 319 struct rtqk_arg *ap = rock; 320 struct rtentry *rt = (struct rtentry *)rn; 321 int err; 322 struct timeval timenow; 323 324 getmicrotime(&timenow); 325 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); 326 327 if (rt->rt_flags & RTPRF_OURS) { 328 ap->found++; 329 330 if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) { 331 if (rt->rt_refcnt > 0) 332 panic("rtqkill route really not free"); 333 334 err = rtrequest_locked(RTM_DELETE, 335 (struct sockaddr *)rt_key(rt), 336 rt->rt_gateway, rt_mask(rt), 337 rt->rt_flags, 0); 338 if (err) { 339 log(LOG_WARNING, "in_rtqkill: error %d\n", err); 340 } else { 341 ap->killed++; 342 } 343 } else { 344 if (ap->updating 345 && (rt->rt_rmx.rmx_expire - timenow.tv_sec 346 > rtq_reallyold)) { 347 rt->rt_rmx.rmx_expire = timenow.tv_sec 348 + rtq_reallyold; 349 } 350 ap->nextstop = lmin(ap->nextstop, 351 rt->rt_rmx.rmx_expire); 352 } 353 } 354 355 return 0; 356} 357 358static void 359in_rtqtimo_funnel(void *rock) 360{ 361 in_rtqtimo(rock); 362 363} 364#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ 365static int rtq_timeout = RTQ_TIMEOUT; 366 367static void 368in_rtqtimo(void *rock) 369{ 370 struct radix_node_head *rnh = rock; 371 struct rtqk_arg arg; 372 struct timeval atv; 373 static time_t last_adjusted_timeout = 0; 374 struct timeval timenow; 375 376 lck_mtx_lock(rt_mtx); 377 /* Get the timestamp after we acquire the lock for better accuracy */ 378 getmicrotime(&timenow); 379 380 arg.found = arg.killed = 0; 381 arg.rnh = rnh; 382 arg.nextstop = timenow.tv_sec + rtq_timeout; 383 arg.draining = arg.updating = 0; 384 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 385 386 /* 387 * Attempt to be somewhat dynamic about this: 388 * If there are ``too many'' routes sitting around taking up space, 389 * then crank down the timeout, and see if we can't make some more 390 * go away. However, we make sure that we will never adjust more 391 * than once in rtq_timeout seconds, to keep from cranking down too 392 * hard. 393 */ 394 if((arg.found - arg.killed > rtq_toomany) 395 && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout) 396 && rtq_reallyold > rtq_minreallyold) { 397 rtq_reallyold = 2*rtq_reallyold / 3; 398 if(rtq_reallyold < rtq_minreallyold) { 399 rtq_reallyold = rtq_minreallyold; 400 } 401 402 last_adjusted_timeout = timenow.tv_sec; 403#if DIAGNOSTIC 404 log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", 405 rtq_reallyold); 406#endif 407 arg.found = arg.killed = 0; 408 arg.updating = 1; 409 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 410 } 411 412 atv.tv_usec = 0; 413 atv.tv_sec = arg.nextstop - timenow.tv_sec; 414 lck_mtx_unlock(rt_mtx); 415 timeout(in_rtqtimo_funnel, rock, tvtohz(&atv)); 416} 417 418void 419in_rtqdrain(void) 420{ 421 struct radix_node_head *rnh = rt_tables[AF_INET]; 422 struct rtqk_arg arg; 423 arg.found = arg.killed = 0; 424 arg.rnh = rnh; 425 arg.nextstop = 0; 426 arg.draining = 1; 427 arg.updating = 0; 428 lck_mtx_lock(rt_mtx); 429 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 430 lck_mtx_unlock(rt_mtx); 431} 432 433/* 434 * Initialize our routing tree. 435 */ 436int 437in_inithead(void **head, int off) 438{ 439 struct radix_node_head *rnh; 440 441#ifdef __APPLE__ 442 if (*head) 443 return 1; 444#endif 445 446 if(!rn_inithead(head, off)) 447 return 0; 448 449 if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ 450 return 1; /* only do this for the real routing table */ 451 452 rnh = *head; 453 rnh->rnh_addaddr = in_addroute; 454 rnh->rnh_matchaddr = in_matroute; 455 rnh->rnh_matchaddr_args = in_matroute_args; 456 rnh->rnh_close = in_clsroute; 457 in_rtqtimo(rnh); /* kick off timeout first time */ 458 return 1; 459} 460 461 462/* 463 * This zaps old routes when the interface goes down or interface 464 * address is deleted. In the latter case, it deletes static routes 465 * that point to this address. If we don't do this, we may end up 466 * using the old address in the future. The ones we always want to 467 * get rid of are things like ARP entries, since the user might down 468 * the interface, walk over to a completely different network, and 469 * plug back in. 470 */ 471struct in_ifadown_arg { 472 struct radix_node_head *rnh; 473 struct ifaddr *ifa; 474 int del; 475}; 476 477static int 478in_ifadownkill(struct radix_node *rn, void *xap) 479{ 480 struct in_ifadown_arg *ap = xap; 481 struct rtentry *rt = (struct rtentry *)rn; 482 int err; 483 484 if (rt->rt_ifa == ap->ifa && 485 (ap->del || !(rt->rt_flags & RTF_STATIC))) { 486 /* 487 * We need to disable the automatic prune that happens 488 * in this case in rtrequest() because it will blow 489 * away the pointers that rn_walktree() needs in order 490 * continue our descent. We will end up deleting all 491 * the routes that rtrequest() would have in any case, 492 * so that behavior is not needed there. 493 */ 494 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); 495 err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), 496 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); 497 if (err) { 498 log(LOG_WARNING, "in_ifadownkill: error %d\n", err); 499 } 500 } 501 return 0; 502} 503 504int 505in_ifadown(struct ifaddr *ifa, int delete) 506{ 507 struct in_ifadown_arg arg; 508 struct radix_node_head *rnh; 509 510 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); 511 512 if (ifa->ifa_addr->sa_family != AF_INET) 513 return 1; 514 515 /* trigger route cache reevaluation */ 516 if (use_routegenid) 517 route_generation++; 518 519 arg.rnh = rnh = rt_tables[AF_INET]; 520 arg.ifa = ifa; 521 arg.del = delete; 522 rnh->rnh_walktree(rnh, in_ifadownkill, &arg); 523 ifa->ifa_flags &= ~IFA_ROUTE; 524 return 0; 525} 526