1/*
2 * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 *    may be used to endorse or promote products derived from this software
43 *    without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright 1994, 1995 Massachusetts Institute of Technology
60 *
61 * Permission to use, copy, modify, and distribute this software and
62 * its documentation for any purpose and without fee is hereby
63 * granted, provided that both the above copyright notice and this
64 * permission notice appear in all copies, that both the above
65 * copyright notice and this permission notice appear in all
66 * supporting documentation, and that the name of M.I.T. not be used
67 * in advertising or publicity pertaining to distribution of the
68 * software without specific, written prior permission.  M.I.T. makes
69 * no representations about the suitability of this software for any
70 * purpose.  It is provided "as is" without express or implied
71 * warranty.
72 *
73 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
74 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
75 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
76 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
77 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
78 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
79 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
80 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
81 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
82 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
83 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
84 * SUCH DAMAGE.
85 *
86 */
87
88/*
89 * This code does two things necessary for the enhanced TCP metrics to
90 * function in a useful manner:
91 *  1) It marks all non-host routes as `cloning', thus ensuring that
92 *     every actual reference to such a route actually gets turned
93 *     into a reference to a host route to the specific destination
94 *     requested.
95 *  2) When such routes lose all their references, it arranges for them
96 *     to be deleted in some random collection of circumstances, so that
97 *     a large quantity of stale routing data is not kept in kernel memory
98 *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
99 */
100
101#include <sys/param.h>
102#include <sys/systm.h>
103#include <sys/kernel.h>
104#include <sys/sysctl.h>
105#include <kern/queue.h>
106#include <sys/socket.h>
107#include <sys/socketvar.h>
108#include <sys/protosw.h>
109#include <sys/mbuf.h>
110#include <sys/syslog.h>
111#include <sys/mcache.h>
112#include <kern/locks.h>
113
114#include <net/if.h>
115#include <net/route.h>
116#include <netinet/in.h>
117#include <netinet/ip_var.h>
118#include <netinet/in_var.h>
119
120#include <netinet/ip6.h>
121#include <netinet6/ip6_var.h>
122
123#include <netinet/icmp6.h>
124
125#include <netinet/tcp.h>
126#include <netinet/tcp_seq.h>
127#include <netinet/tcp_timer.h>
128#include <netinet/tcp_var.h>
129
130extern int	tvtohz(struct timeval *);
131
132static int in6_rtqtimo_run;		/* in6_rtqtimo is scheduled to run */
133static void in6_rtqtimo(void *);
134static void in6_sched_rtqtimo(struct timeval *);
135
136static struct radix_node *in6_addroute(void *, void *, struct radix_node_head *,
137    struct radix_node *);
138static struct radix_node *in6_deleteroute(void *, void *,
139    struct radix_node_head *);
140static struct radix_node *in6_matroute(void *, struct radix_node_head *);
141static struct radix_node *in6_matroute_args(void *, struct radix_node_head *,
142    rn_matchf_t *, void *);
143static void in6_clsroute(struct radix_node *, struct radix_node_head *);
144static int in6_rtqkill(struct radix_node *, void *);
145
146#define	RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
147
148/*
149 * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during
150 * which the routing lock (rnh_lock) is held and thus protects the variable.
151 */
152static int in6dynroutes;
153
154/*
155 * Do what we need to do when inserting a route.
156 */
157static struct radix_node *
158in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
159    struct radix_node *treenodes)
160{
161	struct rtentry *rt = (struct rtentry *)treenodes;
162	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt);
163	struct radix_node *ret;
164	char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
165	uint32_t flags = rt->rt_flags;
166	boolean_t verbose = (rt_verbose > 1);
167
168	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
169	RT_LOCK_ASSERT_HELD(rt);
170
171	if (verbose)
172		rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
173
174	/*
175	 * If this is a dynamic route (which is created via Redirect) and
176	 * we already have the maximum acceptable number of such route entries,
177	 * reject creating a new one.  We could initiate garbage collection to
178	 * make available space right now, but the benefit would probably not
179	 * be worth the cleaning overhead; we only have to endure a slightly
180	 * suboptimal path even without the redirected route.
181	 */
182	if ((rt->rt_flags & RTF_DYNAMIC) &&
183	    ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes)
184		return (NULL);
185
186	/*
187	 * For IPv6, all unicast non-host routes are automatically cloning.
188	 */
189	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
190		rt->rt_flags |= RTF_MULTICAST;
191
192	if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
193		rt->rt_flags |= RTF_PRCLONING;
194
195	/*
196	 * A little bit of help for both IPv6 output and input:
197	 *   For local addresses, we make sure that RTF_LOCAL is set,
198	 *   with the thought that this might one day be used to speed up
199	 *   ip_input().
200	 *
201	 * We also mark routes to multicast addresses as such, because
202	 * it's easy to do and might be useful (but this is much more
203	 * dubious since it's so easy to inspect the address).  (This
204	 * is done above.)
205	 *
206	 * XXX
207	 * should elaborate the code.
208	 */
209	if (rt->rt_flags & RTF_HOST) {
210		IFA_LOCK_SPIN(rt->rt_ifa);
211		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)->
212		    sin6_addr, &sin6->sin6_addr)) {
213			rt->rt_flags |= RTF_LOCAL;
214		}
215		IFA_UNLOCK(rt->rt_ifa);
216	}
217
218	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
219	    rt->rt_ifp)
220		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
221
222	ret = rn_addroute(v_arg, n_arg, head, treenodes);
223	if (ret == NULL && (rt->rt_flags & RTF_HOST)) {
224		struct rtentry *rt2;
225		/*
226		 * We are trying to add a host route, but can't.
227		 * Find out if it is because of an
228		 * ND6 entry and delete it if so.
229		 */
230		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
231		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
232		if (rt2 != NULL) {
233			char dbufc[MAX_IPv6_STR_LEN];
234
235			RT_LOCK(rt2);
236			if (verbose)
237				rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0);
238
239			if ((rt2->rt_flags & RTF_LLINFO) &&
240			    (rt2->rt_flags & RTF_HOST) &&
241			    rt2->rt_gateway != NULL &&
242			    rt2->rt_gateway->sa_family == AF_LINK) {
243				if (verbose) {
244					log(LOG_DEBUG, "%s: unable to insert "
245					    "route to %s:%s, flags=%b, due to "
246					    "existing ND6 route %s->%s "
247					    "flags=%b, attempting to delete\n",
248					    __func__, dbuf,
249					    (rt->rt_ifp != NULL) ?
250					    rt->rt_ifp->if_xname : "",
251					    rt->rt_flags, RTF_BITS,
252					    dbufc, (rt2->rt_ifp != NULL) ?
253					    rt2->rt_ifp->if_xname : "",
254					    rt2->rt_flags, RTF_BITS);
255				}
256				/*
257				 * Safe to drop rt_lock and use rt_key,
258				 * rt_gateway, since holding rnh_lock here
259				 * prevents another thread from calling
260				 * rt_setgate() on this route.
261				 */
262				RT_UNLOCK(rt2);
263				(void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
264				    rt2->rt_gateway, rt_mask(rt2),
265				    rt2->rt_flags, NULL);
266				ret = rn_addroute(v_arg, n_arg, head,
267				    treenodes);
268			} else {
269				RT_UNLOCK(rt2);
270			}
271			rtfree_locked(rt2);
272		}
273	} else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) {
274		struct rtentry *rt2;
275		/*
276		 * We are trying to add a net route, but can't.
277		 * The following case should be allowed, so we'll make a
278		 * special check for this:
279		 *	Two IPv6 addresses with the same prefix is assigned
280		 *	to a single interrface.
281		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
282		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
283		 *	In this case, (*1) and (*2) want to add the same
284		 *	net route entry, 3ffe:0501:: -> if0.
285		 *	This case should not raise an error.
286		 */
287		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
288		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
289		if (rt2 != NULL) {
290			RT_LOCK(rt2);
291			if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|
292			    RTF_GATEWAY)) == RTF_CLONING &&
293			    rt2->rt_gateway &&
294			    rt2->rt_gateway->sa_family == AF_LINK &&
295			    rt2->rt_ifp == rt->rt_ifp) {
296				ret = rt2->rt_nodes;
297			}
298			RT_UNLOCK(rt2);
299			rtfree_locked(rt2);
300		}
301	}
302
303	if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC))
304		in6dynroutes++;
305
306	if (!verbose)
307		goto done;
308
309	if (ret != NULL) {
310		if (flags != rt->rt_flags) {
311			log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
312			    "oflags=%b, flags=%b\n", __func__,
313			    dbuf, gbuf, (rt->rt_ifp != NULL) ?
314			    rt->rt_ifp->if_xname : "", flags, RTF_BITS,
315			    rt->rt_flags, RTF_BITS);
316		} else {
317			log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
318			    "flags=%b\n", __func__, dbuf, gbuf,
319			    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
320			    rt->rt_flags, RTF_BITS);
321		}
322	} else {
323		log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, "
324		    "flags=%b, already exists\n", __func__, dbuf, gbuf,
325		    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
326		    rt->rt_flags, RTF_BITS);
327	}
328done:
329	return (ret);
330}
331
332static struct radix_node *
333in6_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
334{
335	struct radix_node *rn;
336
337	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
338
339	rn = rn_delete(v_arg, netmask_arg, head);
340	if (rn != NULL) {
341		struct rtentry *rt = (struct rtentry *)rn;
342
343		RT_LOCK(rt);
344		if (rt->rt_flags & RTF_DYNAMIC)
345			in6dynroutes--;
346		if (rt_verbose > 1) {
347			char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
348
349			rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
350			log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, "
351			    "flags=%b\n", __func__, dbuf, gbuf,
352			    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
353			    rt->rt_flags, RTF_BITS);
354		}
355		RT_UNLOCK(rt);
356	}
357	return (rn);
358}
359
360/*
361 * Validate (unexpire) an expiring AF_INET6 route.
362 */
363struct radix_node *
364in6_validate(struct radix_node *rn)
365{
366	struct rtentry *rt = (struct rtentry *)rn;
367
368	RT_LOCK_ASSERT_HELD(rt);
369
370	/* This is first reference? */
371	if (rt->rt_refcnt == 0) {
372		if (rt_verbose > 2) {
373			char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
374
375			rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
376			log(LOG_DEBUG, "%s: route to %s->%s->%s validated, "
377			    "flags=%b\n", __func__, dbuf, gbuf,
378			    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
379			    rt->rt_flags, RTF_BITS);
380		}
381
382		/*
383		 * It's one of ours; unexpire it.  If the timer is already
384		 * scheduled, let it run later as it won't re-arm itself
385		 * if there's nothing to do.
386		 */
387		if (rt->rt_flags & RTPRF_OURS) {
388			rt->rt_flags &= ~RTPRF_OURS;
389			rt_setexpire(rt, 0);
390		}
391	}
392	return (rn);
393}
394
395/*
396 * Similar to in6_matroute_args except without the leaf-matching parameters.
397 */
398static struct radix_node *
399in6_matroute(void *v_arg, struct radix_node_head *head)
400{
401	return (in6_matroute_args(v_arg, head, NULL, NULL));
402}
403
404/*
405 * This code is the inverse of in6_clsroute: on first reference, if we
406 * were managing the route, stop doing so and set the expiration timer
407 * back off again.
408 */
409static struct radix_node *
410in6_matroute_args(void *v_arg, struct radix_node_head *head,
411    rn_matchf_t *f, void *w)
412{
413	struct radix_node *rn = rn_match_args(v_arg, head, f, w);
414
415	if (rn != NULL) {
416		RT_LOCK_SPIN((struct rtentry *)rn);
417		in6_validate(rn);
418		RT_UNLOCK((struct rtentry *)rn);
419	}
420	return (rn);
421}
422
423SYSCTL_DECL(_net_inet6_ip6);
424
425/* one hour is ``really old'' */
426static uint32_t rtq_reallyold = 60*60;
427SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
428	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, "");
429
430/* never automatically crank down to less */
431static uint32_t rtq_minreallyold = 10;
432SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
433	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, "");
434
435/* 128 cached routes is ``too many'' */
436static uint32_t rtq_toomany = 128;
437SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
438	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, "");
439
440/*
441 * On last reference drop, mark the route as belong to us so that it can be
442 * timed out.
443 */
444static void
445in6_clsroute(struct radix_node *rn, struct radix_node_head *head)
446{
447#pragma unused(head)
448	char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
449	struct rtentry *rt = (struct rtentry *)rn;
450	boolean_t verbose = (rt_verbose > 1);
451
452	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
453	RT_LOCK_ASSERT_HELD(rt);
454
455	if (!(rt->rt_flags & RTF_UP))
456		return;		/* prophylactic measures */
457
458	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
459		return;
460
461	if (rt->rt_flags & RTPRF_OURS)
462		return;
463
464	if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC)))
465		return;
466
467	if (verbose)
468		rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
469
470	/*
471	 * Delete the route immediately if RTF_DELCLONE is set or
472	 * if route caching is disabled (rtq_reallyold set to 0).
473	 * Otherwise, let it expire and be deleted by in6_rtqkill().
474	 */
475	if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) {
476		int err;
477
478		if (verbose) {
479			log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, "
480			    "flags=%b\n", __func__, dbuf, gbuf,
481			    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
482			    rt->rt_flags, RTF_BITS);
483		}
484		/*
485		 * Delete the route from the radix tree but since we are
486		 * called when the route's reference count is 0, don't
487		 * deallocate it until we return from this routine by
488		 * telling rtrequest that we're interested in it.
489		 * Safe to drop rt_lock and use rt_key, rt_gateway,
490		 * since holding rnh_lock here prevents another thread
491		 * from calling rt_setgate() on this route.
492		 */
493		RT_UNLOCK(rt);
494		err = rtrequest_locked(RTM_DELETE, rt_key(rt),
495		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt);
496		if (err == 0) {
497			/* Now let the caller free it */
498			RT_LOCK(rt);
499			RT_REMREF_LOCKED(rt);
500		} else {
501			RT_LOCK(rt);
502			if (!verbose)
503				rt_str(rt, dbuf, sizeof (dbuf),
504				    gbuf, sizeof (gbuf));
505			log(LOG_ERR, "%s: error deleting route to "
506			    "%s->%s->%s, flags=%b, err=%d\n", __func__,
507			    dbuf, gbuf, (rt->rt_ifp != NULL) ?
508			    rt->rt_ifp->if_xname : "", rt->rt_flags,
509			    RTF_BITS, err);
510		}
511	} else {
512		uint64_t timenow;
513
514		timenow = net_uptime();
515		rt->rt_flags |= RTPRF_OURS;
516		rt_setexpire(rt, timenow + rtq_reallyold);
517
518		if (verbose) {
519			log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, "
520			    "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf,
521			    (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
522			    rt->rt_flags, RTF_BITS, rt->rt_expire - timenow);
523		}
524
525		/* We have at least one entry; arm the timer if not already */
526		in6_sched_rtqtimo(NULL);
527	}
528}
529
530struct rtqk_arg {
531	struct radix_node_head *rnh;
532	int updating;
533	int draining;
534	uint32_t killed;
535	uint32_t found;
536	uint64_t nextstop;
537};
538
539/*
540 * Get rid of old routes.  When draining, this deletes everything, even when
541 * the timeout is not expired yet.  This also applies if the route is dynamic
542 * and there are sufficiently large number of such routes (more than a half of
543 * maximum).  When updating, this makes sure that nothing has a timeout longer
544 * than the current value of rtq_reallyold.
545 */
546static int
547in6_rtqkill(struct radix_node *rn, void *rock)
548{
549	struct rtqk_arg *ap = rock;
550	struct rtentry *rt = (struct rtentry *)rn;
551	boolean_t verbose = (rt_verbose > 1);
552	uint64_t timenow;
553	int err;
554
555	timenow = net_uptime();
556	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
557
558	RT_LOCK(rt);
559	if (rt->rt_flags & RTPRF_OURS) {
560		char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
561
562		if (verbose)
563			rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
564
565		ap->found++;
566		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
567		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
568		if (ap->draining || rt->rt_expire <= timenow ||
569		    ((rt->rt_flags & RTF_DYNAMIC) && ip6_maxdynroutes >= 0 &&
570		    in6dynroutes > ip6_maxdynroutes / 2)) {
571			if (rt->rt_refcnt > 0) {
572				panic("%s: route %p marked with RTPRF_OURS "
573				    "with non-zero refcnt (%u)", __func__,
574				    rt, rt->rt_refcnt);
575				/* NOTREACHED */
576			}
577			if (verbose) {
578				log(LOG_DEBUG, "%s: deleting route to "
579				    "%s->%s->%s, flags=%b, draining=%d\n",
580				    __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ?
581				    rt->rt_ifp->if_xname : "", rt->rt_flags,
582				    RTF_BITS, ap->draining);
583			}
584			RT_ADDREF_LOCKED(rt);	/* for us to free below */
585			/*
586			 * Delete this route since we're done with it;
587			 * the route may be freed afterwards, so we
588			 * can no longer refer to 'rt' upon returning
589			 * from rtrequest().  Safe to drop rt_lock and
590			 * use rt_key, rt_gateway, since holding rnh_lock
591			 * here prevents another thread from calling
592			 * rt_setgate() on this route.
593			 */
594			RT_UNLOCK(rt);
595			err = rtrequest_locked(RTM_DELETE, rt_key(rt),
596			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
597			if (err != 0) {
598				RT_LOCK(rt);
599				if (!verbose)
600					rt_str(rt, dbuf, sizeof (dbuf),
601					    gbuf, sizeof (gbuf));
602				log(LOG_ERR, "%s: error deleting route to "
603				    "%s->%s->%s, flags=%b, err=%d\n", __func__,
604				    dbuf, gbuf, (rt->rt_ifp != NULL) ?
605				    rt->rt_ifp->if_xname : "", rt->rt_flags,
606				    RTF_BITS, err);
607				RT_UNLOCK(rt);
608			} else {
609				ap->killed++;
610			}
611			rtfree_locked(rt);
612		} else {
613			uint64_t expire = (rt->rt_expire - timenow);
614
615			if (ap->updating && expire > rtq_reallyold) {
616				rt_setexpire(rt, timenow + rtq_reallyold);
617				if (verbose) {
618					log(LOG_DEBUG, "%s: route to "
619					    "%s->%s->%s, flags=%b, adjusted "
620					    "expire=T+%u (was T+%u)\n",
621					    __func__, dbuf, gbuf,
622					    (rt->rt_ifp != NULL) ?
623					    rt->rt_ifp->if_xname : "",
624					    rt->rt_flags, RTF_BITS,
625					    (rt->rt_expire - timenow), expire);
626				}
627			}
628			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
629			RT_UNLOCK(rt);
630		}
631	} else {
632		RT_UNLOCK(rt);
633	}
634
635	return (0);
636}
637
638#define	RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
639static int rtq_timeout = RTQ_TIMEOUT;
640
641static void
642in6_rtqtimo(void *targ)
643{
644#pragma unused(targ)
645	struct radix_node_head *rnh;
646	struct rtqk_arg arg;
647	struct timeval atv;
648	static uint64_t last_adjusted_timeout = 0;
649	boolean_t verbose = (rt_verbose > 1);
650	uint64_t timenow;
651	uint32_t ours;
652
653	lck_mtx_lock(rnh_lock);
654	rnh = rt_tables[AF_INET6];
655	VERIFY(rnh != NULL);
656
657	/* Get the timestamp after we acquire the lock for better accuracy */
658	timenow = net_uptime();
659	if (verbose) {
660		log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n",
661		    __func__, rtq_timeout);
662	}
663	bzero(&arg, sizeof (arg));
664	arg.rnh = rnh;
665	arg.nextstop = timenow + rtq_timeout;
666	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
667	if (verbose) {
668		log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__,
669		    arg.found, arg.killed);
670	}
671	/*
672	 * Attempt to be somewhat dynamic about this:
673	 * If there are ``too many'' routes sitting around taking up space,
674	 * then crank down the timeout, and see if we can't make some more
675	 * go away.  However, we make sure that we will never adjust more
676	 * than once in rtq_timeout seconds, to keep from cranking down too
677	 * hard.
678	 */
679	ours = (arg.found - arg.killed);
680	if (ours > rtq_toomany &&
681	    ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) &&
682	    rtq_reallyold > rtq_minreallyold) {
683		rtq_reallyold = 2 * rtq_reallyold / 3;
684		if (rtq_reallyold < rtq_minreallyold)
685			rtq_reallyold = rtq_minreallyold;
686
687		last_adjusted_timeout = timenow;
688		if (verbose) {
689			log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d "
690			    "seconds\n", __func__, rtq_reallyold);
691		}
692		arg.found = arg.killed = 0;
693		arg.updating = 1;
694		rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
695	}
696
697	atv.tv_usec = 0;
698	atv.tv_sec = arg.nextstop - timenow;
699	/* re-arm the timer only if there's work to do */
700	in6_rtqtimo_run = 0;
701	if (ours > 0)
702		in6_sched_rtqtimo(&atv);
703	else if (verbose)
704		log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
705	lck_mtx_unlock(rnh_lock);
706}
707
708static void
709in6_sched_rtqtimo(struct timeval *atv)
710{
711	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
712
713	if (!in6_rtqtimo_run) {
714		struct timeval tv;
715
716		if (atv == NULL) {
717			tv.tv_usec = 0;
718			tv.tv_sec = MAX(rtq_timeout / 10, 1);
719			atv = &tv;
720		}
721		if (rt_verbose > 1) {
722			log(LOG_DEBUG, "%s: timer scheduled in "
723			    "T+%llus.%lluu\n", __func__,
724			    (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
725		}
726		in6_rtqtimo_run = 1;
727		timeout(in6_rtqtimo, NULL, tvtohz(atv));
728	}
729}
730
731void
732in6_rtqdrain(void)
733{
734	struct radix_node_head *rnh;
735	struct rtqk_arg arg;
736
737	if (rt_verbose > 1)
738		log(LOG_DEBUG, "%s: draining routes\n", __func__);
739
740	lck_mtx_lock(rnh_lock);
741	rnh = rt_tables[AF_INET6];
742	VERIFY(rnh != NULL);
743	bzero(&arg, sizeof (arg));
744	arg.rnh = rnh;
745	arg.draining = 1;
746	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
747	lck_mtx_unlock(rnh_lock);
748}
749
750/*
751 * Initialize our routing tree.
752 */
753int
754in6_inithead(void **head, int off)
755{
756	struct radix_node_head *rnh;
757
758	/* If called from route_init(), make sure it is exactly once */
759	VERIFY(head != (void **)&rt_tables[AF_INET6] || *head == NULL);
760
761	if (!rn_inithead(head, off))
762		return (0);
763
764	/*
765	 * We can get here from nfs_subs.c as well, in which case this
766	 * won't be for the real routing table and thus we're done;
767	 * this also takes care of the case when we're called more than
768	 * once from anywhere but route_init().
769	 */
770	if (head != (void **)&rt_tables[AF_INET6])
771		return (1);	/* only do this for the real routing table */
772
773	rnh = *head;
774	rnh->rnh_addaddr = in6_addroute;
775	rnh->rnh_deladdr = in6_deleteroute;
776	rnh->rnh_matchaddr = in6_matroute;
777	rnh->rnh_matchaddr_args = in6_matroute_args;
778	rnh->rnh_close = in6_clsroute;
779	return (1);
780}
781