1/*
2 * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*	$FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $	*/
30/*	$KAME: in6_rmx.c,v 1.10 2001/05/24 05:44:58 itojun Exp $	*/
31
32/*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61/*
62 * Copyright 1994, 1995 Massachusetts Institute of Technology
63 *
64 * Permission to use, copy, modify, and distribute this software and
65 * its documentation for any purpose and without fee is hereby
66 * granted, provided that both the above copyright notice and this
67 * permission notice appear in all copies, that both the above
68 * copyright notice and this permission notice appear in all
69 * supporting documentation, and that the name of M.I.T. not be used
70 * in advertising or publicity pertaining to distribution of the
71 * software without specific, written prior permission.  M.I.T. makes
72 * no representations about the suitability of this software for any
73 * purpose.  It is provided "as is" without express or implied
74 * warranty.
75 *
76 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
77 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
78 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
79 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
80 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
81 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
82 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
83 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
84 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
85 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
86 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87 * SUCH DAMAGE.
88 *
89 */
90
91/*
92 * This code does two things necessary for the enhanced TCP metrics to
93 * function in a useful manner:
94 *  1) It marks all non-host routes as `cloning', thus ensuring that
95 *     every actual reference to such a route actually gets turned
96 *     into a reference to a host route to the specific destination
97 *     requested.
98 *  2) When such routes lose all their references, it arranges for them
99 *     to be deleted in some random collection of circumstances, so that
100 *     a large quantity of stale routing data is not kept in kernel memory
101 *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
102 */
103
104#include <sys/param.h>
105#include <sys/systm.h>
106#include <sys/kernel.h>
107#include <sys/sysctl.h>
108#include <kern/queue.h>
109#include <sys/socket.h>
110#include <sys/socketvar.h>
111#include <sys/protosw.h>
112#include <sys/mbuf.h>
113#include <sys/syslog.h>
114#include <sys/mcache.h>
115#include <kern/lock.h>
116
117#include <net/if.h>
118#include <net/route.h>
119#include <netinet/in.h>
120#include <netinet/ip_var.h>
121#include <netinet/in_var.h>
122
123#include <netinet/ip6.h>
124#include <netinet6/ip6_var.h>
125
126#include <netinet/icmp6.h>
127
128#include <netinet/tcp.h>
129#include <netinet/tcp_seq.h>
130#include <netinet/tcp_timer.h>
131#include <netinet/tcp_var.h>
132
133extern int	in6_inithead(void **head, int off);
134static void	in6_rtqtimo(void *rock);
135static void	in6_mtutimo(void *rock);
136extern int	tvtohz(struct timeval *);
137
138static struct radix_node *in6_matroute_args(void *, struct radix_node_head *,
139    rn_matchf_t *, void *);
140
141#define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
142
143/*
144 * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during
145 * which the routing lock (rnh_lock) is held and thus protects the variable.
146 */
147static int	in6dynroutes;
148
149/*
150 * Do what we need to do when inserting a route.
151 */
152static struct radix_node *
153in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
154	    struct radix_node *treenodes)
155{
156	struct rtentry *rt = (struct rtentry *)treenodes;
157	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt);
158	struct radix_node *ret;
159
160	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
161	RT_LOCK_ASSERT_HELD(rt);
162
163	/*
164	 * If this is a dynamic route (which is created via Redirect) and
165	 * we already have the maximum acceptable number of such route entries,
166	 * reject creating a new one.  We could initiate garbage collection to
167	 * make available space right now, but the benefit would probably not
168	 * be worth the cleaning overhead; we only have to endure a slightly
169	 * suboptimal path even without the redirecbted route.
170	 */
171	if ((rt->rt_flags & RTF_DYNAMIC) != 0 &&
172	    ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes)
173		return (NULL);
174
175	/*
176	 * For IPv6, all unicast non-host routes are automatically cloning.
177	 */
178	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
179		rt->rt_flags |= RTF_MULTICAST;
180
181	if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
182		rt->rt_flags |= RTF_PRCLONING;
183	}
184
185	/*
186	 * A little bit of help for both IPv6 output and input:
187	 *   For local addresses, we make sure that RTF_LOCAL is set,
188	 *   with the thought that this might one day be used to speed up
189	 *   ip_input().
190	 *
191	 * We also mark routes to multicast addresses as such, because
192	 * it's easy to do and might be useful (but this is much more
193	 * dubious since it's so easy to inspect the address).  (This
194	 * is done above.)
195	 *
196	 * XXX
197	 * should elaborate the code.
198	 */
199	if (rt->rt_flags & RTF_HOST) {
200		IFA_LOCK_SPIN(rt->rt_ifa);
201		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
202					->sin6_addr,
203				       &sin6->sin6_addr)) {
204			rt->rt_flags |= RTF_LOCAL;
205		}
206		IFA_UNLOCK(rt->rt_ifa);
207	}
208
209	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
210	    && rt->rt_ifp)
211		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
212
213	ret = rn_addroute(v_arg, n_arg, head, treenodes);
214	if (ret == NULL && (rt->rt_flags & RTF_HOST)) {
215		struct rtentry *rt2;
216		/*
217		 * We are trying to add a host route, but can't.
218		 * Find out if it is because of an
219		 * ARP entry and delete it if so.
220		 */
221		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
222		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
223		if (rt2) {
224			RT_LOCK(rt2);
225			if ((rt2->rt_flags & RTF_LLINFO) &&
226			    (rt2->rt_flags & RTF_HOST) &&
227			    rt2->rt_gateway != NULL &&
228			    rt2->rt_gateway->sa_family == AF_LINK) {
229				/*
230				 * Safe to drop rt_lock and use rt_key,
231				 * rt_gateway, since holding rnh_lock here
232				 * prevents another thread from calling
233				 * rt_setgate() on this route.
234				 */
235				RT_UNLOCK(rt2);
236				(void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
237				    rt2->rt_gateway, rt_mask(rt2),
238				    rt2->rt_flags, 0);
239				ret = rn_addroute(v_arg, n_arg, head,
240					treenodes);
241			} else {
242				RT_UNLOCK(rt2);
243			}
244			rtfree_locked(rt2);
245		}
246	} else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) {
247		struct rtentry *rt2;
248		/*
249		 * We are trying to add a net route, but can't.
250		 * The following case should be allowed, so we'll make a
251		 * special check for this:
252		 *	Two IPv6 addresses with the same prefix is assigned
253		 *	to a single interrface.
254		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
255		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
256		 *	In this case, (*1) and (*2) want to add the same
257		 *	net route entry, 3ffe:0501:: -> if0.
258		 *	This case should not raise an error.
259		 */
260		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
261		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
262		if (rt2) {
263			RT_LOCK(rt2);
264			if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
265					== RTF_CLONING
266			 && rt2->rt_gateway
267			 && rt2->rt_gateway->sa_family == AF_LINK
268			 && rt2->rt_ifp == rt->rt_ifp) {
269				ret = rt2->rt_nodes;
270			}
271			RT_UNLOCK(rt2);
272			rtfree_locked(rt2);
273		}
274	}
275
276	if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC) != 0)
277		in6dynroutes++;
278
279	return ret;
280}
281
282static struct radix_node *
283in6_deleteroute(void * v_arg, void *netmask_arg, struct radix_node_head *head)
284{
285	struct radix_node *rn;
286
287	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
288
289	rn = rn_delete(v_arg, netmask_arg, head);
290	if (rn != NULL) {
291		struct rtentry *rt = (struct rtentry *)rn;
292		RT_LOCK_SPIN(rt);
293		if ((rt->rt_flags & RTF_DYNAMIC) != 0)
294			in6dynroutes--;
295		RT_UNLOCK(rt);
296	}
297
298	return (rn);
299}
300
301/*
302 * Validate (unexpire) an expiring AF_INET6 route.
303 */
304struct radix_node *
305in6_validate(struct radix_node *rn)
306{
307	struct rtentry *rt = (struct rtentry *)rn;
308
309	RT_LOCK_ASSERT_HELD(rt);
310
311	/* This is first reference? */
312	if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) {
313		rt->rt_flags &= ~RTPRF_OURS;
314		rt_setexpire(rt, 0);
315	}
316	return (rn);
317}
318
319/*
320 * Similar to in6_matroute_args except without the leaf-matching parameters.
321 */
322static struct radix_node *
323in6_matroute(void *v_arg, struct radix_node_head *head)
324{
325	return (in6_matroute_args(v_arg, head, NULL, NULL));
326}
327
328/*
329 * This code is the inverse of in6_clsroute: on first reference, if we
330 * were managing the route, stop doing so and set the expiration timer
331 * back off again.
332 */
333static struct radix_node *
334in6_matroute_args(void *v_arg, struct radix_node_head *head,
335    rn_matchf_t *f, void *w)
336{
337	struct radix_node *rn = rn_match_args(v_arg, head, f, w);
338
339	if (rn != NULL) {
340		RT_LOCK_SPIN((struct rtentry *)rn);
341		in6_validate(rn);
342		RT_UNLOCK((struct rtentry *)rn);
343	}
344	return (rn);
345}
346
347SYSCTL_DECL(_net_inet6_ip6);
348
349static int rtq_reallyold = 60*60;
350	/* one hour is ``really old'' */
351SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
352	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold , 0, "");
353
354static int rtq_minreallyold = 10;
355	/* never automatically crank down to less */
356SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
357	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold , 0, "");
358
359static int rtq_toomany = 128;
360	/* 128 cached routes is ``too many'' */
361SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
362	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany , 0, "");
363
364
365/*
366 * On last reference drop, mark the route as belong to us so that it can be
367 * timed out.
368 */
369static void
370in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head)
371{
372	struct rtentry *rt = (struct rtentry *)rn;
373
374	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
375	RT_LOCK_ASSERT_HELD(rt);
376
377	if (!(rt->rt_flags & RTF_UP))
378		return;		/* prophylactic measures */
379
380	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
381		return;
382
383	if (rt->rt_flags & RTPRF_OURS)
384		return;
385
386	if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC)))
387		return;
388
389	/*
390	 * Delete the route immediately if RTF_DELCLONE is set or
391	 * if route caching is disabled (rtq_reallyold set to 0).
392	 * Otherwise, let it expire and be deleted by in6_rtqkill().
393	 */
394	if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) {
395		/*
396		 * Delete the route from the radix tree but since we are
397		 * called when the route's reference count is 0, don't
398		 * deallocate it until we return from this routine by
399		 * telling rtrequest that we're interested in it.
400		 * Safe to drop rt_lock and use rt_key, rt_gateway,
401		 * since holding rnh_lock here prevents another thread
402		 * from calling rt_setgate() on this route.
403		 */
404		RT_UNLOCK(rt);
405		if (rtrequest_locked(RTM_DELETE, rt_key(rt),
406		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) {
407			/* Now let the caller free it */
408			RT_LOCK(rt);
409			RT_REMREF_LOCKED(rt);
410		} else {
411			RT_LOCK(rt);
412		}
413	} else {
414		uint64_t timenow;
415
416		timenow = net_uptime();
417		rt->rt_flags |= RTPRF_OURS;
418		rt_setexpire(rt,
419		    rt_expiry(rt, timenow, rtq_reallyold));
420	}
421}
422
423struct rtqk_arg {
424	struct radix_node_head *rnh;
425	int mode;
426	int updating;
427	int draining;
428	int killed;
429	int found;
430	uint64_t nextstop;
431};
432
433/*
434 * Get rid of old routes.  When draining, this deletes everything, even when
435 * the timeout is not expired yet.  This also applies if the route is dynamic
436 * and there are sufficiently large number of such routes (more than a half of
437 * maximum).  When updating, this makes sure that nothing has a timeout longer
438 * than the current value of rtq_reallyold.
439 */
440static int
441in6_rtqkill(struct radix_node *rn, void *rock)
442{
443	struct rtqk_arg *ap = rock;
444	struct rtentry *rt = (struct rtentry *)rn;
445	int err;
446	uint64_t timenow;
447
448	timenow = net_uptime();
449	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
450
451	RT_LOCK(rt);
452	if (rt->rt_flags & RTPRF_OURS) {
453		ap->found++;
454		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
455		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
456		if (ap->draining || rt->rt_expire <= timenow ||
457		    ((rt->rt_flags & RTF_DYNAMIC) != 0 &&
458		    ip6_maxdynroutes >= 0 &&
459		    in6dynroutes > ip6_maxdynroutes / 2)) {
460			if (rt->rt_refcnt > 0)
461				panic("rtqkill route really not free");
462
463			/*
464			 * Delete this route since we're done with it;
465			 * the route may be freed afterwards, so we
466			 * can no longer refer to 'rt' upon returning
467			 * from rtrequest().  Safe to drop rt_lock and
468			 * use rt_key, rt_gateway, since holding rnh_lock
469			 * here prevents another thread from calling
470			 * rt_setgate() on this route.
471			 */
472			RT_UNLOCK(rt);
473			err = rtrequest_locked(RTM_DELETE, rt_key(rt),
474			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
475			if (err) {
476				log(LOG_WARNING, "in6_rtqkill: error %d", err);
477			} else {
478				ap->killed++;
479			}
480		} else {
481			if (ap->updating &&
482			    (rt->rt_expire - timenow) >
483			    rt_expiry(rt, 0, rtq_reallyold)) {
484				rt_setexpire(rt, rt_expiry(rt,
485				    timenow, rtq_reallyold));
486			}
487			ap->nextstop = lmin(ap->nextstop,
488					    rt->rt_expire);
489			RT_UNLOCK(rt);
490		}
491	} else {
492		RT_UNLOCK(rt);
493	}
494
495	return 0;
496}
497
498#define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
499static int rtq_timeout = RTQ_TIMEOUT;
500
501static void
502in6_rtqtimo(void *rock)
503{
504	struct radix_node_head *rnh = rock;
505	struct rtqk_arg arg;
506	struct timeval atv;
507	static uint64_t last_adjusted_timeout = 0;
508	uint64_t timenow;
509
510	lck_mtx_lock(rnh_lock);
511	/* Get the timestamp after we acquire the lock for better accuracy */
512	timenow = net_uptime();
513
514	arg.found = arg.killed = 0;
515	arg.rnh = rnh;
516	arg.nextstop = timenow + rtq_timeout;
517	arg.draining = arg.updating = 0;
518	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
519
520	/*
521	 * Attempt to be somewhat dynamic about this:
522	 * If there are ``too many'' routes sitting around taking up space,
523	 * then crank down the timeout, and see if we can't make some more
524	 * go away.  However, we make sure that we will never adjust more
525	 * than once in rtq_timeout seconds, to keep from cranking down too
526	 * hard.
527	 */
528	if ((arg.found - arg.killed > rtq_toomany)
529	   && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout)
530	   && rtq_reallyold > rtq_minreallyold) {
531		rtq_reallyold = 2*rtq_reallyold / 3;
532		if (rtq_reallyold < rtq_minreallyold) {
533			rtq_reallyold = rtq_minreallyold;
534		}
535
536		last_adjusted_timeout = timenow;
537#if DIAGNOSTIC
538		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
539		    rtq_reallyold);
540#endif
541		arg.found = arg.killed = 0;
542		arg.updating = 1;
543		rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
544	}
545
546	atv.tv_usec = 0;
547	atv.tv_sec = arg.nextstop - timenow;
548	lck_mtx_unlock(rnh_lock);
549	timeout(in6_rtqtimo, rock, tvtohz(&atv));
550}
551
552/*
553 * Age old PMTUs.
554 */
555struct mtuex_arg {
556	struct radix_node_head *rnh;
557	uint64_t nextstop;
558};
559
560static int
561in6_mtuexpire(struct radix_node *rn, void *rock)
562{
563	struct rtentry *rt = (struct rtentry *)rn;
564	struct mtuex_arg *ap = rock;
565	uint64_t timenow;
566
567	timenow = net_uptime();
568
569	/* sanity */
570	if (!rt)
571		panic("rt == NULL in in6_mtuexpire");
572
573	RT_LOCK(rt);
574	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
575	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
576	if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
577		if (rt->rt_expire <= timenow) {
578			rt->rt_flags |= RTF_PROBEMTU;
579		} else {
580			ap->nextstop = lmin(ap->nextstop,
581					rt->rt_expire);
582		}
583	}
584	RT_UNLOCK(rt);
585
586	return 0;
587}
588
589#define	MTUTIMO_DEFAULT	(60*1)
590
591static void
592in6_mtutimo(void *rock)
593{
594	struct radix_node_head *rnh = rock;
595	struct mtuex_arg arg;
596	struct timeval atv;
597	uint64_t timenow, timo;
598
599	timenow = net_uptime();
600
601	arg.rnh = rnh;
602	arg.nextstop = timenow + MTUTIMO_DEFAULT;
603	lck_mtx_lock(rnh_lock);
604	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
605
606	atv.tv_usec = 0;
607	timo = arg.nextstop;
608	if (timo < timenow) {
609#if DIAGNOSTIC
610		log(LOG_DEBUG, "IPv6: invalid mtu expiration time on routing table\n");
611#endif
612		arg.nextstop = timenow + 30;	/*last resort*/
613	}
614	atv.tv_sec = timo - timenow;
615	lck_mtx_unlock(rnh_lock);
616	timeout(in6_mtutimo, rock, tvtohz(&atv));
617}
618
619void
620in6_rtqdrain()
621{
622	struct radix_node_head *rnh = rt_tables[AF_INET6];
623	struct rtqk_arg arg;
624	arg.found = arg.killed = 0;
625	arg.rnh = rnh;
626	arg.nextstop = 0;
627	arg.draining = 1;
628	arg.updating = 0;
629	lck_mtx_lock(rnh_lock);
630	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
631	lck_mtx_unlock(rnh_lock);
632}
633
634/*
635 * Initialize our routing tree.
636 */
637int
638in6_inithead(void **head, int off)
639{
640	struct radix_node_head *rnh;
641
642	if (!rn_inithead(head, off))
643		return 0;
644
645	if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */
646		return 1;	/* only do this for the real routing table */
647
648	rnh = *head;
649	rnh->rnh_addaddr = in6_addroute;
650	rnh->rnh_deladdr = in6_deleteroute;
651	rnh->rnh_matchaddr = in6_matroute;
652	rnh->rnh_matchaddr_args = in6_matroute_args;
653	rnh->rnh_close = in6_clsroute;
654	in6_rtqtimo(rnh);	/* kick off timeout first time */
655	in6_mtutimo(rnh);	/* kick off timeout first time */
656	return 1;
657}
658