in_rmx.c revision 46381
14074Swollman/*
26399Swollman * Copyright 1994, 1995 Massachusetts Institute of Technology
34074Swollman *
46399Swollman * Permission to use, copy, modify, and distribute this software and
56399Swollman * its documentation for any purpose and without fee is hereby
66399Swollman * granted, provided that both the above copyright notice and this
76399Swollman * permission notice appear in all copies, that both the above
86399Swollman * copyright notice and this permission notice appear in all
96399Swollman * supporting documentation, and that the name of M.I.T. not be used
106399Swollman * in advertising or publicity pertaining to distribution of the
116399Swollman * software without specific, written prior permission.  M.I.T. makes
126399Swollman * no representations about the suitability of this software for any
136399Swollman * purpose.  It is provided "as is" without express or implied
146399Swollman * warranty.
158876Srgrimes *
166399Swollman * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
176399Swollman * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
186399Swollman * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
196399Swollman * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
206399Swollman * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
216399Swollman * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
226399Swollman * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
236399Swollman * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
246399Swollman * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
256399Swollman * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
266399Swollman * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
276399Swollman * SUCH DAMAGE.
284074Swollman *
2946381Sbillf *	$Id: in_rmx.c,v 1.35 1998/08/05 16:59:20 bde Exp $
304074Swollman */
314074Swollman
324074Swollman/*
334074Swollman * This code does two things necessary for the enhanced TCP metrics to
344074Swollman * function in a useful manner:
354074Swollman *  1) It marks all non-host routes as `cloning', thus ensuring that
364074Swollman *     every actual reference to such a route actually gets turned
374074Swollman *     into a reference to a host route to the specific destination
384074Swollman *     requested.
394074Swollman *  2) When such routes lose all their references, it arranges for them
404074Swollman *     to be deleted in some random collection of circumstances, so that
414074Swollman *     a large quantity of stale routing data is not kept in kernel memory
424074Swollman *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
434074Swollman */
444074Swollman
454074Swollman#include <sys/param.h>
464074Swollman#include <sys/systm.h>
474074Swollman#include <sys/kernel.h>
4812172Sphk#include <sys/sysctl.h>
494074Swollman#include <sys/socket.h>
504074Swollman#include <sys/mbuf.h>
514893Swollman#include <sys/syslog.h>
524074Swollman
534074Swollman#include <net/if.h>
544074Swollman#include <net/route.h>
554074Swollman#include <netinet/in.h>
564074Swollman#include <netinet/in_var.h>
574074Swollman
5812579Sbdeextern int	in_inithead __P((void **head, int off));
5912579Sbde
605101Swollman#define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
614074Swollman
624074Swollman/*
634074Swollman * Do what we need to do when inserting a route.
644074Swollman */
654074Swollmanstatic struct radix_node *
664074Swollmanin_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
674074Swollman	    struct radix_node *treenodes)
684074Swollman{
694074Swollman	struct rtentry *rt = (struct rtentry *)treenodes;
709470Swollman	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
7113581Sfenner	struct radix_node *ret;
724074Swollman
734074Swollman	/*
745792Swollman	 * For IP, all unicast non-host routes are automatically cloning.
754074Swollman	 */
7615652Swollman	if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
7715652Swollman		rt->rt_flags |= RTF_MULTICAST;
7815652Swollman
7915652Swollman	if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
8015652Swollman		rt->rt_flags |= RTF_PRCLONING;
8115652Swollman	}
8215652Swollman
8315652Swollman	/*
8415652Swollman	 * A little bit of help for both IP output and input:
8515652Swollman	 *   For host routes, we make sure that RTF_BROADCAST
8615652Swollman	 *   is set for anything that looks like a broadcast address.
8715652Swollman	 *   This way, we can avoid an expensive call to in_broadcast()
8815652Swollman	 *   in ip_output() most of the time (because the route passed
8915652Swollman	 *   to ip_output() is almost always a host route).
9015652Swollman	 *
9115652Swollman	 *   We also do the same for local addresses, with the thought
9215652Swollman	 *   that this might one day be used to speed up ip_input().
9315652Swollman	 *
9415652Swollman	 * We also mark routes to multicast addresses as such, because
9515652Swollman	 * it's easy to do and might be useful (but this is much more
9615652Swollman	 * dubious since it's so easy to inspect the address).  (This
9715652Swollman	 * is done above.)
9815652Swollman	 */
9915652Swollman	if (rt->rt_flags & RTF_HOST) {
10015652Swollman		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
10115652Swollman			rt->rt_flags |= RTF_BROADCAST;
10215652Swollman		} else {
10315652Swollman#define satosin(sa) ((struct sockaddr_in *)sa)
10415652Swollman			if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
10515652Swollman			    == sin->sin_addr.s_addr)
10615652Swollman				rt->rt_flags |= RTF_LOCAL;
10715652Swollman#undef satosin
1085792Swollman		}
1095792Swollman	}
1104074Swollman
1119470Swollman	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
1129470Swollman	    && rt->rt_ifp)
11310881Swollman		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1149470Swollman
11513581Sfenner	ret = rn_addroute(v_arg, n_arg, head, treenodes);
11613581Sfenner	if (ret == NULL && rt->rt_flags & RTF_HOST) {
11713581Sfenner		struct rtentry *rt2;
11813581Sfenner		/*
11913581Sfenner		 * We are trying to add a host route, but can't.
12013581Sfenner		 * Find out if it is because of an
12113581Sfenner		 * ARP entry and delete it if so.
12213581Sfenner		 */
12313581Sfenner		rt2 = rtalloc1((struct sockaddr *)sin, 0,
12413581Sfenner				RTF_CLONING | RTF_PRCLONING);
12513581Sfenner		if (rt2) {
12613581Sfenner			if (rt2->rt_flags & RTF_LLINFO &&
12713581Sfenner				rt2->rt_flags & RTF_HOST &&
12813581Sfenner				rt2->rt_gateway &&
12913581Sfenner				rt2->rt_gateway->sa_family == AF_LINK) {
13013581Sfenner				rtrequest(RTM_DELETE,
13113581Sfenner					  (struct sockaddr *)rt_key(rt2),
13213581Sfenner					  rt2->rt_gateway,
13313581Sfenner					  rt_mask(rt2), rt2->rt_flags, 0);
13413581Sfenner				ret = rn_addroute(v_arg, n_arg, head,
13513581Sfenner					treenodes);
13613581Sfenner			}
13713581Sfenner			RTFREE(rt2);
13813581Sfenner		}
13913581Sfenner	}
14013581Sfenner	return ret;
1414074Swollman}
1424074Swollman
1434074Swollman/*
1444105Swollman * This code is the inverse of in_clsroute: on first reference, if we
1454105Swollman * were managing the route, stop doing so and set the expiration timer
1464105Swollman * back off again.
1474074Swollman */
1484074Swollmanstatic struct radix_node *
1494074Swollmanin_matroute(void *v_arg, struct radix_node_head *head)
1504074Swollman{
1514074Swollman	struct radix_node *rn = rn_match(v_arg, head);
1524074Swollman	struct rtentry *rt = (struct rtentry *)rn;
1534074Swollman
1544074Swollman	if(rt && rt->rt_refcnt == 0) { /* this is first reference */
1555101Swollman		if(rt->rt_flags & RTPRF_OURS) {
1565101Swollman			rt->rt_flags &= ~RTPRF_OURS;
1574105Swollman			rt->rt_rmx.rmx_expire = 0;
1584074Swollman		}
1594074Swollman	}
1604074Swollman	return rn;
1614074Swollman}
1624074Swollman
16312296Sphkstatic int rtq_reallyold = 60*60;
16412172Sphk	/* one hour is ``really old'' */
16546381SbillfSYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
16646381Sbillf    &rtq_reallyold , 0,
16746381Sbillf    "Default expiration time on dynamically learned routes");
16812172Sphk
16912296Sphkstatic int rtq_minreallyold = 10;
17012172Sphk	/* never automatically crank down to less */
17146381SbillfSYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
17246381Sbillf    &rtq_minreallyold , 0,
17346381Sbillf    "Minimum time to attempt to hold onto dynamically learned routes");
17412172Sphk
17512296Sphkstatic int rtq_toomany = 128;
17612172Sphk	/* 128 cached routes is ``too many'' */
17746381SbillfSYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
17846381Sbillf    &rtq_toomany , 0, "Upper limit on dynamically learned routes");
1794105Swollman
1804074Swollman/*
1815792Swollman * On last reference drop, mark the route as belong to us so that it can be
1824074Swollman * timed out.
1834074Swollman */
1844074Swollmanstatic void
1854074Swollmanin_clsroute(struct radix_node *rn, struct radix_node_head *head)
1864074Swollman{
1874074Swollman	struct rtentry *rt = (struct rtentry *)rn;
1888876Srgrimes
1895792Swollman	if(!(rt->rt_flags & RTF_UP))
1905792Swollman		return;		/* prophylactic measures */
1915792Swollman
1924105Swollman	if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
1934074Swollman		return;
1944074Swollman
1958876Srgrimes	if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS))
1965101Swollman	   != RTF_WASCLONED)
1974105Swollman		return;
1984074Swollman
1996399Swollman	/*
2006399Swollman	 * As requested by David Greenman:
2016399Swollman	 * If rtq_reallyold is 0, just delete the route without
2026399Swollman	 * waiting for a timeout cycle to kill it.
2036399Swollman	 */
2046399Swollman	if(rtq_reallyold != 0) {
2056399Swollman		rt->rt_flags |= RTPRF_OURS;
20634961Sphk		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
2076399Swollman	} else {
2086399Swollman		rtrequest(RTM_DELETE,
2096399Swollman			  (struct sockaddr *)rt_key(rt),
2106399Swollman			  rt->rt_gateway, rt_mask(rt),
2116399Swollman			  rt->rt_flags, 0);
2126399Swollman	}
2134074Swollman}
2144074Swollman
2154893Swollmanstruct rtqk_arg {
2164893Swollman	struct radix_node_head *rnh;
2175101Swollman	int draining;
2184893Swollman	int killed;
2194893Swollman	int found;
2206400Swollman	int updating;
2214893Swollman	time_t nextstop;
2224893Swollman};
2234893Swollman
2244074Swollman/*
2255101Swollman * Get rid of old routes.  When draining, this deletes everything, even when
2266400Swollman * the timeout is not expired yet.  When updating, this makes sure that
2276400Swollman * nothing has a timeout longer than the current value of rtq_reallyold.
2284074Swollman */
2294893Swollmanstatic int
2304893Swollmanin_rtqkill(struct radix_node *rn, void *rock)
2314074Swollman{
2324893Swollman	struct rtqk_arg *ap = rock;
2334893Swollman	struct rtentry *rt = (struct rtentry *)rn;
2344893Swollman	int err;
2354893Swollman
2365101Swollman	if(rt->rt_flags & RTPRF_OURS) {
2374893Swollman		ap->found++;
2384893Swollman
23934961Sphk		if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
2404893Swollman			if(rt->rt_refcnt > 0)
2417170Sdg				panic("rtqkill route really not free");
2424893Swollman
2434893Swollman			err = rtrequest(RTM_DELETE,
2444893Swollman					(struct sockaddr *)rt_key(rt),
2454893Swollman					rt->rt_gateway, rt_mask(rt),
2464893Swollman					rt->rt_flags, 0);
2474893Swollman			if(err) {
2486568Sdg				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
2494893Swollman			} else {
2504893Swollman				ap->killed++;
2514893Swollman			}
2524893Swollman		} else {
2538876Srgrimes			if(ap->updating
25434961Sphk			   && (rt->rt_rmx.rmx_expire - time_second
2556400Swollman			       > rtq_reallyold)) {
25634961Sphk				rt->rt_rmx.rmx_expire = time_second
2576400Swollman					+ rtq_reallyold;
2586400Swollman			}
2594893Swollman			ap->nextstop = lmin(ap->nextstop,
2604893Swollman					    rt->rt_rmx.rmx_expire);
2614893Swollman		}
2624893Swollman	}
2634893Swollman
2644893Swollman	return 0;
2654074Swollman}
2664074Swollman
2676399Swollman#define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
26812296Sphkstatic int rtq_timeout = RTQ_TIMEOUT;
2695792Swollman
2704074Swollmanstatic void
2714074Swollmanin_rtqtimo(void *rock)
2724074Swollman{
2734893Swollman	struct radix_node_head *rnh = rock;
2744893Swollman	struct rtqk_arg arg;
2754893Swollman	struct timeval atv;
2766399Swollman	static time_t last_adjusted_timeout = 0;
2775180Swollman	int s;
2784074Swollman
2794893Swollman	arg.found = arg.killed = 0;
2804893Swollman	arg.rnh = rnh;
28134961Sphk	arg.nextstop = time_second + rtq_timeout;
2826400Swollman	arg.draining = arg.updating = 0;
2835180Swollman	s = splnet();
2844893Swollman	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
2855180Swollman	splx(s);
2866399Swollman
2876399Swollman	/*
2886399Swollman	 * Attempt to be somewhat dynamic about this:
2896399Swollman	 * If there are ``too many'' routes sitting around taking up space,
2906399Swollman	 * then crank down the timeout, and see if we can't make some more
2916399Swollman	 * go away.  However, we make sure that we will never adjust more
2926399Swollman	 * than once in rtq_timeout seconds, to keep from cranking down too
2936399Swollman	 * hard.
2946399Swollman	 */
2956399Swollman	if((arg.found - arg.killed > rtq_toomany)
29634961Sphk	   && (time_second - last_adjusted_timeout >= rtq_timeout)
2976399Swollman	   && rtq_reallyold > rtq_minreallyold) {
2986399Swollman		rtq_reallyold = 2*rtq_reallyold / 3;
2996399Swollman		if(rtq_reallyold < rtq_minreallyold) {
3006399Swollman			rtq_reallyold = rtq_minreallyold;
3016399Swollman		}
3026399Swollman
30334961Sphk		last_adjusted_timeout = time_second;
30416542Snate#ifdef DIAGNOSTIC
3056568Sdg		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
3066399Swollman		    rtq_reallyold);
30716542Snate#endif
3086399Swollman		arg.found = arg.killed = 0;
3096400Swollman		arg.updating = 1;
3106399Swollman		s = splnet();
3116399Swollman		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
3126399Swollman		splx(s);
3136399Swollman	}
3146399Swollman
3154893Swollman	atv.tv_usec = 0;
31638128Sbde	atv.tv_sec = arg.nextstop - time_second;
31738128Sbde	timeout(in_rtqtimo, rock, tvtohz(&atv));
3184074Swollman}
3194074Swollman
32012933Swollmanvoid
3214893Swollmanin_rtqdrain(void)
3224893Swollman{
3235101Swollman	struct radix_node_head *rnh = rt_tables[AF_INET];
3245101Swollman	struct rtqk_arg arg;
3255180Swollman	int s;
3265101Swollman	arg.found = arg.killed = 0;
3275101Swollman	arg.rnh = rnh;
3285101Swollman	arg.nextstop = 0;
3295101Swollman	arg.draining = 1;
3306400Swollman	arg.updating = 0;
3315180Swollman	s = splnet();
3325101Swollman	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
3335101Swollman	splx(s);
3344893Swollman}
3354893Swollman
3364074Swollman/*
3374074Swollman * Initialize our routing tree.
3384074Swollman */
3394074Swollmanint
3404074Swollmanin_inithead(void **head, int off)
3414074Swollman{
3424074Swollman	struct radix_node_head *rnh;
3434074Swollman
3444074Swollman	if(!rn_inithead(head, off))
3454074Swollman		return 0;
3464074Swollman
3474896Swollman	if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
3484896Swollman		return 1;	/* only do this for the real routing table */
3494896Swollman
3504074Swollman	rnh = *head;
3514074Swollman	rnh->rnh_addaddr = in_addroute;
3524074Swollman	rnh->rnh_matchaddr = in_matroute;
3534074Swollman	rnh->rnh_close = in_clsroute;
3544105Swollman	in_rtqtimo(rnh);	/* kick off timeout first time */
3554074Swollman	return 1;
3564074Swollman}
3574074Swollman
35822672Swollman
35922672Swollman/*
36022672Swollman * This zaps old routes when the interface goes down.
36122672Swollman * Currently it doesn't delete static routes; there are
36222672Swollman * arguments one could make for both behaviors.  For the moment,
36322672Swollman * we will adopt the Principle of Least Surprise and leave them
36422672Swollman * alone (with the knowledge that this will not be enough for some
36522672Swollman * people).  The ones we really want to get rid of are things like ARP
36622672Swollman * entries, since the user might down the interface, walk over to a completely
36722672Swollman * different network, and plug back in.
36822672Swollman */
36922672Swollmanstruct in_ifadown_arg {
37022672Swollman	struct radix_node_head *rnh;
37122672Swollman	struct ifaddr *ifa;
37222672Swollman};
37322672Swollman
37422672Swollmanstatic int
37522672Swollmanin_ifadownkill(struct radix_node *rn, void *xap)
37622672Swollman{
37722672Swollman	struct in_ifadown_arg *ap = xap;
37822672Swollman	struct rtentry *rt = (struct rtentry *)rn;
37922672Swollman	int err;
38022672Swollman
38122672Swollman	if (rt->rt_ifa == ap->ifa && !(rt->rt_flags & RTF_STATIC)) {
38234914Speter		/*
38334914Speter		 * We need to disable the automatic prune that happens
38434914Speter		 * in this case in rtrequest() because it will blow
38534914Speter		 * away the pointers that rn_walktree() needs in order
38634914Speter		 * continue our descent.  We will end up deleting all
38734914Speter		 * the routes that rtrequest() would have in any case,
38834914Speter		 * so that behavior is not needed there.
38934914Speter		 */
39034914Speter		rt->rt_flags &= ~RTF_PRCLONING;
39122672Swollman		err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
39222672Swollman				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
39322672Swollman		if (err) {
39422672Swollman			log(LOG_WARNING, "in_ifadownkill: error %d\n", err);
39522672Swollman		}
39622672Swollman	}
39722672Swollman	return 0;
39822672Swollman}
39922672Swollman
40022672Swollmanint
40122672Swollmanin_ifadown(struct ifaddr *ifa)
40222672Swollman{
40322672Swollman	struct in_ifadown_arg arg;
40422672Swollman	struct radix_node_head *rnh;
40522672Swollman
40622672Swollman	if (ifa->ifa_addr->sa_family != AF_INET)
40722672Swollman		return 1;
40822672Swollman
40922672Swollman	arg.rnh = rnh = rt_tables[AF_INET];
41022672Swollman	arg.ifa = ifa;
41122672Swollman	rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
41222672Swollman	ifa->ifa_flags &= ~IFA_ROUTE;
41322672Swollman	return 0;
41422672Swollman}
415