1198090Srdivacky/*
2198090Srdivacky * Copyright (c) 1983, 1988, 1993
3198090Srdivacky *	The Regents of the University of California.  All rights reserved.
4198090Srdivacky *
5198090Srdivacky * Redistribution and use in source and binary forms, with or without
6198090Srdivacky * modification, are permitted provided that the following conditions
7198090Srdivacky * are met:
8198090Srdivacky * 1. Redistributions of source code must retain the above copyright
9198090Srdivacky *    notice, this list of conditions and the following disclaimer.
10198090Srdivacky * 2. Redistributions in binary form must reproduce the above copyright
11198090Srdivacky *    notice, this list of conditions and the following disclaimer in the
12198090Srdivacky *    documentation and/or other materials provided with the distribution.
13198090Srdivacky * 4. Neither the name of the University nor the names of its contributors
14198090Srdivacky *    may be used to endorse or promote products derived from this software
15198090Srdivacky *    without specific prior written permission.
16198090Srdivacky *
17198090Srdivacky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18205218Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19249423Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20198090Srdivacky * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21198090Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22198090Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23198090Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24263508Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25249423Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26198090Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27198090Srdivacky * SUCH DAMAGE.
28198090Srdivacky *
29249423Sdim * $FreeBSD$
30249423Sdim */
31249423Sdim
32198090Srdivacky#include "defs.h"
33198090Srdivacky
34226633Sdim#ifdef __NetBSD__
35226633Sdim__RCSID("$NetBSD$");
36226633Sdim#elif defined(__FreeBSD__)
37226633Sdim__RCSID("$FreeBSD$");
38226633Sdim#else
39226633Sdim__RCSID("$Revision: 2.27 $");
40198090Srdivacky#ident "$Revision: 2.27 $"
41218893Sdim#endif
42198090Srdivacky
43198090Srdivackystatic struct rt_spare *rts_better(struct rt_entry *);
44198090Srdivackystatic struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
45218893Sdimstatic void  set_need_flash(void);
46198090Srdivacky#ifdef _HAVE_SIN_LEN
47198090Srdivackystatic void masktrim(struct sockaddr_in *ap);
48198090Srdivacky#else
49198090Srdivackystatic void masktrim(struct sockaddr_in_new *ap);
50198090Srdivacky#endif
51198090Srdivackystatic void rtbad(struct rt_entry *);
52239462Sdim
53218893Sdim
54198090Srdivackystruct radix_node_head *rhead;		/* root of the radix tree */
55198090Srdivacky
56239462Sdimint	need_flash = 1;			/* flash update needed
57239462Sdim					 * start =1 to suppress the 1st
58239462Sdim					 */
59243830Sdim
60243830Sdimstruct timeval age_timer;		/* next check of old routes */
61198090Srdivackystruct timeval need_kern = {		/* need to update kernel table */
62218893Sdim	EPOCH+MIN_WAITTIME-1, 0
63198090Srdivacky};
64198090Srdivacky
65198090Srdivackyint	stopint;
66239462Sdim
67239462Sdimint	total_routes;
68243830Sdim
69243830Sdim/* zap any old routes through this gateway */
70239462Sdimstatic naddr age_bad_gate;
71239462Sdim
72239462Sdim
73239462Sdim/* It is desirable to "aggregate" routes, to combine differing routes of
74239462Sdim * the same metric and next hop into a common route with a smaller netmask
75239462Sdim * or to suppress redundant routes, routes that add no information to
76239462Sdim * routes with smaller netmasks.
77239462Sdim *
78239462Sdim * A route is redundant if and only if any and all routes with smaller
79198090Srdivacky * but matching netmasks and nets are the same.  Since routes are
80198090Srdivacky * kept sorted in the radix tree, redundant routes always come second.
81198090Srdivacky *
82198090Srdivacky * There are two kinds of aggregations.  First, two routes of the same bit
83198090Srdivacky * mask and differing only in the least significant bit of the network
84198090Srdivacky * number can be combined into a single route with a coarser mask.
85198090Srdivacky *
86198090Srdivacky * Second, a route can be suppressed in favor of another route with a more
87198090Srdivacky * coarse mask provided no incompatible routes with intermediate masks
88198090Srdivacky * are present.  The second kind of aggregation involves suppressing routes.
89198090Srdivacky * A route must not be suppressed if an incompatible route exists with
90198090Srdivacky * an intermediate mask, since the suppressed route would be covered
91198090Srdivacky * by the intermediate.
92198090Srdivacky *
93198090Srdivacky * This code relies on the radix tree walk encountering routes
94218893Sdim * sorted first by address, with the smallest address first.
95198090Srdivacky */
96198090Srdivacky
97198090Srdivackystatic struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
98198090Srdivacky
99198090Srdivacky/* #define DEBUG_AG */
100198090Srdivacky#ifdef DEBUG_AG
101198090Srdivacky#define CHECK_AG() {int acnt = 0; struct ag_info *cag;		\
102198090Srdivacky	for (cag = ag_avail; cag != NULL; cag = cag->ag_fine)	\
103249423Sdim		acnt++;						\
104224145Sdim	for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine)	\
105224145Sdim		acnt++;						\
106224145Sdim	if (acnt != NUM_AG_SLOTS) {				\
107224145Sdim		(void)fflush(stderr);				\
108224145Sdim		abort();					\
109198090Srdivacky	}							\
110198090Srdivacky}
111198090Srdivacky#else
112198090Srdivacky#define CHECK_AG()
113198090Srdivacky#endif
114226633Sdim
115198090Srdivacky
116198090Srdivacky/* Output the contents of an aggregation table slot.
117198090Srdivacky *	This function must always be immediately followed with the deletion
118198090Srdivacky *	of the target slot.
119198090Srdivacky */
120198090Srdivackystatic void
121198090Srdivackyag_out(struct ag_info *ag,
122198090Srdivacky	 void (*out)(struct ag_info *))
123198090Srdivacky{
124198090Srdivacky	struct ag_info *ag_cors;
125198090Srdivacky	naddr bit;
126198090Srdivacky
127198090Srdivacky
128249423Sdim	/* Forget it if this route should not be output for split-horizon. */
129218893Sdim	if (ag->ag_state & AGS_SPLIT_HZ)
130198090Srdivacky		return;
131198090Srdivacky
132198090Srdivacky	/* If we output both the even and odd twins, then the immediate parent,
133224145Sdim	 * if it is present, is redundant, unless the parent manages to
134198090Srdivacky	 * aggregate into something coarser.
135239462Sdim	 * On successive calls, this code detects the even and odd twins,
136239462Sdim	 * and marks the parent.
137239462Sdim	 *
138239462Sdim	 * Note that the order in which the radix tree code emits routes
139198090Srdivacky	 * ensures that the twins are seen before the parent is emitted.
140198090Srdivacky	 */
141198090Srdivacky	ag_cors = ag->ag_cors;
142226633Sdim	if (ag_cors != NULL
143226633Sdim	    && ag_cors->ag_mask == ag->ag_mask<<1
144198090Srdivacky	    && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
145198090Srdivacky		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
146198090Srdivacky				      ? AGS_REDUN0
147198090Srdivacky				      : AGS_REDUN1);
148198090Srdivacky	}
149198090Srdivacky
150198090Srdivacky	/* Skip it if this route is itself redundant.
151198090Srdivacky	 *
152198090Srdivacky	 * It is ok to change the contents of the slot here, since it is
153198090Srdivacky	 * always deleted next.
154198090Srdivacky	 */
155198090Srdivacky	if (ag->ag_state & AGS_REDUN0) {
156198090Srdivacky		if (ag->ag_state & AGS_REDUN1)
157198090Srdivacky			return;		/* quit if fully redundant */
158210299Sed		/* make it finer if it is half-redundant */
159224145Sdim		bit = (-ag->ag_mask) >> 1;
160198090Srdivacky		ag->ag_dst_h |= bit;
161198090Srdivacky		ag->ag_mask |= bit;
162198090Srdivacky
163198090Srdivacky	} else if (ag->ag_state & AGS_REDUN1) {
164198090Srdivacky		/* make it finer if it is half-redundant */
165198090Srdivacky		bit = (-ag->ag_mask) >> 1;
166198090Srdivacky		ag->ag_mask |= bit;
167198090Srdivacky	}
168198090Srdivacky	out(ag);
169226633Sdim}
170198090Srdivacky
171198090Srdivacky
172198090Srdivackystatic void
173198090Srdivackyag_del(struct ag_info *ag)
174198090Srdivacky{
175198090Srdivacky	CHECK_AG();
176198090Srdivacky
177210299Sed	if (ag->ag_cors == NULL)
178210299Sed		ag_corsest = ag->ag_fine;
179198090Srdivacky	else
180198090Srdivacky		ag->ag_cors->ag_fine = ag->ag_fine;
181198090Srdivacky
182198090Srdivacky	if (ag->ag_fine == NULL)
183198090Srdivacky		ag_finest = ag->ag_cors;
184198090Srdivacky	else
185218893Sdim		ag->ag_fine->ag_cors = ag->ag_cors;
186198090Srdivacky
187198090Srdivacky	ag->ag_fine = ag_avail;
188198090Srdivacky	ag_avail = ag;
189198090Srdivacky
190198090Srdivacky	CHECK_AG();
191198090Srdivacky}
192198090Srdivacky
193198090Srdivacky
194198090Srdivacky/* Flush routes waiting for aggregation.
195198090Srdivacky *	This must not suppress a route unless it is known that among all
196198090Srdivacky *	routes with coarser masks that match it, the one with the longest
197226633Sdim *	mask is appropriate.  This is ensured by scanning the routes
198198090Srdivacky *	in lexical order, and with the most restrictive mask first
199198090Srdivacky *	among routes to the same destination.
200198090Srdivacky */
201198090Srdivackyvoid
202198090Srdivackyag_flush(naddr lim_dst_h,		/* flush routes to here */
203198090Srdivacky	 naddr lim_mask,		/* matching this mask */
204198090Srdivacky	 void (*out)(struct ag_info *))
205198090Srdivacky{
206198090Srdivacky	struct ag_info *ag, *ag_cors;
207249423Sdim	naddr dst_h;
208249423Sdim
209224145Sdim
210198090Srdivacky	for (ag = ag_finest;
211198090Srdivacky	     ag != NULL && ag->ag_mask >= lim_mask;
212203954Srdivacky	     ag = ag_cors) {
213198090Srdivacky		ag_cors = ag->ag_cors;
214198090Srdivacky
215263508Sdim		/* work on only the specified routes */
216198090Srdivacky		dst_h = ag->ag_dst_h;
217198090Srdivacky		if ((dst_h & lim_mask) != lim_dst_h)
218198090Srdivacky			continue;
219198090Srdivacky
220198090Srdivacky		if (!(ag->ag_state & AGS_SUPPRESS))
221239462Sdim			ag_out(ag, out);
222239462Sdim
223263763Sdim		else for ( ; ; ag_cors = ag_cors->ag_cors) {
224263763Sdim			/* Look for a route that can suppress the
225263763Sdim			 * current route */
226263763Sdim			if (ag_cors == NULL) {
227263763Sdim				/* failed, so output it and look for
228263763Sdim				 * another route to work on
229263763Sdim				 */
230263763Sdim				ag_out(ag, out);
231263763Sdim				break;
232263763Sdim			}
233263763Sdim
234263763Sdim			if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
235263763Sdim				/* We found a route with a coarser mask that
236198090Srdivacky				 * aggregates the current target.
237198090Srdivacky				 *
238198090Srdivacky				 * If it has a different next hop, it
239198090Srdivacky				 * cannot replace the target, so output
240198090Srdivacky				 * the target.
241249423Sdim				 */
242198090Srdivacky				if (ag->ag_gate != ag_cors->ag_gate
243198090Srdivacky				    && !(ag->ag_state & AGS_FINE_GATE)
244198090Srdivacky				    && !(ag_cors->ag_state & AGS_CORS_GATE)) {
245198090Srdivacky					ag_out(ag, out);
246198090Srdivacky					break;
247198090Srdivacky				}
248226633Sdim
249198090Srdivacky				/* If the coarse route has a good enough
250198090Srdivacky				 * metric, it suppresses the target.
251198090Srdivacky				 * If the suppressed target was redundant,
252198090Srdivacky				 * then mark the suppressor redundant.
253198090Srdivacky				 */
254198090Srdivacky				if (ag_cors->ag_pref <= ag->ag_pref) {
255198090Srdivacky				    if (AG_IS_REDUN(ag->ag_state)
256249423Sdim					&& ag_cors->ag_mask==ag->ag_mask<<1) {
257198090Srdivacky					if (ag_cors->ag_dst_h == dst_h)
258198090Srdivacky					    ag_cors->ag_state |= AGS_REDUN0;
259198090Srdivacky					else
260198090Srdivacky					    ag_cors->ag_state |= AGS_REDUN1;
261198090Srdivacky				    }
262198090Srdivacky				    if (ag->ag_tag != ag_cors->ag_tag)
263198090Srdivacky					    ag_cors->ag_tag = 0;
264198090Srdivacky				    if (ag->ag_nhop != ag_cors->ag_nhop)
265198090Srdivacky					    ag_cors->ag_nhop = 0;
266198090Srdivacky				    break;
267198090Srdivacky				}
268249423Sdim			}
269198090Srdivacky		}
270198090Srdivacky
271198090Srdivacky		/* That route has either been output or suppressed */
272198090Srdivacky		ag_cors = ag->ag_cors;
273198090Srdivacky		ag_del(ag);
274198090Srdivacky	}
275218893Sdim
276198090Srdivacky	CHECK_AG();
277198090Srdivacky}
278198090Srdivacky
279198090Srdivacky
280198090Srdivacky/* Try to aggregate a route with previous routes.
281198090Srdivacky */
282198090Srdivackyvoid
283198090Srdivackyag_check(naddr	dst,
284198090Srdivacky	 naddr	mask,
285203954Srdivacky	 naddr	gate,
286198090Srdivacky	 naddr	nhop,
287198090Srdivacky	 char	metric,
288224145Sdim	 char	pref,
289198090Srdivacky	 u_int	new_seqno,
290198090Srdivacky	 u_short tag,
291249423Sdim	 u_short state,
292249423Sdim	 void (*out)(struct ag_info *))	/* output using this */
293198090Srdivacky{
294198090Srdivacky	struct ag_info *ag, *nag, *ag_cors;
295210299Sed	naddr xaddr;
296203954Srdivacky	int x;
297198090Srdivacky
298198090Srdivacky	dst = ntohl(dst);
299198090Srdivacky
300198090Srdivacky	/* Punt non-contiguous subnet masks.
301198090Srdivacky	 *
302198090Srdivacky	 * (X & -X) contains a single bit if and only if X is a power of 2.
303198090Srdivacky	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
304198090Srdivacky	 */
305198090Srdivacky	if ((mask & -mask) + mask != 0) {
306198090Srdivacky		struct ag_info nc_ag;
307198090Srdivacky
308198090Srdivacky		nc_ag.ag_dst_h = dst;
309198090Srdivacky		nc_ag.ag_mask = mask;
310249423Sdim		nc_ag.ag_gate = gate;
311249423Sdim		nc_ag.ag_nhop = nhop;
312198090Srdivacky		nc_ag.ag_metric = metric;
313224145Sdim		nc_ag.ag_pref = pref;
314206083Srdivacky		nc_ag.ag_tag = tag;
315208599Srdivacky		nc_ag.ag_state = state;
316198090Srdivacky		nc_ag.ag_seqno = new_seqno;
317218893Sdim		out(&nc_ag);
318218893Sdim		return;
319198090Srdivacky	}
320198090Srdivacky
321198090Srdivacky	/* Search for the right slot in the aggregation table.
322198090Srdivacky	 */
323249423Sdim	ag_cors = NULL;
324224145Sdim	ag = ag_corsest;
325224145Sdim	while (ag != NULL) {
326198090Srdivacky		if (ag->ag_mask >= mask)
327198090Srdivacky			break;
328226633Sdim
329226633Sdim		/* Suppress old routes (i.e. combine with compatible routes
330226633Sdim		 * with coarser masks) as we look for the right slot in the
331198090Srdivacky		 * aggregation table for the new route.
332198090Srdivacky		 * A route to an address less than the current destination
333198090Srdivacky		 * will not be affected by the current route or any route
334239462Sdim		 * seen hereafter.  That means it is safe to suppress it.
335226633Sdim		 * This check keeps poor routes (e.g. with large hop counts)
336198090Srdivacky		 * from preventing suppression of finer routes.
337210299Sed		 */
338210299Sed		if (ag_cors != NULL
339198090Srdivacky		    && ag->ag_dst_h < dst
340198090Srdivacky		    && (ag->ag_state & AGS_SUPPRESS)
341198090Srdivacky		    && ag_cors->ag_pref <= ag->ag_pref
342198090Srdivacky		    && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
343207618Srdivacky		    && (ag_cors->ag_gate == ag->ag_gate
344208599Srdivacky			|| (ag->ag_state & AGS_FINE_GATE)
345208599Srdivacky			|| (ag_cors->ag_state & AGS_CORS_GATE))) {
346208599Srdivacky			/*  If the suppressed target was redundant,
347208599Srdivacky			 * then mark the suppressor redundant.
348208599Srdivacky			 */
349208599Srdivacky			if (AG_IS_REDUN(ag->ag_state)
350208599Srdivacky			    && ag_cors->ag_mask == ag->ag_mask<<1) {
351208599Srdivacky				if (ag_cors->ag_dst_h == dst)
352208599Srdivacky					ag_cors->ag_state |= AGS_REDUN0;
353208599Srdivacky				else
354208599Srdivacky					ag_cors->ag_state |= AGS_REDUN1;
355249423Sdim			}
356208599Srdivacky			if (ag->ag_tag != ag_cors->ag_tag)
357249423Sdim				ag_cors->ag_tag = 0;
358249423Sdim			if (ag->ag_nhop != ag_cors->ag_nhop)
359208599Srdivacky				ag_cors->ag_nhop = 0;
360249423Sdim			ag_del(ag);
361208599Srdivacky			CHECK_AG();
362208599Srdivacky		} else {
363208599Srdivacky			ag_cors = ag;
364207618Srdivacky		}
365249423Sdim		ag = ag_cors->ag_fine;
366249423Sdim	}
367198090Srdivacky
368198090Srdivacky	/* If we find the even/odd twin of the new route, and if the
369198090Srdivacky	 * masks and so forth are equal, we can aggregate them.
370198090Srdivacky	 * We can probably promote one of the pair.
371239462Sdim	 *
372249423Sdim	 * Since the routes are encountered in lexical order,
373249423Sdim	 * the new route must be odd.  However, the second or later
374198090Srdivacky	 * times around this loop, it could be the even twin promoted
375224145Sdim	 * from the even/odd pair of twins of the finer route.
376206083Srdivacky	 */
377208599Srdivacky	while (ag != NULL
378198090Srdivacky	       && ag->ag_mask == mask
379249423Sdim	       && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
380208599Srdivacky
381198090Srdivacky		/* Here we know the target route and the route in the current
382249423Sdim		 * slot have the same netmasks and differ by at most the
383198090Srdivacky		 * last bit.  They are either for the same destination, or
384249423Sdim		 * for an even/odd pair of destinations.
385198090Srdivacky		 */
386239462Sdim		if (ag->ag_dst_h == dst) {
387239462Sdim			/* We have two routes to the same destination.
388239462Sdim			 * Routes are encountered in lexical order, so a
389239462Sdim			 * route is never promoted until the parent route is
390249423Sdim			 * already present.  So we know that the new route is
391234353Sdim			 * a promoted (or aggregated) pair and the route
392249423Sdim			 * already in the slot is the explicit route.
393198090Srdivacky			 *
394249423Sdim			 * Prefer the best route if their metrics differ,
395249423Sdim			 * or the aggregated one if not, following a sort
396198090Srdivacky			 * of longest-match rule.
397249423Sdim			 */
398198090Srdivacky			if (pref <= ag->ag_pref) {
399249423Sdim				ag->ag_gate = gate;
400198090Srdivacky				ag->ag_nhop = nhop;
401249423Sdim				ag->ag_tag = tag;
402198090Srdivacky				ag->ag_metric = metric;
403198090Srdivacky				ag->ag_pref = pref;
404198090Srdivacky				if (ag->ag_seqno < new_seqno)
405226633Sdim					ag->ag_seqno = new_seqno;
406198090Srdivacky				x = ag->ag_state;
407198090Srdivacky				ag->ag_state = state;
408243830Sdim				state = x;
409198090Srdivacky			}
410198090Srdivacky
411243830Sdim			/* Some bits are set if they are set on either route,
412198090Srdivacky			 * except when the route is for an interface.
413198090Srdivacky			 */
414226633Sdim			if (!(ag->ag_state & AGS_IF))
415198090Srdivacky				ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
416198090Srdivacky							| AGS_REDUN0
417198090Srdivacky							| AGS_REDUN1));
418198090Srdivacky			return;
419198090Srdivacky		}
420198090Srdivacky
421249423Sdim		/* If one of the routes can be promoted and the other can
422198090Srdivacky		 * be suppressed, it may be possible to combine them or
423249423Sdim		 * worthwhile to promote one.
424198892Srdivacky		 *
425249423Sdim		 * Any route that can be promoted is always
426249423Sdim		 * marked to be eligible to be suppressed.
427249423Sdim		 */
428239462Sdim		if (!((state & AGS_AGGREGATE)
429249423Sdim		      && (ag->ag_state & AGS_SUPPRESS))
430198090Srdivacky		    && !((ag->ag_state & AGS_AGGREGATE)
431198090Srdivacky			 && (state & AGS_SUPPRESS)))
432218893Sdim			break;
433218893Sdim
434249423Sdim		/* A pair of even/odd twin routes can be combined
435208599Srdivacky		 * if either is redundant, or if they are via the
436198090Srdivacky		 * same gateway and have the same metric.
437198090Srdivacky		 */
438198090Srdivacky		if (AG_IS_REDUN(ag->ag_state)
439226633Sdim		    || AG_IS_REDUN(state)
440249423Sdim		    || (ag->ag_gate == gate
441226633Sdim			&& ag->ag_pref == pref
442226633Sdim			&& (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
443226633Sdim
444226633Sdim			/* We have both the even and odd pairs.
445226633Sdim			 * Since the routes are encountered in order,
446226633Sdim			 * the route in the slot must be the even twin.
447226633Sdim			 *
448226633Sdim			 * Combine and promote (aggregate) the pair of routes.
449226633Sdim			 */
450226633Sdim			if (new_seqno < ag->ag_seqno)
451226633Sdim				new_seqno = ag->ag_seqno;
452226633Sdim			if (!AG_IS_REDUN(state))
453226633Sdim				state &= ~AGS_REDUN1;
454226633Sdim			if (AG_IS_REDUN(ag->ag_state))
455226633Sdim				state |= AGS_REDUN0;
456226633Sdim			else
457226633Sdim				state &= ~AGS_REDUN0;
458226633Sdim			state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
459226633Sdim			if (ag->ag_tag != tag)
460226633Sdim				tag = 0;
461198090Srdivacky			if (ag->ag_nhop != nhop)
462198090Srdivacky				nhop = 0;
463198090Srdivacky
464198090Srdivacky			/* Get rid of the even twin that was already
465226633Sdim			 * in the slot.
466208599Srdivacky			 */
467208599Srdivacky			ag_del(ag);
468198090Srdivacky
469198090Srdivacky		} else if (ag->ag_pref >= pref
470226633Sdim			   && (ag->ag_state & AGS_AGGREGATE)) {
471198090Srdivacky			/* If we cannot combine the pair, maybe the route
472198090Srdivacky			 * with the worse metric can be promoted.
473198090Srdivacky			 *
474198090Srdivacky			 * Promote the old, even twin, by giving its slot
475198090Srdivacky			 * in the table to the new, odd twin.
476226633Sdim			 */
477198090Srdivacky			ag->ag_dst_h = dst;
478198090Srdivacky
479198090Srdivacky			xaddr = ag->ag_gate;
480198090Srdivacky			ag->ag_gate = gate;
481198090Srdivacky			gate = xaddr;
482198090Srdivacky
483198090Srdivacky			xaddr = ag->ag_nhop;
484198090Srdivacky			ag->ag_nhop = nhop;
485226633Sdim			nhop = xaddr;
486203954Srdivacky
487226633Sdim			x = ag->ag_tag;
488226633Sdim			ag->ag_tag = tag;
489226633Sdim			tag = x;
490198090Srdivacky
491249423Sdim			/* The promoted route is even-redundant only if the
492249423Sdim			 * even twin was fully redundant.  It is not
493198090Srdivacky			 * odd-redundant because the odd-twin will still be
494198090Srdivacky			 * in the table.
495218893Sdim			 */
496218893Sdim			x = ag->ag_state;
497218893Sdim			if (!AG_IS_REDUN(x))
498218893Sdim				x &= ~AGS_REDUN0;
499239462Sdim			x &= ~AGS_REDUN1;
500239462Sdim			ag->ag_state = state;
501218893Sdim			state = x;
502218893Sdim
503218893Sdim			x = ag->ag_metric;
504218893Sdim			ag->ag_metric = metric;
505218893Sdim			metric = x;
506218893Sdim
507218893Sdim			x = ag->ag_pref;
508218893Sdim			ag->ag_pref = pref;
509239462Sdim			pref = x;
510218893Sdim
511226633Sdim			/* take the newest sequence number */
512226633Sdim			if (new_seqno <= ag->ag_seqno)
513226633Sdim				new_seqno = ag->ag_seqno;
514226633Sdim			else
515249423Sdim				ag->ag_seqno = new_seqno;
516226633Sdim
517198090Srdivacky		} else {
518226633Sdim			if (!(state & AGS_AGGREGATE))
519226633Sdim				break;	/* cannot promote either twin */
520226633Sdim
521198090Srdivacky			/* Promote the new, odd twin by shaving its
522218893Sdim			 * mask and address.
523226633Sdim			 * The promoted route is odd-redundant only if the
524226633Sdim			 * odd twin was fully redundant.  It is not
525218893Sdim			 * even-redundant because the even twin is still in
526203954Srdivacky			 * the table.
527203954Srdivacky			 */
528198090Srdivacky			if (!AG_IS_REDUN(state))
529198090Srdivacky				state &= ~AGS_REDUN1;
530198090Srdivacky			state &= ~AGS_REDUN0;
531198090Srdivacky			if (new_seqno < ag->ag_seqno)
532198090Srdivacky				new_seqno = ag->ag_seqno;
533226633Sdim			else
534226633Sdim				ag->ag_seqno = new_seqno;
535226633Sdim		}
536226633Sdim
537226633Sdim		mask <<= 1;
538226633Sdim		dst &= mask;
539226633Sdim
540226633Sdim		if (ag_cors == NULL) {
541226633Sdim			ag = ag_corsest;
542226633Sdim			break;
543226633Sdim		}
544226633Sdim		ag = ag_cors;
545226633Sdim		ag_cors = ag->ag_cors;
546226633Sdim	}
547249423Sdim
548226633Sdim	/* When we can no longer promote and combine routes,
549226633Sdim	 * flush the old route in the target slot.  Also flush
550226633Sdim	 * any finer routes that we know will never be aggregated by
551226633Sdim	 * the new route.
552198090Srdivacky	 *
553198090Srdivacky	 * In case we moved toward coarser masks,
554198090Srdivacky	 * get back where we belong
555249423Sdim	 */
556249423Sdim	if (ag != NULL
557226633Sdim	    && ag->ag_mask < mask) {
558198090Srdivacky		ag_cors = ag;
559198090Srdivacky		ag = ag->ag_fine;
560203954Srdivacky	}
561198090Srdivacky
562249423Sdim	/* Empty the target slot
563198090Srdivacky	 */
564249423Sdim	if (ag != NULL && ag->ag_mask == mask) {
565208599Srdivacky		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
566198090Srdivacky		ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
567249423Sdim	}
568208599Srdivacky
569249423Sdim#ifdef DEBUG_AG
570249423Sdim	(void)fflush(stderr);
571198090Srdivacky	if (ag == NULL && ag_cors != ag_finest)
572198090Srdivacky		abort();
573226633Sdim	if (ag_cors == NULL && ag != ag_corsest)
574198090Srdivacky		abort();
575198090Srdivacky	if (ag != NULL && ag->ag_cors != ag_cors)
576218893Sdim		abort();
577198090Srdivacky	if (ag_cors != NULL && ag_cors->ag_fine != ag)
578198090Srdivacky		abort();
579198090Srdivacky	CHECK_AG();
580198090Srdivacky#endif
581198090Srdivacky
582198090Srdivacky	/* Save the new route on the end of the table.
583198090Srdivacky	 */
584198090Srdivacky	nag = ag_avail;
585198090Srdivacky	ag_avail = nag->ag_fine;
586198090Srdivacky
587198090Srdivacky	nag->ag_dst_h = dst;
588198090Srdivacky	nag->ag_mask = mask;
589210299Sed	nag->ag_gate = gate;
590198090Srdivacky	nag->ag_nhop = nhop;
591239462Sdim	nag->ag_metric = metric;
592239462Sdim	nag->ag_pref = pref;
593198090Srdivacky	nag->ag_tag = tag;
594210299Sed	nag->ag_state = state;
595210299Sed	nag->ag_seqno = new_seqno;
596198090Srdivacky
597198090Srdivacky	nag->ag_fine = ag;
598198090Srdivacky	if (ag != NULL)
599218893Sdim		ag->ag_cors = nag;
600198090Srdivacky	else
601198090Srdivacky		ag_finest = nag;
602198090Srdivacky	nag->ag_cors = ag_cors;
603207618Srdivacky	if (ag_cors == NULL)
604207618Srdivacky		ag_corsest = nag;
605207618Srdivacky	else
606208599Srdivacky		ag_cors->ag_fine = nag;
607208599Srdivacky	CHECK_AG();
608224145Sdim}
609224145Sdim
610239462Sdimstatic const char *
611249423Sdimrtm_type_name(u_char type)
612249423Sdim{
613207618Srdivacky	static const char * const rtm_types[] = {
614224145Sdim		"RTM_ADD",
615224145Sdim		"RTM_DELETE",
616224145Sdim		"RTM_CHANGE",
617207618Srdivacky		"RTM_GET",
618224145Sdim		"RTM_LOSING",
619234353Sdim		"RTM_REDIRECT",
620234353Sdim		"RTM_MISS",
621234353Sdim		"RTM_LOCK",
622234353Sdim		"RTM_OLDADD",
623234353Sdim		"RTM_OLDDEL",
624234353Sdim		"RTM_RESOLVE",
625234353Sdim		"RTM_NEWADDR",
626234353Sdim		"RTM_DELADDR",
627208599Srdivacky#ifdef RTM_OIFINFO
628234353Sdim		"RTM_OIFINFO",
629234353Sdim#endif
630234353Sdim		"RTM_IFINFO",
631234353Sdim		"RTM_NEWMADDR",
632208599Srdivacky		"RTM_DELMADDR"
633207618Srdivacky	};
634249423Sdim#define NEW_RTM_PAT "RTM type %#x"
635208599Srdivacky	static char name0[sizeof(NEW_RTM_PAT)+2];
636207618Srdivacky
637207618Srdivacky
638249423Sdim	if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
639207618Srdivacky	    || type == 0) {
640207618Srdivacky		snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
641218893Sdim		return name0;
642207618Srdivacky	} else {
643207618Srdivacky		return rtm_types[type-1];
644207618Srdivacky	}
645206083Srdivacky#undef NEW_RTM_PAT
646206083Srdivacky}
647207618Srdivacky
648207618Srdivacky
649207618Srdivacky/* Trim a mask in a sockaddr
650206083Srdivacky *	Produce a length of 0 for an address of 0.
651206083Srdivacky *	Otherwise produce the index of the first zero byte.
652206083Srdivacky */
653204792Srdivackyvoid
654207618Srdivacky#ifdef _HAVE_SIN_LEN
655207618Srdivackymasktrim(struct sockaddr_in *ap)
656207618Srdivacky#else
657263508Sdimmasktrim(struct sockaddr_in_new *ap)
658263508Sdim#endif
659207618Srdivacky{
660207618Srdivacky	char *cp;
661224145Sdim
662206083Srdivacky	if (ap->sin_addr.s_addr == 0) {
663206083Srdivacky		ap->sin_len = 0;
664207618Srdivacky		return;
665207618Srdivacky	}
666207618Srdivacky	cp = (char *)(&ap->sin_addr.s_addr+1);
667207618Srdivacky	while (*--cp == 0)
668207618Srdivacky		continue;
669207618Srdivacky	ap->sin_len = cp - (char*)ap + 1;
670207618Srdivacky}
671207618Srdivacky
672207618Srdivacky
673207618Srdivacky/* Tell the kernel to add, delete or change a route
674207618Srdivacky */
675249423Sdimstatic void
676208599Srdivackyrtioctl(int action,			/* RTM_DELETE, etc */
677206083Srdivacky	naddr dst,
678207618Srdivacky	naddr gate,
679207618Srdivacky	naddr mask,
680224145Sdim	int metric,
681224145Sdim	int flags)
682208599Srdivacky{
683208599Srdivacky	struct {
684207618Srdivacky		struct rt_msghdr w_rtm;
685206083Srdivacky		struct sockaddr_in w_dst;
686205218Srdivacky		struct sockaddr_in w_gate;
687205218Srdivacky#ifdef _HAVE_SA_LEN
688205218Srdivacky		struct sockaddr_in w_mask;
689206083Srdivacky#else
690205218Srdivacky		struct sockaddr_in_new w_mask;
691204792Srdivacky#endif
692204792Srdivacky	} w;
693206083Srdivacky	long cc;
694204792Srdivacky#   define PAT " %-10s %s metric=%d flags=%#x"
695206083Srdivacky#   define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
696263508Sdim
697263508Sdimagain:
698263508Sdim	memset(&w, 0, sizeof(w));
699263508Sdim	w.w_rtm.rtm_msglen = sizeof(w);
700263508Sdim	w.w_rtm.rtm_version = RTM_VERSION;
701263508Sdim	w.w_rtm.rtm_type = action;
702263508Sdim	w.w_rtm.rtm_flags = flags;
703206083Srdivacky	w.w_rtm.rtm_seq = ++rt_sock_seqno;
704204792Srdivacky	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
705204792Srdivacky	if (metric != 0 || action == RTM_CHANGE) {
706206083Srdivacky		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
707206083Srdivacky		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
708198090Srdivacky	}
709206083Srdivacky	w.w_dst.sin_family = AF_INET;
710206083Srdivacky	w.w_dst.sin_addr.s_addr = dst;
711207618Srdivacky	w.w_gate.sin_family = AF_INET;
712206083Srdivacky	w.w_gate.sin_addr.s_addr = gate;
713226633Sdim#ifdef _HAVE_SA_LEN
714206083Srdivacky	w.w_dst.sin_len = sizeof(w.w_dst);
715226633Sdim	w.w_gate.sin_len = sizeof(w.w_gate);
716206083Srdivacky#endif
717206083Srdivacky	if (mask == HOST_MASK) {
718208599Srdivacky		w.w_rtm.rtm_flags |= RTF_HOST;
719206083Srdivacky		w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
720206083Srdivacky	} else {
721198090Srdivacky		w.w_rtm.rtm_addrs |= RTA_NETMASK;
722206083Srdivacky		w.w_mask.sin_addr.s_addr = htonl(mask);
723206083Srdivacky#ifdef _HAVE_SA_LEN
724206083Srdivacky		masktrim(&w.w_mask);
725206083Srdivacky		if (w.w_mask.sin_len == 0)
726206083Srdivacky			w.w_mask.sin_len = sizeof(long);
727198090Srdivacky		w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
728207618Srdivacky#endif
729207618Srdivacky	}
730208599Srdivacky
731207618Srdivacky#ifndef NO_INSTALL
732207618Srdivacky	cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
733207618Srdivacky	if (cc < 0) {
734206083Srdivacky		if (errno == ESRCH
735206083Srdivacky		    && (action == RTM_CHANGE || action == RTM_DELETE)) {
736206083Srdivacky			trace_act("route disappeared before" PAT, ARGS);
737226633Sdim			if (action == RTM_CHANGE) {
738224145Sdim				action = RTM_ADD;
739206083Srdivacky				goto again;
740263508Sdim			}
741263508Sdim			return;
742263508Sdim		}
743263508Sdim		msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
744263508Sdim		return;
745263508Sdim	} else if (cc != w.w_rtm.rtm_msglen) {
746263508Sdim		msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
747263508Sdim		       cc, w.w_rtm.rtm_msglen, ARGS);
748263508Sdim		return;
749263508Sdim	}
750239462Sdim#endif
751243830Sdim	if (TRACEKERNEL)
752263508Sdim		trace_misc("write kernel" PAT, ARGS);
753263508Sdim#undef PAT
754198090Srdivacky#undef ARGS
755206083Srdivacky}
756206083Srdivacky
757206083Srdivacky
758206083Srdivacky#define KHASH_SIZE 71			/* should be prime */
759206083Srdivacky#define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
760206083Srdivackystatic struct khash {
761239462Sdim	struct khash *k_next;
762239462Sdim	naddr	k_dst;
763206083Srdivacky	naddr	k_mask;
764198090Srdivacky	naddr	k_gate;
765198090Srdivacky	short	k_metric;
766206083Srdivacky	u_short	k_state;
767249423Sdim#define	    KS_NEW	0x001
768210299Sed#define	    KS_DELETE	0x002		/* need to delete the route */
769206083Srdivacky#define	    KS_ADD	0x004		/* add to the kernel */
770206083Srdivacky#define	    KS_CHANGE	0x008		/* tell kernel to change the route */
771206083Srdivacky#define	    KS_DEL_ADD	0x010		/* delete & add to change the kernel */
772249423Sdim#define	    KS_STATIC	0x020		/* Static flag in kernel */
773226633Sdim#define	    KS_GATEWAY	0x040		/* G flag in kernel */
774206083Srdivacky#define	    KS_DYNAMIC	0x080		/* result of redirect */
775206083Srdivacky#define	    KS_DELETED	0x100		/* already deleted from kernel */
776263508Sdim#define	    KS_CHECK	0x200
777206083Srdivacky	time_t	k_keep;
778206083Srdivacky#define	    K_KEEP_LIM	30
779263508Sdim	time_t	k_redirect_time;	/* when redirected route 1st seen */
780206083Srdivacky} *khash_bins[KHASH_SIZE];
781263508Sdim
782208599Srdivacky
783198090Srdivackystatic struct khash*
784263508Sdimkern_find(naddr dst, naddr mask, struct khash ***ppk)
785263508Sdim{
786263508Sdim	struct khash *k, **pk;
787263508Sdim
788263508Sdim	for (pk = &KHASH(dst,mask); (k = *pk) != NULL; pk = &k->k_next) {
789263508Sdim		if (k->k_dst == dst && k->k_mask == mask)
790206083Srdivacky			break;
791249423Sdim	}
792206083Srdivacky	if (ppk != NULL)
793198090Srdivacky		*ppk = pk;
794210299Sed	return k;
795210299Sed}
796210299Sed
797249423Sdim
798210299Sedstatic struct khash*
799234353Sdimkern_add(naddr dst, naddr mask)
800234353Sdim{
801234353Sdim	struct khash *k, **pk;
802234353Sdim
803234353Sdim	k = kern_find(dst, mask, &pk);
804234353Sdim	if (k != NULL)
805234353Sdim		return k;
806234353Sdim
807234353Sdim	k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
808234353Sdim
809234353Sdim	memset(k, 0, sizeof(*k));
810234353Sdim	k->k_dst = dst;
811234353Sdim	k->k_mask = mask;
812234353Sdim	k->k_state = KS_NEW;
813234353Sdim	k->k_keep = now.tv_sec;
814234353Sdim	*pk = k;
815234353Sdim
816218893Sdim	return k;
817206083Srdivacky}
818263508Sdim
819263508Sdim
820234353Sdim/* If a kernel route has a non-zero metric, check that it is still in the
821234353Sdim *	daemon table, and not deleted by interfaces coming and going.
822234353Sdim */
823234353Sdimstatic void
824234353Sdimkern_check_static(struct khash *k,
825198090Srdivacky		  struct interface *ifp)
826198090Srdivacky{
827226633Sdim	struct rt_entry *rt;
828234353Sdim	struct rt_spare new;
829234353Sdim
830234353Sdim	if (k->k_metric == 0)
831234353Sdim		return;
832234353Sdim
833234353Sdim	memset(&new, 0, sizeof(new));
834234353Sdim	new.rts_ifp = ifp;
835234353Sdim	new.rts_gate = k->k_gate;
836234353Sdim	new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
837234353Sdim	new.rts_metric = k->k_metric;
838234353Sdim	new.rts_time = now.tv_sec;
839234353Sdim
840234353Sdim	rt = rtget(k->k_dst, k->k_mask);
841234353Sdim	if (rt != NULL) {
842234353Sdim		if (!(rt->rt_state & RS_STATIC))
843234353Sdim			rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
844234353Sdim	} else {
845234353Sdim		rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
846234353Sdim	}
847234353Sdim}
848234353Sdim
849234353Sdim
850206083Srdivacky/* operate on a kernel entry
851234353Sdim */
852226633Sdimstatic void
853234353Sdimkern_ioctl(struct khash *k,
854234353Sdim	   int action,			/* RTM_DELETE, etc */
855249423Sdim	   int flags)
856234353Sdim
857226633Sdim{
858226633Sdim	switch (action) {
859226633Sdim	case RTM_DELETE:
860226633Sdim		k->k_state &= ~KS_DYNAMIC;
861249423Sdim		if (k->k_state & KS_DELETED)
862206083Srdivacky			return;
863198090Srdivacky		k->k_state |= KS_DELETED;
864206083Srdivacky		break;
865206083Srdivacky	case RTM_ADD:
866206083Srdivacky		k->k_state &= ~KS_DELETED;
867206083Srdivacky		break;
868206083Srdivacky	case RTM_CHANGE:
869198090Srdivacky		if (k->k_state & KS_DELETED) {
870198090Srdivacky			action = RTM_ADD;
871198090Srdivacky			k->k_state &= ~KS_DELETED;
872198090Srdivacky		}
873198090Srdivacky		break;
874198090Srdivacky	}
875198090Srdivacky
876198090Srdivacky	rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
877198090Srdivacky}
878198090Srdivacky
879198090Srdivacky
880198090Srdivacky/* add a route the kernel told us
881198090Srdivacky */
882198090Srdivackystatic void
883198090Srdivackyrtm_add(struct rt_msghdr *rtm,
884198090Srdivacky	struct rt_addrinfo *info,
885198090Srdivacky	time_t keep)
886198090Srdivacky{
887226633Sdim	struct khash *k;
888198090Srdivacky	struct interface *ifp;
889198090Srdivacky	naddr mask;
890198090Srdivacky
891198090Srdivacky
892210299Sed	if (rtm->rtm_flags & RTF_HOST) {
893210299Sed		mask = HOST_MASK;
894198090Srdivacky	} else if (INFO_MASK(info) != 0) {
895198090Srdivacky		mask = ntohl(S_ADDR(INFO_MASK(info)));
896198090Srdivacky	} else {
897198090Srdivacky		msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
898198090Srdivacky		return;
899198090Srdivacky	}
900198090Srdivacky
901205218Srdivacky	k = kern_add(S_ADDR(INFO_DST(info)), mask);
902205218Srdivacky	if (k->k_state & KS_NEW)
903205218Srdivacky		k->k_keep = now.tv_sec+keep;
904205218Srdivacky	if (INFO_GATE(info) == 0) {
905205218Srdivacky		trace_act("note %s without gateway",
906205218Srdivacky			  rtm_type_name(rtm->rtm_type));
907226633Sdim		k->k_metric = HOPCNT_INFINITY;
908243830Sdim	} else if (INFO_GATE(info)->sa_family != AF_INET) {
909243830Sdim		trace_act("note %s with gateway AF=%d",
910243830Sdim			  rtm_type_name(rtm->rtm_type),
911243830Sdim			  INFO_GATE(info)->sa_family);
912243830Sdim		k->k_metric = HOPCNT_INFINITY;
913243830Sdim	} else {
914243830Sdim		k->k_gate = S_ADDR(INFO_GATE(info));
915243830Sdim		k->k_metric = rtm->rtm_rmx.rmx_hopcount;
916243830Sdim		if (k->k_metric < 0)
917243830Sdim			k->k_metric = 0;
918243830Sdim		else if (k->k_metric > HOPCNT_INFINITY-1)
919198090Srdivacky			k->k_metric = HOPCNT_INFINITY-1;
920198090Srdivacky	}
921218893Sdim	k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
922218893Sdim			| KS_DELETED | KS_GATEWAY | KS_STATIC
923226633Sdim			| KS_NEW | KS_CHECK);
924198090Srdivacky	if (rtm->rtm_flags & RTF_GATEWAY)
925249423Sdim		k->k_state |= KS_GATEWAY;
926249423Sdim	if (rtm->rtm_flags & RTF_STATIC)
927198090Srdivacky		k->k_state |= KS_STATIC;
928198090Srdivacky
929207618Srdivacky	if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
930207618Srdivacky		if (INFO_AUTHOR(info) != 0
931249423Sdim		    && INFO_AUTHOR(info)->sa_family == AF_INET)
932226633Sdim			ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
933243830Sdim		else
934243830Sdim			ifp = NULL;
935218893Sdim		if (supplier
936218893Sdim		    && (ifp == NULL || !(ifp->int_state & IS_REDIRECT_OK))) {
937210299Sed			/* Routers are not supposed to listen to redirects,
938249423Sdim			 * so delete it if it came via an unknown interface
939210299Sed			 * or the interface does not have special permission.
940243830Sdim			 */
941243830Sdim			k->k_state &= ~KS_DYNAMIC;
942243830Sdim			k->k_state |= KS_DELETE;
943198090Srdivacky			LIM_SEC(need_kern, 0);
944207618Srdivacky			trace_act("mark for deletion redirected %s --> %s"
945198090Srdivacky				  " via %s",
946198090Srdivacky				  addrname(k->k_dst, k->k_mask, 0),
947243830Sdim				  naddr_ntoa(k->k_gate),
948226633Sdim				  ifp ? ifp->int_name : "unknown interface");
949249423Sdim		} else {
950249423Sdim			k->k_state |= KS_DYNAMIC;
951198090Srdivacky			k->k_redirect_time = now.tv_sec;
952226633Sdim			trace_act("accept redirected %s --> %s via %s",
953207618Srdivacky				  addrname(k->k_dst, k->k_mask, 0),
954198090Srdivacky				  naddr_ntoa(k->k_gate),
955207618Srdivacky				  ifp ? ifp->int_name : "unknown interface");
956243830Sdim		}
957198090Srdivacky		return;
958210299Sed	}
959210299Sed
960210299Sed	/* If it is not a static route, quit until the next comparison
961249423Sdim	 * between the kernel and daemon tables, when it will be deleted.
962249423Sdim	 */
963198090Srdivacky	if (!(k->k_state & KS_STATIC)) {
964198090Srdivacky		k->k_state |= KS_DELETE;
965207618Srdivacky		LIM_SEC(need_kern, k->k_keep);
966224145Sdim		return;
967243830Sdim	}
968198090Srdivacky
969249423Sdim	/* Put static routes with real metrics into the daemon table so
970249423Sdim	 * they can be advertised.
971198090Srdivacky	 *
972198090Srdivacky	 * Find the interface toward the gateway.
973207618Srdivacky	 */
974207618Srdivacky	ifp = iflookup(k->k_gate);
975207618Srdivacky	if (ifp == NULL)
976198090Srdivacky		msglog("static route %s --> %s impossibly lacks ifp",
977198090Srdivacky		       addrname(S_ADDR(INFO_DST(info)), mask, 0),
978243830Sdim		       naddr_ntoa(k->k_gate));
979249423Sdim
980208599Srdivacky	kern_check_static(k, ifp);
981243830Sdim}
982243830Sdim
983243830Sdim
984243830Sdim/* deal with packet loss
985243830Sdim */
986243830Sdimstatic void
987243830Sdimrtm_lose(struct rt_msghdr *rtm,
988243830Sdim	 struct rt_addrinfo *info)
989249423Sdim{
990243830Sdim	if (INFO_GATE(info) == 0
991243830Sdim	    || INFO_GATE(info)->sa_family != AF_INET) {
992198090Srdivacky		trace_act("ignore %s without gateway",
993198090Srdivacky			  rtm_type_name(rtm->rtm_type));
994198090Srdivacky		return;
995226633Sdim	}
996207618Srdivacky
997207618Srdivacky	if (rdisc_ok)
998207618Srdivacky		rdisc_age(S_ADDR(INFO_GATE(info)));
999207618Srdivacky	age(S_ADDR(INFO_GATE(info)));
1000249423Sdim}
1001226633Sdim
1002249423Sdim
1003198090Srdivacky/* Make the gateway slot of an info structure point to something
1004198090Srdivacky * useful.  If it is not already useful, but it specifies an interface,
1005198090Srdivacky * then fill in the sockaddr_in provided and point it there.
1006198090Srdivacky */
1007198090Srdivackystatic int
1008198090Srdivackyget_info_gate(struct sockaddr **sap,
1009198090Srdivacky	      struct sockaddr_in *rsin)
1010198090Srdivacky{
1011198090Srdivacky	struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
1012198090Srdivacky	struct interface *ifp;
1013198090Srdivacky
1014198090Srdivacky	if (sdl == NULL)
1015198090Srdivacky		return 0;
1016198090Srdivacky	if ((sdl)->sdl_family == AF_INET)
1017198090Srdivacky		return 1;
1018198090Srdivacky	if ((sdl)->sdl_family != AF_LINK)
1019198090Srdivacky		return 0;
1020
1021	ifp = ifwithindex(sdl->sdl_index, 1);
1022	if (ifp == NULL)
1023		return 0;
1024
1025	rsin->sin_addr.s_addr = ifp->int_addr;
1026#ifdef _HAVE_SA_LEN
1027	rsin->sin_len = sizeof(*rsin);
1028#endif
1029	rsin->sin_family = AF_INET;
1030	*sap = (struct sockaddr*)rsin;
1031
1032	return 1;
1033}
1034
1035
1036/* Clean the kernel table by copying it to the daemon image.
1037 * Eventually the daemon will delete any extra routes.
1038 */
1039void
1040flush_kern(void)
1041{
1042	static char *sysctl_buf;
1043	static size_t sysctl_buf_size = 0;
1044	size_t needed;
1045	int mib[6];
1046	char *next, *lim;
1047	struct rt_msghdr *rtm;
1048	struct sockaddr_in gate_sin;
1049	struct rt_addrinfo info;
1050	int i;
1051	struct khash *k;
1052
1053
1054	for (i = 0; i < KHASH_SIZE; i++) {
1055		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1056			k->k_state |= KS_CHECK;
1057		}
1058	}
1059
1060	mib[0] = CTL_NET;
1061	mib[1] = PF_ROUTE;
1062	mib[2] = 0;		/* protocol */
1063	mib[3] = 0;		/* wildcard address family */
1064	mib[4] = NET_RT_DUMP;
1065	mib[5] = 0;		/* no flags */
1066	for (;;) {
1067		if ((needed = sysctl_buf_size) != 0) {
1068			if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1069				break;
1070			if (errno != ENOMEM && errno != EFAULT)
1071				BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1072			free(sysctl_buf);
1073			needed = 0;
1074		}
1075		if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1076			BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1077		/* Kludge around the habit of some systems, such as
1078		 * BSD/OS 3.1, to not admit how many routes are in the
1079		 * kernel, or at least to be quite wrong.
1080		 */
1081		needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1082		sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1083				      "flush_kern sysctl(RT_DUMP)");
1084	}
1085
1086	lim = sysctl_buf + needed;
1087	for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1088		rtm = (struct rt_msghdr *)next;
1089		if (rtm->rtm_msglen == 0) {
1090			msglog("zero length kernel route at "
1091			       " %#lx in buffer %#lx before %#lx",
1092			       (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1093			break;
1094		}
1095
1096		rt_xaddrs(&info,
1097			  (struct sockaddr *)(rtm+1),
1098			  (struct sockaddr *)(next + rtm->rtm_msglen),
1099			  rtm->rtm_addrs);
1100
1101		if (INFO_DST(&info) == 0
1102		    || INFO_DST(&info)->sa_family != AF_INET)
1103			continue;
1104
1105#if defined (RTF_LLINFO)
1106		/* ignore ARP table entries on systems with a merged route
1107		 * and ARP table.
1108		 */
1109		if (rtm->rtm_flags & RTF_LLINFO)
1110			continue;
1111#endif
1112#if defined(RTF_WASCLONED) && defined(__FreeBSD__)
1113		/* ignore cloned routes
1114		 */
1115		if (rtm->rtm_flags & RTF_WASCLONED)
1116			continue;
1117#endif
1118
1119		/* ignore multicast addresses
1120		 */
1121		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1122			continue;
1123
1124		if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1125			continue;
1126
1127		/* Note static routes and interface routes, and also
1128		 * preload the image of the kernel table so that
1129		 * we can later clean it, as well as avoid making
1130		 * unneeded changes.  Keep the old kernel routes for a
1131		 * few seconds to allow a RIP or router-discovery
1132		 * response to be heard.
1133		 */
1134		rtm_add(rtm,&info,MIN_WAITTIME);
1135	}
1136
1137	for (i = 0; i < KHASH_SIZE; i++) {
1138		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1139			if (k->k_state & KS_CHECK) {
1140				msglog("%s --> %s disappeared from kernel",
1141				       addrname(k->k_dst, k->k_mask, 0),
1142				       naddr_ntoa(k->k_gate));
1143				del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1144			}
1145		}
1146	}
1147}
1148
1149
1150/* Listen to announcements from the kernel
1151 */
1152void
1153read_rt(void)
1154{
1155	long cc;
1156	struct interface *ifp;
1157	struct sockaddr_in gate_sin;
1158	naddr mask, gate;
1159	union {
1160		struct {
1161			struct rt_msghdr rtm;
1162			struct sockaddr addrs[RTAX_MAX];
1163		} r;
1164		struct if_msghdr ifm;
1165	} m;
1166	char str[100], *strp;
1167	struct rt_addrinfo info;
1168
1169
1170	for (;;) {
1171		cc = read(rt_sock, &m, sizeof(m));
1172		if (cc <= 0) {
1173			if (cc < 0 && errno != EWOULDBLOCK)
1174				LOGERR("read(rt_sock)");
1175			return;
1176		}
1177
1178		if (m.r.rtm.rtm_version != RTM_VERSION) {
1179			msglog("bogus routing message version %d",
1180			       m.r.rtm.rtm_version);
1181			continue;
1182		}
1183
1184		/* Ignore our own results.
1185		 */
1186		if (m.r.rtm.rtm_type <= RTM_CHANGE
1187		    && m.r.rtm.rtm_pid == mypid) {
1188			static int complained = 0;
1189			if (!complained) {
1190				msglog("receiving our own change messages");
1191				complained = 1;
1192			}
1193			continue;
1194		}
1195
1196		if (m.r.rtm.rtm_type == RTM_IFINFO
1197		    || m.r.rtm.rtm_type == RTM_NEWADDR
1198		    || m.r.rtm.rtm_type == RTM_DELADDR) {
1199			ifp = ifwithindex(m.ifm.ifm_index,
1200					  m.r.rtm.rtm_type != RTM_DELADDR);
1201			if (ifp == NULL)
1202				trace_act("note %s with flags %#x"
1203					  " for unknown interface index #%d",
1204					  rtm_type_name(m.r.rtm.rtm_type),
1205					  m.ifm.ifm_flags,
1206					  m.ifm.ifm_index);
1207			else
1208				trace_act("note %s with flags %#x for %s",
1209					  rtm_type_name(m.r.rtm.rtm_type),
1210					  m.ifm.ifm_flags,
1211					  ifp->int_name);
1212
1213			/* After being informed of a change to an interface,
1214			 * check them all now if the check would otherwise
1215			 * be a long time from now, if the interface is
1216			 * not known, or if the interface has been turned
1217			 * off or on.
1218			 */
1219			if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1220			    || ifp == NULL
1221			    || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1222				& IFF_UP) != 0)
1223				ifinit_timer.tv_sec = now.tv_sec;
1224			continue;
1225		}
1226#ifdef RTM_OIFINFO
1227		if (m.r.rtm.rtm_type == RTM_OIFINFO)
1228			continue;	/* ignore compat message */
1229#endif
1230
1231		strlcpy(str, rtm_type_name(m.r.rtm.rtm_type), sizeof(str));
1232		strp = &str[strlen(str)];
1233		if (m.r.rtm.rtm_type <= RTM_CHANGE)
1234			strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1235
1236		rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1237			  m.r.rtm.rtm_addrs);
1238
1239		if (INFO_DST(&info) == 0) {
1240			trace_act("ignore %s without dst", str);
1241			continue;
1242		}
1243
1244		if (INFO_DST(&info)->sa_family != AF_INET) {
1245			trace_act("ignore %s for AF %d", str,
1246				  INFO_DST(&info)->sa_family);
1247			continue;
1248		}
1249
1250		mask = ((INFO_MASK(&info) != 0)
1251			? ntohl(S_ADDR(INFO_MASK(&info)))
1252			: (m.r.rtm.rtm_flags & RTF_HOST)
1253			? HOST_MASK
1254			: std_mask(S_ADDR(INFO_DST(&info))));
1255
1256		strp += sprintf(strp, ": %s",
1257				addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1258
1259		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1260			trace_act("ignore multicast %s", str);
1261			continue;
1262		}
1263
1264#if defined(RTF_LLINFO)
1265		if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1266			trace_act("ignore ARP %s", str);
1267			continue;
1268		}
1269#endif
1270
1271#if defined(RTF_WASCLONED) && defined(__FreeBSD__)
1272		if (m.r.rtm.rtm_flags & RTF_WASCLONED) {
1273			trace_act("ignore cloned %s", str);
1274			continue;
1275		}
1276#endif
1277
1278		if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1279			gate = S_ADDR(INFO_GATE(&info));
1280			strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
1281		} else {
1282			gate = 0;
1283		}
1284
1285		if (INFO_AUTHOR(&info) != 0)
1286			strp += sprintf(strp, " by authority of %s",
1287					saddr_ntoa(INFO_AUTHOR(&info)));
1288
1289		switch (m.r.rtm.rtm_type) {
1290		case RTM_ADD:
1291		case RTM_CHANGE:
1292		case RTM_REDIRECT:
1293			if (m.r.rtm.rtm_errno != 0) {
1294				trace_act("ignore %s with \"%s\" error",
1295					  str, strerror(m.r.rtm.rtm_errno));
1296			} else {
1297				trace_act("%s", str);
1298				rtm_add(&m.r.rtm,&info,0);
1299			}
1300			break;
1301
1302		case RTM_DELETE:
1303			if (m.r.rtm.rtm_errno != 0
1304			    && m.r.rtm.rtm_errno != ESRCH) {
1305				trace_act("ignore %s with \"%s\" error",
1306					  str, strerror(m.r.rtm.rtm_errno));
1307			} else {
1308				trace_act("%s", str);
1309				del_static(S_ADDR(INFO_DST(&info)), mask,
1310					   gate, 1);
1311			}
1312			break;
1313
1314		case RTM_LOSING:
1315			trace_act("%s", str);
1316			rtm_lose(&m.r.rtm,&info);
1317			break;
1318
1319		default:
1320			trace_act("ignore %s", str);
1321			break;
1322		}
1323	}
1324}
1325
1326
1327/* after aggregating, note routes that belong in the kernel
1328 */
1329static void
1330kern_out(struct ag_info *ag)
1331{
1332	struct khash *k;
1333
1334
1335	/* Do not install bad routes if they are not already present.
1336	 * This includes routes that had RS_NET_SYN for interfaces that
1337	 * recently died.
1338	 */
1339	if (ag->ag_metric == HOPCNT_INFINITY) {
1340		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1341		if (k == NULL)
1342			return;
1343	} else {
1344		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1345	}
1346
1347	if (k->k_state & KS_NEW) {
1348		/* will need to add new entry to the kernel table */
1349		k->k_state = KS_ADD;
1350		if (ag->ag_state & AGS_GATEWAY)
1351			k->k_state |= KS_GATEWAY;
1352		k->k_gate = ag->ag_gate;
1353		k->k_metric = ag->ag_metric;
1354		return;
1355	}
1356
1357	if (k->k_state & KS_STATIC)
1358		return;
1359
1360	/* modify existing kernel entry if necessary */
1361	if (k->k_gate != ag->ag_gate
1362	    || k->k_metric != ag->ag_metric) {
1363		/* Must delete bad interface routes etc. to change them. */
1364		if (k->k_metric == HOPCNT_INFINITY)
1365			k->k_state |= KS_DEL_ADD;
1366		k->k_gate = ag->ag_gate;
1367		k->k_metric = ag->ag_metric;
1368		k->k_state |= KS_CHANGE;
1369	}
1370
1371	/* If the daemon thinks the route should exist, forget
1372	 * about any redirections.
1373	 * If the daemon thinks the route should exist, eventually
1374	 * override manual intervention by the operator.
1375	 */
1376	if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1377		k->k_state &= ~KS_DYNAMIC;
1378		k->k_state |= (KS_ADD | KS_DEL_ADD);
1379	}
1380
1381	if ((k->k_state & KS_GATEWAY)
1382	    && !(ag->ag_state & AGS_GATEWAY)) {
1383		k->k_state &= ~KS_GATEWAY;
1384		k->k_state |= (KS_ADD | KS_DEL_ADD);
1385	} else if (!(k->k_state & KS_GATEWAY)
1386		   && (ag->ag_state & AGS_GATEWAY)) {
1387		k->k_state |= KS_GATEWAY;
1388		k->k_state |= (KS_ADD | KS_DEL_ADD);
1389	}
1390
1391	/* Deleting-and-adding is necessary to change aspects of a route.
1392	 * Just delete instead of deleting and then adding a bad route.
1393	 * Otherwise, we want to keep the route in the kernel.
1394	 */
1395	if (k->k_metric == HOPCNT_INFINITY
1396	    && (k->k_state & KS_DEL_ADD))
1397		k->k_state |= KS_DELETE;
1398	else
1399		k->k_state &= ~KS_DELETE;
1400#undef RT
1401}
1402
1403
1404/* ARGSUSED */
1405static int
1406walk_kern(struct radix_node *rn,
1407	  struct walkarg *argp UNUSED)
1408{
1409#define RT ((struct rt_entry *)rn)
1410	char metric, pref;
1411	u_int ags = 0;
1412
1413
1414	/* Do not install synthetic routes */
1415	if (RT->rt_state & RS_NET_SYN)
1416		return 0;
1417
1418	if (!(RT->rt_state & RS_IF)) {
1419		/* This is an ordinary route, not for an interface.
1420		 */
1421
1422		/* aggregate, ordinary good routes without regard to
1423		 * their metric
1424		 */
1425		pref = 1;
1426		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1427
1428		/* Do not install host routes directly to hosts, to avoid
1429		 * interfering with ARP entries in the kernel table.
1430		 */
1431		if (RT_ISHOST(RT)
1432		    && ntohl(RT->rt_dst) == RT->rt_gate)
1433			return 0;
1434
1435	} else {
1436		/* This is an interface route.
1437		 * Do not install routes for "external" remote interfaces.
1438		 */
1439		if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1440			return 0;
1441
1442		/* Interfaces should override received routes.
1443		 */
1444		pref = 0;
1445		ags |= (AGS_IF | AGS_CORS_GATE);
1446
1447		/* If it is not an interface, or an alias for an interface,
1448		 * it must be a "gateway."
1449		 *
1450		 * If it is a "remote" interface, it is also a "gateway" to
1451		 * the kernel if is not an alias.
1452		 */
1453		if (RT->rt_ifp == 0
1454		    || (RT->rt_ifp->int_state & IS_REMOTE))
1455			ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1456	}
1457
1458	/* If RIP is off and IRDP is on, let the route to the discovered
1459	 * route suppress any RIP routes.  Eventually the RIP routes
1460	 * will time-out and be deleted.  This reaches the steady-state
1461	 * quicker.
1462	 */
1463	if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1464		ags |= AGS_CORS_GATE;
1465
1466	metric = RT->rt_metric;
1467	if (metric == HOPCNT_INFINITY) {
1468		/* if the route is dead, so try hard to aggregate. */
1469		pref = HOPCNT_INFINITY;
1470		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1471		ags &= ~(AGS_IF | AGS_CORS_GATE);
1472	}
1473
1474	ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1475		 metric,pref, 0, 0, ags, kern_out);
1476	return 0;
1477#undef RT
1478}
1479
1480
1481/* Update the kernel table to match the daemon table.
1482 */
1483static void
1484fix_kern(void)
1485{
1486	int i;
1487	struct khash *k, **pk;
1488
1489
1490	need_kern = age_timer;
1491
1492	/* Walk daemon table, updating the copy of the kernel table.
1493	 */
1494	(void)rn_walktree(rhead, walk_kern, 0);
1495	ag_flush(0,0,kern_out);
1496
1497	for (i = 0; i < KHASH_SIZE; i++) {
1498		for (pk = &khash_bins[i]; (k = *pk) != NULL; ) {
1499			/* Do not touch static routes */
1500			if (k->k_state & KS_STATIC) {
1501				kern_check_static(k,0);
1502				pk = &k->k_next;
1503				continue;
1504			}
1505
1506			/* check hold on routes deleted by the operator */
1507			if (k->k_keep > now.tv_sec) {
1508				/* ensure we check when the hold is over */
1509				LIM_SEC(need_kern, k->k_keep);
1510				/* mark for the next cycle */
1511				k->k_state |= KS_DELETE;
1512				pk = &k->k_next;
1513				continue;
1514			}
1515
1516			if ((k->k_state & KS_DELETE)
1517			    && !(k->k_state & KS_DYNAMIC)) {
1518				kern_ioctl(k, RTM_DELETE, 0);
1519				*pk = k->k_next;
1520				free(k);
1521				continue;
1522			}
1523
1524			if (k->k_state & KS_DEL_ADD)
1525				kern_ioctl(k, RTM_DELETE, 0);
1526
1527			if (k->k_state & KS_ADD) {
1528				kern_ioctl(k, RTM_ADD,
1529					   ((0 != (k->k_state & (KS_GATEWAY
1530							| KS_DYNAMIC)))
1531					    ? RTF_GATEWAY : 0));
1532			} else if (k->k_state & KS_CHANGE) {
1533				kern_ioctl(k,  RTM_CHANGE,
1534					   ((0 != (k->k_state & (KS_GATEWAY
1535							| KS_DYNAMIC)))
1536					    ? RTF_GATEWAY : 0));
1537			}
1538			k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1539
1540			/* Mark this route to be deleted in the next cycle.
1541			 * This deletes routes that disappear from the
1542			 * daemon table, since the normal aging code
1543			 * will clear the bit for routes that have not
1544			 * disappeared from the daemon table.
1545			 */
1546			k->k_state |= KS_DELETE;
1547			pk = &k->k_next;
1548		}
1549	}
1550}
1551
1552
1553/* Delete a static route in the image of the kernel table.
1554 */
1555void
1556del_static(naddr dst,
1557	   naddr mask,
1558	   naddr gate,
1559	   int gone)
1560{
1561	struct khash *k;
1562	struct rt_entry *rt;
1563
1564	/* Just mark it in the table to be deleted next time the kernel
1565	 * table is updated.
1566	 * If it has already been deleted, mark it as such, and set its
1567	 * keep-timer so that it will not be deleted again for a while.
1568	 * This lets the operator delete a route added by the daemon
1569	 * and add a replacement.
1570	 */
1571	k = kern_find(dst, mask, 0);
1572	if (k != NULL && (gate == 0 || k->k_gate == gate)) {
1573		k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1574		k->k_state |= KS_DELETE;
1575		if (gone) {
1576			k->k_state |= KS_DELETED;
1577			k->k_keep = now.tv_sec + K_KEEP_LIM;
1578		}
1579	}
1580
1581	rt = rtget(dst, mask);
1582	if (rt != NULL && (rt->rt_state & RS_STATIC))
1583		rtbad(rt);
1584}
1585
1586
1587/* Delete all routes generated from ICMP Redirects that use a given gateway,
1588 * as well as old redirected routes.
1589 */
1590void
1591del_redirects(naddr bad_gate,
1592	      time_t old)
1593{
1594	int i;
1595	struct khash *k;
1596
1597
1598	for (i = 0; i < KHASH_SIZE; i++) {
1599		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1600			if (!(k->k_state & KS_DYNAMIC)
1601			    || (k->k_state & KS_STATIC))
1602				continue;
1603
1604			if (k->k_gate != bad_gate
1605			    && k->k_redirect_time > old
1606			    && !supplier)
1607				continue;
1608
1609			k->k_state |= KS_DELETE;
1610			k->k_state &= ~KS_DYNAMIC;
1611			need_kern.tv_sec = now.tv_sec;
1612			trace_act("mark redirected %s --> %s for deletion",
1613				  addrname(k->k_dst, k->k_mask, 0),
1614				  naddr_ntoa(k->k_gate));
1615		}
1616	}
1617}
1618
1619
1620/* Start the daemon tables.
1621 */
1622extern int max_keylen;
1623
1624void
1625rtinit(void)
1626{
1627	int i;
1628	struct ag_info *ag;
1629
1630	/* Initialize the radix trees */
1631	max_keylen = sizeof(struct sockaddr_in);
1632	rn_init();
1633	rn_inithead(&rhead, 32);
1634
1635	/* mark all of the slots in the table free */
1636	ag_avail = ag_slots;
1637	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1638		ag->ag_fine = ag+1;
1639		ag++;
1640	}
1641}
1642
1643
1644#ifdef _HAVE_SIN_LEN
1645static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1646static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1647#else
1648static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1649static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1650#endif
1651
1652
1653static void
1654set_need_flash(void)
1655{
1656	if (!need_flash) {
1657		need_flash = 1;
1658		/* Do not send the flash update immediately.  Wait a little
1659		 * while to hear from other routers.
1660		 */
1661		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1662	}
1663}
1664
1665
1666/* Get a particular routing table entry
1667 */
1668struct rt_entry *
1669rtget(naddr dst, naddr mask)
1670{
1671	struct rt_entry *rt;
1672
1673	dst_sock.sin_addr.s_addr = dst;
1674	mask_sock.sin_addr.s_addr = htonl(mask);
1675	masktrim(&mask_sock);
1676	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1677	if (!rt
1678	    || rt->rt_dst != dst
1679	    || rt->rt_mask != mask)
1680		return 0;
1681
1682	return rt;
1683}
1684
1685
1686/* Find a route to dst as the kernel would.
1687 */
1688struct rt_entry *
1689rtfind(naddr dst)
1690{
1691	dst_sock.sin_addr.s_addr = dst;
1692	return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1693}
1694
1695
1696/* add a route to the table
1697 */
1698void
1699rtadd(naddr	dst,
1700      naddr	mask,
1701      u_int	state,			/* rt_state for the entry */
1702      struct	rt_spare *new)
1703{
1704	struct rt_entry *rt;
1705	naddr smask;
1706	int i;
1707	struct rt_spare *rts;
1708
1709	rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1710	memset(rt, 0, sizeof(*rt));
1711	for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1712		rts->rts_metric = HOPCNT_INFINITY;
1713
1714	rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1715	rt->rt_dst = dst;
1716	rt->rt_dst_sock.sin_family = AF_INET;
1717#ifdef _HAVE_SIN_LEN
1718	rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1719#endif
1720	if (mask != HOST_MASK) {
1721		smask = std_mask(dst);
1722		if ((smask & ~mask) == 0 && mask > smask)
1723			state |= RS_SUBNET;
1724	}
1725	mask_sock.sin_addr.s_addr = htonl(mask);
1726	masktrim(&mask_sock);
1727	rt->rt_mask = mask;
1728	rt->rt_state = state;
1729	rt->rt_spares[0] = *new;
1730	rt->rt_time = now.tv_sec;
1731	rt->rt_poison_metric = HOPCNT_INFINITY;
1732	rt->rt_seqno = update_seqno;
1733
1734	if (++total_routes == MAX_ROUTES)
1735		msglog("have maximum (%d) routes", total_routes);
1736	if (TRACEACTIONS)
1737		trace_add_del("Add", rt);
1738
1739	need_kern.tv_sec = now.tv_sec;
1740	set_need_flash();
1741
1742	if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1743				    rhead, rt->rt_nodes)) {
1744		msglog("rnh_addaddr() failed for %s mask=%#lx",
1745		       naddr_ntoa(dst), (u_long)mask);
1746		free(rt);
1747	}
1748}
1749
1750
1751/* notice a changed route
1752 */
1753void
1754rtchange(struct rt_entry *rt,
1755	 u_int	state,			/* new state bits */
1756	 struct rt_spare *new,
1757	 char	*label)
1758{
1759	if (rt->rt_metric != new->rts_metric) {
1760		/* Fix the kernel immediately if it seems the route
1761		 * has gone bad, since there may be a working route that
1762		 * aggregates this route.
1763		 */
1764		if (new->rts_metric == HOPCNT_INFINITY) {
1765			need_kern.tv_sec = now.tv_sec;
1766			if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1767				new->rts_time = now.tv_sec - EXPIRE_TIME;
1768		}
1769		rt->rt_seqno = update_seqno;
1770		set_need_flash();
1771	}
1772
1773	if (rt->rt_gate != new->rts_gate) {
1774		need_kern.tv_sec = now.tv_sec;
1775		rt->rt_seqno = update_seqno;
1776		set_need_flash();
1777	}
1778
1779	state |= (rt->rt_state & RS_SUBNET);
1780
1781	/* Keep various things from deciding ageless routes are stale.
1782	 */
1783	if (!AGE_RT(state, new->rts_ifp))
1784		new->rts_time = now.tv_sec;
1785
1786	if (TRACEACTIONS)
1787		trace_change(rt, state, new,
1788			     label ? label : "Chg   ");
1789
1790	rt->rt_state = state;
1791	rt->rt_spares[0] = *new;
1792}
1793
1794
1795/* check for a better route among the spares
1796 */
1797static struct rt_spare *
1798rts_better(struct rt_entry *rt)
1799{
1800	struct rt_spare *rts, *rts1;
1801	int i;
1802
1803	/* find the best alternative among the spares */
1804	rts = rt->rt_spares+1;
1805	for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1806		if (BETTER_LINK(rt,rts1,rts))
1807			rts = rts1;
1808	}
1809
1810	return rts;
1811}
1812
1813
1814/* switch to a backup route
1815 */
1816void
1817rtswitch(struct rt_entry *rt,
1818	 struct rt_spare *rts)
1819{
1820	struct rt_spare swap;
1821	char label[10];
1822
1823
1824	/* Do not change permanent routes */
1825	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1826				  | RS_NET_SYN | RS_IF)))
1827		return;
1828
1829	/* find the best alternative among the spares */
1830	if (rts == NULL)
1831		rts = rts_better(rt);
1832
1833	/* Do not bother if it is not worthwhile.
1834	 */
1835	if (!BETTER_LINK(rt, rts, rt->rt_spares))
1836		return;
1837
1838	swap = rt->rt_spares[0];
1839	(void)sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
1840	rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1841	if (swap.rts_metric == HOPCNT_INFINITY) {
1842		*rts = rts_empty;
1843	} else {
1844		*rts = swap;
1845	}
1846}
1847
1848
1849void
1850rtdelete(struct rt_entry *rt)
1851{
1852	struct khash *k;
1853
1854
1855	if (TRACEACTIONS)
1856		trace_add_del("Del", rt);
1857
1858	k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1859	if (k != NULL) {
1860		k->k_state |= KS_DELETE;
1861		need_kern.tv_sec = now.tv_sec;
1862	}
1863
1864	dst_sock.sin_addr.s_addr = rt->rt_dst;
1865	mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1866	masktrim(&mask_sock);
1867	if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1868							rhead)) {
1869		msglog("rnh_deladdr() failed");
1870	} else {
1871		free(rt);
1872		total_routes--;
1873	}
1874}
1875
1876
1877void
1878rts_delete(struct rt_entry *rt,
1879	   struct rt_spare *rts)
1880{
1881	trace_upslot(rt, rts, &rts_empty);
1882	*rts = rts_empty;
1883}
1884
1885
1886/* Get rid of a bad route, and try to switch to a replacement.
1887 */
1888static void
1889rtbad(struct rt_entry *rt)
1890{
1891	struct rt_spare new;
1892
1893	/* Poison the route */
1894	new = rt->rt_spares[0];
1895	new.rts_metric = HOPCNT_INFINITY;
1896	rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1897	rtswitch(rt, 0);
1898}
1899
1900
1901/* Junk a RS_NET_SYN or RS_LOCAL route,
1902 *	unless it is needed by another interface.
1903 */
1904void
1905rtbad_sub(struct rt_entry *rt)
1906{
1907	struct interface *ifp, *ifp1;
1908	struct intnet *intnetp;
1909	u_int state;
1910
1911
1912	ifp1 = NULL;
1913	state = 0;
1914
1915	if (rt->rt_state & RS_LOCAL) {
1916		/* Is this the route through loopback for the interface?
1917		 * If so, see if it is used by any other interfaces, such
1918		 * as a point-to-point interface with the same local address.
1919		 */
1920		LIST_FOREACH(ifp, &ifnet, int_list) {
1921			/* Retain it if another interface needs it.
1922			 */
1923			if (ifp->int_addr == rt->rt_ifp->int_addr) {
1924				state |= RS_LOCAL;
1925				ifp1 = ifp;
1926				break;
1927			}
1928		}
1929
1930	}
1931
1932	if (!(state & RS_LOCAL)) {
1933		/* Retain RIPv1 logical network route if there is another
1934		 * interface that justifies it.
1935		 */
1936		if (rt->rt_state & RS_NET_SYN) {
1937			LIST_FOREACH(ifp, &ifnet, int_list) {
1938				if ((ifp->int_state & IS_NEED_NET_SYN)
1939				    && rt->rt_mask == ifp->int_std_mask
1940				    && rt->rt_dst == ifp->int_std_addr) {
1941					state |= RS_NET_SYN;
1942					ifp1 = ifp;
1943					break;
1944				}
1945			}
1946		}
1947
1948		/* or if there is an authority route that needs it. */
1949		for (intnetp = intnets;
1950		     intnetp != NULL;
1951		     intnetp = intnetp->intnet_next) {
1952			if (intnetp->intnet_addr == rt->rt_dst
1953			    && intnetp->intnet_mask == rt->rt_mask) {
1954				state |= (RS_NET_SYN | RS_NET_INT);
1955				break;
1956			}
1957		}
1958	}
1959
1960	if (ifp1 != NULL || (state & RS_NET_SYN)) {
1961		struct rt_spare new = rt->rt_spares[0];
1962		new.rts_ifp = ifp1;
1963		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1964			 &new, 0);
1965	} else {
1966		rtbad(rt);
1967	}
1968}
1969
1970
1971/* Called while walking the table looking for sick interfaces
1972 * or after a time change.
1973 */
1974/* ARGSUSED */
1975int
1976walk_bad(struct radix_node *rn,
1977	 struct walkarg *argp UNUSED)
1978{
1979#define RT ((struct rt_entry *)rn)
1980	struct rt_spare *rts;
1981	int i;
1982
1983
1984	/* fix any spare routes through the interface
1985	 */
1986	rts = RT->rt_spares;
1987	for (i = NUM_SPARES; i != 1; i--) {
1988		rts++;
1989		if (rts->rts_metric < HOPCNT_INFINITY
1990		    && (rts->rts_ifp == NULL
1991			|| (rts->rts_ifp->int_state & IS_BROKE)))
1992			rts_delete(RT, rts);
1993	}
1994
1995	/* Deal with the main route
1996	 */
1997	/* finished if it has been handled before or if its interface is ok
1998	 */
1999	if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
2000		return 0;
2001
2002	/* Bad routes for other than interfaces are easy.
2003	 */
2004	if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
2005		rtbad(RT);
2006		return 0;
2007	}
2008
2009	rtbad_sub(RT);
2010	return 0;
2011#undef RT
2012}
2013
2014
2015/* Check the age of an individual route.
2016 */
2017/* ARGSUSED */
2018static int
2019walk_age(struct radix_node *rn,
2020	   struct walkarg *argp UNUSED)
2021{
2022#define RT ((struct rt_entry *)rn)
2023	struct interface *ifp;
2024	struct rt_spare *rts;
2025	int i;
2026
2027
2028	/* age all of the spare routes, including the primary route
2029	 * currently in use
2030	 */
2031	rts = RT->rt_spares;
2032	for (i = NUM_SPARES; i != 0; i--, rts++) {
2033
2034		ifp = rts->rts_ifp;
2035		if (i == NUM_SPARES) {
2036			if (!AGE_RT(RT->rt_state, ifp)) {
2037				/* Keep various things from deciding ageless
2038				 * routes are stale
2039				 */
2040				rts->rts_time = now.tv_sec;
2041				continue;
2042			}
2043
2044			/* forget RIP routes after RIP has been turned off.
2045			 */
2046			if (rip_sock < 0) {
2047				rtdelete(RT);
2048				return 0;
2049			}
2050		}
2051
2052		/* age failing routes
2053		 */
2054		if (age_bad_gate == rts->rts_gate
2055		    && rts->rts_time >= now_stale) {
2056			rts->rts_time -= SUPPLY_INTERVAL;
2057		}
2058
2059		/* trash the spare routes when they go bad */
2060		if (rts->rts_metric < HOPCNT_INFINITY
2061		    && now_garbage > rts->rts_time
2062		    && i != NUM_SPARES)
2063			rts_delete(RT, rts);
2064	}
2065
2066
2067	/* finished if the active route is still fresh */
2068	if (now_stale <= RT->rt_time)
2069		return 0;
2070
2071	/* try to switch to an alternative */
2072	rtswitch(RT, 0);
2073
2074	/* Delete a dead route after it has been publicly mourned. */
2075	if (now_garbage > RT->rt_time) {
2076		rtdelete(RT);
2077		return 0;
2078	}
2079
2080	/* Start poisoning a bad route before deleting it. */
2081	if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2082		struct rt_spare new = RT->rt_spares[0];
2083		new.rts_metric = HOPCNT_INFINITY;
2084		rtchange(RT, RT->rt_state, &new, 0);
2085	}
2086	return 0;
2087}
2088
2089
2090/* Watch for dead routes and interfaces.
2091 */
2092void
2093age(naddr bad_gate)
2094{
2095	struct interface *ifp;
2096	int need_query = 0;
2097
2098	/* If not listening to RIP, there is no need to age the routes in
2099	 * the table.
2100	 */
2101	age_timer.tv_sec = (now.tv_sec
2102			    + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2103
2104	/* Check for dead IS_REMOTE interfaces by timing their
2105	 * transmissions.
2106	 */
2107	LIST_FOREACH(ifp, &ifnet, int_list) {
2108		if (!(ifp->int_state & IS_REMOTE))
2109			continue;
2110
2111		/* ignore unreachable remote interfaces */
2112		if (!check_remote(ifp))
2113			continue;
2114
2115		/* Restore remote interface that has become reachable
2116		 */
2117		if (ifp->int_state & IS_BROKE)
2118			if_ok(ifp, "remote ");
2119
2120		if (ifp->int_act_time != NEVER
2121		    && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2122			msglog("remote interface %s to %s timed out after"
2123			       " %ld:%ld",
2124			       ifp->int_name,
2125			       naddr_ntoa(ifp->int_dstaddr),
2126			       (long)(now.tv_sec - ifp->int_act_time)/60,
2127			       (long)(now.tv_sec - ifp->int_act_time)%60);
2128			if_sick(ifp);
2129		}
2130
2131		/* If we have not heard from the other router
2132		 * recently, ask it.
2133		 */
2134		if (now.tv_sec >= ifp->int_query_time) {
2135			ifp->int_query_time = NEVER;
2136			need_query = 1;
2137		}
2138	}
2139
2140	/* Age routes. */
2141	age_bad_gate = bad_gate;
2142	(void)rn_walktree(rhead, walk_age, 0);
2143
2144	/* delete old redirected routes to keep the kernel table small
2145	 * and prevent blackholes
2146	 */
2147	del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2148
2149	/* Update the kernel routing table. */
2150	fix_kern();
2151
2152	/* poke reticent remote gateways */
2153	if (need_query)
2154		rip_query();
2155}
2156