table.c revision 20342
1/*
2 * Copyright (c) 1983, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if !defined(lint) && !defined(sgi) && !defined(__NetBSD__)
35static char sccsid[] = "@(#)tables.c	8.1 (Berkeley) 6/5/93";
36#elif defined(__NetBSD__)
37static char rcsid[] = "$NetBSD$";
38#endif
39
40#include "defs.h"
41
42static struct rt_spare *rts_better(struct rt_entry *);
43
44struct radix_node_head *rhead;		/* root of the radix tree */
45
46int	need_flash = 1;			/* flash update needed
47					 * start =1 to suppress the 1st
48					 */
49
50struct timeval age_timer;		/* next check of old routes */
51struct timeval need_kern = {		/* need to update kernel table */
52	EPOCH+MIN_WAITTIME-1
53};
54
55int	stopint;
56
57int	total_routes;
58
59/* zap any old routes through this gateway */
60naddr	age_bad_gate;
61
62
63/* It is desirable to "aggregate" routes, to combine differing routes of
64 * the same metric and next hop into a common route with a smaller netmask
65 * or to suppress redundant routes, routes that add no information to
66 * routes with smaller netmasks.
67 *
68 * A route is redundant if and only if any and all routes with smaller
69 * but matching netmasks and nets are the same.  Since routes are
70 * kept sorted in the radix tree, redundant routes always come second.
71 *
72 * There are two kinds of aggregations.  First, two routes of the same bit
73 * mask and differing only in the least significant bit of the network
74 * number can be combined into a single route with a coarser mask.
75 *
76 * Second, a route can be suppressed in favor of another route with a more
77 * coarse mask provided no incompatible routes with intermediate masks
78 * are present.  The second kind of aggregation involves suppressing routes.
79 * A route must not be suppressed if an incompatible route exists with
80 * an intermediate mask, since the suppressed route would be covered
81 * by the intermediate.
82 *
83 * This code relies on the radix tree walk encountering routes
84 * sorted first by address, with the smallest address first.
85 */
86
87struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
88
89/* #define DEBUG_AG */
90#ifdef DEBUG_AG
91#define CHECK_AG() {int acnt = 0; struct ag_info *cag;		\
92	for (cag = ag_avail; cag != 0; cag = cag->ag_fine)	\
93		acnt++;						\
94	for (cag = ag_corsest; cag != 0; cag = cag->ag_fine)	\
95		acnt++;						\
96	if (acnt != NUM_AG_SLOTS) {				\
97		(void)fflush(stderr);				\
98		abort();					\
99	}							\
100}
101#else
102#define CHECK_AG()
103#endif
104
105
106/* Output the contents of an aggregation table slot.
107 *	This function must always be immediately followed with the deletion
108 *	of the target slot.
109 */
110static void
111ag_out(struct ag_info *ag,
112	 void (*out)(struct ag_info *))
113{
114	struct ag_info *ag_cors;
115	naddr bit;
116
117
118	/* If we output both the even and odd twins, then the immediate parent,
119	 * if it is present, is redundant, unless the parent manages to
120	 * aggregate into something coarser.
121	 * On successive calls, this code detects the even and odd twins,
122	 * and marks the parent.
123	 *
124	 * Note that the order in which the radix tree code emits routes
125	 * ensures that the twins are seen before the parent is emitted.
126	 */
127	ag_cors = ag->ag_cors;
128	if (ag_cors != 0
129	    && ag_cors->ag_mask == ag->ag_mask<<1
130	    && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
131		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
132				      ? AGS_REDUN0
133				      : AGS_REDUN1);
134	}
135
136	/* Skip it if this route is itself redundant.
137	 *
138	 * It is ok to change the contents of the slot here, since it is
139	 * always deleted next.
140	 */
141	if (ag->ag_state & AGS_REDUN0) {
142		if (ag->ag_state & AGS_REDUN1)
143			return;
144		bit = (-ag->ag_mask) >> 1;
145		ag->ag_dst_h |= bit;
146		ag->ag_mask |= bit;
147
148	} else if (ag->ag_state & AGS_REDUN1) {
149		bit = (-ag->ag_mask) >> 1;
150		ag->ag_mask |= bit;
151	}
152	out(ag);
153}
154
155
156static void
157ag_del(struct ag_info *ag)
158{
159	CHECK_AG();
160
161	if (ag->ag_cors == 0)
162		ag_corsest = ag->ag_fine;
163	else
164		ag->ag_cors->ag_fine = ag->ag_fine;
165
166	if (ag->ag_fine == 0)
167		ag_finest = ag->ag_cors;
168	else
169		ag->ag_fine->ag_cors = ag->ag_cors;
170
171	ag->ag_fine = ag_avail;
172	ag_avail = ag;
173
174	CHECK_AG();
175}
176
177
178/* Flush routes waiting for aggretation.
179 *	This must not suppress a route unless it is known that among all
180 *	routes with coarser masks that match it, the one with the longest
181 *	mask is appropriate.  This is ensured by scanning the routes
182 *	in lexical order, and with the most restritive mask first
183 *	among routes to the same destination.
184 */
185void
186ag_flush(naddr lim_dst_h,		/* flush routes to here */
187	 naddr lim_mask,		/* matching this mask */
188	 void (*out)(struct ag_info *))
189{
190	struct ag_info *ag, *ag_cors;
191	naddr dst_h;
192
193
194	for (ag = ag_finest;
195	     ag != 0 && ag->ag_mask >= lim_mask;
196	     ag = ag_cors) {
197		ag_cors = ag->ag_cors;
198
199		/* work on only the specified routes */
200		dst_h = ag->ag_dst_h;
201		if ((dst_h & lim_mask) != lim_dst_h)
202			continue;
203
204		if (!(ag->ag_state & AGS_SUPPRESS))
205			ag_out(ag, out);
206
207		else for ( ; ; ag_cors = ag_cors->ag_cors) {
208			/* Look for a route that can suppress the
209			 * current route */
210			if (ag_cors == 0) {
211				/* failed, so output it and look for
212				 * another route to work on
213				 */
214				ag_out(ag, out);
215				break;
216			}
217
218			if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
219				/* We found a route with a coarser mask that
220				 * aggregates the current target.
221				 *
222				 * If it has a different next hop, it
223				 * cannot replace the target, so output
224				 * the target.
225				 */
226				if (ag->ag_gate != ag_cors->ag_gate
227				    && !(ag->ag_state & AGS_FINE_GATE)
228				    && !(ag_cors->ag_state & AGS_CORS_GATE)) {
229					ag_out(ag, out);
230					break;
231				}
232
233				/* If the coarse route has a good enough
234				 * metric, it suppresses the target.
235				 */
236				if (ag_cors->ag_pref <= ag->ag_pref) {
237				    if (ag_cors->ag_seqno > ag->ag_seqno)
238					ag_cors->ag_seqno = ag->ag_seqno;
239				    if (AG_IS_REDUN(ag->ag_state)
240					&& ag_cors->ag_mask==ag->ag_mask<<1) {
241					if (ag_cors->ag_dst_h == dst_h)
242					    ag_cors->ag_state |= AGS_REDUN0;
243					else
244					    ag_cors->ag_state |= AGS_REDUN1;
245				    }
246				    if (ag->ag_tag != ag_cors->ag_tag)
247					    ag_cors->ag_tag = 0;
248				    if (ag->ag_nhop != ag_cors->ag_nhop)
249					    ag_cors->ag_nhop = 0;
250				    break;
251				}
252			}
253		}
254
255		/* That route has either been output or suppressed */
256		ag_cors = ag->ag_cors;
257		ag_del(ag);
258	}
259
260	CHECK_AG();
261}
262
263
264/* Try to aggregate a route with previous routes.
265 */
266void
267ag_check(naddr	dst,
268	 naddr	mask,
269	 naddr	gate,
270	 naddr	nhop,
271	 char	metric,
272	 char	pref,
273	 u_int	seqno,
274	 u_short tag,
275	 u_short state,
276	 void (*out)(struct ag_info *))	/* output using this */
277{
278	struct ag_info *ag, *nag, *ag_cors;
279	naddr xaddr;
280	int x;
281
282	NTOHL(dst);
283
284	/* Punt non-contiguous subnet masks.
285	 *
286	 * (X & -X) contains a single bit if and only if X is a power of 2.
287	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
288	 */
289	if ((mask & -mask) + mask != 0) {
290		struct ag_info nc_ag;
291
292		nc_ag.ag_dst_h = dst;
293		nc_ag.ag_mask = mask;
294		nc_ag.ag_gate = gate;
295		nc_ag.ag_nhop = nhop;
296		nc_ag.ag_metric = metric;
297		nc_ag.ag_pref = pref;
298		nc_ag.ag_tag = tag;
299		nc_ag.ag_state = state;
300		nc_ag.ag_seqno = seqno;
301		out(&nc_ag);
302		return;
303	}
304
305	/* Search for the right slot in the aggregation table.
306	 */
307	ag_cors = 0;
308	ag = ag_corsest;
309	while (ag != 0) {
310		if (ag->ag_mask >= mask)
311			break;
312
313		/* Suppress old routes (i.e. combine with compatible routes
314		 * with coarser masks) as we look for the right slot in the
315		 * aggregation table for the new route.
316		 * A route to an address less than the current destination
317		 * will not be affected by the current route or any route
318		 * seen hereafter.  That means it is safe to suppress it.
319		 * This check keeps poor routes (eg. with large hop counts)
320		 * from preventing suppresion of finer routes.
321		 */
322		if (ag_cors != 0
323		    && ag->ag_dst_h < dst
324		    && (ag->ag_state & AGS_SUPPRESS)
325		    && ag_cors->ag_pref <= ag->ag_pref
326		    && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
327		    && (ag_cors->ag_gate == ag->ag_gate
328			|| (ag->ag_state & AGS_FINE_GATE)
329			|| (ag_cors->ag_state & AGS_CORS_GATE))) {
330			if (ag_cors->ag_seqno > ag->ag_seqno)
331				ag_cors->ag_seqno = ag->ag_seqno;
332			if (AG_IS_REDUN(ag->ag_state)
333			    && ag_cors->ag_mask==ag->ag_mask<<1) {
334				if (ag_cors->ag_dst_h == dst)
335					ag_cors->ag_state |= AGS_REDUN0;
336				else
337					ag_cors->ag_state |= AGS_REDUN1;
338			}
339			if (ag->ag_tag != ag_cors->ag_tag)
340				ag_cors->ag_tag = 0;
341			if (ag->ag_nhop != ag_cors->ag_nhop)
342				ag_cors->ag_nhop = 0;
343			ag_del(ag);
344			CHECK_AG();
345		} else {
346			ag_cors = ag;
347		}
348		ag = ag_cors->ag_fine;
349	}
350
351	/* If we find the even/odd twin of the new route, and if the
352	 * masks and so forth are equal, we can aggregate them.
353	 * We can probably promote one of the pair.
354	 *
355	 * Since the routes are encountered in lexical order,
356	 * the new route must be odd.  However, the second or later
357	 * times around this loop, it could be the even twin promoted
358	 * from the even/odd pair of twins of the finer route.
359	 */
360	while (ag != 0
361	       && ag->ag_mask == mask
362	       && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
363
364		/* Here we know the target route and the route in the current
365		 * slot have the same netmasks and differ by at most the
366		 * last bit.  They are either for the same destination, or
367		 * for an even/odd pair of destinations.
368		 */
369		if (ag->ag_dst_h == dst) {
370			/* We have two routes to the same destination.
371			 * Routes are encountered in lexical order, so a
372			 * route is never promoted until the parent route is
373			 * already present.  So we know that the new route is
374			 * a promoted pair and the route already in the slot
375			 * is the explicit route.
376			 *
377			 * Prefer the best route if their metrics differ,
378			 * or the promoted one if not, following a sort
379			 * of longest-match rule.
380			 */
381			if (pref <= ag->ag_pref) {
382				ag->ag_gate = gate;
383				ag->ag_nhop = nhop;
384				ag->ag_tag = tag;
385				ag->ag_metric = metric;
386				ag->ag_pref = pref;
387				x = ag->ag_state;
388				ag->ag_state = state;
389				state = x;
390			}
391
392			/* The sequence number controls flash updating,
393			 * and should be the smaller of the two.
394			 */
395			if (ag->ag_seqno > seqno)
396				ag->ag_seqno = seqno;
397
398			/* some bits are set if they are set on either route */
399			ag->ag_state |= (state & (AGS_PROMOTE_EITHER
400						  | AGS_REDUN0 | AGS_REDUN1));
401			return;
402		}
403
404		/* If one of the routes can be promoted and the other can
405		 * be suppressed, it may be possible to combine them or
406		 * worthwhile to promote one.
407		 *
408		 * Note that any route that can be promoted is always
409		 * marked to be eligible to be suppressed.
410		 */
411		if (!((state & AGS_PROMOTE)
412		      && (ag->ag_state & AGS_SUPPRESS))
413		    && !((ag->ag_state & AGS_PROMOTE)
414			 && (state & AGS_SUPPRESS)))
415			break;
416
417		/* A pair of even/odd twin routes can be combined
418		 * if either is redundant, or if they are via the
419		 * same gateway and have the same metric.
420		 */
421		if (AG_IS_REDUN(ag->ag_state)
422		    || AG_IS_REDUN(state)
423		    || (ag->ag_gate == gate
424			&& ag->ag_pref == pref
425			&& (state & ag->ag_state & AGS_PROMOTE) != 0)) {
426
427			/* We have both the even and odd pairs.
428			 * Since the routes are encountered in order,
429			 * the route in the slot must be the even twin.
430			 *
431			 * Combine and promote the pair of routes.
432			 */
433			if (seqno > ag->ag_seqno)
434				seqno = ag->ag_seqno;
435			if (!AG_IS_REDUN(state))
436				state &= ~AGS_REDUN1;
437			if (AG_IS_REDUN(ag->ag_state))
438				state |= AGS_REDUN0;
439			else
440				state &= ~AGS_REDUN0;
441			state |= (ag->ag_state & AGS_PROMOTE_EITHER);
442			if (ag->ag_tag != tag)
443				tag = 0;
444			if (ag->ag_nhop != nhop)
445				nhop = 0;
446
447			/* Get rid of the even twin that was already
448			 * in the slot.
449			 */
450			ag_del(ag);
451
452		} else if (ag->ag_pref >= pref
453			   && (ag->ag_state & AGS_PROMOTE)) {
454			/* If we cannot combine the pair, maybe the route
455			 * with the worse metric can be promoted.
456			 *
457			 * Promote the old, even twin, by giving its slot
458			 * in the table to the new, odd twin.
459			 */
460			ag->ag_dst_h = dst;
461
462			xaddr = ag->ag_gate;
463			ag->ag_gate = gate;
464			gate = xaddr;
465
466			xaddr = ag->ag_nhop;
467			ag->ag_nhop = nhop;
468			nhop = xaddr;
469
470			x = ag->ag_tag;
471			ag->ag_tag = tag;
472			tag = x;
473
474			x = ag->ag_state;
475			ag->ag_state = state;
476			state = x;
477			if (!AG_IS_REDUN(state))
478				state &= ~AGS_REDUN0;
479
480			x = ag->ag_metric;
481			ag->ag_metric = metric;
482			metric = x;
483
484			x = ag->ag_pref;
485			ag->ag_pref = pref;
486			pref = x;
487
488			if (seqno >= ag->ag_seqno)
489				seqno = ag->ag_seqno;
490			else
491				ag->ag_seqno = seqno;
492
493		} else {
494			if (!(state & AGS_PROMOTE))
495				break;	/* cannot promote either twin */
496
497			/* promote the new, odd twin by shaving its
498			 * mask and address.
499			 */
500			if (seqno > ag->ag_seqno)
501				seqno = ag->ag_seqno;
502			else
503				ag->ag_seqno = seqno;
504			if (!AG_IS_REDUN(state))
505				state &= ~AGS_REDUN1;
506		}
507
508		mask <<= 1;
509		dst &= mask;
510
511		if (ag_cors == 0) {
512			ag = ag_corsest;
513			break;
514		}
515		ag = ag_cors;
516		ag_cors = ag->ag_cors;
517	}
518
519	/* When we can no longer promote and combine routes,
520	 * flush the old route in the target slot.  Also flush
521	 * any finer routes that we know will never be aggregated by
522	 * the new route.
523	 *
524	 * In case we moved toward coarser masks,
525	 * get back where we belong
526	 */
527	if (ag != 0
528	    && ag->ag_mask < mask) {
529		ag_cors = ag;
530		ag = ag->ag_fine;
531	}
532
533	/* Empty the target slot
534	 */
535	if (ag != 0 && ag->ag_mask == mask) {
536		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
537		ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
538	}
539
540#ifdef DEBUG_AG
541	(void)fflush(stderr);
542	if (ag == 0 && ag_cors != ag_finest)
543		abort();
544	if (ag_cors == 0 && ag != ag_corsest)
545		abort();
546	if (ag != 0 && ag->ag_cors != ag_cors)
547		abort();
548	if (ag_cors != 0 && ag_cors->ag_fine != ag)
549		abort();
550	CHECK_AG();
551#endif
552
553	/* Save the new route on the end of the table.
554	 */
555	nag = ag_avail;
556	ag_avail = nag->ag_fine;
557
558	nag->ag_dst_h = dst;
559	nag->ag_mask = mask;
560	nag->ag_gate = gate;
561	nag->ag_nhop = nhop;
562	nag->ag_metric = metric;
563	nag->ag_pref = pref;
564	nag->ag_tag = tag;
565	nag->ag_state = state;
566	nag->ag_seqno = seqno;
567
568	nag->ag_fine = ag;
569	if (ag != 0)
570		ag->ag_cors = nag;
571	else
572		ag_finest = nag;
573	nag->ag_cors = ag_cors;
574	if (ag_cors == 0)
575		ag_corsest = nag;
576	else
577		ag_cors->ag_fine = nag;
578	CHECK_AG();
579}
580
581
582static char *
583rtm_type_name(u_char type)
584{
585	static char *rtm_types[] = {
586		"RTM_ADD",
587		"RTM_DELETE",
588		"RTM_CHANGE",
589		"RTM_GET",
590		"RTM_LOSING",
591		"RTM_REDIRECT",
592		"RTM_MISS",
593		"RTM_LOCK",
594		"RTM_OLDADD",
595		"RTM_OLDDEL",
596		"RTM_RESOLVE",
597		"RTM_NEWADDR",
598		"RTM_DELADDR",
599		"RTM_IFINFO"
600	};
601	static char name0[10];
602
603
604	if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
605	    || type == 0) {
606		sprintf(name0, "RTM type %#x", type);
607		return name0;
608	} else {
609		return rtm_types[type-1];
610	}
611}
612
613
614/* Trim a mask in a sockaddr
615 *	Produce a length of 0 for an address of 0.
616 *	Otherwise produce the index of the first zero byte.
617 */
618void
619#ifdef _HAVE_SIN_LEN
620masktrim(struct sockaddr_in *ap)
621#else
622masktrim(struct sockaddr_in_new *ap)
623#endif
624{
625	register char *cp;
626
627	if (ap->sin_addr.s_addr == 0) {
628		ap->sin_len = 0;
629		return;
630	}
631	cp = (char *)(&ap->sin_addr.s_addr+1);
632	while (*--cp == 0)
633		continue;
634	ap->sin_len = cp - (char*)ap + 1;
635}
636
637
638/* Tell the kernel to add, delete or change a route
639 */
640static void
641rtioctl(int action,			/* RTM_DELETE, etc */
642	naddr dst,
643	naddr gate,
644	naddr mask,
645	int metric,
646	int flags)
647{
648	struct {
649		struct rt_msghdr w_rtm;
650		struct sockaddr_in w_dst;
651		struct sockaddr_in w_gate;
652#ifdef _HAVE_SA_LEN
653		struct sockaddr_in w_mask;
654#else
655		struct sockaddr_in_new w_mask;
656#endif
657	} w;
658	long cc;
659
660again:
661	bzero(&w, sizeof(w));
662	w.w_rtm.rtm_msglen = sizeof(w);
663	w.w_rtm.rtm_version = RTM_VERSION;
664	w.w_rtm.rtm_type = action;
665	w.w_rtm.rtm_flags = flags;
666	w.w_rtm.rtm_seq = ++rt_sock_seqno;
667	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
668	if (metric != 0) {
669		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
670		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
671	}
672	w.w_dst.sin_family = AF_INET;
673	w.w_dst.sin_addr.s_addr = dst;
674	w.w_gate.sin_family = AF_INET;
675	w.w_gate.sin_addr.s_addr = gate;
676#ifdef _HAVE_SA_LEN
677	w.w_dst.sin_len = sizeof(w.w_dst);
678	w.w_gate.sin_len = sizeof(w.w_gate);
679#endif
680	if (mask == HOST_MASK) {
681		w.w_rtm.rtm_flags |= RTF_HOST;
682		w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
683	} else {
684		w.w_rtm.rtm_addrs |= RTA_NETMASK;
685		w.w_mask.sin_addr.s_addr = htonl(mask);
686#ifdef _HAVE_SA_LEN
687		masktrim(&w.w_mask);
688		if (w.w_mask.sin_len == 0)
689			w.w_mask.sin_len = sizeof(long);
690		w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
691#endif
692	}
693
694	if (TRACEKERNEL)
695		trace_kernel("write kernel %s %s->%s metric=%d flags=%#x\n",
696			     rtm_type_name(action),
697			     addrname(dst, mask, 0), naddr_ntoa(gate),
698			     metric, flags);
699
700#ifndef NO_INSTALL
701	cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
702	if (cc == w.w_rtm.rtm_msglen)
703		return;
704	if (cc < 0) {
705		if (errno == ESRCH
706		    && (action == RTM_CHANGE || action == RTM_DELETE)) {
707			trace_act("route to %s disappeared before %s",
708				  addrname(dst, mask, 0),
709				  rtm_type_name(action));
710			if (action == RTM_CHANGE) {
711				action = RTM_ADD;
712				goto again;
713			}
714			return;
715		}
716		msglog("write(rt_sock) %s %s --> %s: %s",
717		       rtm_type_name(action),
718		       addrname(dst, mask, 0), naddr_ntoa(gate),
719		       strerror(errno));
720	} else {
721		msglog("write(rt_sock) wrote %d instead of %d",
722		       cc, w.w_rtm.rtm_msglen);
723	}
724#endif
725}
726
727
728#define KHASH_SIZE 71			/* should be prime */
729#define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
730static struct khash {
731	struct khash *k_next;
732	naddr	k_dst;
733	naddr	k_mask;
734	naddr	k_gate;
735	short	k_metric;
736	u_short	k_state;
737#define	    KS_NEW	0x001
738#define	    KS_DELETE	0x002
739#define	    KS_ADD	0x004		/* add to the kernel */
740#define	    KS_CHANGE	0x008		/* tell kernel to change the route */
741#define	    KS_DEL_ADD	0x010		/* delete & add to change the kernel */
742#define	    KS_STATIC	0x020		/* Static flag in kernel */
743#define	    KS_GATEWAY	0x040		/* G flag in kernel */
744#define	    KS_DYNAMIC	0x080		/* result of redirect */
745#define	    KS_DELETED	0x100		/* already deleted */
746	time_t	k_keep;
747#define	    K_KEEP_LIM	30
748	time_t	k_redirect_time;	/* when redirected route 1st seen */
749} *khash_bins[KHASH_SIZE];
750
751
752static struct khash*
753kern_find(naddr dst, naddr mask, struct khash ***ppk)
754{
755	struct khash *k, **pk;
756
757	for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
758		if (k->k_dst == dst && k->k_mask == mask)
759			break;
760	}
761	if (ppk != 0)
762		*ppk = pk;
763	return k;
764}
765
766
767static struct khash*
768kern_add(naddr dst, naddr mask)
769{
770	struct khash *k, **pk;
771
772	k = kern_find(dst, mask, &pk);
773	if (k != 0)
774		return k;
775
776	k = (struct khash *)malloc(sizeof(*k));
777
778	bzero(k, sizeof(*k));
779	k->k_dst = dst;
780	k->k_mask = mask;
781	k->k_state = KS_NEW;
782	k->k_keep = now.tv_sec;
783	*pk = k;
784
785	return k;
786}
787
788
789/* If a kernel route has a non-zero metric, check that it is still in the
790 *	daemon table, and not deleted by interfaces coming and going.
791 */
792static void
793kern_check_static(struct khash *k,
794		  struct interface *ifp)
795{
796	struct rt_entry *rt;
797	naddr int_addr;
798
799	if (k->k_metric == 0)
800		return;
801
802	int_addr = (ifp != 0) ? ifp->int_addr : loopaddr;
803
804	rt = rtget(k->k_dst, k->k_mask);
805	if (rt != 0) {
806		if (!(rt->rt_state & RS_STATIC))
807			rtchange(rt, rt->rt_state | RS_STATIC,
808				 k->k_gate, int_addr,
809				 k->k_metric, 0, ifp, now.tv_sec, 0);
810	} else {
811		rtadd(k->k_dst, k->k_mask, k->k_gate, int_addr,
812		      k->k_metric, 0, RS_STATIC, ifp);
813	}
814}
815
816
817/* add a route the kernel told us
818 */
819static void
820rtm_add(struct rt_msghdr *rtm,
821	struct rt_addrinfo *info,
822	time_t keep)
823{
824	struct khash *k;
825	struct interface *ifp;
826	naddr mask;
827
828
829	if (rtm->rtm_flags & RTF_HOST) {
830		mask = HOST_MASK;
831	} else if (INFO_MASK(info) != 0) {
832		mask = ntohl(S_ADDR(INFO_MASK(info)));
833	} else {
834		msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
835		return;
836	}
837
838	if (INFO_GATE(info) == 0
839	    || INFO_GATE(info)->sa_family != AF_INET) {
840		msglog("ignore %s without gateway",
841		       rtm_type_name(rtm->rtm_type));
842		return;
843	}
844
845	k = kern_add(S_ADDR(INFO_DST(info)), mask);
846	if (k->k_state & KS_NEW)
847		k->k_keep = now.tv_sec+keep;
848	k->k_gate = S_ADDR(INFO_GATE(info));
849	k->k_metric = rtm->rtm_rmx.rmx_hopcount;
850	if (k->k_metric < 0)
851		k->k_metric = 0;
852	else if (k->k_metric > HOPCNT_INFINITY)
853		 k->k_metric = HOPCNT_INFINITY;
854	k->k_state &= ~(KS_DELETED | KS_GATEWAY | KS_STATIC | KS_NEW);
855	if (rtm->rtm_flags & RTF_GATEWAY)
856		k->k_state |= KS_GATEWAY;
857	if (rtm->rtm_flags & RTF_STATIC)
858		k->k_state |= KS_STATIC;
859
860	if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
861		if (INFO_AUTHOR(info) != 0
862		    && INFO_AUTHOR(info)->sa_family == AF_INET)
863			ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
864		else
865			ifp = 0;
866		if (supplier
867		    && (ifp == 0 || !(ifp->int_state & IS_REDIRECT_OK))) {
868			/* Routers are not supposed to listen to redirects,
869			 * so delete it if it came via an unknown interface
870			 * or the interface does not have special permission.
871			 */
872			k->k_state &= ~KS_DYNAMIC;
873			k->k_state |= KS_DELETE;
874			LIM_SEC(need_kern, 0);
875			trace_act("mark for deletion redirected %s --> %s"
876				  " via %s",
877				  addrname(k->k_dst, k->k_mask, 0),
878				  naddr_ntoa(k->k_gate),
879				  ifp ? ifp->int_name : "unknown interface");
880		} else {
881			k->k_state |= KS_DYNAMIC;
882			k->k_redirect_time = now.tv_sec;
883			trace_act("accept redirected %s --> %s via %s",
884				  addrname(k->k_dst, k->k_mask, 0),
885				  naddr_ntoa(k->k_gate),
886				  ifp ? ifp->int_name : "unknown interface");
887		}
888		return;
889	}
890
891	/* If it is not a static route, quit until the next comparison
892	 * between the kernel and daemon tables, when it will be deleted.
893	 */
894	if (!(k->k_state & KS_STATIC)) {
895		k->k_state |= KS_DELETE;
896		LIM_SEC(need_kern, k->k_keep);
897		return;
898	}
899
900	/* Put static routes with real metrics into the daemon table so
901	 * they can be advertised.
902	 *
903	 * Find the interface toward the gateway.
904	 */
905	ifp = iflookup(k->k_gate);
906	if (ifp == 0)
907		msglog("static route %s --> %s impossibly lacks ifp",
908		       addrname(S_ADDR(INFO_DST(info)), mask, 0),
909		       naddr_ntoa(k->k_gate));
910
911	kern_check_static(k, ifp);
912}
913
914
915/* deal with packet loss
916 */
917static void
918rtm_lose(struct rt_msghdr *rtm,
919	 struct rt_addrinfo *info)
920{
921	if (INFO_GATE(info) == 0
922	    || INFO_GATE(info)->sa_family != AF_INET) {
923		trace_act("ignore %s without gateway",
924			  rtm_type_name(rtm->rtm_type));
925		return;
926	}
927
928	if (!supplier)
929		rdisc_age(S_ADDR(INFO_GATE(info)));
930
931	age(S_ADDR(INFO_GATE(info)));
932}
933
934
935/* Clean the kernel table by copying it to the daemon image.
936 * Eventually the daemon will delete any extra routes.
937 */
938void
939flush_kern(void)
940{
941	size_t needed;
942	int mib[6];
943	char *buf, *next, *lim;
944	struct rt_msghdr *rtm;
945	struct interface *ifp;
946	static struct sockaddr_in gate_sa;
947	struct rt_addrinfo info;
948
949
950	mib[0] = CTL_NET;
951	mib[1] = PF_ROUTE;
952	mib[2] = 0;		/* protocol */
953	mib[3] = 0;		/* wildcard address family */
954	mib[4] = NET_RT_DUMP;
955	mib[5] = 0;		/* no flags */
956	if (sysctl(mib, 6, 0, &needed, 0, 0) < 0) {
957		DBGERR(1,"RT_DUMP-sysctl-estimate");
958		return;
959	}
960	buf = malloc(needed);
961	if (sysctl(mib, 6, buf, &needed, 0, 0) < 0)
962		BADERR(1,"RT_DUMP");
963	lim = buf + needed;
964	for (next = buf; next < lim; next += rtm->rtm_msglen) {
965		rtm = (struct rt_msghdr *)next;
966
967		rt_xaddrs(&info,
968			  (struct sockaddr *)(rtm+1),
969			  (struct sockaddr *)(next + rtm->rtm_msglen),
970			  rtm->rtm_addrs);
971
972		if (INFO_DST(&info) == 0
973		    || INFO_DST(&info)->sa_family != AF_INET)
974			continue;
975
976		/* ignore ARP table entries on systems with a merged route
977		 * and ARP table.
978		 */
979		if (rtm->rtm_flags & RTF_LLINFO)
980			continue;
981
982		if (INFO_GATE(&info) == 0)
983			continue;
984		if (INFO_GATE(&info)->sa_family != AF_INET) {
985			if (INFO_GATE(&info)->sa_family != AF_LINK)
986				continue;
987			ifp = ifwithindex(((struct sockaddr_dl *)
988					   INFO_GATE(&info))->sdl_index);
989			if (ifp == 0)
990				continue;
991			if ((ifp->int_if_flags & IFF_POINTOPOINT)
992			    || S_ADDR(INFO_DST(&info)) == ifp->int_addr)
993				gate_sa.sin_addr.s_addr = ifp->int_addr;
994			else
995				gate_sa.sin_addr.s_addr = htonl(ifp->int_net);
996#ifdef _HAVE_SA_LEN
997			gate_sa.sin_len = sizeof(gate_sa);
998#endif
999			gate_sa.sin_family = AF_INET;
1000			INFO_GATE(&info) = (struct sockaddr *)&gate_sa;
1001		}
1002
1003		/* ignore multicast addresses
1004		 */
1005		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1006			continue;
1007
1008		/* Note static routes and interface routes, and also
1009		 * preload the image of the kernel table so that
1010		 * we can later clean it, as well as avoid making
1011		 * unneeded changes.  Keep the old kernel routes for a
1012		 * few seconds to allow a RIP or router-discovery
1013		 * response to be heard.
1014		 */
1015		rtm_add(rtm,&info,MIN_WAITTIME);
1016	}
1017	free(buf);
1018}
1019
1020
1021/* Listen to announcements from the kernel
1022 */
1023void
1024read_rt(void)
1025{
1026	long cc;
1027	struct interface *ifp;
1028	naddr mask;
1029	union {
1030		struct {
1031			struct rt_msghdr rtm;
1032			struct sockaddr addrs[RTAX_MAX];
1033		} r;
1034		struct if_msghdr ifm;
1035	} m;
1036	char str[100], *strp;
1037	struct rt_addrinfo info;
1038
1039
1040	for (;;) {
1041		cc = read(rt_sock, &m, sizeof(m));
1042		if (cc <= 0) {
1043			if (cc < 0 && errno != EWOULDBLOCK)
1044				LOGERR("read(rt_sock)");
1045			return;
1046		}
1047
1048		if (m.r.rtm.rtm_version != RTM_VERSION) {
1049			msglog("bogus routing message version %d",
1050			       m.r.rtm.rtm_version);
1051			continue;
1052		}
1053
1054		/* Ignore our own results.
1055		 */
1056		if (m.r.rtm.rtm_type <= RTM_CHANGE
1057		    && m.r.rtm.rtm_pid == mypid) {
1058			static int complained = 0;
1059			if (!complained) {
1060				msglog("receiving our own change messages");
1061				complained = 1;
1062			}
1063			continue;
1064		}
1065
1066		if (m.r.rtm.rtm_type == RTM_IFINFO
1067		    || m.r.rtm.rtm_type == RTM_NEWADDR
1068		    || m.r.rtm.rtm_type == RTM_DELADDR) {
1069			ifp = ifwithindex(m.ifm.ifm_index);
1070			if (ifp == 0)
1071				trace_act("note %s with flags %#x"
1072					  " for index #%d",
1073					  rtm_type_name(m.r.rtm.rtm_type),
1074					  m.ifm.ifm_flags,
1075					  m.ifm.ifm_index);
1076			else
1077				trace_act("note %s with flags %#x for %s",
1078					  rtm_type_name(m.r.rtm.rtm_type),
1079					  m.ifm.ifm_flags,
1080					  ifp->int_name);
1081
1082			/* After being informed of a change to an interface,
1083			 * check them all now if the check would otherwise
1084			 * be a long time from now, if the interface is
1085			 * not known, or if the interface has been turned
1086			 * off or on.
1087			 */
1088			if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1089			    || ifp == 0
1090			    || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1091				& IFF_UP_RUNNING) != 0)
1092				ifinit_timer.tv_sec = now.tv_sec;
1093			continue;
1094		}
1095
1096		strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1097		strp = &str[strlen(str)];
1098		if (m.r.rtm.rtm_type <= RTM_CHANGE)
1099			strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1100
1101		rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1102			  m.r.rtm.rtm_addrs);
1103
1104		if (INFO_DST(&info) == 0) {
1105			trace_act("ignore %s without dst", str);
1106			continue;
1107		}
1108
1109		if (INFO_DST(&info)->sa_family != AF_INET) {
1110			trace_act("ignore %s for AF %d", str,
1111				  INFO_DST(&info)->sa_family);
1112			continue;
1113		}
1114
1115		mask = ((INFO_MASK(&info) != 0)
1116			? ntohl(S_ADDR(INFO_MASK(&info)))
1117			: (m.r.rtm.rtm_flags & RTF_HOST)
1118			? HOST_MASK
1119			: std_mask(S_ADDR(INFO_DST(&info))));
1120
1121		strp += sprintf(strp, ": %s",
1122				addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1123
1124		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1125			trace_act("ignore multicast %s", str);
1126			continue;
1127		}
1128
1129		if (INFO_GATE(&info) != 0
1130		    && INFO_GATE(&info)->sa_family == AF_INET)
1131			strp += sprintf(strp, " --> %s",
1132					saddr_ntoa(INFO_GATE(&info)));
1133
1134		if (INFO_AUTHOR(&info) != 0)
1135			strp += sprintf(strp, " by authority of %s",
1136					saddr_ntoa(INFO_AUTHOR(&info)));
1137
1138		switch (m.r.rtm.rtm_type) {
1139		case RTM_ADD:
1140		case RTM_CHANGE:
1141		case RTM_REDIRECT:
1142			if (m.r.rtm.rtm_errno != 0) {
1143				trace_act("ignore %s with \"%s\" error",
1144					  str, strerror(m.r.rtm.rtm_errno));
1145			} else {
1146				trace_act("%s", str);
1147				rtm_add(&m.r.rtm,&info,0);
1148			}
1149			break;
1150
1151		case RTM_DELETE:
1152			if (m.r.rtm.rtm_errno != 0) {
1153				trace_act("ignore %s with \"%s\" error",
1154					  str, strerror(m.r.rtm.rtm_errno));
1155			} else {
1156				trace_act("%s", str);
1157				del_static(S_ADDR(INFO_DST(&info)), mask, 1);
1158			}
1159			break;
1160
1161		case RTM_LOSING:
1162			trace_act("%s", str);
1163			rtm_lose(&m.r.rtm,&info);
1164			break;
1165
1166		default:
1167			trace_act("ignore %s", str);
1168			break;
1169		}
1170	}
1171}
1172
1173
1174/* after aggregating, note routes that belong in the kernel
1175 */
1176static void
1177kern_out(struct ag_info *ag)
1178{
1179	struct khash *k;
1180
1181
1182	/* Do not install bad routes if they are not already present.
1183	 * This includes routes that had RS_NET_SYN for interfaces that
1184	 * recently died.
1185	 */
1186	if (ag->ag_metric == HOPCNT_INFINITY) {
1187		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1188		if (k == 0)
1189			return;
1190	} else {
1191		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1192	}
1193
1194	if (k->k_state & KS_NEW) {
1195		/* will need to add new entry to the kernel table */
1196		k->k_state = KS_ADD;
1197		if (ag->ag_state & AGS_GATEWAY)
1198			k->k_state |= KS_GATEWAY;
1199		k->k_gate = ag->ag_gate;
1200		k->k_metric = ag->ag_metric;
1201		return;
1202	}
1203
1204	if (k->k_state & KS_STATIC)
1205		return;
1206
1207	/* modify existing kernel entry if necessary */
1208	if (k->k_gate != ag->ag_gate
1209	    || k->k_metric != ag->ag_metric) {
1210		k->k_gate = ag->ag_gate;
1211		k->k_metric = ag->ag_metric;
1212		k->k_state |= KS_CHANGE;
1213	}
1214
1215	if (k->k_state & KS_DYNAMIC) {
1216		k->k_state &= ~KS_DYNAMIC;
1217		k->k_state |= (KS_ADD | KS_DEL_ADD);
1218	}
1219
1220	if ((k->k_state & KS_GATEWAY)
1221	    && !(ag->ag_state & AGS_GATEWAY)) {
1222		k->k_state &= ~KS_GATEWAY;
1223		k->k_state |= (KS_ADD | KS_DEL_ADD);
1224	} else if (!(k->k_state & KS_GATEWAY)
1225		   && (ag->ag_state & AGS_GATEWAY)) {
1226		k->k_state |= KS_GATEWAY;
1227		k->k_state |= (KS_ADD | KS_DEL_ADD);
1228	}
1229
1230	/* Deleting-and-adding is necessary to change aspects of a route.
1231	 * Just delete instead of deleting and then adding a bad route.
1232	 * Otherwise, we want to keep the route in the kernel.
1233	 */
1234	if (k->k_metric == HOPCNT_INFINITY
1235	    && (k->k_state & KS_DEL_ADD))
1236		k->k_state |= KS_DELETE;
1237	else
1238		k->k_state &= ~KS_DELETE;
1239#undef RT
1240}
1241
1242
1243/* ARGSUSED */
1244static int
1245walk_kern(struct radix_node *rn,
1246	  struct walkarg *w)
1247{
1248#define RT ((struct rt_entry *)rn)
1249	char metric, pref;
1250	u_int ags = 0;
1251
1252
1253	/* Do not install synthetic routes */
1254	if (RT->rt_state & RS_NET_SYN)
1255		return 0;
1256
1257	if (!(RT->rt_state & RS_IF)) {
1258		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_PROMOTE);
1259
1260	} else {
1261		/* Do not install routes for "external" remote interfaces.
1262		 */
1263		if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1264			return 0;
1265
1266		ags |= AGS_IF;
1267
1268		/* If it is not an interface, or an alias for an interface,
1269		 * it must be a "gateway."
1270		 *
1271		 * If it is a "remote" interface, it is also a "gateway" to
1272		 * the kernel if is not a alias.
1273		 */
1274		if (RT->rt_ifp == 0
1275		    || (RT->rt_ifp->int_state & IS_REMOTE))
1276			ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_PROMOTE);
1277	}
1278
1279	if (RT->rt_state & RS_RDISC)
1280		ags |= AGS_CORS_GATE;
1281
1282	/* aggregate good routes without regard to their metric */
1283	pref = 1;
1284	metric = RT->rt_metric;
1285	if (metric == HOPCNT_INFINITY) {
1286		/* if the route is dead, so try hard to aggregate. */
1287		pref = HOPCNT_INFINITY;
1288		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1289	}
1290
1291	ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1292		 metric,pref, 0, 0, ags, kern_out);
1293	return 0;
1294#undef RT
1295}
1296
1297
1298/* Update the kernel table to match the daemon table.
1299 */
1300static void
1301fix_kern(void)
1302{
1303	int i, flags;
1304	struct khash *k, **pk;
1305
1306
1307	need_kern = age_timer;
1308
1309	/* Walk daemon table, updating the copy of the kernel table.
1310	 */
1311	(void)rn_walktree(rhead, walk_kern, 0);
1312	ag_flush(0,0,kern_out);
1313
1314	for (i = 0; i < KHASH_SIZE; i++) {
1315		for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
1316			/* Do not touch static routes */
1317			if (k->k_state & KS_STATIC) {
1318				kern_check_static(k,0);
1319				pk = &k->k_next;
1320				continue;
1321			}
1322
1323			/* check hold on routes deleted by the operator */
1324			if (k->k_keep > now.tv_sec) {
1325				LIM_SEC(need_kern, k->k_keep);
1326				k->k_state |= KS_DELETE;
1327				pk = &k->k_next;
1328				continue;
1329			}
1330
1331			if ((k->k_state & (KS_DELETE | KS_DYNAMIC))
1332			    == KS_DELETE) {
1333				if (!(k->k_state & KS_DELETED))
1334					rtioctl(RTM_DELETE,
1335						k->k_dst, k->k_gate, k->k_mask,
1336						0, 0);
1337				*pk = k->k_next;
1338				free(k);
1339				continue;
1340			}
1341
1342			if (0 != (k->k_state&(KS_ADD|KS_CHANGE|KS_DEL_ADD))) {
1343				if (k->k_state & KS_DEL_ADD) {
1344					rtioctl(RTM_DELETE,
1345						k->k_dst,k->k_gate,k->k_mask,
1346						0, 0);
1347					k->k_state &= ~KS_DYNAMIC;
1348				}
1349
1350				flags = 0;
1351				if (0 != (k->k_state&(KS_GATEWAY|KS_DYNAMIC)))
1352					flags |= RTF_GATEWAY;
1353
1354				if (k->k_state & KS_ADD) {
1355					rtioctl(RTM_ADD,
1356						k->k_dst, k->k_gate, k->k_mask,
1357						k->k_metric, flags);
1358				} else if (k->k_state & KS_CHANGE) {
1359					rtioctl(RTM_CHANGE,
1360						k->k_dst,k->k_gate,k->k_mask,
1361						k->k_metric, flags);
1362				}
1363				k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1364			}
1365
1366			/* Mark this route to be deleted in the next cycle.
1367			 * This deletes routes that disappear from the
1368			 * daemon table, since the normal aging code
1369			 * will clear the bit for routes that have not
1370			 * disappeared from the daemon table.
1371			 */
1372			k->k_state |= KS_DELETE;
1373			pk = &k->k_next;
1374		}
1375	}
1376}
1377
1378
1379/* Delete a static route in the image of the kernel table.
1380 */
1381void
1382del_static(naddr dst,
1383	   naddr mask,
1384	   int gone)
1385{
1386	struct khash *k;
1387	struct rt_entry *rt;
1388
1389	/* Just mark it in the table to be deleted next time the kernel
1390	 * table is updated.
1391	 * If it has already been deleted, mark it as such, and set its
1392	 * keep-timer so that it will not be deleted again for a while.
1393	 * This lets the operator delete a route added by the daemon
1394	 * and add a replacement.
1395	 */
1396	k = kern_find(dst, mask, 0);
1397	if (k != 0) {
1398		k->k_state &= ~(KS_STATIC | KS_DYNAMIC);
1399		k->k_state |= KS_DELETE;
1400		if (gone) {
1401			k->k_state |= KS_DELETED;
1402			k->k_keep = now.tv_sec + K_KEEP_LIM;
1403		}
1404	}
1405
1406	rt = rtget(dst, mask);
1407	if (rt != 0 && (rt->rt_state & RS_STATIC))
1408		rtbad(rt);
1409}
1410
1411
1412/* Delete all routes generated from ICMP Redirects that use a given gateway,
1413 * as well as old redirected routes.
1414 */
1415void
1416del_redirects(naddr bad_gate,
1417	      time_t old)
1418{
1419	int i;
1420	struct khash *k;
1421
1422
1423	for (i = 0; i < KHASH_SIZE; i++) {
1424		for (k = khash_bins[i]; k != 0; k = k->k_next) {
1425			if (!(k->k_state & KS_DYNAMIC)
1426			    || (k->k_state & KS_STATIC))
1427				continue;
1428
1429			if (k->k_gate != bad_gate
1430			    && k->k_redirect_time > old
1431			    && !supplier)
1432				continue;
1433
1434			k->k_state |= KS_DELETE;
1435			k->k_state &= ~KS_DYNAMIC;
1436			need_kern.tv_sec = now.tv_sec;
1437			trace_act("mark redirected %s --> %s for deletion",
1438				  addrname(k->k_dst, k->k_mask, 0),
1439				  naddr_ntoa(k->k_gate));
1440		}
1441	}
1442}
1443
1444
1445/* Start the daemon tables.
1446 */
1447void
1448rtinit(void)
1449{
1450	extern int max_keylen;
1451	int i;
1452	struct ag_info *ag;
1453
1454	/* Initialize the radix trees */
1455	max_keylen = sizeof(struct sockaddr_in);
1456	rn_init();
1457	rn_inithead((void**)&rhead, 32);
1458
1459	/* mark all of the slots in the table free */
1460	ag_avail = ag_slots;
1461	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1462		ag->ag_fine = ag+1;
1463		ag++;
1464	}
1465}
1466
1467
1468#ifdef _HAVE_SIN_LEN
1469static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET};
1470static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET};
1471#else
1472static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1473static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1474#endif
1475
1476
1477void
1478set_need_flash(void)
1479{
1480	if (!need_flash) {
1481		need_flash = 1;
1482		/* Do not send the flash update immediately.  Wait a little
1483		 * while to hear from other routers.
1484		 */
1485		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1486	}
1487}
1488
1489
1490/* Get a particular routing table entry
1491 */
1492struct rt_entry *
1493rtget(naddr dst, naddr mask)
1494{
1495	struct rt_entry *rt;
1496
1497	dst_sock.sin_addr.s_addr = dst;
1498	mask_sock.sin_addr.s_addr = mask;
1499	masktrim(&mask_sock);
1500	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1501	if (!rt
1502	    || rt->rt_dst != dst
1503	    || rt->rt_mask != mask)
1504		return 0;
1505
1506	return rt;
1507}
1508
1509
1510/* Find a route to dst as the kernel would.
1511 */
1512struct rt_entry *
1513rtfind(naddr dst)
1514{
1515	dst_sock.sin_addr.s_addr = dst;
1516	return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1517}
1518
1519
1520/* add a route to the table
1521 */
1522void
1523rtadd(naddr	dst,
1524      naddr	mask,
1525      naddr	gate,			/* forward packets here */
1526      naddr	router,			/* on the authority of this router */
1527      int	metric,
1528      u_short	tag,
1529      u_int	state,			/* rs_state for the entry */
1530      struct interface *ifp)
1531{
1532	struct rt_entry *rt;
1533	naddr smask;
1534	int i;
1535	struct rt_spare *rts;
1536
1537	rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1538	bzero(rt, sizeof(*rt));
1539	for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1540		rts->rts_metric = HOPCNT_INFINITY;
1541
1542	rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1543	rt->rt_dst = dst;
1544	rt->rt_dst_sock.sin_family = AF_INET;
1545#ifdef _HAVE_SIN_LEN
1546	rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1547#endif
1548	if (mask != HOST_MASK) {
1549		smask = std_mask(dst);
1550		if ((smask & ~mask) == 0 && mask > smask)
1551			state |= RS_SUBNET;
1552	}
1553	mask_sock.sin_addr.s_addr = mask;
1554	masktrim(&mask_sock);
1555	rt->rt_mask = mask;
1556	rt->rt_state = state;
1557	rt->rt_gate = gate;
1558	rt->rt_router = router;
1559	rt->rt_time = now.tv_sec;
1560	rt->rt_metric = metric;
1561	rt->rt_poison_metric = HOPCNT_INFINITY;
1562	rt->rt_tag = tag;
1563	rt->rt_ifp = ifp;
1564	rt->rt_seqno = update_seqno;
1565
1566	if (++total_routes == MAX_ROUTES)
1567		msglog("have maximum (%d) routes", total_routes);
1568	if (TRACEACTIONS)
1569		trace_add_del("Add", rt);
1570
1571	need_kern.tv_sec = now.tv_sec;
1572	set_need_flash();
1573
1574	if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1575				    rhead, rt->rt_nodes)) {
1576/*
1577 * This will happen if RIP1 and RIP2 routeds talk to one another and
1578 * there are variable subnets.  This is only good for filling up your
1579 * syslog. -jkh
1580 */
1581#if 0
1582		msglog("rnh_addaddr() failed for %s mask=%#x",
1583		       naddr_ntoa(dst), mask);
1584#endif
1585	}
1586}
1587
1588
1589/* notice a changed route
1590 */
1591void
1592rtchange(struct rt_entry *rt,
1593	 u_int	state,			/* new state bits */
1594	 naddr	gate,			/* now forward packets here */
1595	 naddr	router,			/* on the authority of this router */
1596	 int	metric,			/* new metric */
1597	 u_short tag,
1598	 struct interface *ifp,
1599	 time_t	new_time,
1600	 char	*label)
1601{
1602	if (rt->rt_metric != metric) {
1603		/* Fix the kernel immediately if it seems the route
1604		 * has gone bad, since there may be a working route that
1605		 * aggregates this route.
1606		 */
1607		if (metric == HOPCNT_INFINITY) {
1608			need_kern.tv_sec = now.tv_sec;
1609			if (new_time >= now.tv_sec - EXPIRE_TIME)
1610				new_time = now.tv_sec - EXPIRE_TIME;
1611		}
1612		rt->rt_seqno = update_seqno;
1613		set_need_flash();
1614	}
1615
1616	if (rt->rt_gate != gate) {
1617		need_kern.tv_sec = now.tv_sec;
1618		rt->rt_seqno = update_seqno;
1619		set_need_flash();
1620	}
1621
1622	state |= (rt->rt_state & RS_SUBNET);
1623
1624	/* Keep various things from deciding ageless routes are stale.
1625	 */
1626	if (!AGE_RT(state, ifp))
1627		new_time = now.tv_sec;
1628
1629	if (TRACEACTIONS)
1630		trace_change(rt, state, gate, router, metric, tag, ifp,
1631			     new_time,
1632			     label ? label : "Chg   ");
1633
1634	rt->rt_state = state;
1635	rt->rt_gate = gate;
1636	rt->rt_router = router;
1637	rt->rt_metric = metric;
1638	rt->rt_tag = tag;
1639	rt->rt_ifp = ifp;
1640	rt->rt_time = new_time;
1641}
1642
1643
1644/* check for a better route among the spares
1645 */
1646static struct rt_spare *
1647rts_better(struct rt_entry *rt)
1648{
1649	struct rt_spare *rts, *rts1;
1650	int i;
1651
1652	/* find the best alternative among the spares */
1653	rts = rt->rt_spares+1;
1654	for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1655		if (BETTER_LINK(rt,rts1,rts))
1656			rts = rts1;
1657	}
1658
1659	return rts;
1660}
1661
1662
1663/* switch to a backup route
1664 */
1665void
1666rtswitch(struct rt_entry *rt,
1667	 struct rt_spare *rts)
1668{
1669	struct rt_spare swap;
1670	char label[10];
1671
1672
1673	/* Do not change permanent routes */
1674	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1675				  | RS_NET_SYN | RS_IF)))
1676		return;
1677
1678	/* find the best alternative among the spares */
1679	if (rts == 0)
1680		rts = rts_better(rt);
1681
1682	/* Do not bother if it is not worthwhile.
1683	 */
1684	if (!BETTER_LINK(rt, rts, rt->rt_spares))
1685		return;
1686
1687	swap = rt->rt_spares[0];
1688	(void)sprintf(label, "Use #%d", rts - rt->rt_spares);
1689	rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC),
1690		 rts->rts_gate, rts->rts_router, rts->rts_metric,
1691		 rts->rts_tag, rts->rts_ifp, rts->rts_time, label);
1692	*rts = swap;
1693}
1694
1695
1696void
1697rtdelete(struct rt_entry *rt)
1698{
1699	struct khash *k;
1700
1701
1702	if (TRACEACTIONS)
1703		trace_add_del("Del", rt);
1704
1705	k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1706	if (k != 0) {
1707		k->k_state |= KS_DELETE;
1708		need_kern.tv_sec = now.tv_sec;
1709	}
1710
1711	dst_sock.sin_addr.s_addr = rt->rt_dst;
1712	mask_sock.sin_addr.s_addr = rt->rt_mask;
1713	masktrim(&mask_sock);
1714	if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1715							rhead)) {
1716		msglog("rnh_deladdr() failed");
1717	} else {
1718		free(rt);
1719		total_routes--;
1720	}
1721}
1722
1723
1724/* Get rid of a bad route, and try to switch to a replacement.
1725 */
1726void
1727rtbad(struct rt_entry *rt)
1728{
1729	/* Poison the route */
1730	rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC),
1731		 rt->rt_gate, rt->rt_router, HOPCNT_INFINITY, rt->rt_tag,
1732		 0, rt->rt_time, 0);
1733
1734	rtswitch(rt, 0);
1735}
1736
1737
1738/* Junk a RS_NET_SYN or RS_LOCAL route,
1739 *	unless it is needed by another interface.
1740 */
1741void
1742rtbad_sub(struct rt_entry *rt)
1743{
1744	struct interface *ifp, *ifp1;
1745	struct intnet *intnetp;
1746	u_int state;
1747
1748
1749	ifp1 = 0;
1750	state = 0;
1751
1752	if (rt->rt_state & RS_LOCAL) {
1753		/* Is this the route through loopback for the interface?
1754		 * If so, see if it is used by any other interfaces, such
1755		 * as a point-to-point interface with the same local address.
1756		 */
1757		for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1758			/* Retain it if another interface needs it.
1759			 */
1760			if (ifp->int_addr == rt->rt_ifp->int_addr) {
1761				state |= RS_LOCAL;
1762				ifp1 = ifp;
1763				break;
1764			}
1765		}
1766
1767	}
1768
1769	if (!(state & RS_LOCAL)) {
1770		/* Retain RIPv1 logical network route if there is another
1771		 * interface that justifies it.
1772		 */
1773		if (rt->rt_state & RS_NET_SYN) {
1774			for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1775				if ((ifp->int_state & IS_NEED_NET_SYN)
1776				    && rt->rt_mask == ifp->int_std_mask
1777				    && rt->rt_dst == ifp->int_std_addr) {
1778					state |= RS_NET_SYN;
1779					ifp1 = ifp;
1780					break;
1781				}
1782			}
1783		}
1784
1785		/* or if there is an authority route that needs it. */
1786		for (intnetp = intnets;
1787		     intnetp != 0;
1788		     intnetp = intnetp->intnet_next) {
1789			if (intnetp->intnet_addr == rt->rt_dst
1790			    && intnetp->intnet_mask == rt->rt_mask) {
1791				state |= (RS_NET_SYN | RS_NET_INT);
1792				break;
1793			}
1794		}
1795	}
1796
1797	if (ifp1 != 0 || (state & RS_NET_SYN)) {
1798		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN | RS_LOCAL))
1799			      | state),
1800			 rt->rt_gate, rt->rt_router, rt->rt_metric,
1801			 rt->rt_tag, ifp1, rt->rt_time, 0);
1802	} else {
1803		rtbad(rt);
1804	}
1805}
1806
1807
1808/* Called while walking the table looking for sick interfaces
1809 * or after a time change.
1810 */
1811/* ARGSUSED */
1812int
1813walk_bad(struct radix_node *rn,
1814	 struct walkarg *w)
1815{
1816#define RT ((struct rt_entry *)rn)
1817	struct rt_spare *rts;
1818	int i;
1819	time_t new_time;
1820
1821
1822	/* fix any spare routes through the interface
1823	 */
1824	rts = RT->rt_spares;
1825	for (i = NUM_SPARES; i != 1; i--) {
1826		rts++;
1827
1828		if (rts->rts_ifp != 0
1829		    && (rts->rts_ifp->int_state & IS_BROKE)) {
1830			/* mark the spare route to be deleted immediately */
1831			new_time = rts->rts_time;
1832			if (new_time >= now_garbage)
1833				new_time = now_garbage-1;
1834			trace_upslot(RT, rts, rts->rts_gate,
1835				     rts->rts_router, 0,
1836				     HOPCNT_INFINITY, rts->rts_tag,
1837				     new_time);
1838			rts->rts_ifp = 0;
1839			rts->rts_metric = HOPCNT_INFINITY;
1840			rts->rts_time = new_time;
1841		}
1842	}
1843
1844	/* Deal with the main route
1845	 */
1846	/* finished if it has been handled before or if its interface is ok
1847	 */
1848	if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1849		return 0;
1850
1851	/* Bad routes for other than interfaces are easy.
1852	 */
1853	if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1854		rtbad(RT);
1855		return 0;
1856	}
1857
1858	rtbad_sub(RT);
1859	return 0;
1860#undef RT
1861}
1862
1863
1864/* Check the age of an individual route.
1865 */
1866/* ARGSUSED */
1867static int
1868walk_age(struct radix_node *rn,
1869	   struct walkarg *w)
1870{
1871#define RT ((struct rt_entry *)rn)
1872	struct interface *ifp;
1873	struct rt_spare *rts;
1874	int i;
1875
1876
1877	/* age all of the spare routes, including the primary route
1878	 * currently in use
1879	 */
1880	rts = RT->rt_spares;
1881	for (i = NUM_SPARES; i != 0; i--, rts++) {
1882
1883		ifp = rts->rts_ifp;
1884		if (i == NUM_SPARES) {
1885			if (!AGE_RT(RT->rt_state, ifp)) {
1886				/* Keep various things from deciding ageless
1887				 * routes are stale
1888				 */
1889				rts->rts_time = now.tv_sec;
1890				continue;
1891			}
1892
1893			/* forget RIP routes after RIP has been turned off.
1894			 */
1895			if (rip_sock < 0) {
1896				rtdelete(RT);
1897				return 0;
1898			}
1899		}
1900
1901		/* age failing routes
1902		 */
1903		if (age_bad_gate == rts->rts_gate
1904		    && rts->rts_time >= now_stale) {
1905			rts->rts_time -= SUPPLY_INTERVAL;
1906		}
1907
1908		/* trash the spare routes when they go bad */
1909		if (rts->rts_metric < HOPCNT_INFINITY
1910		    && now_garbage > rts->rts_time) {
1911			trace_upslot(RT, rts, rts->rts_gate,
1912				     rts->rts_router, rts->rts_ifp,
1913				     HOPCNT_INFINITY, rts->rts_tag,
1914				     rts->rts_time);
1915			rts->rts_metric = HOPCNT_INFINITY;
1916		}
1917	}
1918
1919
1920	/* finished if the active route is still fresh */
1921	if (now_stale <= RT->rt_time)
1922		return 0;
1923
1924	/* try to switch to an alternative */
1925	rtswitch(RT, 0);
1926
1927	/* Delete a dead route after it has been publically mourned. */
1928	if (now_garbage > RT->rt_time) {
1929		rtdelete(RT);
1930		return 0;
1931	}
1932
1933	/* Start poisoning a bad route before deleting it. */
1934	if (now.tv_sec - RT->rt_time > EXPIRE_TIME)
1935		rtchange(RT, RT->rt_state, RT->rt_gate, RT->rt_router,
1936			 HOPCNT_INFINITY, RT->rt_tag, RT->rt_ifp,
1937			 RT->rt_time, 0);
1938	return 0;
1939}
1940
1941
1942/* Watch for dead routes and interfaces.
1943 */
1944void
1945age(naddr bad_gate)
1946{
1947	struct interface *ifp;
1948	int need_query = 0;
1949
1950	/* If not listening to RIP, there is no need to age the routes in
1951	 * the table.
1952	 */
1953	age_timer.tv_sec = (now.tv_sec
1954			    + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
1955
1956	/* Check for dead IS_REMOTE interfaces by timing their
1957	 * transmissions.
1958	 */
1959	for (ifp = ifnet; ifp; ifp = ifp->int_next) {
1960		if (!(ifp->int_state & IS_REMOTE))
1961			continue;
1962
1963		/* ignore unreachable remote interfaces */
1964		if (!check_remote(ifp))
1965			continue;
1966		/* Restore remote interface that has become reachable
1967		 */
1968		if (ifp->int_state & IS_BROKE)
1969			if_ok(ifp, "remote ");
1970
1971		if (ifp->int_act_time != NEVER
1972		    && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
1973			msglog("remote interface %s to %s timed out after"
1974			       " %d:%d",
1975			       ifp->int_name,
1976			       naddr_ntoa(ifp->int_dstaddr),
1977			       (now.tv_sec - ifp->int_act_time)/60,
1978			       (now.tv_sec - ifp->int_act_time)%60);
1979			if_sick(ifp);
1980		}
1981
1982		/* If we have not heard from the other router
1983		 * recently, ask it.
1984		 */
1985		if (now.tv_sec >= ifp->int_query_time) {
1986			ifp->int_query_time = NEVER;
1987			need_query = 1;
1988		}
1989	}
1990
1991	/* Age routes. */
1992	age_bad_gate = bad_gate;
1993	(void)rn_walktree(rhead, walk_age, 0);
1994
1995	/* Update the kernel routing table. */
1996	fix_kern();
1997
1998	/* poke reticent remote gateways */
1999	if (need_query)
2000		rip_query();
2001}
2002