radix.c revision 1.38
1/*	$NetBSD: radix.c,v 1.38 2007/07/12 04:28:59 dyoung Exp $	*/
2
3/*
4 * Copyright (c) 1988, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)radix.c	8.6 (Berkeley) 10/17/95
32 */
33
34/*
35 * Routines to build and maintain radix trees for routing lookups.
36 */
37
38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: radix.c,v 1.38 2007/07/12 04:28:59 dyoung Exp $");
40
41#ifndef _NET_RADIX_H_
42#include <sys/param.h>
43#ifdef	_KERNEL
44#include "opt_inet.h"
45
46#include <sys/systm.h>
47#include <sys/malloc.h>
48#define	M_DONTWAIT M_NOWAIT
49#include <sys/domain.h>
50#include <netinet/ip_encap.h>
51#else
52#include <stdlib.h>
53#endif
54#include <machine/stdarg.h>
55#include <sys/syslog.h>
56#include <net/radix.h>
57#endif
58
59typedef void (*rn_printer_t)(void *, const char *fmt, ...);
60
61int	max_keylen;
62struct radix_mask *rn_mkfreelist;
63struct radix_node_head *mask_rnhead;
64static char *addmask_key;
65static const char normal_chars[] =
66    {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
67static char *rn_zeros, *rn_ones;
68
69#define rn_masktop (mask_rnhead->rnh_treetop)
70
71static int rn_satisfies_leaf(const char *, struct radix_node *, int);
72static int rn_lexobetter(const void *, const void *);
73static struct radix_mask *rn_new_radix_mask(struct radix_node *,
74    struct radix_mask *);
75static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t,
76    void *);
77static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t,
78    void *);
79static void rn_nodeprint(struct radix_node *, rn_printer_t, void *,
80    const char *);
81
82#define	SUBTREE_OPEN	"[ "
83#define	SUBTREE_CLOSE	" ]"
84
85#ifdef RN_DEBUG
86static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *);
87#endif /* RN_DEBUG */
88
89/*
90 * The data structure for the keys is a radix tree with one way
91 * branching removed.  The index rn_b at an internal node n represents a bit
92 * position to be tested.  The tree is arranged so that all descendants
93 * of a node n have keys whose bits all agree up to position rn_b - 1.
94 * (We say the index of n is rn_b.)
95 *
96 * There is at least one descendant which has a one bit at position rn_b,
97 * and at least one with a zero there.
98 *
99 * A route is determined by a pair of key and mask.  We require that the
100 * bit-wise logical and of the key and mask to be the key.
101 * We define the index of a route to associated with the mask to be
102 * the first bit number in the mask where 0 occurs (with bit number 0
103 * representing the highest order bit).
104 *
105 * We say a mask is normal if every bit is 0, past the index of the mask.
106 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
107 * and m is a normal mask, then the route applies to every descendant of n.
108 * If the index(m) < rn_b, this implies the trailing last few bits of k
109 * before bit b are all 0, (and hence consequently true of every descendant
110 * of n), so the route applies to all descendants of the node as well.
111 *
112 * Similar logic shows that a non-normal mask m such that
113 * index(m) <= index(n) could potentially apply to many children of n.
114 * Thus, for each non-host route, we attach its mask to a list at an internal
115 * node as high in the tree as we can go.
116 *
117 * The present version of the code makes use of normal routes in short-
118 * circuiting an explicit mask and compare operation when testing whether
119 * a key satisfies a normal route, and also in remembering the unique leaf
120 * that governs a subtree.
121 */
122
123struct radix_node *
124rn_search(
125	const void *v_arg,
126	struct radix_node *head)
127{
128	const u_char * const v = v_arg;
129	struct radix_node *x;
130
131	for (x = head; x->rn_b >= 0;) {
132		if (x->rn_bmask & v[x->rn_off])
133			x = x->rn_r;
134		else
135			x = x->rn_l;
136	}
137	return x;
138}
139
140struct radix_node *
141rn_search_m(
142	const void *v_arg,
143	struct radix_node *head,
144	const void *m_arg)
145{
146	struct radix_node *x;
147	const u_char * const v = v_arg;
148	const u_char * const m = m_arg;
149
150	for (x = head; x->rn_b >= 0;) {
151		if ((x->rn_bmask & m[x->rn_off]) &&
152		    (x->rn_bmask & v[x->rn_off]))
153			x = x->rn_r;
154		else
155			x = x->rn_l;
156	}
157	return x;
158}
159
160int
161rn_refines(
162	const void *m_arg,
163	const void *n_arg)
164{
165	const char *m = m_arg;
166	const char *n = n_arg;
167	const char *lim = n + *(const u_char *)n;
168	const char *lim2 = lim;
169	int longer = (*(const u_char *)n++) - (int)(*(const u_char *)m++);
170	int masks_are_equal = 1;
171
172	if (longer > 0)
173		lim -= longer;
174	while (n < lim) {
175		if (*n & ~(*m))
176			return 0;
177		if (*n++ != *m++)
178			masks_are_equal = 0;
179	}
180	while (n < lim2)
181		if (*n++)
182			return 0;
183	if (masks_are_equal && (longer < 0))
184		for (lim2 = m - longer; m < lim2; )
185			if (*m++)
186				return 1;
187	return !masks_are_equal;
188}
189
190struct radix_node *
191rn_lookup(
192	const void *v_arg,
193	const void *m_arg,
194	struct radix_node_head *head)
195{
196	struct radix_node *x;
197	const char *netmask = NULL;
198
199	if (m_arg) {
200		if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
201			return NULL;
202		netmask = x->rn_key;
203	}
204	x = rn_match(v_arg, head);
205	if (x != NULL && netmask != NULL) {
206		while (x != NULL && x->rn_mask != netmask)
207			x = x->rn_dupedkey;
208	}
209	return x;
210}
211
212static int
213rn_satisfies_leaf(
214	const char *trial,
215	struct radix_node *leaf,
216	int skip)
217{
218	const char *cp = trial;
219	const char *cp2 = leaf->rn_key;
220	const char *cp3 = leaf->rn_mask;
221	const char *cplim;
222	int length = min(*(const u_char *)cp, *(const u_char *)cp2);
223
224	if (cp3 == 0)
225		cp3 = rn_ones;
226	else
227		length = min(length, *(const u_char *)cp3);
228	cplim = cp + length; cp3 += skip; cp2 += skip;
229	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
230		if ((*cp ^ *cp2) & *cp3)
231			return 0;
232	return 1;
233}
234
235struct radix_node *
236rn_match(
237	const void *v_arg,
238	struct radix_node_head *head)
239{
240	const char * const v = v_arg;
241	struct radix_node *t = head->rnh_treetop;
242	struct radix_node *top = t;
243	struct radix_node *x;
244	struct radix_node *saved_t;
245	const char *cp = v;
246	const char *cp2;
247	const char *cplim;
248	int off = t->rn_off;
249	int vlen = *(const u_char *)cp;
250	int matched_off;
251	int test, b, rn_b;
252
253	/*
254	 * Open code rn_search(v, top) to avoid overhead of extra
255	 * subroutine call.
256	 */
257	for (; t->rn_b >= 0; ) {
258		if (t->rn_bmask & cp[t->rn_off])
259			t = t->rn_r;
260		else
261			t = t->rn_l;
262	}
263	/*
264	 * See if we match exactly as a host destination
265	 * or at least learn how many bits match, for normal mask finesse.
266	 *
267	 * It doesn't hurt us to limit how many bytes to check
268	 * to the length of the mask, since if it matches we had a genuine
269	 * match and the leaf we have is the most specific one anyway;
270	 * if it didn't match with a shorter length it would fail
271	 * with a long one.  This wins big for class B&C netmasks which
272	 * are probably the most common case...
273	 */
274	if (t->rn_mask)
275		vlen = *(const u_char *)t->rn_mask;
276	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
277	for (; cp < cplim; cp++, cp2++)
278		if (*cp != *cp2)
279			goto on1;
280	/*
281	 * This extra grot is in case we are explicitly asked
282	 * to look up the default.  Ugh!
283	 */
284	if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
285		t = t->rn_dupedkey;
286	return t;
287on1:
288	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
289	for (b = 7; (test >>= 1) > 0;)
290		b--;
291	matched_off = cp - v;
292	b += matched_off << 3;
293	rn_b = -1 - b;
294	/*
295	 * If there is a host route in a duped-key chain, it will be first.
296	 */
297	if ((saved_t = t)->rn_mask == 0)
298		t = t->rn_dupedkey;
299	for (; t; t = t->rn_dupedkey)
300		/*
301		 * Even if we don't match exactly as a host,
302		 * we may match if the leaf we wound up at is
303		 * a route to a net.
304		 */
305		if (t->rn_flags & RNF_NORMAL) {
306			if (rn_b <= t->rn_b)
307				return t;
308		} else if (rn_satisfies_leaf(v, t, matched_off))
309				return t;
310	t = saved_t;
311	/* start searching up the tree */
312	do {
313		struct radix_mask *m;
314		t = t->rn_p;
315		m = t->rn_mklist;
316		if (m) {
317			/*
318			 * If non-contiguous masks ever become important
319			 * we can restore the masking and open coding of
320			 * the search and satisfaction test and put the
321			 * calculation of "off" back before the "do".
322			 */
323			do {
324				if (m->rm_flags & RNF_NORMAL) {
325					if (rn_b <= m->rm_b)
326						return m->rm_leaf;
327				} else {
328					off = min(t->rn_off, matched_off);
329					x = rn_search_m(v, t, m->rm_mask);
330					while (x && x->rn_mask != m->rm_mask)
331						x = x->rn_dupedkey;
332					if (x && rn_satisfies_leaf(v, x, off))
333						return x;
334				}
335				m = m->rm_mklist;
336			} while (m);
337		}
338	} while (t != top);
339	return NULL;
340}
341
342static void
343rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg,
344    const char *delim)
345{
346	(*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)",
347	    delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p,
348	    rn->rn_l, rn->rn_r);
349}
350
351#ifdef RN_DEBUG
352int	rn_debug =  1;
353
354static void
355rn_dbg_print(void *arg, const char *fmt, ...)
356{
357	va_list ap;
358
359	va_start(ap, fmt);
360	vlog(LOG_DEBUG, fmt, ap);
361	va_end(ap);
362}
363
364static void
365rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg)
366{
367	struct radix_node *dup, *rn;
368	const char *delim;
369
370	if (printer == NULL)
371		return;
372
373	rn = rn_walkfirst(h->rnh_treetop, printer, arg);
374	for (;;) {
375		/* Process leaves */
376		delim = "";
377		for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) {
378			if ((dup->rn_flags & RNF_ROOT) != 0)
379				continue;
380			rn_nodeprint(dup, printer, arg, delim);
381			delim = ", ";
382		}
383		rn = rn_walknext(rn, printer, arg);
384		if (rn->rn_flags & RNF_ROOT)
385			return;
386	}
387	/* NOTREACHED */
388}
389
390#define	traverse(__head, __rn)	rn_treeprint((__head), rn_dbg_print, (__rn))
391#endif /* RN_DEBUG */
392
393struct radix_node *
394rn_newpair(
395	const void *v,
396	int b,
397	struct radix_node nodes[2])
398{
399	struct radix_node *tt = nodes;
400	struct radix_node *t = tt + 1;
401	t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
402	t->rn_l = tt; t->rn_off = b >> 3;
403	tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t;
404	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
405	return t;
406}
407
408struct radix_node *
409rn_insert(
410	const void *v_arg,
411	struct radix_node_head *head,
412	int *dupentry,
413	struct radix_node nodes[2])
414{
415	struct radix_node *top = head->rnh_treetop;
416	struct radix_node *t = rn_search(v_arg, top);
417	struct radix_node *tt;
418	const char *v = v_arg;
419	int head_off = top->rn_off;
420	int vlen = *((const u_char *)v);
421	const char *cp = v + head_off;
422	int b;
423    	/*
424	 * Find first bit at which v and t->rn_key differ
425	 */
426    {
427	const char *cp2 = t->rn_key + head_off;
428	const char *cplim = v + vlen;
429	int cmp_res;
430
431	while (cp < cplim)
432		if (*cp2++ != *cp++)
433			goto on1;
434	*dupentry = 1;
435	return t;
436on1:
437	*dupentry = 0;
438	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
439	for (b = (cp - v) << 3; cmp_res; b--)
440		cmp_res >>= 1;
441    }
442    {
443	struct radix_node *p, *x = top;
444	cp = v;
445	do {
446		p = x;
447		if (cp[x->rn_off] & x->rn_bmask)
448			x = x->rn_r;
449		else x = x->rn_l;
450	} while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
451#ifdef RN_DEBUG
452	if (rn_debug)
453		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p);
454#endif
455	t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
456	if ((cp[p->rn_off] & p->rn_bmask) == 0)
457		p->rn_l = t;
458	else
459		p->rn_r = t;
460	x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
461	if ((cp[t->rn_off] & t->rn_bmask) == 0) {
462		t->rn_r = x;
463	} else {
464		t->rn_r = tt; t->rn_l = x;
465	}
466#ifdef RN_DEBUG
467	if (rn_debug) {
468		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
469		    traverse(head, p);
470	}
471#endif /* RN_DEBUG */
472    }
473	return tt;
474}
475
476struct radix_node *
477rn_addmask(
478	const void *n_arg,
479	int search,
480	int skip)
481{
482	const char *netmask = n_arg;
483	const char *cp;
484	const char *cplim;
485	struct radix_node *x;
486	struct radix_node *saved_x;
487	int b = 0, mlen, j;
488	int maskduplicated, m0, isnormal;
489	static int last_zeroed = 0;
490
491	if ((mlen = *(const u_char *)netmask) > max_keylen)
492		mlen = max_keylen;
493	if (skip == 0)
494		skip = 1;
495	if (mlen <= skip)
496		return mask_rnhead->rnh_nodes;
497	if (skip > 1)
498		Bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
499	if ((m0 = mlen) > skip)
500		Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
501	/*
502	 * Trim trailing zeroes.
503	 */
504	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
505		cp--;
506	mlen = cp - addmask_key;
507	if (mlen <= skip) {
508		if (m0 >= last_zeroed)
509			last_zeroed = mlen;
510		return mask_rnhead->rnh_nodes;
511	}
512	if (m0 < last_zeroed)
513		Bzero(addmask_key + m0, last_zeroed - m0);
514	*addmask_key = last_zeroed = mlen;
515	x = rn_search(addmask_key, rn_masktop);
516	if (Bcmp(addmask_key, x->rn_key, mlen) != 0)
517		x = 0;
518	if (x || search)
519		return x;
520	R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
521	if ((saved_x = x) == NULL)
522		return NULL;
523	Bzero(x, max_keylen + 2 * sizeof (*x));
524	cp = netmask = (void *)(x + 2);
525	Bcopy(addmask_key, (void *)(x + 2), mlen);
526	x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
527	if (maskduplicated) {
528		log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
529		Free(saved_x);
530		return x;
531	}
532	/*
533	 * Calculate index of mask, and check for normalcy.
534	 */
535	cplim = netmask + mlen; isnormal = 1;
536	for (cp = netmask + skip; (cp < cplim) && *(const u_char *)cp == 0xff;)
537		cp++;
538	if (cp != cplim) {
539		for (j = 0x80; (j & *cp) != 0; j >>= 1)
540			b++;
541		if (*cp != normal_chars[b] || cp != (cplim - 1))
542			isnormal = 0;
543	}
544	b += (cp - netmask) << 3;
545	x->rn_b = -1 - b;
546	if (isnormal)
547		x->rn_flags |= RNF_NORMAL;
548	return x;
549}
550
551static int	/* XXX: arbitrary ordering for non-contiguous masks */
552rn_lexobetter(
553	const void *m_arg,
554	const void *n_arg)
555{
556	const u_char *mp = m_arg;
557	const u_char *np = n_arg;
558	const u_char *lim;
559
560	if (*mp > *np)
561		return 1;  /* not really, but need to check longer one first */
562	if (*mp == *np)
563		for (lim = mp + *mp; mp < lim;)
564			if (*mp++ > *np++)
565				return 1;
566	return 0;
567}
568
569static struct radix_mask *
570rn_new_radix_mask(
571	struct radix_node *tt,
572	struct radix_mask *next)
573{
574	struct radix_mask *m;
575
576	MKGet(m);
577	if (m == NULL) {
578		log(LOG_ERR, "Mask for route not entered\n");
579		return NULL;
580	}
581	Bzero(m, sizeof *m);
582	m->rm_b = tt->rn_b;
583	m->rm_flags = tt->rn_flags;
584	if (tt->rn_flags & RNF_NORMAL)
585		m->rm_leaf = tt;
586	else
587		m->rm_mask = tt->rn_mask;
588	m->rm_mklist = next;
589	tt->rn_mklist = m;
590	return m;
591}
592
593struct radix_node *
594rn_addroute(
595	const void *v_arg,
596	const void *n_arg,
597	struct radix_node_head *head,
598	struct radix_node treenodes[2])
599{
600	const char *v = v_arg, *netmask = n_arg;
601	struct radix_node *t, *x = NULL, *tt;
602	struct radix_node *saved_tt, *top = head->rnh_treetop;
603	short b = 0, b_leaf = 0;
604	int keyduplicated;
605	const char *mmask;
606	struct radix_mask *m, **mp;
607
608	/*
609	 * In dealing with non-contiguous masks, there may be
610	 * many different routes which have the same mask.
611	 * We will find it useful to have a unique pointer to
612	 * the mask to speed avoiding duplicate references at
613	 * nodes and possibly save time in calculating indices.
614	 */
615	if (netmask != NULL) {
616		if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL)
617			return NULL;
618		b_leaf = x->rn_b;
619		b = -1 - x->rn_b;
620		netmask = x->rn_key;
621	}
622	/*
623	 * Deal with duplicated keys: attach node to previous instance
624	 */
625	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
626	if (keyduplicated) {
627		for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
628			if (tt->rn_mask == netmask)
629				return NULL;
630			if (netmask == NULL ||
631			    (tt->rn_mask != NULL &&
632			     (b_leaf < tt->rn_b || /* index(netmask) > node */
633			       rn_refines(netmask, tt->rn_mask) ||
634			       rn_lexobetter(netmask, tt->rn_mask))))
635				break;
636		}
637		/*
638		 * If the mask is not duplicated, we wouldn't
639		 * find it among possible duplicate key entries
640		 * anyway, so the above test doesn't hurt.
641		 *
642		 * We sort the masks for a duplicated key the same way as
643		 * in a masklist -- most specific to least specific.
644		 * This may require the unfortunate nuisance of relocating
645		 * the head of the list.
646		 *
647		 * We also reverse, or doubly link the list through the
648		 * parent pointer.
649		 */
650		if (tt == saved_tt) {
651			struct	radix_node *xx = x;
652			/* link in at head of list */
653			(tt = treenodes)->rn_dupedkey = t;
654			tt->rn_flags = t->rn_flags;
655			tt->rn_p = x = t->rn_p;
656			t->rn_p = tt;
657			if (x->rn_l == t)
658				x->rn_l = tt;
659			else
660				x->rn_r = tt;
661			saved_tt = tt;
662			x = xx;
663		} else {
664			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
665			t->rn_dupedkey = tt;
666			tt->rn_p = t;
667			if (tt->rn_dupedkey)
668				tt->rn_dupedkey->rn_p = tt;
669		}
670		tt->rn_key = v;
671		tt->rn_b = -1;
672		tt->rn_flags = RNF_ACTIVE;
673	}
674	/*
675	 * Put mask in tree.
676	 */
677	if (netmask != NULL) {
678		tt->rn_mask = netmask;
679		tt->rn_b = x->rn_b;
680		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
681	}
682	t = saved_tt->rn_p;
683	if (keyduplicated)
684		goto on2;
685	b_leaf = -1 - t->rn_b;
686	if (t->rn_r == saved_tt)
687		x = t->rn_l;
688	else
689		x = t->rn_r;
690	/* Promote general routes from below */
691	if (x->rn_b < 0) {
692		for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) {
693			if (x->rn_mask != NULL && x->rn_b >= b_leaf &&
694			    x->rn_mklist == NULL) {
695				*mp = m = rn_new_radix_mask(x, NULL);
696				if (m != NULL)
697					mp = &m->rm_mklist;
698			}
699		}
700	} else if (x->rn_mklist != NULL) {
701		/*
702		 * Skip over masks whose index is > that of new node
703		 */
704		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
705			if (m->rm_b >= b_leaf)
706				break;
707		t->rn_mklist = m;
708		*mp = NULL;
709	}
710on2:
711	/* Add new route to highest possible ancestor's list */
712	if (netmask == NULL || b > t->rn_b)
713		return tt; /* can't lift at all */
714	b_leaf = tt->rn_b;
715	do {
716		x = t;
717		t = t->rn_p;
718	} while (b <= t->rn_b && x != top);
719	/*
720	 * Search through routes associated with node to
721	 * insert new route according to index.
722	 * Need same criteria as when sorting dupedkeys to avoid
723	 * double loop on deletion.
724	 */
725	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
726		if (m->rm_b < b_leaf)
727			continue;
728		if (m->rm_b > b_leaf)
729			break;
730		if (m->rm_flags & RNF_NORMAL) {
731			mmask = m->rm_leaf->rn_mask;
732			if (tt->rn_flags & RNF_NORMAL) {
733				log(LOG_ERR, "Non-unique normal route,"
734				    " mask not entered\n");
735				return tt;
736			}
737		} else
738			mmask = m->rm_mask;
739		if (mmask == netmask) {
740			m->rm_refs++;
741			tt->rn_mklist = m;
742			return tt;
743		}
744		if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask))
745			break;
746	}
747	*mp = rn_new_radix_mask(tt, *mp);
748	return tt;
749}
750
751struct radix_node *
752rn_delete1(
753	const void *v_arg,
754	const void *netmask_arg,
755	struct radix_node_head *head,
756	struct radix_node *rn)
757{
758	struct radix_node *t, *p, *x, *tt;
759	struct radix_mask *m, *saved_m, **mp;
760	struct radix_node *dupedkey, *saved_tt, *top;
761	const char *v, *netmask;
762	int b, head_off, vlen;
763
764	v = v_arg;
765	netmask = netmask_arg;
766	x = head->rnh_treetop;
767	tt = rn_search(v, x);
768	head_off = x->rn_off;
769	vlen =  *(const u_char *)v;
770	saved_tt = tt;
771	top = x;
772	if (tt == NULL ||
773	    Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
774		return NULL;
775	/*
776	 * Delete our route from mask lists.
777	 */
778	if (netmask != NULL) {
779		if ((x = rn_addmask(netmask, 1, head_off)) == NULL)
780			return NULL;
781		netmask = x->rn_key;
782		while (tt->rn_mask != netmask)
783			if ((tt = tt->rn_dupedkey) == NULL)
784				return NULL;
785	}
786	if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
787		goto on1;
788	if (tt->rn_flags & RNF_NORMAL) {
789		if (m->rm_leaf != tt || m->rm_refs > 0) {
790			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
791			return NULL;  /* dangling ref could cause disaster */
792		}
793	} else {
794		if (m->rm_mask != tt->rn_mask) {
795			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
796			goto on1;
797		}
798		if (--m->rm_refs >= 0)
799			goto on1;
800	}
801	b = -1 - tt->rn_b;
802	t = saved_tt->rn_p;
803	if (b > t->rn_b)
804		goto on1; /* Wasn't lifted at all */
805	do {
806		x = t;
807		t = t->rn_p;
808	} while (b <= t->rn_b && x != top);
809	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
810		if (m == saved_m) {
811			*mp = m->rm_mklist;
812			MKFree(m);
813			break;
814		}
815	}
816	if (m == NULL) {
817		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
818		if (tt->rn_flags & RNF_NORMAL)
819			return NULL; /* Dangling ref to us */
820	}
821on1:
822	/*
823	 * Eliminate us from tree
824	 */
825	if (tt->rn_flags & RNF_ROOT)
826		return NULL;
827#ifdef RN_DEBUG
828	if (rn_debug)
829		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt);
830#endif
831	t = tt->rn_p;
832	dupedkey = saved_tt->rn_dupedkey;
833	if (dupedkey != NULL) {
834		/*
835		 * Here, tt is the deletion target, and
836		 * saved_tt is the head of the dupedkey chain.
837		 */
838		if (tt == saved_tt) {
839			x = dupedkey;
840			x->rn_p = t;
841			if (t->rn_l == tt)
842				t->rn_l = x;
843			else
844				t->rn_r = x;
845		} else {
846			/* find node in front of tt on the chain */
847			for (x = p = saved_tt;
848			     p != NULL && p->rn_dupedkey != tt;)
849				p = p->rn_dupedkey;
850			if (p != NULL) {
851				p->rn_dupedkey = tt->rn_dupedkey;
852				if (tt->rn_dupedkey != NULL)
853					tt->rn_dupedkey->rn_p = p;
854			} else
855				log(LOG_ERR, "rn_delete: couldn't find us\n");
856		}
857		t = tt + 1;
858		if  (t->rn_flags & RNF_ACTIVE) {
859			*++x = *t;
860			p = t->rn_p;
861			if (p->rn_l == t)
862				p->rn_l = x;
863			else
864				p->rn_r = x;
865			x->rn_l->rn_p = x;
866			x->rn_r->rn_p = x;
867		}
868		goto out;
869	}
870	if (t->rn_l == tt)
871		x = t->rn_r;
872	else
873		x = t->rn_l;
874	p = t->rn_p;
875	if (p->rn_r == t)
876		p->rn_r = x;
877	else
878		p->rn_l = x;
879	x->rn_p = p;
880	/*
881	 * Demote routes attached to us.
882	 */
883	if (t->rn_mklist == NULL)
884		;
885	else if (x->rn_b >= 0) {
886		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
887			;
888		*mp = t->rn_mklist;
889	} else {
890		/* If there are any key,mask pairs in a sibling
891		   duped-key chain, some subset will appear sorted
892		   in the same order attached to our mklist */
893		for (m = t->rn_mklist;
894		     m != NULL && x != NULL;
895		     x = x->rn_dupedkey) {
896			if (m == x->rn_mklist) {
897				struct radix_mask *mm = m->rm_mklist;
898				x->rn_mklist = NULL;
899				if (--(m->rm_refs) < 0)
900					MKFree(m);
901				m = mm;
902			}
903		}
904		if (m != NULL) {
905			log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n",
906			    m, x);
907		}
908	}
909	/*
910	 * We may be holding an active internal node in the tree.
911	 */
912	x = tt + 1;
913	if (t != x) {
914		*t = *x;
915		t->rn_l->rn_p = t;
916		t->rn_r->rn_p = t;
917		p = x->rn_p;
918		if (p->rn_l == x)
919			p->rn_l = t;
920		else
921			p->rn_r = t;
922	}
923out:
924#ifdef RN_DEBUG
925	if (rn_debug) {
926		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
927		    traverse(head, tt);
928	}
929#endif /* RN_DEBUG */
930	tt->rn_flags &= ~RNF_ACTIVE;
931	tt[1].rn_flags &= ~RNF_ACTIVE;
932	return tt;
933}
934
935struct radix_node *
936rn_delete(
937	const void *v_arg,
938	const void *netmask_arg,
939	struct radix_node_head *head)
940{
941	return rn_delete1(v_arg, netmask_arg, head, NULL);
942}
943
944static struct radix_node *
945rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg)
946{
947	/* If at right child go back up, otherwise, go right */
948	while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) {
949		if (printer != NULL)
950			(*printer)(arg, SUBTREE_CLOSE);
951		rn = rn->rn_p;
952	}
953	if (printer)
954		rn_nodeprint(rn->rn_p, printer, arg, "");
955	/* Find the next *leaf* since next node might vanish, too */
956	for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) {
957		if (printer != NULL)
958			(*printer)(arg, SUBTREE_OPEN);
959		rn = rn->rn_l;
960	}
961	return rn;
962}
963
964static struct radix_node *
965rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg)
966{
967	/* First time through node, go left */
968	while (rn->rn_b >= 0) {
969		if (printer != NULL)
970			(*printer)(arg, SUBTREE_OPEN);
971		rn = rn->rn_l;
972	}
973	return rn;
974}
975
976int
977rn_walktree(
978	struct radix_node_head *h,
979	int (*f)(struct radix_node *, void *),
980	void *w)
981{
982	int error;
983	struct radix_node *base, *next, *rn;
984	/*
985	 * This gets complicated because we may delete the node
986	 * while applying the function f to it, so we need to calculate
987	 * the successor node in advance.
988	 */
989	rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
990	for (;;) {
991		base = rn;
992		next = rn_walknext(rn, NULL, NULL);
993		/* Process leaves */
994		while ((rn = base) != NULL) {
995			base = rn->rn_dupedkey;
996			if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
997				return error;
998		}
999		rn = next;
1000		if (rn->rn_flags & RNF_ROOT)
1001			return 0;
1002	}
1003	/* NOTREACHED */
1004}
1005
1006int
1007rn_inithead(head, off)
1008	void **head;
1009	int off;
1010{
1011	struct radix_node_head *rnh;
1012
1013	if (*head != NULL)
1014		return 1;
1015	R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
1016	if (rnh == NULL)
1017		return 0;
1018	*head = rnh;
1019	return rn_inithead0(rnh, off);
1020}
1021
1022int
1023rn_inithead0(rnh, off)
1024	struct radix_node_head *rnh;
1025	int off;
1026{
1027	struct radix_node *t;
1028	struct radix_node *tt;
1029	struct radix_node *ttt;
1030
1031	Bzero(rnh, sizeof (*rnh));
1032	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1033	ttt = rnh->rnh_nodes + 2;
1034	t->rn_r = ttt;
1035	t->rn_p = t;
1036	tt = t->rn_l;
1037	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1038	tt->rn_b = -1 - off;
1039	*ttt = *tt;
1040	ttt->rn_key = rn_ones;
1041	rnh->rnh_addaddr = rn_addroute;
1042	rnh->rnh_deladdr = rn_delete;
1043	rnh->rnh_matchaddr = rn_match;
1044	rnh->rnh_lookup = rn_lookup;
1045	rnh->rnh_treetop = t;
1046	return 1;
1047}
1048
1049void
1050rn_init()
1051{
1052	char *cp, *cplim;
1053#ifdef _KERNEL
1054	static int initialized;
1055	__link_set_decl(domains, struct domain);
1056	struct domain *const *dpp;
1057
1058	if (initialized)
1059		return;
1060	initialized = 1;
1061
1062	__link_set_foreach(dpp, domains) {
1063		if ((*dpp)->dom_maxrtkey > max_keylen)
1064			max_keylen = (*dpp)->dom_maxrtkey;
1065	}
1066#ifdef INET
1067	encap_setkeylen();
1068#endif
1069#endif
1070	if (max_keylen == 0) {
1071		log(LOG_ERR,
1072		    "rn_init: radix functions require max_keylen be set\n");
1073		return;
1074	}
1075	R_Malloc(rn_zeros, char *, 3 * max_keylen);
1076	if (rn_zeros == NULL)
1077		panic("rn_init");
1078	Bzero(rn_zeros, 3 * max_keylen);
1079	rn_ones = cp = rn_zeros + max_keylen;
1080	addmask_key = cplim = rn_ones + max_keylen;
1081	while (cp < cplim)
1082		*cp++ = -1;
1083	if (rn_inithead((void *)&mask_rnhead, 0) == 0)
1084		panic("rn_init 2");
1085}
1086