1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1988, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * Routines to build and maintain radix trees for routing lookups.
34 */
35#include <sys/param.h>
36#ifdef	_KERNEL
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/rmlock.h>
40#include <sys/systm.h>
41#include <sys/malloc.h>
42#include <sys/syslog.h>
43#include <net/radix.h>
44#else /* !_KERNEL */
45#include <stdio.h>
46#include <strings.h>
47#include <stdlib.h>
48#define log(x, arg...)	fprintf(stderr, ## arg)
49#define panic(x)	fprintf(stderr, "PANIC: %s", x), exit(1)
50#define min(a, b) ((a) < (b) ? (a) : (b) )
51#include <net/radix.h>
52#endif /* !_KERNEL */
53
54static struct radix_node
55	 *rn_insert(void *, struct radix_head *, int *,
56	     struct radix_node [2]),
57	 *rn_newpair(void *, int, struct radix_node[2]),
58	 *rn_search(const void *, struct radix_node *),
59	 *rn_search_m(const void *, struct radix_node *, void *);
60static struct radix_node *rn_addmask(const void *, struct radix_mask_head *, int,int);
61
62static void rn_detachhead_internal(struct radix_head *);
63
64#define	RADIX_MAX_KEY_LEN	32
65
66static char rn_zeros[RADIX_MAX_KEY_LEN];
67static char rn_ones[RADIX_MAX_KEY_LEN] = {
68	-1, -1, -1, -1, -1, -1, -1, -1,
69	-1, -1, -1, -1, -1, -1, -1, -1,
70	-1, -1, -1, -1, -1, -1, -1, -1,
71	-1, -1, -1, -1, -1, -1, -1, -1,
72};
73
74static int	rn_lexobetter(const void *m_arg, const void *n_arg);
75static struct radix_mask *
76		rn_new_radix_mask(struct radix_node *tt,
77		    struct radix_mask *next);
78static int	rn_satisfies_leaf(const char *trial, struct radix_node *leaf,
79		    int skip);
80
81/*
82 * The data structure for the keys is a radix tree with one way
83 * branching removed.  The index rn_bit at an internal node n represents a bit
84 * position to be tested.  The tree is arranged so that all descendants
85 * of a node n have keys whose bits all agree up to position rn_bit - 1.
86 * (We say the index of n is rn_bit.)
87 *
88 * There is at least one descendant which has a one bit at position rn_bit,
89 * and at least one with a zero there.
90 *
91 * A route is determined by a pair of key and mask.  We require that the
92 * bit-wise logical and of the key and mask to be the key.
93 * We define the index of a route to associated with the mask to be
94 * the first bit number in the mask where 0 occurs (with bit number 0
95 * representing the highest order bit).
96 *
97 * We say a mask is normal if every bit is 0, past the index of the mask.
98 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
99 * and m is a normal mask, then the route applies to every descendant of n.
100 * If the index(m) < rn_bit, this implies the trailing last few bits of k
101 * before bit b are all 0, (and hence consequently true of every descendant
102 * of n), so the route applies to all descendants of the node as well.
103 *
104 * Similar logic shows that a non-normal mask m such that
105 * index(m) <= index(n) could potentially apply to many children of n.
106 * Thus, for each non-host route, we attach its mask to a list at an internal
107 * node as high in the tree as we can go.
108 *
109 * The present version of the code makes use of normal routes in short-
110 * circuiting an explict mask and compare operation when testing whether
111 * a key satisfies a normal route, and also in remembering the unique leaf
112 * that governs a subtree.
113 */
114
115/*
116 * Most of the functions in this code assume that the key/mask arguments
117 * are sockaddr-like structures, where the first byte is an u_char
118 * indicating the size of the entire structure.
119 *
120 * To make the assumption more explicit, we use the LEN() macro to access
121 * this field. It is safe to pass an expression with side effects
122 * to LEN() as the argument is evaluated only once.
123 * We cast the result to int as this is the dominant usage.
124 */
125#define LEN(x) ( (int) (*(const u_char *)(x)) )
126
127/*
128 * XXX THIS NEEDS TO BE FIXED
129 * In the code, pointers to keys and masks are passed as either
130 * 'void *' (because callers use to pass pointers of various kinds), or
131 * 'caddr_t' (which is fine for pointer arithmetics, but not very
132 * clean when you dereference it to access data). Furthermore, caddr_t
133 * is really 'char *', while the natural type to operate on keys and
134 * masks would be 'u_char'. This mismatch require a lot of casts and
135 * intermediate variables to adapt types that clutter the code.
136 */
137
138/*
139 * Search a node in the tree matching the key.
140 */
141static struct radix_node *
142rn_search(const void *v_arg, struct radix_node *head)
143{
144	struct radix_node *x;
145	c_caddr_t v;
146
147	for (x = head, v = v_arg; x->rn_bit >= 0;) {
148		if (x->rn_bmask & v[x->rn_offset])
149			x = x->rn_right;
150		else
151			x = x->rn_left;
152	}
153	return (x);
154}
155
156/*
157 * Same as above, but with an additional mask.
158 * XXX note this function is used only once.
159 */
160static struct radix_node *
161rn_search_m(const void *v_arg, struct radix_node *head, void *m_arg)
162{
163	struct radix_node *x;
164	c_caddr_t v = v_arg, m = m_arg;
165
166	for (x = head; x->rn_bit >= 0;) {
167		if ((x->rn_bmask & m[x->rn_offset]) &&
168		    (x->rn_bmask & v[x->rn_offset]))
169			x = x->rn_right;
170		else
171			x = x->rn_left;
172	}
173	return (x);
174}
175
176int
177rn_refines(const void *m_arg, const void *n_arg)
178{
179	c_caddr_t m = m_arg, n = n_arg;
180	c_caddr_t lim, lim2 = lim = n + LEN(n);
181	int longer = LEN(n++) - LEN(m++);
182	int masks_are_equal = 1;
183
184	if (longer > 0)
185		lim -= longer;
186	while (n < lim) {
187		if (*n & ~(*m))
188			return (0);
189		if (*n++ != *m++)
190			masks_are_equal = 0;
191	}
192	while (n < lim2)
193		if (*n++)
194			return (0);
195	if (masks_are_equal && (longer < 0))
196		for (lim2 = m - longer; m < lim2; )
197			if (*m++)
198				return (1);
199	return (!masks_are_equal);
200}
201
202/*
203 * Search for exact match in given @head.
204 * Assume host bits are cleared in @v_arg if @m_arg is not NULL
205 * Note that prefixes with /32 or /128 masks are treated differently
206 * from host routes.
207 */
208struct radix_node *
209rn_lookup(const void *v_arg, const void *m_arg, struct radix_head *head)
210{
211	struct radix_node *x;
212	caddr_t netmask;
213
214	if (m_arg != NULL) {
215		/*
216		 * Most common case: search exact prefix/mask
217		 */
218		x = rn_addmask(m_arg, head->rnh_masks, 1,
219		    head->rnh_treetop->rn_offset);
220		if (x == NULL)
221			return (NULL);
222		netmask = x->rn_key;
223
224		x = rn_match(v_arg, head);
225
226		while (x != NULL && x->rn_mask != netmask)
227			x = x->rn_dupedkey;
228
229		return (x);
230	}
231
232	/*
233	 * Search for host address.
234	 */
235	if ((x = rn_match(v_arg, head)) == NULL)
236		return (NULL);
237
238	/* Check if found key is the same */
239	if (LEN(x->rn_key) != LEN(v_arg) || bcmp(x->rn_key, v_arg, LEN(v_arg)))
240		return (NULL);
241
242	/* Check if this is not host route */
243	if (x->rn_mask != NULL)
244		return (NULL);
245
246	return (x);
247}
248
249static int
250rn_satisfies_leaf(const char *trial, struct radix_node *leaf, int skip)
251{
252	const char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
253	const char *cplim;
254	int length = min(LEN(cp), LEN(cp2));
255
256	if (cp3 == NULL)
257		cp3 = rn_ones;
258	else
259		length = min(length, LEN(cp3));
260	cplim = cp + length; cp3 += skip; cp2 += skip;
261	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
262		if ((*cp ^ *cp2) & *cp3)
263			return (0);
264	return (1);
265}
266
267/*
268 * Search for longest-prefix match in given @head
269 */
270struct radix_node *
271rn_match(const void *v_arg, struct radix_head *head)
272{
273	c_caddr_t v = v_arg;
274	struct radix_node *t = head->rnh_treetop, *x;
275	c_caddr_t cp = v, cp2;
276	c_caddr_t cplim;
277	struct radix_node *saved_t, *top = t;
278	int off = t->rn_offset, vlen = LEN(cp), matched_off;
279	int test, b, rn_bit;
280
281	/*
282	 * Open code rn_search(v, top) to avoid overhead of extra
283	 * subroutine call.
284	 */
285	for (; t->rn_bit >= 0; ) {
286		if (t->rn_bmask & cp[t->rn_offset])
287			t = t->rn_right;
288		else
289			t = t->rn_left;
290	}
291	/*
292	 * See if we match exactly as a host destination
293	 * or at least learn how many bits match, for normal mask finesse.
294	 *
295	 * It doesn't hurt us to limit how many bytes to check
296	 * to the length of the mask, since if it matches we had a genuine
297	 * match and the leaf we have is the most specific one anyway;
298	 * if it didn't match with a shorter length it would fail
299	 * with a long one.  This wins big for class B&C netmasks which
300	 * are probably the most common case...
301	 */
302	if (t->rn_mask)
303		vlen = *(u_char *)t->rn_mask;
304	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
305	for (; cp < cplim; cp++, cp2++)
306		if (*cp != *cp2)
307			goto on1;
308	/*
309	 * This extra grot is in case we are explicitly asked
310	 * to look up the default.  Ugh!
311	 *
312	 * Never return the root node itself, it seems to cause a
313	 * lot of confusion.
314	 */
315	if (t->rn_flags & RNF_ROOT)
316		t = t->rn_dupedkey;
317	return (t);
318on1:
319	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
320	for (b = 7; (test >>= 1) > 0;)
321		b--;
322	matched_off = cp - v;
323	b += matched_off << 3;
324	rn_bit = -1 - b;
325	/*
326	 * If there is a host route in a duped-key chain, it will be first.
327	 */
328	if ((saved_t = t)->rn_mask == 0)
329		t = t->rn_dupedkey;
330	for (; t; t = t->rn_dupedkey)
331		/*
332		 * Even if we don't match exactly as a host,
333		 * we may match if the leaf we wound up at is
334		 * a route to a net.
335		 */
336		if (t->rn_flags & RNF_NORMAL) {
337			if (rn_bit <= t->rn_bit)
338				return (t);
339		} else if (rn_satisfies_leaf(v, t, matched_off))
340				return (t);
341	t = saved_t;
342	/* start searching up the tree */
343	do {
344		struct radix_mask *m;
345		t = t->rn_parent;
346		m = t->rn_mklist;
347		/*
348		 * If non-contiguous masks ever become important
349		 * we can restore the masking and open coding of
350		 * the search and satisfaction test and put the
351		 * calculation of "off" back before the "do".
352		 */
353		while (m) {
354			if (m->rm_flags & RNF_NORMAL) {
355				if (rn_bit <= m->rm_bit)
356					return (m->rm_leaf);
357			} else {
358				off = min(t->rn_offset, matched_off);
359				x = rn_search_m(v, t, m->rm_mask);
360				while (x && x->rn_mask != m->rm_mask)
361					x = x->rn_dupedkey;
362				if (x && rn_satisfies_leaf(v, x, off))
363					return (x);
364			}
365			m = m->rm_mklist;
366		}
367	} while (t != top);
368	return (0);
369}
370
371/*
372 * Returns the next (wider) prefix for the key defined by @rn
373 *  if exists.
374 */
375struct radix_node *
376rn_nextprefix(struct radix_node *rn)
377{
378	for (rn = rn->rn_dupedkey; rn != NULL; rn = rn->rn_dupedkey) {
379		if (!(rn->rn_flags & RNF_ROOT))
380			return (rn);
381	}
382	return (NULL);
383}
384
385#ifdef RN_DEBUG
386int	rn_nodenum;
387struct	radix_node *rn_clist;
388int	rn_saveinfo;
389int	rn_debug =  1;
390#endif
391
392/*
393 * Whenever we add a new leaf to the tree, we also add a parent node,
394 * so we allocate them as an array of two elements: the first one must be
395 * the leaf (see RNTORT() in route.c), the second one is the parent.
396 * This routine initializes the relevant fields of the nodes, so that
397 * the leaf is the left child of the parent node, and both nodes have
398 * (almost) all all fields filled as appropriate.
399 * (XXX some fields are left unset, see the '#if 0' section).
400 * The function returns a pointer to the parent node.
401 */
402
403static struct radix_node *
404rn_newpair(void *v, int b, struct radix_node nodes[2])
405{
406	struct radix_node *tt = nodes, *t = tt + 1;
407	t->rn_bit = b;
408	t->rn_bmask = 0x80 >> (b & 7);
409	t->rn_left = tt;
410	t->rn_offset = b >> 3;
411
412#if 0  /* XXX perhaps we should fill these fields as well. */
413	t->rn_parent = t->rn_right = NULL;
414
415	tt->rn_mask = NULL;
416	tt->rn_dupedkey = NULL;
417	tt->rn_bmask = 0;
418#endif
419	tt->rn_bit = -1;
420	tt->rn_key = (caddr_t)v;
421	tt->rn_parent = t;
422	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
423	tt->rn_mklist = t->rn_mklist = 0;
424#ifdef RN_DEBUG
425	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
426	tt->rn_twin = t;
427	tt->rn_ybro = rn_clist;
428	rn_clist = tt;
429#endif
430	return (t);
431}
432
433static struct radix_node *
434rn_insert(void *v_arg, struct radix_head *head, int *dupentry,
435    struct radix_node nodes[2])
436{
437	caddr_t v = v_arg;
438	struct radix_node *top = head->rnh_treetop;
439	int head_off = top->rn_offset, vlen = LEN(v);
440	struct radix_node *t = rn_search(v_arg, top);
441	caddr_t cp = v + head_off;
442	unsigned b;
443	struct radix_node *p, *tt, *x;
444    	/*
445	 * Find first bit at which v and t->rn_key differ
446	 */
447	caddr_t cp2 = t->rn_key + head_off;
448	int cmp_res;
449	caddr_t cplim = v + vlen;
450
451	while (cp < cplim)
452		if (*cp2++ != *cp++)
453			goto on1;
454	*dupentry = 1;
455	return (t);
456on1:
457	*dupentry = 0;
458	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
459	for (b = (cp - v) << 3; cmp_res; b--)
460		cmp_res >>= 1;
461
462	x = top;
463	cp = v;
464	do {
465		p = x;
466		if (cp[x->rn_offset] & x->rn_bmask)
467			x = x->rn_right;
468		else
469			x = x->rn_left;
470	} while (b > (unsigned) x->rn_bit);
471				/* x->rn_bit < b && x->rn_bit >= 0 */
472#ifdef RN_DEBUG
473	if (rn_debug)
474		log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
475#endif
476	t = rn_newpair(v_arg, b, nodes);
477	tt = t->rn_left;
478	if ((cp[p->rn_offset] & p->rn_bmask) == 0)
479		p->rn_left = t;
480	else
481		p->rn_right = t;
482	x->rn_parent = t;
483	t->rn_parent = p; /* frees x, p as temp vars below */
484	if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
485		t->rn_right = x;
486	} else {
487		t->rn_right = tt;
488		t->rn_left = x;
489	}
490#ifdef RN_DEBUG
491	if (rn_debug)
492		log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
493#endif
494	return (tt);
495}
496
497static struct radix_node *
498rn_addmask(const void *n_arg, struct radix_mask_head *maskhead, int search, int skip)
499{
500	const unsigned char *netmask = n_arg;
501	const unsigned char *c, *clim;
502	unsigned char *cp;
503	struct radix_node *x;
504	int b = 0, mlen, j;
505	int maskduplicated, isnormal;
506	struct radix_node *saved_x;
507	unsigned char addmask_key[RADIX_MAX_KEY_LEN];
508
509	if ((mlen = LEN(netmask)) > RADIX_MAX_KEY_LEN)
510		mlen = RADIX_MAX_KEY_LEN;
511	if (skip == 0)
512		skip = 1;
513	if (mlen <= skip)
514		return (maskhead->mask_nodes);
515
516	bzero(addmask_key, RADIX_MAX_KEY_LEN);
517	if (skip > 1)
518		bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
519	bcopy(netmask + skip, addmask_key + skip, mlen - skip);
520	/*
521	 * Trim trailing zeroes.
522	 */
523	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
524		cp--;
525	mlen = cp - addmask_key;
526	if (mlen <= skip)
527		return (maskhead->mask_nodes);
528	*addmask_key = mlen;
529	x = rn_search(addmask_key, maskhead->head.rnh_treetop);
530	if (bcmp(addmask_key, x->rn_key, mlen) != 0)
531		x = NULL;
532	if (x || search)
533		return (x);
534	R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
535	if ((saved_x = x) == NULL)
536		return (0);
537	netmask = cp = (unsigned char *)(x + 2);
538	bcopy(addmask_key, cp, mlen);
539	x = rn_insert(cp, &maskhead->head, &maskduplicated, x);
540	if (maskduplicated) {
541		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
542		R_Free(saved_x);
543		return (x);
544	}
545	/*
546	 * Calculate index of mask, and check for normalcy.
547	 * First find the first byte with a 0 bit, then if there are
548	 * more bits left (remember we already trimmed the trailing 0's),
549	 * the bits should be contiguous, otherwise we have got
550	 * a non-contiguous mask.
551	 */
552#define	CONTIG(_c)	(((~(_c) + 1) & (_c)) == (unsigned char)(~(_c) + 1))
553	clim = netmask + mlen;
554	isnormal = 1;
555	for (c = netmask + skip; (c < clim) && *(const u_char *)c == 0xff;)
556		c++;
557	if (c != clim) {
558		for (j = 0x80; (j & *c) != 0; j >>= 1)
559			b++;
560		if (!CONTIG(*c) || c != (clim - 1))
561			isnormal = 0;
562	}
563	b += (c - netmask) << 3;
564	x->rn_bit = -1 - b;
565	if (isnormal)
566		x->rn_flags |= RNF_NORMAL;
567	return (x);
568}
569
570static int	/* XXX: arbitrary ordering for non-contiguous masks */
571rn_lexobetter(const void *m_arg, const void *n_arg)
572{
573	const u_char *mp = m_arg, *np = n_arg, *lim;
574
575	if (LEN(mp) > LEN(np))
576		return (1);  /* not really, but need to check longer one first */
577	if (LEN(mp) == LEN(np))
578		for (lim = mp + LEN(mp); mp < lim;)
579			if (*mp++ > *np++)
580				return (1);
581	return (0);
582}
583
584static struct radix_mask *
585rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
586{
587	struct radix_mask *m;
588
589	R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask));
590	if (m == NULL) {
591		log(LOG_ERR, "Failed to allocate route mask\n");
592		return (0);
593	}
594	bzero(m, sizeof(*m));
595	m->rm_bit = tt->rn_bit;
596	m->rm_flags = tt->rn_flags;
597	if (tt->rn_flags & RNF_NORMAL)
598		m->rm_leaf = tt;
599	else
600		m->rm_mask = tt->rn_mask;
601	m->rm_mklist = next;
602	tt->rn_mklist = m;
603	return (m);
604}
605
606struct radix_node *
607rn_addroute(void *v_arg, const void *n_arg, struct radix_head *head,
608    struct radix_node treenodes[2])
609{
610	caddr_t v = (caddr_t)v_arg, netmask = NULL;
611	struct radix_node *t, *x = NULL, *tt;
612	struct radix_node *saved_tt, *top = head->rnh_treetop;
613	short b = 0, b_leaf = 0;
614	int keyduplicated;
615	caddr_t mmask;
616	struct radix_mask *m, **mp;
617
618	/*
619	 * In dealing with non-contiguous masks, there may be
620	 * many different routes which have the same mask.
621	 * We will find it useful to have a unique pointer to
622	 * the mask to speed avoiding duplicate references at
623	 * nodes and possibly save time in calculating indices.
624	 */
625	if (n_arg)  {
626		x = rn_addmask(n_arg, head->rnh_masks, 0, top->rn_offset);
627		if (x == NULL)
628			return (0);
629		b_leaf = x->rn_bit;
630		b = -1 - x->rn_bit;
631		netmask = x->rn_key;
632	}
633	/*
634	 * Deal with duplicated keys: attach node to previous instance
635	 */
636	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
637	if (keyduplicated) {
638		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
639			if (tt->rn_mask == netmask)
640				return (0);
641			if (netmask == 0 ||
642			    (tt->rn_mask &&
643			     ((b_leaf < tt->rn_bit) /* index(netmask) > node */
644			      || rn_refines(netmask, tt->rn_mask)
645			      || rn_lexobetter(netmask, tt->rn_mask))))
646				break;
647		}
648		/*
649		 * If the mask is not duplicated, we wouldn't
650		 * find it among possible duplicate key entries
651		 * anyway, so the above test doesn't hurt.
652		 *
653		 * We sort the masks for a duplicated key the same way as
654		 * in a masklist -- most specific to least specific.
655		 * This may require the unfortunate nuisance of relocating
656		 * the head of the list.
657		 *
658		 * We also reverse, or doubly link the list through the
659		 * parent pointer.
660		 */
661		if (tt == saved_tt) {
662			struct	radix_node *xx = x;
663			/* link in at head of list */
664			(tt = treenodes)->rn_dupedkey = t;
665			tt->rn_flags = t->rn_flags;
666			tt->rn_parent = x = t->rn_parent;
667			t->rn_parent = tt;	 		/* parent */
668			if (x->rn_left == t)
669				x->rn_left = tt;
670			else
671				x->rn_right = tt;
672			saved_tt = tt; x = xx;
673		} else {
674			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
675			t->rn_dupedkey = tt;
676			tt->rn_parent = t;			/* parent */
677			if (tt->rn_dupedkey)			/* parent */
678				tt->rn_dupedkey->rn_parent = tt; /* parent */
679		}
680#ifdef RN_DEBUG
681		t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
682		tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
683#endif
684		tt->rn_key = (caddr_t) v;
685		tt->rn_bit = -1;
686		tt->rn_flags = RNF_ACTIVE;
687	}
688	/*
689	 * Put mask in tree.
690	 */
691	if (netmask) {
692		tt->rn_mask = netmask;
693		tt->rn_bit = x->rn_bit;
694		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
695	}
696	t = saved_tt->rn_parent;
697	if (keyduplicated)
698		goto on2;
699	b_leaf = -1 - t->rn_bit;
700	if (t->rn_right == saved_tt)
701		x = t->rn_left;
702	else
703		x = t->rn_right;
704	/* Promote general routes from below */
705	if (x->rn_bit < 0) {
706	    for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
707		if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
708			*mp = m = rn_new_radix_mask(x, 0);
709			if (m)
710				mp = &m->rm_mklist;
711		}
712	} else if (x->rn_mklist) {
713		/*
714		 * Skip over masks whose index is > that of new node
715		 */
716		for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
717			if (m->rm_bit >= b_leaf)
718				break;
719		t->rn_mklist = m; *mp = NULL;
720	}
721on2:
722	/* Add new route to highest possible ancestor's list */
723	if ((netmask == 0) || (b > t->rn_bit ))
724		return (tt); /* can't lift at all */
725	b_leaf = tt->rn_bit;
726	do {
727		x = t;
728		t = t->rn_parent;
729	} while (b <= t->rn_bit && x != top);
730	/*
731	 * Search through routes associated with node to
732	 * insert new route according to index.
733	 * Need same criteria as when sorting dupedkeys to avoid
734	 * double loop on deletion.
735	 */
736	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
737		if (m->rm_bit < b_leaf)
738			continue;
739		if (m->rm_bit > b_leaf)
740			break;
741		if (m->rm_flags & RNF_NORMAL) {
742			mmask = m->rm_leaf->rn_mask;
743			if (tt->rn_flags & RNF_NORMAL) {
744			    log(LOG_ERR,
745			        "Non-unique normal route, mask not entered\n");
746				return (tt);
747			}
748		} else
749			mmask = m->rm_mask;
750		if (mmask == netmask) {
751			m->rm_refs++;
752			tt->rn_mklist = m;
753			return (tt);
754		}
755		if (rn_refines(netmask, mmask)
756		    || rn_lexobetter(netmask, mmask))
757			break;
758	}
759	*mp = rn_new_radix_mask(tt, *mp);
760	return (tt);
761}
762
763struct radix_node *
764rn_delete(const void *v_arg, const void *netmask_arg, struct radix_head *head)
765{
766	struct radix_node *t, *p, *x, *tt;
767	struct radix_mask *m, *saved_m, **mp;
768	struct radix_node *dupedkey, *saved_tt, *top;
769	c_caddr_t v;
770	c_caddr_t netmask;
771	int b, head_off, vlen;
772
773	v = v_arg;
774	netmask = netmask_arg;
775	x = head->rnh_treetop;
776	tt = rn_search(v, x);
777	head_off = x->rn_offset;
778	vlen =  LEN(v);
779	saved_tt = tt;
780	top = x;
781	if (tt == NULL ||
782	    bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
783		return (0);
784	/*
785	 * Delete our route from mask lists.
786	 */
787	if (netmask) {
788		x = rn_addmask(netmask, head->rnh_masks, 1, head_off);
789		if (x == NULL)
790			return (0);
791		netmask = x->rn_key;
792		while (tt->rn_mask != netmask)
793			if ((tt = tt->rn_dupedkey) == NULL)
794				return (0);
795	}
796	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == NULL)
797		goto on1;
798	if (tt->rn_flags & RNF_NORMAL) {
799		if (m->rm_leaf != tt || m->rm_refs > 0) {
800			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
801			return (0);  /* dangling ref could cause disaster */
802		}
803	} else {
804		if (m->rm_mask != tt->rn_mask) {
805			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
806			goto on1;
807		}
808		if (--m->rm_refs >= 0)
809			goto on1;
810	}
811	b = -1 - tt->rn_bit;
812	t = saved_tt->rn_parent;
813	if (b > t->rn_bit)
814		goto on1; /* Wasn't lifted at all */
815	do {
816		x = t;
817		t = t->rn_parent;
818	} while (b <= t->rn_bit && x != top);
819	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
820		if (m == saved_m) {
821			*mp = m->rm_mklist;
822			R_Free(m);
823			break;
824		}
825	if (m == NULL) {
826		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
827		if (tt->rn_flags & RNF_NORMAL)
828			return (0); /* Dangling ref to us */
829	}
830on1:
831	/*
832	 * Eliminate us from tree
833	 */
834	if (tt->rn_flags & RNF_ROOT)
835		return (0);
836#ifdef RN_DEBUG
837	/* Get us out of the creation list */
838	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
839	if (t) t->rn_ybro = tt->rn_ybro;
840#endif
841	t = tt->rn_parent;
842	dupedkey = saved_tt->rn_dupedkey;
843	if (dupedkey) {
844		/*
845		 * Here, tt is the deletion target and
846		 * saved_tt is the head of the dupekey chain.
847		 */
848		if (tt == saved_tt) {
849			/* remove from head of chain */
850			x = dupedkey; x->rn_parent = t;
851			if (t->rn_left == tt)
852				t->rn_left = x;
853			else
854				t->rn_right = x;
855		} else {
856			/* find node in front of tt on the chain */
857			for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
858				p = p->rn_dupedkey;
859			if (p) {
860				p->rn_dupedkey = tt->rn_dupedkey;
861				if (tt->rn_dupedkey)		/* parent */
862					tt->rn_dupedkey->rn_parent = p;
863								/* parent */
864			} else log(LOG_ERR, "rn_delete: couldn't find us\n");
865		}
866		t = tt + 1;
867		if  (t->rn_flags & RNF_ACTIVE) {
868#ifndef RN_DEBUG
869			*++x = *t;
870			p = t->rn_parent;
871#else
872			b = t->rn_info;
873			*++x = *t;
874			t->rn_info = b;
875			p = t->rn_parent;
876#endif
877			if (p->rn_left == t)
878				p->rn_left = x;
879			else
880				p->rn_right = x;
881			x->rn_left->rn_parent = x;
882			x->rn_right->rn_parent = x;
883		}
884		goto out;
885	}
886	if (t->rn_left == tt)
887		x = t->rn_right;
888	else
889		x = t->rn_left;
890	p = t->rn_parent;
891	if (p->rn_right == t)
892		p->rn_right = x;
893	else
894		p->rn_left = x;
895	x->rn_parent = p;
896	/*
897	 * Demote routes attached to us.
898	 */
899	if (t->rn_mklist) {
900		if (x->rn_bit >= 0) {
901			for (mp = &x->rn_mklist; (m = *mp);)
902				mp = &m->rm_mklist;
903			*mp = t->rn_mklist;
904		} else {
905			/* If there are any key,mask pairs in a sibling
906			   duped-key chain, some subset will appear sorted
907			   in the same order attached to our mklist */
908			for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
909				if (m == x->rn_mklist) {
910					struct radix_mask *mm = m->rm_mklist;
911					x->rn_mklist = 0;
912					if (--(m->rm_refs) < 0)
913						R_Free(m);
914					m = mm;
915				}
916			if (m)
917				log(LOG_ERR,
918				    "rn_delete: Orphaned Mask %p at %p\n",
919				    m, x);
920		}
921	}
922	/*
923	 * We may be holding an active internal node in the tree.
924	 */
925	x = tt + 1;
926	if (t != x) {
927#ifndef RN_DEBUG
928		*t = *x;
929#else
930		b = t->rn_info;
931		*t = *x;
932		t->rn_info = b;
933#endif
934		t->rn_left->rn_parent = t;
935		t->rn_right->rn_parent = t;
936		p = x->rn_parent;
937		if (p->rn_left == x)
938			p->rn_left = t;
939		else
940			p->rn_right = t;
941	}
942out:
943	tt->rn_flags &= ~RNF_ACTIVE;
944	tt[1].rn_flags &= ~RNF_ACTIVE;
945	return (tt);
946}
947
948/*
949 * This is the same as rn_walktree() except for the parameters and the
950 * exit.
951 */
952int
953rn_walktree_from(struct radix_head *h, void *a, void *m,
954    walktree_f_t *f, void *w)
955{
956	int error;
957	struct radix_node *base, *next;
958	u_char *xa = (u_char *)a;
959	u_char *xm = (u_char *)m;
960	struct radix_node *rn, *last = NULL; /* shut up gcc */
961	int stopping = 0;
962	int lastb;
963
964	KASSERT(m != NULL, ("%s: mask needs to be specified", __func__));
965
966	/*
967	 * rn_search_m is sort-of-open-coded here. We cannot use the
968	 * function because we need to keep track of the last node seen.
969	 */
970	/* printf("about to search\n"); */
971	for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
972		last = rn;
973		/* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n",
974		       rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */
975		if (!(rn->rn_bmask & xm[rn->rn_offset])) {
976			break;
977		}
978		if (rn->rn_bmask & xa[rn->rn_offset]) {
979			rn = rn->rn_right;
980		} else {
981			rn = rn->rn_left;
982		}
983	}
984	/* printf("done searching\n"); */
985
986	/*
987	 * Two cases: either we stepped off the end of our mask,
988	 * in which case last == rn, or we reached a leaf, in which
989	 * case we want to start from the leaf.
990	 */
991	if (rn->rn_bit >= 0)
992		rn = last;
993	lastb = last->rn_bit;
994
995	/* printf("rn %p, lastb %d\n", rn, lastb);*/
996
997	/*
998	 * This gets complicated because we may delete the node
999	 * while applying the function f to it, so we need to calculate
1000	 * the successor node in advance.
1001	 */
1002	while (rn->rn_bit >= 0)
1003		rn = rn->rn_left;
1004
1005	while (!stopping) {
1006		/* printf("node %p (%d)\n", rn, rn->rn_bit); */
1007		base = rn;
1008		/* If at right child go back up, otherwise, go right */
1009		while (rn->rn_parent->rn_right == rn
1010		       && !(rn->rn_flags & RNF_ROOT)) {
1011			rn = rn->rn_parent;
1012
1013			/* if went up beyond last, stop */
1014			if (rn->rn_bit <= lastb) {
1015				stopping = 1;
1016				/* printf("up too far\n"); */
1017				/*
1018				 * XXX we should jump to the 'Process leaves'
1019				 * part, because the values of 'rn' and 'next'
1020				 * we compute will not be used. Not a big deal
1021				 * because this loop will terminate, but it is
1022				 * inefficient and hard to understand!
1023				 */
1024			}
1025		}
1026
1027		/*
1028		 * At the top of the tree, no need to traverse the right
1029		 * half, prevent the traversal of the entire tree in the
1030		 * case of default route.
1031		 */
1032		if (rn->rn_parent->rn_flags & RNF_ROOT)
1033			stopping = 1;
1034
1035		/* Find the next *leaf* since next node might vanish, too */
1036		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1037			rn = rn->rn_left;
1038		next = rn;
1039		/* Process leaves */
1040		while ((rn = base) != NULL) {
1041			base = rn->rn_dupedkey;
1042			/* printf("leaf %p\n", rn); */
1043			if (!(rn->rn_flags & RNF_ROOT)
1044			    && (error = (*f)(rn, w)))
1045				return (error);
1046		}
1047		rn = next;
1048
1049		if (rn->rn_flags & RNF_ROOT) {
1050			/* printf("root, stopping"); */
1051			stopping = 1;
1052		}
1053	}
1054	return (0);
1055}
1056
1057int
1058rn_walktree(struct radix_head *h, walktree_f_t *f, void *w)
1059{
1060	int error;
1061	struct radix_node *base, *next;
1062	struct radix_node *rn = h->rnh_treetop;
1063	/*
1064	 * This gets complicated because we may delete the node
1065	 * while applying the function f to it, so we need to calculate
1066	 * the successor node in advance.
1067	 */
1068
1069	/* First time through node, go left */
1070	while (rn->rn_bit >= 0)
1071		rn = rn->rn_left;
1072	for (;;) {
1073		base = rn;
1074		/* If at right child go back up, otherwise, go right */
1075		while (rn->rn_parent->rn_right == rn
1076		       && (rn->rn_flags & RNF_ROOT) == 0)
1077			rn = rn->rn_parent;
1078		/* Find the next *leaf* since next node might vanish, too */
1079		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1080			rn = rn->rn_left;
1081		next = rn;
1082		/* Process leaves */
1083		while ((rn = base)) {
1084			base = rn->rn_dupedkey;
1085			if (!(rn->rn_flags & RNF_ROOT)
1086			    && (error = (*f)(rn, w)))
1087				return (error);
1088		}
1089		rn = next;
1090		if (rn->rn_flags & RNF_ROOT)
1091			return (0);
1092	}
1093	/* NOTREACHED */
1094}
1095
1096/*
1097 * Initialize an empty tree. This has 3 nodes, which are passed
1098 * via base_nodes (in the order <left,root,right>) and are
1099 * marked RNF_ROOT so they cannot be freed.
1100 * The leaves have all-zero and all-one keys, with significant
1101 * bits starting at 'off'.
1102 */
1103void
1104rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, int off)
1105{
1106	struct radix_node *t, *tt, *ttt;
1107
1108	t = rn_newpair(rn_zeros, off, base_nodes);
1109	ttt = base_nodes + 2;
1110	t->rn_right = ttt;
1111	t->rn_parent = t;
1112	tt = t->rn_left;	/* ... which in turn is base_nodes */
1113	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1114	tt->rn_bit = -1 - off;
1115	*ttt = *tt;
1116	ttt->rn_key = rn_ones;
1117
1118	rh->rnh_treetop = t;
1119}
1120
1121static void
1122rn_detachhead_internal(struct radix_head *head)
1123{
1124
1125	KASSERT((head != NULL),
1126	    ("%s: head already freed", __func__));
1127
1128	/* Free <left,root,right> nodes. */
1129	R_Free(head);
1130}
1131
1132/* Functions used by 'struct radix_node_head' users */
1133
1134int
1135rn_inithead(void **head, int off)
1136{
1137	struct radix_node_head *rnh;
1138	struct radix_mask_head *rmh;
1139
1140	rnh = *head;
1141	rmh = NULL;
1142
1143	if (*head != NULL)
1144		return (1);
1145
1146	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
1147	R_Zalloc(rmh, struct radix_mask_head *, sizeof (*rmh));
1148	if (rnh == NULL || rmh == NULL) {
1149		if (rnh != NULL)
1150			R_Free(rnh);
1151		if (rmh != NULL)
1152			R_Free(rmh);
1153		return (0);
1154	}
1155
1156	/* Init trees */
1157	rn_inithead_internal(&rnh->rh, rnh->rnh_nodes, off);
1158	rn_inithead_internal(&rmh->head, rmh->mask_nodes, 0);
1159	*head = rnh;
1160	rnh->rh.rnh_masks = rmh;
1161
1162	/* Finally, set base callbacks */
1163	rnh->rnh_addaddr = rn_addroute;
1164	rnh->rnh_deladdr = rn_delete;
1165	rnh->rnh_matchaddr = rn_match;
1166	rnh->rnh_lookup = rn_lookup;
1167	rnh->rnh_walktree = rn_walktree;
1168	rnh->rnh_walktree_from = rn_walktree_from;
1169
1170	return (1);
1171}
1172
1173static int
1174rn_freeentry(struct radix_node *rn, void *arg)
1175{
1176	struct radix_head * const rnh = arg;
1177	struct radix_node *x;
1178
1179	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
1180	if (x != NULL)
1181		R_Free(x);
1182	return (0);
1183}
1184
1185int
1186rn_detachhead(void **head)
1187{
1188	struct radix_node_head *rnh;
1189
1190	KASSERT((head != NULL && *head != NULL),
1191	    ("%s: head already freed", __func__));
1192
1193	rnh = (struct radix_node_head *)(*head);
1194
1195	rn_walktree(&rnh->rh.rnh_masks->head, rn_freeentry, rnh->rh.rnh_masks);
1196	rn_detachhead_internal(&rnh->rh.rnh_masks->head);
1197	rn_detachhead_internal(&rnh->rh);
1198
1199	*head = NULL;
1200
1201	return (1);
1202}
1203