1250551Sjeff/*
2250551Sjeff * Copyright (c) 2013 EMC Corp.
3250551Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4250551Sjeff * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5250551Sjeff * All rights reserved.
6250551Sjeff *
7250551Sjeff * Redistribution and use in source and binary forms, with or without
8250551Sjeff * modification, are permitted provided that the following conditions
9250551Sjeff * are met:
10250551Sjeff * 1. Redistributions of source code must retain the above copyright
11250551Sjeff *    notice, this list of conditions and the following disclaimer.
12250551Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13250551Sjeff *    notice, this list of conditions and the following disclaimer in the
14250551Sjeff *    documentation and/or other materials provided with the distribution.
15250551Sjeff *
16250551Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17250551Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18250551Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19250551Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20250551Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21250551Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22250551Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23250551Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24250551Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25250551Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26250551Sjeff * SUCH DAMAGE.
27250551Sjeff *
28250551Sjeff */
29250551Sjeff
30250551Sjeff/*
31250551Sjeff * Path-compressed radix trie implementation.
32250551Sjeff *
33250551Sjeff * The implementation takes into account the following rationale:
34250551Sjeff * - Size of the nodes should be as small as possible but still big enough
35250551Sjeff *   to avoid a large maximum depth for the trie.  This is a balance
36250551Sjeff *   between the necessity to not wire too much physical memory for the nodes
37250551Sjeff *   and the necessity to avoid too much cache pollution during the trie
38250551Sjeff *   operations.
39250551Sjeff * - There is not a huge bias toward the number of lookup operations over
40250551Sjeff *   the number of insert and remove operations.  This basically implies
41250551Sjeff *   that optimizations supposedly helping one operation but hurting the
42250551Sjeff *   other might be carefully evaluated.
43250551Sjeff * - On average not many nodes are expected to be fully populated, hence
44250551Sjeff *   level compression may just complicate things.
45250551Sjeff */
46250551Sjeff
47250551Sjeff#include <sys/cdefs.h>
48250551Sjeff__FBSDID("$FreeBSD$");
49250551Sjeff
50250551Sjeff#include "opt_ddb.h"
51250551Sjeff
52250551Sjeff#include <sys/param.h>
53250551Sjeff#include <sys/systm.h>
54250551Sjeff#include <sys/kernel.h>
55250551Sjeff#include <sys/pctrie.h>
56250551Sjeff
57250551Sjeff#ifdef DDB
58250551Sjeff#include <ddb/ddb.h>
59250551Sjeff#endif
60250551Sjeff
61250551Sjeff/*
62250551Sjeff * These widths should allow the pointers to a node's children to fit within
63250551Sjeff * a single cache line.  The extra levels from a narrow width should not be
64250551Sjeff * a problem thanks to path compression.
65250551Sjeff */
66250551Sjeff#ifdef __LP64__
67250551Sjeff#define	PCTRIE_WIDTH	4
68250551Sjeff#else
69250551Sjeff#define	PCTRIE_WIDTH	3
70250551Sjeff#endif
71250551Sjeff
72250551Sjeff#define	PCTRIE_COUNT	(1 << PCTRIE_WIDTH)
73250551Sjeff#define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
74250551Sjeff#define	PCTRIE_LIMIT	(howmany((sizeof(uint64_t) * NBBY), PCTRIE_WIDTH) - 1)
75250551Sjeff
76250551Sjeff/* Flag bits stored in node pointers. */
77250551Sjeff#define	PCTRIE_ISLEAF	0x1
78250551Sjeff#define	PCTRIE_FLAGS	0x1
79250551Sjeff#define	PCTRIE_PAD	PCTRIE_FLAGS
80250551Sjeff
81250551Sjeff/* Returns one unit associated with specified level. */
82250551Sjeff#define	PCTRIE_UNITLEVEL(lev)						\
83250551Sjeff	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
84250551Sjeff
85250551Sjeffstruct pctrie_node {
86250551Sjeff	uint64_t	 pn_owner;			/* Owner of record. */
87250551Sjeff	uint16_t	 pn_count;			/* Valid children. */
88250551Sjeff	uint16_t	 pn_clev;			/* Current level. */
89250551Sjeff	void		*pn_child[PCTRIE_COUNT];	/* Child nodes. */
90250551Sjeff};
91250551Sjeff
92250551Sjeff/*
93250551Sjeff * Allocate a node.  Pre-allocation should ensure that the request
94250551Sjeff * will always be satisfied.
95250551Sjeff */
96250551Sjeffstatic __inline struct pctrie_node *
97250551Sjeffpctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner,
98250551Sjeff    uint16_t count, uint16_t clevel)
99250551Sjeff{
100250551Sjeff	struct pctrie_node *node;
101250551Sjeff
102250551Sjeff	node = allocfn(ptree);
103250551Sjeff	if (node == NULL)
104250551Sjeff		return (NULL);
105250551Sjeff	node->pn_owner = owner;
106250551Sjeff	node->pn_count = count;
107250551Sjeff	node->pn_clev = clevel;
108250551Sjeff
109250551Sjeff	return (node);
110250551Sjeff}
111250551Sjeff
112250551Sjeff/*
113250551Sjeff * Free radix node.
114250551Sjeff */
115250551Sjeffstatic __inline void
116250551Sjeffpctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
117250551Sjeff    pctrie_free_t freefn)
118250551Sjeff{
119250551Sjeff#ifdef INVARIANTS
120250551Sjeff	int slot;
121250551Sjeff
122250551Sjeff	KASSERT(node->pn_count == 0,
123250551Sjeff	    ("pctrie_node_put: node %p has %d children", node,
124250551Sjeff	    node->pn_count));
125250551Sjeff	for (slot = 0; slot < PCTRIE_COUNT; slot++)
126250551Sjeff		KASSERT(node->pn_child[slot] == NULL,
127250551Sjeff		    ("pctrie_node_put: node %p has a child", node));
128250551Sjeff#endif
129250551Sjeff	freefn(ptree, node);
130250551Sjeff}
131250551Sjeff
132250551Sjeff/*
133250551Sjeff * Return the position in the array for a given level.
134250551Sjeff */
135250551Sjeffstatic __inline int
136250551Sjeffpctrie_slot(uint64_t index, uint16_t level)
137250551Sjeff{
138250551Sjeff
139250551Sjeff	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
140250551Sjeff}
141250551Sjeff
142250551Sjeff/* Trims the key after the specified level. */
143250551Sjeffstatic __inline uint64_t
144250551Sjeffpctrie_trimkey(uint64_t index, uint16_t level)
145250551Sjeff{
146250551Sjeff	uint64_t ret;
147250551Sjeff
148250551Sjeff	ret = index;
149250551Sjeff	if (level > 0) {
150250551Sjeff		ret >>= level * PCTRIE_WIDTH;
151250551Sjeff		ret <<= level * PCTRIE_WIDTH;
152250551Sjeff	}
153250551Sjeff	return (ret);
154250551Sjeff}
155250551Sjeff
156250551Sjeff/*
157250551Sjeff * Get the root node for a tree.
158250551Sjeff */
159250551Sjeffstatic __inline struct pctrie_node *
160250551Sjeffpctrie_getroot(struct pctrie *ptree)
161250551Sjeff{
162250551Sjeff
163250551Sjeff	return ((struct pctrie_node *)ptree->pt_root);
164250551Sjeff}
165250551Sjeff
166250551Sjeff/*
167250551Sjeff * Set the root node for a tree.
168250551Sjeff */
169250551Sjeffstatic __inline void
170250551Sjeffpctrie_setroot(struct pctrie *ptree, struct pctrie_node *node)
171250551Sjeff{
172250551Sjeff
173250551Sjeff	ptree->pt_root = (uintptr_t)node;
174250551Sjeff}
175250551Sjeff
176250551Sjeff/*
177250551Sjeff * Returns TRUE if the specified node is a leaf and FALSE otherwise.
178250551Sjeff */
179250551Sjeffstatic __inline boolean_t
180250551Sjeffpctrie_isleaf(struct pctrie_node *node)
181250551Sjeff{
182250551Sjeff
183250551Sjeff	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
184250551Sjeff}
185250551Sjeff
186250551Sjeff/*
187250551Sjeff * Returns the associated val extracted from node.
188250551Sjeff */
189250551Sjeffstatic __inline uint64_t *
190250551Sjeffpctrie_toval(struct pctrie_node *node)
191250551Sjeff{
192250551Sjeff
193250551Sjeff	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
194250551Sjeff}
195250551Sjeff
196250551Sjeff/*
197250551Sjeff * Adds the val as a child of the provided node.
198250551Sjeff */
199250551Sjeffstatic __inline void
200250551Sjeffpctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
201250551Sjeff    uint64_t *val)
202250551Sjeff{
203250551Sjeff	int slot;
204250551Sjeff
205250551Sjeff	slot = pctrie_slot(index, clev);
206250551Sjeff	node->pn_child[slot] = (void *)((uintptr_t)val | PCTRIE_ISLEAF);
207250551Sjeff}
208250551Sjeff
209250551Sjeff/*
210250551Sjeff * Returns the slot where two keys differ.
211250551Sjeff * It cannot accept 2 equal keys.
212250551Sjeff */
213250551Sjeffstatic __inline uint16_t
214250551Sjeffpctrie_keydiff(uint64_t index1, uint64_t index2)
215250551Sjeff{
216250551Sjeff	uint16_t clev;
217250551Sjeff
218250551Sjeff	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
219250551Sjeff	    __func__, (uintmax_t)index1));
220250551Sjeff
221250551Sjeff	index1 ^= index2;
222250551Sjeff	for (clev = PCTRIE_LIMIT;; clev--)
223250551Sjeff		if (pctrie_slot(index1, clev) != 0)
224250551Sjeff			return (clev);
225250551Sjeff}
226250551Sjeff
227250551Sjeff/*
228250551Sjeff * Returns TRUE if it can be determined that key does not belong to the
229250551Sjeff * specified node.  Otherwise, returns FALSE.
230250551Sjeff */
231250551Sjeffstatic __inline boolean_t
232250551Sjeffpctrie_keybarr(struct pctrie_node *node, uint64_t idx)
233250551Sjeff{
234250551Sjeff
235250551Sjeff	if (node->pn_clev < PCTRIE_LIMIT) {
236250551Sjeff		idx = pctrie_trimkey(idx, node->pn_clev + 1);
237250551Sjeff		return (idx != node->pn_owner);
238250551Sjeff	}
239250551Sjeff	return (FALSE);
240250551Sjeff}
241250551Sjeff
242250551Sjeff/*
243250551Sjeff * Internal helper for pctrie_reclaim_allnodes().
244250551Sjeff * This function is recursive.
245250551Sjeff */
246250551Sjeffstatic void
247250551Sjeffpctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
248250551Sjeff    pctrie_free_t freefn)
249250551Sjeff{
250250551Sjeff	int slot;
251250551Sjeff
252250551Sjeff	KASSERT(node->pn_count <= PCTRIE_COUNT,
253250551Sjeff	    ("pctrie_reclaim_allnodes_int: bad count in node %p", node));
254250551Sjeff	for (slot = 0; node->pn_count != 0; slot++) {
255250551Sjeff		if (node->pn_child[slot] == NULL)
256250551Sjeff			continue;
257250551Sjeff		if (!pctrie_isleaf(node->pn_child[slot]))
258250551Sjeff			pctrie_reclaim_allnodes_int(ptree,
259250551Sjeff			    node->pn_child[slot], freefn);
260250551Sjeff		node->pn_child[slot] = NULL;
261250551Sjeff		node->pn_count--;
262250551Sjeff	}
263250551Sjeff	pctrie_node_put(ptree, node, freefn);
264250551Sjeff}
265250551Sjeff
266250551Sjeff/*
267250551Sjeff * pctrie node zone initializer.
268250551Sjeff */
269250551Sjeffint
270250551Sjeffpctrie_zone_init(void *mem, int size __unused, int flags __unused)
271250551Sjeff{
272250551Sjeff	struct pctrie_node *node;
273250551Sjeff
274250551Sjeff	node = mem;
275250551Sjeff	memset(node->pn_child, 0, sizeof(node->pn_child));
276250551Sjeff	return (0);
277250551Sjeff}
278250551Sjeff
279250551Sjeffsize_t
280250551Sjeffpctrie_node_size(void)
281250551Sjeff{
282250551Sjeff
283250551Sjeff	return (sizeof(struct pctrie_node));
284250551Sjeff}
285250551Sjeff
286250551Sjeff/*
287250551Sjeff * Inserts the key-value pair into the trie.
288250551Sjeff * Panics if the key already exists.
289250551Sjeff */
290250551Sjeffint
291250551Sjeffpctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
292250551Sjeff{
293250551Sjeff	uint64_t index, newind;
294250551Sjeff	void **parentp;
295250551Sjeff	struct pctrie_node *node, *tmp;
296250551Sjeff	uint64_t *m;
297250551Sjeff	int slot;
298250551Sjeff	uint16_t clev;
299250551Sjeff
300250551Sjeff	index = *val;
301250551Sjeff
302250551Sjeff	/*
303250551Sjeff	 * The owner of record for root is not really important because it
304250551Sjeff	 * will never be used.
305250551Sjeff	 */
306250551Sjeff	node = pctrie_getroot(ptree);
307250551Sjeff	if (node == NULL) {
308250551Sjeff		ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF;
309250551Sjeff		return (0);
310250551Sjeff	}
311250551Sjeff	parentp = (void **)&ptree->pt_root;
312250551Sjeff	for (;;) {
313250551Sjeff		if (pctrie_isleaf(node)) {
314250551Sjeff			m = pctrie_toval(node);
315250551Sjeff			if (*m == index)
316250551Sjeff				panic("%s: key %jx is already present",
317250551Sjeff				    __func__, (uintmax_t)index);
318250551Sjeff			clev = pctrie_keydiff(*m, index);
319250551Sjeff			tmp = pctrie_node_get(ptree, allocfn,
320250551Sjeff			    pctrie_trimkey(index, clev + 1), 2, clev);
321250551Sjeff			if (tmp == NULL)
322250551Sjeff				return (ENOMEM);
323250551Sjeff			*parentp = tmp;
324250551Sjeff			pctrie_addval(tmp, index, clev, val);
325250551Sjeff			pctrie_addval(tmp, *m, clev, m);
326250551Sjeff			return (0);
327250551Sjeff		} else if (pctrie_keybarr(node, index))
328250551Sjeff			break;
329250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
330250551Sjeff		if (node->pn_child[slot] == NULL) {
331250551Sjeff			node->pn_count++;
332250551Sjeff			pctrie_addval(node, index, node->pn_clev, val);
333250551Sjeff			return (0);
334250551Sjeff		}
335250551Sjeff		parentp = &node->pn_child[slot];
336250551Sjeff		node = node->pn_child[slot];
337250551Sjeff	}
338250551Sjeff
339250551Sjeff	/*
340250551Sjeff	 * A new node is needed because the right insertion level is reached.
341250551Sjeff	 * Setup the new intermediate node and add the 2 children: the
342250551Sjeff	 * new object and the older edge.
343250551Sjeff	 */
344250551Sjeff	newind = node->pn_owner;
345250551Sjeff	clev = pctrie_keydiff(newind, index);
346250551Sjeff	tmp = pctrie_node_get(ptree, allocfn,
347250551Sjeff	    pctrie_trimkey(index, clev + 1), 2, clev);
348250551Sjeff	if (tmp == NULL)
349250551Sjeff		return (ENOMEM);
350250551Sjeff	*parentp = tmp;
351250551Sjeff	pctrie_addval(tmp, index, clev, val);
352250551Sjeff	slot = pctrie_slot(newind, clev);
353250551Sjeff	tmp->pn_child[slot] = node;
354250551Sjeff
355250551Sjeff	return (0);
356250551Sjeff}
357250551Sjeff
358250551Sjeff/*
359250551Sjeff * Returns the value stored at the index.  If the index is not present,
360250551Sjeff * NULL is returned.
361250551Sjeff */
362250551Sjeffuint64_t *
363250551Sjeffpctrie_lookup(struct pctrie *ptree, uint64_t index)
364250551Sjeff{
365250551Sjeff	struct pctrie_node *node;
366250551Sjeff	uint64_t *m;
367250551Sjeff	int slot;
368250551Sjeff
369250551Sjeff	node = pctrie_getroot(ptree);
370250551Sjeff	while (node != NULL) {
371250551Sjeff		if (pctrie_isleaf(node)) {
372250551Sjeff			m = pctrie_toval(node);
373250551Sjeff			if (*m == index)
374250551Sjeff				return (m);
375250551Sjeff			else
376250551Sjeff				break;
377250551Sjeff		} else if (pctrie_keybarr(node, index))
378250551Sjeff			break;
379250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
380250551Sjeff		node = node->pn_child[slot];
381250551Sjeff	}
382250551Sjeff	return (NULL);
383250551Sjeff}
384250551Sjeff
385250551Sjeff/*
386250551Sjeff * Look up the nearest entry at a position bigger than or equal to index.
387250551Sjeff */
388250551Sjeffuint64_t *
389250551Sjeffpctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
390250551Sjeff{
391250551Sjeff	struct pctrie_node *stack[PCTRIE_LIMIT];
392250551Sjeff	uint64_t inc;
393250551Sjeff	uint64_t *m;
394250551Sjeff	struct pctrie_node *child, *node;
395250551Sjeff#ifdef INVARIANTS
396250551Sjeff	int loops = 0;
397250551Sjeff#endif
398250551Sjeff	int slot, tos;
399250551Sjeff
400250551Sjeff	node = pctrie_getroot(ptree);
401250551Sjeff	if (node == NULL)
402250551Sjeff		return (NULL);
403250551Sjeff	else if (pctrie_isleaf(node)) {
404250551Sjeff		m = pctrie_toval(node);
405250551Sjeff		if (*m >= index)
406250551Sjeff			return (m);
407250551Sjeff		else
408250551Sjeff			return (NULL);
409250551Sjeff	}
410250551Sjeff	tos = 0;
411250551Sjeff	for (;;) {
412250551Sjeff		/*
413250551Sjeff		 * If the keys differ before the current bisection node,
414250551Sjeff		 * then the search key might rollback to the earliest
415250551Sjeff		 * available bisection node or to the smallest key
416250551Sjeff		 * in the current node (if the owner is bigger than the
417250551Sjeff		 * search key).
418250551Sjeff		 */
419250551Sjeff		if (pctrie_keybarr(node, index)) {
420250551Sjeff			if (index > node->pn_owner) {
421250551Sjeffascend:
422250551Sjeff				KASSERT(++loops < 1000,
423250551Sjeff				    ("pctrie_lookup_ge: too many loops"));
424250551Sjeff
425250551Sjeff				/*
426250551Sjeff				 * Pop nodes from the stack until either the
427250551Sjeff				 * stack is empty or a node that could have a
428250551Sjeff				 * matching descendant is found.
429250551Sjeff				 */
430250551Sjeff				do {
431250551Sjeff					if (tos == 0)
432250551Sjeff						return (NULL);
433250551Sjeff					node = stack[--tos];
434250551Sjeff				} while (pctrie_slot(index,
435250551Sjeff				    node->pn_clev) == (PCTRIE_COUNT - 1));
436250551Sjeff
437250551Sjeff				/*
438250551Sjeff				 * The following computation cannot overflow
439250551Sjeff				 * because index's slot at the current level
440250551Sjeff				 * is less than PCTRIE_COUNT - 1.
441250551Sjeff				 */
442250551Sjeff				index = pctrie_trimkey(index,
443250551Sjeff				    node->pn_clev);
444250551Sjeff				index += PCTRIE_UNITLEVEL(node->pn_clev);
445250551Sjeff			} else
446250551Sjeff				index = node->pn_owner;
447250551Sjeff			KASSERT(!pctrie_keybarr(node, index),
448250551Sjeff			    ("pctrie_lookup_ge: keybarr failed"));
449250551Sjeff		}
450250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
451250551Sjeff		child = node->pn_child[slot];
452250551Sjeff		if (pctrie_isleaf(child)) {
453250551Sjeff			m = pctrie_toval(child);
454250551Sjeff			if (*m >= index)
455250551Sjeff				return (m);
456250551Sjeff		} else if (child != NULL)
457250551Sjeff			goto descend;
458250551Sjeff
459250551Sjeff		/*
460250551Sjeff		 * Look for an available edge or val within the current
461250551Sjeff		 * bisection node.
462250551Sjeff		 */
463250551Sjeff                if (slot < (PCTRIE_COUNT - 1)) {
464250551Sjeff			inc = PCTRIE_UNITLEVEL(node->pn_clev);
465250551Sjeff			index = pctrie_trimkey(index, node->pn_clev);
466250551Sjeff			do {
467250551Sjeff				index += inc;
468250551Sjeff				slot++;
469250551Sjeff				child = node->pn_child[slot];
470250551Sjeff				if (pctrie_isleaf(child)) {
471250551Sjeff					m = pctrie_toval(child);
472250551Sjeff					if (*m >= index)
473250551Sjeff						return (m);
474250551Sjeff				} else if (child != NULL)
475250551Sjeff					goto descend;
476250551Sjeff			} while (slot < (PCTRIE_COUNT - 1));
477250551Sjeff		}
478250551Sjeff		KASSERT(child == NULL || pctrie_isleaf(child),
479250551Sjeff		    ("pctrie_lookup_ge: child is radix node"));
480250551Sjeff
481250551Sjeff		/*
482250551Sjeff		 * If a value or edge bigger than the search slot is not found
483250551Sjeff		 * in the current node, ascend to the next higher-level node.
484250551Sjeff		 */
485250551Sjeff		goto ascend;
486250551Sjeffdescend:
487250551Sjeff		KASSERT(node->pn_clev > 0,
488250551Sjeff		    ("pctrie_lookup_ge: pushing leaf's parent"));
489250551Sjeff		KASSERT(tos < PCTRIE_LIMIT,
490250551Sjeff		    ("pctrie_lookup_ge: stack overflow"));
491250551Sjeff		stack[tos++] = node;
492250551Sjeff		node = child;
493250551Sjeff	}
494250551Sjeff}
495250551Sjeff
496250551Sjeff/*
497250551Sjeff * Look up the nearest entry at a position less than or equal to index.
498250551Sjeff */
499250551Sjeffuint64_t *
500250551Sjeffpctrie_lookup_le(struct pctrie *ptree, uint64_t index)
501250551Sjeff{
502250551Sjeff	struct pctrie_node *stack[PCTRIE_LIMIT];
503250551Sjeff	uint64_t inc;
504250551Sjeff	uint64_t *m;
505250551Sjeff	struct pctrie_node *child, *node;
506250551Sjeff#ifdef INVARIANTS
507250551Sjeff	int loops = 0;
508250551Sjeff#endif
509250551Sjeff	int slot, tos;
510250551Sjeff
511250551Sjeff	node = pctrie_getroot(ptree);
512250551Sjeff	if (node == NULL)
513250551Sjeff		return (NULL);
514250551Sjeff	else if (pctrie_isleaf(node)) {
515250551Sjeff		m = pctrie_toval(node);
516250551Sjeff		if (*m <= index)
517250551Sjeff			return (m);
518250551Sjeff		else
519250551Sjeff			return (NULL);
520250551Sjeff	}
521250551Sjeff	tos = 0;
522250551Sjeff	for (;;) {
523250551Sjeff		/*
524250551Sjeff		 * If the keys differ before the current bisection node,
525250551Sjeff		 * then the search key might rollback to the earliest
526250551Sjeff		 * available bisection node or to the largest key
527250551Sjeff		 * in the current node (if the owner is smaller than the
528250551Sjeff		 * search key).
529250551Sjeff		 */
530250551Sjeff		if (pctrie_keybarr(node, index)) {
531250551Sjeff			if (index > node->pn_owner) {
532250551Sjeff				index = node->pn_owner + PCTRIE_COUNT *
533250551Sjeff				    PCTRIE_UNITLEVEL(node->pn_clev);
534250551Sjeff			} else {
535250551Sjeffascend:
536250551Sjeff				KASSERT(++loops < 1000,
537250551Sjeff				    ("pctrie_lookup_le: too many loops"));
538250551Sjeff
539250551Sjeff				/*
540250551Sjeff				 * Pop nodes from the stack until either the
541250551Sjeff				 * stack is empty or a node that could have a
542250551Sjeff				 * matching descendant is found.
543250551Sjeff				 */
544250551Sjeff				do {
545250551Sjeff					if (tos == 0)
546250551Sjeff						return (NULL);
547250551Sjeff					node = stack[--tos];
548250551Sjeff				} while (pctrie_slot(index,
549250551Sjeff				    node->pn_clev) == 0);
550250551Sjeff
551250551Sjeff				/*
552250551Sjeff				 * The following computation cannot overflow
553250551Sjeff				 * because index's slot at the current level
554250551Sjeff				 * is greater than 0.
555250551Sjeff				 */
556250551Sjeff				index = pctrie_trimkey(index,
557250551Sjeff				    node->pn_clev);
558250551Sjeff			}
559250551Sjeff			index--;
560250551Sjeff			KASSERT(!pctrie_keybarr(node, index),
561250551Sjeff			    ("pctrie_lookup_le: keybarr failed"));
562250551Sjeff		}
563250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
564250551Sjeff		child = node->pn_child[slot];
565250551Sjeff		if (pctrie_isleaf(child)) {
566250551Sjeff			m = pctrie_toval(child);
567250551Sjeff			if (*m <= index)
568250551Sjeff				return (m);
569250551Sjeff		} else if (child != NULL)
570250551Sjeff			goto descend;
571250551Sjeff
572250551Sjeff		/*
573250551Sjeff		 * Look for an available edge or value within the current
574250551Sjeff		 * bisection node.
575250551Sjeff		 */
576250551Sjeff		if (slot > 0) {
577250551Sjeff			inc = PCTRIE_UNITLEVEL(node->pn_clev);
578250551Sjeff			index |= inc - 1;
579250551Sjeff			do {
580250551Sjeff				index -= inc;
581250551Sjeff				slot--;
582250551Sjeff				child = node->pn_child[slot];
583250551Sjeff				if (pctrie_isleaf(child)) {
584250551Sjeff					m = pctrie_toval(child);
585250551Sjeff					if (*m <= index)
586250551Sjeff						return (m);
587250551Sjeff				} else if (child != NULL)
588250551Sjeff					goto descend;
589250551Sjeff			} while (slot > 0);
590250551Sjeff		}
591250551Sjeff		KASSERT(child == NULL || pctrie_isleaf(child),
592250551Sjeff		    ("pctrie_lookup_le: child is radix node"));
593250551Sjeff
594250551Sjeff		/*
595250551Sjeff		 * If a value or edge smaller than the search slot is not found
596250551Sjeff		 * in the current node, ascend to the next higher-level node.
597250551Sjeff		 */
598250551Sjeff		goto ascend;
599250551Sjeffdescend:
600250551Sjeff		KASSERT(node->pn_clev > 0,
601250551Sjeff		    ("pctrie_lookup_le: pushing leaf's parent"));
602250551Sjeff		KASSERT(tos < PCTRIE_LIMIT,
603250551Sjeff		    ("pctrie_lookup_le: stack overflow"));
604250551Sjeff		stack[tos++] = node;
605250551Sjeff		node = child;
606250551Sjeff	}
607250551Sjeff}
608250551Sjeff
609250551Sjeff/*
610250551Sjeff * Remove the specified index from the tree.
611250551Sjeff * Panics if the key is not present.
612250551Sjeff */
613250551Sjeffvoid
614250551Sjeffpctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
615250551Sjeff{
616250551Sjeff	struct pctrie_node *node, *parent;
617250551Sjeff	uint64_t *m;
618250551Sjeff	int i, slot;
619250551Sjeff
620250551Sjeff	node = pctrie_getroot(ptree);
621250551Sjeff	if (pctrie_isleaf(node)) {
622250551Sjeff		m = pctrie_toval(node);
623250551Sjeff		if (*m != index)
624250551Sjeff			panic("%s: invalid key found", __func__);
625250551Sjeff		pctrie_setroot(ptree, NULL);
626250551Sjeff		return;
627250551Sjeff	}
628250551Sjeff	parent = NULL;
629250551Sjeff	for (;;) {
630250551Sjeff		if (node == NULL)
631250551Sjeff			panic("pctrie_remove: impossible to locate the key");
632250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
633250551Sjeff		if (pctrie_isleaf(node->pn_child[slot])) {
634250551Sjeff			m = pctrie_toval(node->pn_child[slot]);
635250551Sjeff			if (*m != index)
636250551Sjeff				panic("%s: invalid key found", __func__);
637250551Sjeff			node->pn_child[slot] = NULL;
638250551Sjeff			node->pn_count--;
639250551Sjeff			if (node->pn_count > 1)
640250551Sjeff				break;
641250551Sjeff			for (i = 0; i < PCTRIE_COUNT; i++)
642250551Sjeff				if (node->pn_child[i] != NULL)
643250551Sjeff					break;
644250551Sjeff			KASSERT(i != PCTRIE_COUNT,
645250551Sjeff			    ("%s: invalid node configuration", __func__));
646250551Sjeff			if (parent == NULL)
647250551Sjeff				pctrie_setroot(ptree, node->pn_child[i]);
648250551Sjeff			else {
649250551Sjeff				slot = pctrie_slot(index, parent->pn_clev);
650250551Sjeff				KASSERT(parent->pn_child[slot] == node,
651250551Sjeff				    ("%s: invalid child value", __func__));
652250551Sjeff				parent->pn_child[slot] = node->pn_child[i];
653250551Sjeff			}
654250551Sjeff			node->pn_count--;
655250551Sjeff			node->pn_child[i] = NULL;
656250551Sjeff			pctrie_node_put(ptree, node, freefn);
657250551Sjeff			break;
658250551Sjeff		}
659250551Sjeff		parent = node;
660250551Sjeff		node = node->pn_child[slot];
661250551Sjeff	}
662250551Sjeff}
663250551Sjeff
664250551Sjeff/*
665250551Sjeff * Remove and free all the nodes from the tree.
666250551Sjeff * This function is recursive but there is a tight control on it as the
667250551Sjeff * maximum depth of the tree is fixed.
668250551Sjeff */
669250551Sjeffvoid
670250551Sjeffpctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
671250551Sjeff{
672250551Sjeff	struct pctrie_node *root;
673250551Sjeff
674250551Sjeff	root = pctrie_getroot(ptree);
675250551Sjeff	if (root == NULL)
676250551Sjeff		return;
677250551Sjeff	pctrie_setroot(ptree, NULL);
678250551Sjeff	if (!pctrie_isleaf(root))
679250551Sjeff		pctrie_reclaim_allnodes_int(ptree, root, freefn);
680250551Sjeff}
681250551Sjeff
682250551Sjeff#ifdef DDB
683250551Sjeff/*
684250551Sjeff * Show details about the given node.
685250551Sjeff */
686250551SjeffDB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
687250551Sjeff{
688250551Sjeff	struct pctrie_node *node;
689250551Sjeff	int i;
690250551Sjeff
691250551Sjeff        if (!have_addr)
692250551Sjeff                return;
693250551Sjeff	node = (struct pctrie_node *)addr;
694250551Sjeff	db_printf("node %p, owner %jx, children count %u, level %u:\n",
695250551Sjeff	    (void *)node, (uintmax_t)node->pn_owner, node->pn_count,
696250551Sjeff	    node->pn_clev);
697250551Sjeff	for (i = 0; i < PCTRIE_COUNT; i++)
698250551Sjeff		if (node->pn_child[i] != NULL)
699250551Sjeff			db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
700250551Sjeff			    i, (void *)node->pn_child[i],
701250551Sjeff			    pctrie_isleaf(node->pn_child[i]) ?
702250551Sjeff			    pctrie_toval(node->pn_child[i]) : NULL,
703250551Sjeff			    node->pn_clev);
704250551Sjeff}
705250551Sjeff#endif /* DDB */
706