1250551Sjeff/*
2250551Sjeff * Copyright (c) 2013 EMC Corp.
3250551Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4250551Sjeff * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5250551Sjeff * All rights reserved.
6250551Sjeff *
7250551Sjeff * Redistribution and use in source and binary forms, with or without
8250551Sjeff * modification, are permitted provided that the following conditions
9250551Sjeff * are met:
10250551Sjeff * 1. Redistributions of source code must retain the above copyright
11250551Sjeff *    notice, this list of conditions and the following disclaimer.
12250551Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13250551Sjeff *    notice, this list of conditions and the following disclaimer in the
14250551Sjeff *    documentation and/or other materials provided with the distribution.
15250551Sjeff *
16250551Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17250551Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18250551Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19250551Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20250551Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21250551Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22250551Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23250551Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24250551Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25250551Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26250551Sjeff * SUCH DAMAGE.
27250551Sjeff *
28250551Sjeff */
29250551Sjeff
30250551Sjeff/*
31250551Sjeff * Path-compressed radix trie implementation.
32250551Sjeff *
33250551Sjeff * The implementation takes into account the following rationale:
34250551Sjeff * - Size of the nodes should be as small as possible but still big enough
35250551Sjeff *   to avoid a large maximum depth for the trie.  This is a balance
36250551Sjeff *   between the necessity to not wire too much physical memory for the nodes
37250551Sjeff *   and the necessity to avoid too much cache pollution during the trie
38250551Sjeff *   operations.
39250551Sjeff * - There is not a huge bias toward the number of lookup operations over
40250551Sjeff *   the number of insert and remove operations.  This basically implies
41250551Sjeff *   that optimizations supposedly helping one operation but hurting the
42250551Sjeff *   other might be carefully evaluated.
43250551Sjeff * - On average not many nodes are expected to be fully populated, hence
44250551Sjeff *   level compression may just complicate things.
45250551Sjeff */
46250551Sjeff
47250551Sjeff#include <sys/cdefs.h>
48250551Sjeff__FBSDID("$FreeBSD: stable/11/sys/kern/subr_pctrie.c 321977 2017-08-03 07:28:54Z kib $");
49250551Sjeff
50250551Sjeff#include "opt_ddb.h"
51250551Sjeff
52250551Sjeff#include <sys/param.h>
53250551Sjeff#include <sys/systm.h>
54250551Sjeff#include <sys/kernel.h>
55250551Sjeff#include <sys/pctrie.h>
56250551Sjeff
57250551Sjeff#ifdef DDB
58250551Sjeff#include <ddb/ddb.h>
59250551Sjeff#endif
60250551Sjeff
61250551Sjeff#define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
62298649Spfg#define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
63250551Sjeff
64250551Sjeff/* Flag bits stored in node pointers. */
65250551Sjeff#define	PCTRIE_ISLEAF	0x1
66250551Sjeff#define	PCTRIE_FLAGS	0x1
67250551Sjeff#define	PCTRIE_PAD	PCTRIE_FLAGS
68250551Sjeff
69250551Sjeff/* Returns one unit associated with specified level. */
70250551Sjeff#define	PCTRIE_UNITLEVEL(lev)						\
71250551Sjeff	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
72250551Sjeff
73250551Sjeffstruct pctrie_node {
74250551Sjeff	uint64_t	 pn_owner;			/* Owner of record. */
75250551Sjeff	uint16_t	 pn_count;			/* Valid children. */
76250551Sjeff	uint16_t	 pn_clev;			/* Current level. */
77250551Sjeff	void		*pn_child[PCTRIE_COUNT];	/* Child nodes. */
78250551Sjeff};
79250551Sjeff
80250551Sjeff/*
81250551Sjeff * Allocate a node.  Pre-allocation should ensure that the request
82250551Sjeff * will always be satisfied.
83250551Sjeff */
84250551Sjeffstatic __inline struct pctrie_node *
85250551Sjeffpctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner,
86250551Sjeff    uint16_t count, uint16_t clevel)
87250551Sjeff{
88250551Sjeff	struct pctrie_node *node;
89250551Sjeff
90250551Sjeff	node = allocfn(ptree);
91250551Sjeff	if (node == NULL)
92250551Sjeff		return (NULL);
93250551Sjeff	node->pn_owner = owner;
94250551Sjeff	node->pn_count = count;
95250551Sjeff	node->pn_clev = clevel;
96250551Sjeff
97250551Sjeff	return (node);
98250551Sjeff}
99250551Sjeff
100250551Sjeff/*
101250551Sjeff * Free radix node.
102250551Sjeff */
103250551Sjeffstatic __inline void
104250551Sjeffpctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
105250551Sjeff    pctrie_free_t freefn)
106250551Sjeff{
107250551Sjeff#ifdef INVARIANTS
108250551Sjeff	int slot;
109250551Sjeff
110250551Sjeff	KASSERT(node->pn_count == 0,
111250551Sjeff	    ("pctrie_node_put: node %p has %d children", node,
112250551Sjeff	    node->pn_count));
113250551Sjeff	for (slot = 0; slot < PCTRIE_COUNT; slot++)
114250551Sjeff		KASSERT(node->pn_child[slot] == NULL,
115250551Sjeff		    ("pctrie_node_put: node %p has a child", node));
116250551Sjeff#endif
117250551Sjeff	freefn(ptree, node);
118250551Sjeff}
119250551Sjeff
120250551Sjeff/*
121250551Sjeff * Return the position in the array for a given level.
122250551Sjeff */
123250551Sjeffstatic __inline int
124250551Sjeffpctrie_slot(uint64_t index, uint16_t level)
125250551Sjeff{
126250551Sjeff
127250551Sjeff	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
128250551Sjeff}
129250551Sjeff
130250551Sjeff/* Trims the key after the specified level. */
131250551Sjeffstatic __inline uint64_t
132250551Sjeffpctrie_trimkey(uint64_t index, uint16_t level)
133250551Sjeff{
134250551Sjeff	uint64_t ret;
135250551Sjeff
136250551Sjeff	ret = index;
137250551Sjeff	if (level > 0) {
138250551Sjeff		ret >>= level * PCTRIE_WIDTH;
139250551Sjeff		ret <<= level * PCTRIE_WIDTH;
140250551Sjeff	}
141250551Sjeff	return (ret);
142250551Sjeff}
143250551Sjeff
144250551Sjeff/*
145250551Sjeff * Get the root node for a tree.
146250551Sjeff */
147250551Sjeffstatic __inline struct pctrie_node *
148250551Sjeffpctrie_getroot(struct pctrie *ptree)
149250551Sjeff{
150250551Sjeff
151250551Sjeff	return ((struct pctrie_node *)ptree->pt_root);
152250551Sjeff}
153250551Sjeff
154250551Sjeff/*
155250551Sjeff * Set the root node for a tree.
156250551Sjeff */
157250551Sjeffstatic __inline void
158250551Sjeffpctrie_setroot(struct pctrie *ptree, struct pctrie_node *node)
159250551Sjeff{
160250551Sjeff
161250551Sjeff	ptree->pt_root = (uintptr_t)node;
162250551Sjeff}
163250551Sjeff
164250551Sjeff/*
165250551Sjeff * Returns TRUE if the specified node is a leaf and FALSE otherwise.
166250551Sjeff */
167250551Sjeffstatic __inline boolean_t
168250551Sjeffpctrie_isleaf(struct pctrie_node *node)
169250551Sjeff{
170250551Sjeff
171250551Sjeff	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
172250551Sjeff}
173250551Sjeff
174250551Sjeff/*
175250551Sjeff * Returns the associated val extracted from node.
176250551Sjeff */
177250551Sjeffstatic __inline uint64_t *
178250551Sjeffpctrie_toval(struct pctrie_node *node)
179250551Sjeff{
180250551Sjeff
181250551Sjeff	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
182250551Sjeff}
183250551Sjeff
184250551Sjeff/*
185250551Sjeff * Adds the val as a child of the provided node.
186250551Sjeff */
187250551Sjeffstatic __inline void
188250551Sjeffpctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
189250551Sjeff    uint64_t *val)
190250551Sjeff{
191250551Sjeff	int slot;
192250551Sjeff
193250551Sjeff	slot = pctrie_slot(index, clev);
194250551Sjeff	node->pn_child[slot] = (void *)((uintptr_t)val | PCTRIE_ISLEAF);
195250551Sjeff}
196250551Sjeff
197250551Sjeff/*
198250551Sjeff * Returns the slot where two keys differ.
199250551Sjeff * It cannot accept 2 equal keys.
200250551Sjeff */
201250551Sjeffstatic __inline uint16_t
202250551Sjeffpctrie_keydiff(uint64_t index1, uint64_t index2)
203250551Sjeff{
204250551Sjeff	uint16_t clev;
205250551Sjeff
206250551Sjeff	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
207250551Sjeff	    __func__, (uintmax_t)index1));
208250551Sjeff
209250551Sjeff	index1 ^= index2;
210250551Sjeff	for (clev = PCTRIE_LIMIT;; clev--)
211250551Sjeff		if (pctrie_slot(index1, clev) != 0)
212250551Sjeff			return (clev);
213250551Sjeff}
214250551Sjeff
215250551Sjeff/*
216250551Sjeff * Returns TRUE if it can be determined that key does not belong to the
217250551Sjeff * specified node.  Otherwise, returns FALSE.
218250551Sjeff */
219250551Sjeffstatic __inline boolean_t
220250551Sjeffpctrie_keybarr(struct pctrie_node *node, uint64_t idx)
221250551Sjeff{
222250551Sjeff
223250551Sjeff	if (node->pn_clev < PCTRIE_LIMIT) {
224250551Sjeff		idx = pctrie_trimkey(idx, node->pn_clev + 1);
225250551Sjeff		return (idx != node->pn_owner);
226250551Sjeff	}
227250551Sjeff	return (FALSE);
228250551Sjeff}
229250551Sjeff
230250551Sjeff/*
231250551Sjeff * Internal helper for pctrie_reclaim_allnodes().
232250551Sjeff * This function is recursive.
233250551Sjeff */
234250551Sjeffstatic void
235250551Sjeffpctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
236250551Sjeff    pctrie_free_t freefn)
237250551Sjeff{
238250551Sjeff	int slot;
239250551Sjeff
240250551Sjeff	KASSERT(node->pn_count <= PCTRIE_COUNT,
241250551Sjeff	    ("pctrie_reclaim_allnodes_int: bad count in node %p", node));
242250551Sjeff	for (slot = 0; node->pn_count != 0; slot++) {
243250551Sjeff		if (node->pn_child[slot] == NULL)
244250551Sjeff			continue;
245250551Sjeff		if (!pctrie_isleaf(node->pn_child[slot]))
246250551Sjeff			pctrie_reclaim_allnodes_int(ptree,
247250551Sjeff			    node->pn_child[slot], freefn);
248250551Sjeff		node->pn_child[slot] = NULL;
249250551Sjeff		node->pn_count--;
250250551Sjeff	}
251250551Sjeff	pctrie_node_put(ptree, node, freefn);
252250551Sjeff}
253250551Sjeff
254250551Sjeff/*
255250551Sjeff * pctrie node zone initializer.
256250551Sjeff */
257250551Sjeffint
258250551Sjeffpctrie_zone_init(void *mem, int size __unused, int flags __unused)
259250551Sjeff{
260250551Sjeff	struct pctrie_node *node;
261250551Sjeff
262250551Sjeff	node = mem;
263250551Sjeff	memset(node->pn_child, 0, sizeof(node->pn_child));
264250551Sjeff	return (0);
265250551Sjeff}
266250551Sjeff
267250551Sjeffsize_t
268250551Sjeffpctrie_node_size(void)
269250551Sjeff{
270250551Sjeff
271250551Sjeff	return (sizeof(struct pctrie_node));
272250551Sjeff}
273250551Sjeff
274250551Sjeff/*
275250551Sjeff * Inserts the key-value pair into the trie.
276250551Sjeff * Panics if the key already exists.
277250551Sjeff */
278250551Sjeffint
279250551Sjeffpctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
280250551Sjeff{
281250551Sjeff	uint64_t index, newind;
282250551Sjeff	void **parentp;
283250551Sjeff	struct pctrie_node *node, *tmp;
284250551Sjeff	uint64_t *m;
285250551Sjeff	int slot;
286250551Sjeff	uint16_t clev;
287250551Sjeff
288250551Sjeff	index = *val;
289250551Sjeff
290250551Sjeff	/*
291250551Sjeff	 * The owner of record for root is not really important because it
292250551Sjeff	 * will never be used.
293250551Sjeff	 */
294250551Sjeff	node = pctrie_getroot(ptree);
295250551Sjeff	if (node == NULL) {
296250551Sjeff		ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF;
297250551Sjeff		return (0);
298250551Sjeff	}
299250551Sjeff	parentp = (void **)&ptree->pt_root;
300250551Sjeff	for (;;) {
301250551Sjeff		if (pctrie_isleaf(node)) {
302250551Sjeff			m = pctrie_toval(node);
303250551Sjeff			if (*m == index)
304250551Sjeff				panic("%s: key %jx is already present",
305250551Sjeff				    __func__, (uintmax_t)index);
306250551Sjeff			clev = pctrie_keydiff(*m, index);
307250551Sjeff			tmp = pctrie_node_get(ptree, allocfn,
308250551Sjeff			    pctrie_trimkey(index, clev + 1), 2, clev);
309250551Sjeff			if (tmp == NULL)
310250551Sjeff				return (ENOMEM);
311250551Sjeff			*parentp = tmp;
312250551Sjeff			pctrie_addval(tmp, index, clev, val);
313250551Sjeff			pctrie_addval(tmp, *m, clev, m);
314250551Sjeff			return (0);
315250551Sjeff		} else if (pctrie_keybarr(node, index))
316250551Sjeff			break;
317250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
318250551Sjeff		if (node->pn_child[slot] == NULL) {
319250551Sjeff			node->pn_count++;
320250551Sjeff			pctrie_addval(node, index, node->pn_clev, val);
321250551Sjeff			return (0);
322250551Sjeff		}
323250551Sjeff		parentp = &node->pn_child[slot];
324250551Sjeff		node = node->pn_child[slot];
325250551Sjeff	}
326250551Sjeff
327250551Sjeff	/*
328250551Sjeff	 * A new node is needed because the right insertion level is reached.
329250551Sjeff	 * Setup the new intermediate node and add the 2 children: the
330250551Sjeff	 * new object and the older edge.
331250551Sjeff	 */
332250551Sjeff	newind = node->pn_owner;
333250551Sjeff	clev = pctrie_keydiff(newind, index);
334250551Sjeff	tmp = pctrie_node_get(ptree, allocfn,
335250551Sjeff	    pctrie_trimkey(index, clev + 1), 2, clev);
336250551Sjeff	if (tmp == NULL)
337250551Sjeff		return (ENOMEM);
338250551Sjeff	*parentp = tmp;
339250551Sjeff	pctrie_addval(tmp, index, clev, val);
340250551Sjeff	slot = pctrie_slot(newind, clev);
341250551Sjeff	tmp->pn_child[slot] = node;
342250551Sjeff
343250551Sjeff	return (0);
344250551Sjeff}
345250551Sjeff
346250551Sjeff/*
347250551Sjeff * Returns the value stored at the index.  If the index is not present,
348250551Sjeff * NULL is returned.
349250551Sjeff */
350250551Sjeffuint64_t *
351250551Sjeffpctrie_lookup(struct pctrie *ptree, uint64_t index)
352250551Sjeff{
353250551Sjeff	struct pctrie_node *node;
354250551Sjeff	uint64_t *m;
355250551Sjeff	int slot;
356250551Sjeff
357250551Sjeff	node = pctrie_getroot(ptree);
358250551Sjeff	while (node != NULL) {
359250551Sjeff		if (pctrie_isleaf(node)) {
360250551Sjeff			m = pctrie_toval(node);
361250551Sjeff			if (*m == index)
362250551Sjeff				return (m);
363250551Sjeff			else
364250551Sjeff				break;
365250551Sjeff		} else if (pctrie_keybarr(node, index))
366250551Sjeff			break;
367250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
368250551Sjeff		node = node->pn_child[slot];
369250551Sjeff	}
370250551Sjeff	return (NULL);
371250551Sjeff}
372250551Sjeff
373250551Sjeff/*
374250551Sjeff * Look up the nearest entry at a position bigger than or equal to index.
375250551Sjeff */
376250551Sjeffuint64_t *
377250551Sjeffpctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
378250551Sjeff{
379250551Sjeff	struct pctrie_node *stack[PCTRIE_LIMIT];
380250551Sjeff	uint64_t inc;
381250551Sjeff	uint64_t *m;
382250551Sjeff	struct pctrie_node *child, *node;
383250551Sjeff#ifdef INVARIANTS
384250551Sjeff	int loops = 0;
385250551Sjeff#endif
386250551Sjeff	int slot, tos;
387250551Sjeff
388250551Sjeff	node = pctrie_getroot(ptree);
389250551Sjeff	if (node == NULL)
390250551Sjeff		return (NULL);
391250551Sjeff	else if (pctrie_isleaf(node)) {
392250551Sjeff		m = pctrie_toval(node);
393250551Sjeff		if (*m >= index)
394250551Sjeff			return (m);
395250551Sjeff		else
396250551Sjeff			return (NULL);
397250551Sjeff	}
398250551Sjeff	tos = 0;
399250551Sjeff	for (;;) {
400250551Sjeff		/*
401250551Sjeff		 * If the keys differ before the current bisection node,
402250551Sjeff		 * then the search key might rollback to the earliest
403250551Sjeff		 * available bisection node or to the smallest key
404250551Sjeff		 * in the current node (if the owner is bigger than the
405250551Sjeff		 * search key).
406250551Sjeff		 */
407250551Sjeff		if (pctrie_keybarr(node, index)) {
408250551Sjeff			if (index > node->pn_owner) {
409250551Sjeffascend:
410250551Sjeff				KASSERT(++loops < 1000,
411250551Sjeff				    ("pctrie_lookup_ge: too many loops"));
412250551Sjeff
413250551Sjeff				/*
414250551Sjeff				 * Pop nodes from the stack until either the
415250551Sjeff				 * stack is empty or a node that could have a
416250551Sjeff				 * matching descendant is found.
417250551Sjeff				 */
418250551Sjeff				do {
419250551Sjeff					if (tos == 0)
420250551Sjeff						return (NULL);
421250551Sjeff					node = stack[--tos];
422250551Sjeff				} while (pctrie_slot(index,
423250551Sjeff				    node->pn_clev) == (PCTRIE_COUNT - 1));
424250551Sjeff
425250551Sjeff				/*
426250551Sjeff				 * The following computation cannot overflow
427250551Sjeff				 * because index's slot at the current level
428250551Sjeff				 * is less than PCTRIE_COUNT - 1.
429250551Sjeff				 */
430250551Sjeff				index = pctrie_trimkey(index,
431250551Sjeff				    node->pn_clev);
432250551Sjeff				index += PCTRIE_UNITLEVEL(node->pn_clev);
433250551Sjeff			} else
434250551Sjeff				index = node->pn_owner;
435250551Sjeff			KASSERT(!pctrie_keybarr(node, index),
436250551Sjeff			    ("pctrie_lookup_ge: keybarr failed"));
437250551Sjeff		}
438250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
439250551Sjeff		child = node->pn_child[slot];
440250551Sjeff		if (pctrie_isleaf(child)) {
441250551Sjeff			m = pctrie_toval(child);
442250551Sjeff			if (*m >= index)
443250551Sjeff				return (m);
444250551Sjeff		} else if (child != NULL)
445250551Sjeff			goto descend;
446250551Sjeff
447250551Sjeff		/*
448250551Sjeff		 * Look for an available edge or val within the current
449250551Sjeff		 * bisection node.
450250551Sjeff		 */
451250551Sjeff                if (slot < (PCTRIE_COUNT - 1)) {
452250551Sjeff			inc = PCTRIE_UNITLEVEL(node->pn_clev);
453250551Sjeff			index = pctrie_trimkey(index, node->pn_clev);
454250551Sjeff			do {
455250551Sjeff				index += inc;
456250551Sjeff				slot++;
457250551Sjeff				child = node->pn_child[slot];
458250551Sjeff				if (pctrie_isleaf(child)) {
459250551Sjeff					m = pctrie_toval(child);
460250551Sjeff					if (*m >= index)
461250551Sjeff						return (m);
462250551Sjeff				} else if (child != NULL)
463250551Sjeff					goto descend;
464250551Sjeff			} while (slot < (PCTRIE_COUNT - 1));
465250551Sjeff		}
466250551Sjeff		KASSERT(child == NULL || pctrie_isleaf(child),
467250551Sjeff		    ("pctrie_lookup_ge: child is radix node"));
468250551Sjeff
469250551Sjeff		/*
470250551Sjeff		 * If a value or edge bigger than the search slot is not found
471250551Sjeff		 * in the current node, ascend to the next higher-level node.
472250551Sjeff		 */
473250551Sjeff		goto ascend;
474250551Sjeffdescend:
475250551Sjeff		KASSERT(node->pn_clev > 0,
476250551Sjeff		    ("pctrie_lookup_ge: pushing leaf's parent"));
477250551Sjeff		KASSERT(tos < PCTRIE_LIMIT,
478250551Sjeff		    ("pctrie_lookup_ge: stack overflow"));
479250551Sjeff		stack[tos++] = node;
480250551Sjeff		node = child;
481250551Sjeff	}
482250551Sjeff}
483250551Sjeff
484250551Sjeff/*
485250551Sjeff * Look up the nearest entry at a position less than or equal to index.
486250551Sjeff */
487250551Sjeffuint64_t *
488250551Sjeffpctrie_lookup_le(struct pctrie *ptree, uint64_t index)
489250551Sjeff{
490250551Sjeff	struct pctrie_node *stack[PCTRIE_LIMIT];
491250551Sjeff	uint64_t inc;
492250551Sjeff	uint64_t *m;
493250551Sjeff	struct pctrie_node *child, *node;
494250551Sjeff#ifdef INVARIANTS
495250551Sjeff	int loops = 0;
496250551Sjeff#endif
497250551Sjeff	int slot, tos;
498250551Sjeff
499250551Sjeff	node = pctrie_getroot(ptree);
500250551Sjeff	if (node == NULL)
501250551Sjeff		return (NULL);
502250551Sjeff	else if (pctrie_isleaf(node)) {
503250551Sjeff		m = pctrie_toval(node);
504250551Sjeff		if (*m <= index)
505250551Sjeff			return (m);
506250551Sjeff		else
507250551Sjeff			return (NULL);
508250551Sjeff	}
509250551Sjeff	tos = 0;
510250551Sjeff	for (;;) {
511250551Sjeff		/*
512250551Sjeff		 * If the keys differ before the current bisection node,
513250551Sjeff		 * then the search key might rollback to the earliest
514250551Sjeff		 * available bisection node or to the largest key
515250551Sjeff		 * in the current node (if the owner is smaller than the
516250551Sjeff		 * search key).
517250551Sjeff		 */
518250551Sjeff		if (pctrie_keybarr(node, index)) {
519250551Sjeff			if (index > node->pn_owner) {
520250551Sjeff				index = node->pn_owner + PCTRIE_COUNT *
521250551Sjeff				    PCTRIE_UNITLEVEL(node->pn_clev);
522250551Sjeff			} else {
523250551Sjeffascend:
524250551Sjeff				KASSERT(++loops < 1000,
525250551Sjeff				    ("pctrie_lookup_le: too many loops"));
526250551Sjeff
527250551Sjeff				/*
528250551Sjeff				 * Pop nodes from the stack until either the
529250551Sjeff				 * stack is empty or a node that could have a
530250551Sjeff				 * matching descendant is found.
531250551Sjeff				 */
532250551Sjeff				do {
533250551Sjeff					if (tos == 0)
534250551Sjeff						return (NULL);
535250551Sjeff					node = stack[--tos];
536250551Sjeff				} while (pctrie_slot(index,
537250551Sjeff				    node->pn_clev) == 0);
538250551Sjeff
539250551Sjeff				/*
540250551Sjeff				 * The following computation cannot overflow
541250551Sjeff				 * because index's slot at the current level
542250551Sjeff				 * is greater than 0.
543250551Sjeff				 */
544250551Sjeff				index = pctrie_trimkey(index,
545250551Sjeff				    node->pn_clev);
546250551Sjeff			}
547250551Sjeff			index--;
548250551Sjeff			KASSERT(!pctrie_keybarr(node, index),
549250551Sjeff			    ("pctrie_lookup_le: keybarr failed"));
550250551Sjeff		}
551250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
552250551Sjeff		child = node->pn_child[slot];
553250551Sjeff		if (pctrie_isleaf(child)) {
554250551Sjeff			m = pctrie_toval(child);
555250551Sjeff			if (*m <= index)
556250551Sjeff				return (m);
557250551Sjeff		} else if (child != NULL)
558250551Sjeff			goto descend;
559250551Sjeff
560250551Sjeff		/*
561250551Sjeff		 * Look for an available edge or value within the current
562250551Sjeff		 * bisection node.
563250551Sjeff		 */
564250551Sjeff		if (slot > 0) {
565250551Sjeff			inc = PCTRIE_UNITLEVEL(node->pn_clev);
566250551Sjeff			index |= inc - 1;
567250551Sjeff			do {
568250551Sjeff				index -= inc;
569250551Sjeff				slot--;
570250551Sjeff				child = node->pn_child[slot];
571250551Sjeff				if (pctrie_isleaf(child)) {
572250551Sjeff					m = pctrie_toval(child);
573250551Sjeff					if (*m <= index)
574250551Sjeff						return (m);
575250551Sjeff				} else if (child != NULL)
576250551Sjeff					goto descend;
577250551Sjeff			} while (slot > 0);
578250551Sjeff		}
579250551Sjeff		KASSERT(child == NULL || pctrie_isleaf(child),
580250551Sjeff		    ("pctrie_lookup_le: child is radix node"));
581250551Sjeff
582250551Sjeff		/*
583250551Sjeff		 * If a value or edge smaller than the search slot is not found
584250551Sjeff		 * in the current node, ascend to the next higher-level node.
585250551Sjeff		 */
586250551Sjeff		goto ascend;
587250551Sjeffdescend:
588250551Sjeff		KASSERT(node->pn_clev > 0,
589250551Sjeff		    ("pctrie_lookup_le: pushing leaf's parent"));
590250551Sjeff		KASSERT(tos < PCTRIE_LIMIT,
591250551Sjeff		    ("pctrie_lookup_le: stack overflow"));
592250551Sjeff		stack[tos++] = node;
593250551Sjeff		node = child;
594250551Sjeff	}
595250551Sjeff}
596250551Sjeff
597250551Sjeff/*
598250551Sjeff * Remove the specified index from the tree.
599250551Sjeff * Panics if the key is not present.
600250551Sjeff */
601250551Sjeffvoid
602250551Sjeffpctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
603250551Sjeff{
604250551Sjeff	struct pctrie_node *node, *parent;
605250551Sjeff	uint64_t *m;
606250551Sjeff	int i, slot;
607250551Sjeff
608250551Sjeff	node = pctrie_getroot(ptree);
609250551Sjeff	if (pctrie_isleaf(node)) {
610250551Sjeff		m = pctrie_toval(node);
611250551Sjeff		if (*m != index)
612250551Sjeff			panic("%s: invalid key found", __func__);
613250551Sjeff		pctrie_setroot(ptree, NULL);
614250551Sjeff		return;
615250551Sjeff	}
616250551Sjeff	parent = NULL;
617250551Sjeff	for (;;) {
618250551Sjeff		if (node == NULL)
619250551Sjeff			panic("pctrie_remove: impossible to locate the key");
620250551Sjeff		slot = pctrie_slot(index, node->pn_clev);
621250551Sjeff		if (pctrie_isleaf(node->pn_child[slot])) {
622250551Sjeff			m = pctrie_toval(node->pn_child[slot]);
623250551Sjeff			if (*m != index)
624250551Sjeff				panic("%s: invalid key found", __func__);
625250551Sjeff			node->pn_child[slot] = NULL;
626250551Sjeff			node->pn_count--;
627250551Sjeff			if (node->pn_count > 1)
628250551Sjeff				break;
629250551Sjeff			for (i = 0; i < PCTRIE_COUNT; i++)
630250551Sjeff				if (node->pn_child[i] != NULL)
631250551Sjeff					break;
632250551Sjeff			KASSERT(i != PCTRIE_COUNT,
633250551Sjeff			    ("%s: invalid node configuration", __func__));
634250551Sjeff			if (parent == NULL)
635250551Sjeff				pctrie_setroot(ptree, node->pn_child[i]);
636250551Sjeff			else {
637250551Sjeff				slot = pctrie_slot(index, parent->pn_clev);
638250551Sjeff				KASSERT(parent->pn_child[slot] == node,
639250551Sjeff				    ("%s: invalid child value", __func__));
640250551Sjeff				parent->pn_child[slot] = node->pn_child[i];
641250551Sjeff			}
642250551Sjeff			node->pn_count--;
643250551Sjeff			node->pn_child[i] = NULL;
644250551Sjeff			pctrie_node_put(ptree, node, freefn);
645250551Sjeff			break;
646250551Sjeff		}
647250551Sjeff		parent = node;
648250551Sjeff		node = node->pn_child[slot];
649250551Sjeff	}
650250551Sjeff}
651250551Sjeff
652250551Sjeff/*
653250551Sjeff * Remove and free all the nodes from the tree.
654250551Sjeff * This function is recursive but there is a tight control on it as the
655250551Sjeff * maximum depth of the tree is fixed.
656250551Sjeff */
657250551Sjeffvoid
658250551Sjeffpctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
659250551Sjeff{
660250551Sjeff	struct pctrie_node *root;
661250551Sjeff
662250551Sjeff	root = pctrie_getroot(ptree);
663250551Sjeff	if (root == NULL)
664250551Sjeff		return;
665250551Sjeff	pctrie_setroot(ptree, NULL);
666250551Sjeff	if (!pctrie_isleaf(root))
667250551Sjeff		pctrie_reclaim_allnodes_int(ptree, root, freefn);
668250551Sjeff}
669250551Sjeff
670250551Sjeff#ifdef DDB
671250551Sjeff/*
672250551Sjeff * Show details about the given node.
673250551Sjeff */
674250551SjeffDB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
675250551Sjeff{
676250551Sjeff	struct pctrie_node *node;
677250551Sjeff	int i;
678250551Sjeff
679250551Sjeff        if (!have_addr)
680250551Sjeff                return;
681250551Sjeff	node = (struct pctrie_node *)addr;
682250551Sjeff	db_printf("node %p, owner %jx, children count %u, level %u:\n",
683250551Sjeff	    (void *)node, (uintmax_t)node->pn_owner, node->pn_count,
684250551Sjeff	    node->pn_clev);
685250551Sjeff	for (i = 0; i < PCTRIE_COUNT; i++)
686250551Sjeff		if (node->pn_child[i] != NULL)
687250551Sjeff			db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
688250551Sjeff			    i, (void *)node->pn_child[i],
689250551Sjeff			    pctrie_isleaf(node->pn_child[i]) ?
690250551Sjeff			    pctrie_toval(node->pn_child[i]) : NULL,
691250551Sjeff			    node->pn_clev);
692250551Sjeff}
693250551Sjeff#endif /* DDB */
694