1/*	$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $	*/
2
3/*-
4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * radixtree.c
31 *
32 * Overview:
33 *
34 * This is an implementation of radix tree, whose keys are uint64_t and leafs
35 * are user provided pointers.
36 *
37 * Leaf nodes are just void * and this implementation doesn't care about
38 * what they actually point to.  However, this implementation has an assumption
39 * about their alignment.  Specifically, this implementation assumes that their
40 * 2 LSBs are always zero and uses them for internal accounting.
41 *
42 * Intermediate nodes and memory allocation:
43 *
44 * Intermediate nodes are automatically allocated and freed internally and
45 * basically users don't need to care about them.  The allocation is done via
46 * kmem_zalloc(9) for _KERNEL, malloc(3) for userland, and alloc() for
47 * _STANDALONE environment.  Only radix_tree_insert_node function can allocate
48 * memory for intermediate nodes and thus can fail for ENOMEM.
49 *
50 * Memory Efficiency:
51 *
52 * It's designed to work efficiently with dense index distribution.
53 * The memory consumption (number of necessary intermediate nodes) heavily
54 * depends on the index distribution.  Basically, more dense index distribution
55 * consumes less nodes per item.  Approximately,
56 *
57 *  - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
58 *    it would look like the following.
59 *
60 *     root (t_height=1)
61 *      |
62 *      v
63 *      [ | | | ]   (intermediate node.  RADIX_TREE_PTR_PER_NODE=4 in this fig)
64 *       | | | |
65 *       v v v v
66 *       p p p p    (items)
67 *
68 *  - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
69 *    it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
70 *
71 *     root (t_height=3)
72 *      |
73 *      v
74 *      [ | | | ]
75 *           |
76 *           v
77 *           [ | | | ]
78 *                |
79 *                v
80 *                [ | | | ]
81 *                   |
82 *                   v
83 *                   p
84 *
85 * The height of tree (t_height) is dynamic.  It's smaller if only small
86 * index values are used.  As an extreme case, if only index 0 is used,
87 * the corresponding value is directly stored in the root of the tree
88 * (struct radix_tree) without allocating any intermediate nodes.  In that
89 * case, t_height=0.
90 *
91 * Gang lookup:
92 *
93 * This implementation provides a way to scan many nodes quickly via
94 * radix_tree_gang_lookup_node function and its varients.
95 *
96 * Tags:
97 *
98 * This implementation provides tagging functionality, which allows quick
99 * scanning of a subset of leaf nodes.  Leaf nodes are untagged when inserted
100 * into the tree and can be tagged by radix_tree_set_tag function.
101 * radix_tree_gang_lookup_tagged_node function and its variants returns only
102 * leaf nodes with the given tag.  To reduce amount of nodes to visit for
103 * these functions, this implementation keeps tagging information in internal
104 * intermediate nodes and quickly skips uninterested parts of a tree.
105 *
106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107 * identified by a zero-origin numbers, tagid.  For the current implementation,
108 * RADIX_TREE_TAG_ID_MAX is 2.  A set of tags is described as a bitmask tagmask,
109 * which is a bitwise OR of (1 << tagid).
110 */
111
112#include <sys/cdefs.h>
113
114#if defined(_KERNEL) || defined(_STANDALONE)
115__KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
116#include <sys/param.h>
117#include <sys/errno.h>
118#include <sys/kmem.h>
119#include <sys/radixtree.h>
120#include <lib/libkern/libkern.h>
121#if defined(_STANDALONE)
122#include <lib/libsa/stand.h>
123#endif /* defined(_STANDALONE) */
124#else /* defined(_KERNEL) || defined(_STANDALONE) */
125__RCSID("$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
126#include <assert.h>
127#include <errno.h>
128#include <stdbool.h>
129#include <stdlib.h>
130#include <string.h>
131#if 1
132#define KASSERT assert
133#else
134#define KASSERT(a)	/* nothing */
135#endif
136#endif /* defined(_KERNEL) || defined(_STANDALONE) */
137
138#include <sys/radixtree.h>
139
140#define	RADIX_TREE_BITS_PER_HEIGHT	4	/* XXX tune */
141#define	RADIX_TREE_PTR_PER_NODE		(1 << RADIX_TREE_BITS_PER_HEIGHT)
142#define	RADIX_TREE_MAX_HEIGHT		(64 / RADIX_TREE_BITS_PER_HEIGHT)
143#define	RADIX_TREE_INVALID_HEIGHT	(RADIX_TREE_MAX_HEIGHT + 1)
144__CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
145
146__CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
147#define	RADIX_TREE_TAG_MASK	((1 << RADIX_TREE_TAG_ID_MAX) - 1)
148
149static inline void *
150entry_ptr(void *p)
151{
152
153	return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
154}
155
156static inline unsigned int
157entry_tagmask(void *p)
158{
159
160	return (uintptr_t)p & RADIX_TREE_TAG_MASK;
161}
162
163static inline void *
164entry_compose(void *p, unsigned int tagmask)
165{
166
167	return (void *)((uintptr_t)p | tagmask);
168}
169
170static inline bool
171entry_match_p(void *p, unsigned int tagmask)
172{
173
174	KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
175	if (p == NULL) {
176		return false;
177	}
178	if (tagmask == 0) {
179		return true;
180	}
181	return (entry_tagmask(p) & tagmask) != 0;
182}
183
184/*
185 * radix_tree_node: an intermediate node
186 *
187 * we don't care the type of leaf nodes.  they are just void *.
188 *
189 * we used to maintain a count of non-NULL nodes in this structure, but it
190 * prevented it from being aligned to a cache line boundary; the performance
191 * benefit from being cache friendly is greater than the benefit of having
192 * a dedicated count value, especially in multi-processor situations where
193 * we need to avoid intra-pool-page false sharing.
194 */
195
196struct radix_tree_node {
197	void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198};
199
200/*
201 * p_refs[0].pptr == &t->t_root
202 *	:
203 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
204 *	:
205 *	:
206 * p_refs[t->t_height].pptr == &leaf_pointer
207 */
208
209struct radix_tree_path {
210	struct radix_tree_node_ref {
211		void **pptr;
212	} p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
213	/*
214	 * p_lastidx is either the index of the last valid element of p_refs[]
215	 * or RADIX_TREE_INVALID_HEIGHT.
216	 * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
217	 * that the height of the tree is not enough to cover the given index.
218	 */
219	unsigned int p_lastidx;
220};
221
222static inline void **
223path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
224    unsigned int height)
225{
226
227	KASSERT(height <= t->t_height);
228	return p->p_refs[height].pptr;
229}
230
231static inline struct radix_tree_node *
232path_node(const struct radix_tree * t, const struct radix_tree_path *p,
233    unsigned int height)
234{
235
236	KASSERT(height <= t->t_height);
237	return entry_ptr(*path_pptr(t, p, height));
238}
239
240/*
241 * radix_tree_init_tree:
242 *
243 * Initialize a tree.
244 */
245
246void
247radix_tree_init_tree(struct radix_tree *t)
248{
249
250	t->t_height = 0;
251	t->t_root = NULL;
252}
253
254/*
255 * radix_tree_fini_tree:
256 *
257 * Finish using a tree.
258 */
259
260void
261radix_tree_fini_tree(struct radix_tree *t)
262{
263
264	KASSERT(t->t_root == NULL);
265	KASSERT(t->t_height == 0);
266}
267
268/*
269 * radix_tree_empty_tree_p:
270 *
271 * Return if the tree is empty.
272 */
273
274bool
275radix_tree_empty_tree_p(struct radix_tree *t)
276{
277
278	return t->t_root == NULL;
279}
280
281/*
282 * radix_tree_empty_tree_p:
283 *
284 * Return true if the tree has any nodes with the given tag.  Otherwise
285 * return false.
286 *
287 * It's illegal to call this function with tagmask 0.
288 */
289
290bool
291radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
292{
293
294	KASSERT(tagmask != 0);
295	return (entry_tagmask(t->t_root) & tagmask) == 0;
296}
297
298static void
299radix_tree_node_init(struct radix_tree_node *n)
300{
301
302	memset(n, 0, sizeof(*n));
303}
304
305#if defined(_KERNEL)
306/*
307 * radix_tree_init:
308 *
309 * initialize the subsystem.
310 */
311
312void
313radix_tree_init(void)
314{
315
316	/* nothing right now */
317}
318
319/*
320 * radix_tree_await_memory:
321 *
322 * after an insert has failed with ENOMEM, wait for memory to become
323 * available, so the caller can retry.  this needs to ensure that the
324 * maximum possible required number of nodes is available.
325 */
326
327void
328radix_tree_await_memory(void)
329{
330	struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
331	int i;
332
333	for (i = 0; i < __arraycount(nodes); i++) {
334		nodes[i] = kmem_intr_alloc(sizeof(struct radix_tree_node),
335		    KM_SLEEP);
336	}
337	while (--i >= 0) {
338		kmem_intr_free(nodes[i], sizeof(struct radix_tree_node));
339	}
340}
341
342#endif /* defined(_KERNEL) */
343
344/*
345 * radix_tree_sum_node:
346 *
347 * return the logical sum of all entries in the given node.  used to quickly
348 * check for tag masks or empty nodes.
349 */
350
351static uintptr_t
352radix_tree_sum_node(const struct radix_tree_node *n)
353{
354#if RADIX_TREE_PTR_PER_NODE > 16
355	unsigned int i;
356	uintptr_t sum;
357
358	for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
359		sum |= (uintptr_t)n->n_ptrs[i];
360	}
361	return sum;
362#else /* RADIX_TREE_PTR_PER_NODE > 16 */
363	uintptr_t sum;
364
365	/*
366	 * Unrolling the above is much better than a tight loop with two
367	 * test+branch pairs.  On x86 with gcc 5.5.0 this compiles into 19
368	 * deterministic instructions including the "return" and prologue &
369	 * epilogue.
370	 */
371	sum = (uintptr_t)n->n_ptrs[0];
372	sum |= (uintptr_t)n->n_ptrs[1];
373	sum |= (uintptr_t)n->n_ptrs[2];
374	sum |= (uintptr_t)n->n_ptrs[3];
375#if RADIX_TREE_PTR_PER_NODE > 4
376	sum |= (uintptr_t)n->n_ptrs[4];
377	sum |= (uintptr_t)n->n_ptrs[5];
378	sum |= (uintptr_t)n->n_ptrs[6];
379	sum |= (uintptr_t)n->n_ptrs[7];
380#endif
381#if RADIX_TREE_PTR_PER_NODE > 8
382	sum |= (uintptr_t)n->n_ptrs[8];
383	sum |= (uintptr_t)n->n_ptrs[9];
384	sum |= (uintptr_t)n->n_ptrs[10];
385	sum |= (uintptr_t)n->n_ptrs[11];
386	sum |= (uintptr_t)n->n_ptrs[12];
387	sum |= (uintptr_t)n->n_ptrs[13];
388	sum |= (uintptr_t)n->n_ptrs[14];
389	sum |= (uintptr_t)n->n_ptrs[15];
390#endif
391	return sum;
392#endif /* RADIX_TREE_PTR_PER_NODE > 16 */
393}
394
395static int __unused
396radix_tree_node_count_ptrs(const struct radix_tree_node *n)
397{
398	unsigned int i, c;
399
400	for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
401		c += (n->n_ptrs[i] != NULL);
402	}
403	return c;
404}
405
406static struct radix_tree_node *
407radix_tree_alloc_node(void)
408{
409	struct radix_tree_node *n;
410
411#if defined(_KERNEL)
412	/*
413	 * We must not block waiting for memory because this function
414	 * can be called in contexts where waiting for memory is illegal.
415	 */
416	n = kmem_intr_alloc(sizeof(struct radix_tree_node), KM_NOSLEEP);
417#elif defined(_STANDALONE)
418	n = alloc(sizeof(*n));
419#else /* defined(_STANDALONE) */
420	n = malloc(sizeof(*n));
421#endif /* defined(_STANDALONE) */
422	if (n != NULL) {
423		radix_tree_node_init(n);
424	}
425	KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
426	return n;
427}
428
429static void
430radix_tree_free_node(struct radix_tree_node *n)
431{
432
433	KASSERT(radix_tree_sum_node(n) == 0);
434#if defined(_KERNEL)
435	kmem_intr_free(n, sizeof(struct radix_tree_node));
436#elif defined(_STANDALONE)
437	dealloc(n, sizeof(*n));
438#else
439	free(n);
440#endif
441}
442
443/*
444 * radix_tree_grow:
445 *
446 * increase the height of the tree.
447 */
448
449static __noinline int
450radix_tree_grow(struct radix_tree *t, unsigned int newheight)
451{
452	const unsigned int tagmask = entry_tagmask(t->t_root);
453	struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
454	void *root;
455	int h;
456
457	KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
458	if ((root = t->t_root) == NULL) {
459		t->t_height = newheight;
460		return 0;
461	}
462	for (h = t->t_height; h < newheight; h++) {
463		newnodes[h] = radix_tree_alloc_node();
464		if (__predict_false(newnodes[h] == NULL)) {
465			while (--h >= (int)t->t_height) {
466				newnodes[h]->n_ptrs[0] = NULL;
467				radix_tree_free_node(newnodes[h]);
468			}
469			return ENOMEM;
470		}
471		newnodes[h]->n_ptrs[0] = root;
472		root = entry_compose(newnodes[h], tagmask);
473	}
474	t->t_root = root;
475	t->t_height = h;
476	return 0;
477}
478
479/*
480 * radix_tree_lookup_ptr:
481 *
482 * an internal helper function used for various exported functions.
483 *
484 * return the pointer to store the node for the given index.
485 *
486 * if alloc is true, try to allocate the storage.  (note for _KERNEL:
487 * in that case, this function can block.)  if the allocation failed or
488 * alloc is false, return NULL.
489 *
490 * if path is not NULL, fill it for the caller's investigation.
491 *
492 * if tagmask is not zero, search only for nodes with the tag set.
493 * note that, however, this function doesn't check the tagmask for the leaf
494 * pointer.  it's a caller's responsibility to investigate the value which
495 * is pointed by the returned pointer if necessary.
496 *
497 * while this function is a bit large, as it's called with some constant
498 * arguments, inlining might have benefits.  anyway, a compiler will decide.
499 */
500
501static inline void **
502radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
503    struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
504{
505	struct radix_tree_node *n;
506	int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
507	int shift;
508	void **vpp;
509	const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
510	struct radix_tree_node_ref *refs = NULL;
511
512	/*
513	 * check unsupported combinations
514	 */
515	KASSERT(tagmask == 0 || !alloc);
516	KASSERT(path == NULL || !alloc);
517	vpp = &t->t_root;
518	if (path != NULL) {
519		refs = path->p_refs;
520		refs->pptr = vpp;
521	}
522	n = NULL;
523	for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
524		struct radix_tree_node *c;
525		void *entry;
526		const uint64_t i = (idx >> shift) & mask;
527
528		if (shift >= hshift) {
529			unsigned int newheight;
530
531			KASSERT(vpp == &t->t_root);
532			if (i == 0) {
533				shift -= RADIX_TREE_BITS_PER_HEIGHT;
534				continue;
535			}
536			if (!alloc) {
537				if (path != NULL) {
538					KASSERT((refs - path->p_refs) == 0);
539					path->p_lastidx =
540					    RADIX_TREE_INVALID_HEIGHT;
541				}
542				return NULL;
543			}
544			newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
545			if (radix_tree_grow(t, newheight)) {
546				return NULL;
547			}
548			hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
549		}
550		entry = *vpp;
551		c = entry_ptr(entry);
552		if (c == NULL ||
553		    (tagmask != 0 &&
554		    (entry_tagmask(entry) & tagmask) == 0)) {
555			if (!alloc) {
556				if (path != NULL) {
557					path->p_lastidx = refs - path->p_refs;
558				}
559				return NULL;
560			}
561			c = radix_tree_alloc_node();
562			if (c == NULL) {
563				return NULL;
564			}
565			*vpp = c;
566		}
567		n = c;
568		vpp = &n->n_ptrs[i];
569		if (path != NULL) {
570			refs++;
571			refs->pptr = vpp;
572		}
573		shift -= RADIX_TREE_BITS_PER_HEIGHT;
574	}
575	if (alloc) {
576		KASSERT(*vpp == NULL);
577	}
578	if (path != NULL) {
579		path->p_lastidx = refs - path->p_refs;
580	}
581	return vpp;
582}
583
584/*
585 * radix_tree_undo_insert_node:
586 *
587 * Undo the effects of a failed insert.  The conditions that led to the
588 * insert may change and it may not be retried.  If the insert is not
589 * retried, there will be no corresponding radix_tree_remove_node() for
590 * this index in the future.  Therefore any adjustments made to the tree
591 * before memory was exhausted must be reverted.
592 */
593
594static __noinline void
595radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
596{
597	struct radix_tree_path path;
598	int i;
599
600	(void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
601	if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
602		/*
603		 * no nodes were inserted.
604		 */
605		return;
606	}
607	for (i = path.p_lastidx - 1; i >= 0; i--) {
608		struct radix_tree_node ** const pptr =
609		    (struct radix_tree_node **)path_pptr(t, &path, i);
610		struct radix_tree_node *n;
611
612		KASSERT(pptr != NULL);
613		n = entry_ptr(*pptr);
614		KASSERT(n != NULL);
615		if (radix_tree_sum_node(n) != 0) {
616			break;
617		}
618		radix_tree_free_node(n);
619		*pptr = NULL;
620	}
621	/*
622	 * fix up height
623	 */
624	if (i < 0) {
625		KASSERT(t->t_root == NULL);
626		t->t_height = 0;
627	}
628}
629
630/*
631 * radix_tree_insert_node:
632 *
633 * Insert the node at the given index.
634 *
635 * It's illegal to insert NULL.  It's illegal to insert a non-aligned pointer.
636 *
637 * This function returns ENOMEM if necessary memory allocation failed.
638 * Otherwise, this function returns 0.
639 *
640 * Note that inserting a node can involves memory allocation for intermediate
641 * nodes.  If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
642 *
643 * For the newly inserted node, all tags are cleared.
644 */
645
646int
647radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
648{
649	void **vpp;
650
651	KASSERT(p != NULL);
652	KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
653	vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
654	if (__predict_false(vpp == NULL)) {
655		radix_tree_undo_insert_node(t, idx);
656		return ENOMEM;
657	}
658	KASSERT(*vpp == NULL);
659	*vpp = p;
660	return 0;
661}
662
663/*
664 * radix_tree_replace_node:
665 *
666 * Replace a node at the given index with the given node and return the
667 * replaced one.
668 *
669 * It's illegal to try to replace a node which has not been inserted.
670 *
671 * This function keeps tags intact.
672 */
673
674void *
675radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
676{
677	void **vpp;
678	void *oldp;
679
680	KASSERT(p != NULL);
681	KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
682	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
683	KASSERT(vpp != NULL);
684	oldp = *vpp;
685	KASSERT(oldp != NULL);
686	*vpp = entry_compose(p, entry_tagmask(*vpp));
687	return entry_ptr(oldp);
688}
689
690/*
691 * radix_tree_remove_node:
692 *
693 * Remove the node at the given index.
694 *
695 * It's illegal to try to remove a node which has not been inserted.
696 */
697
698void *
699radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
700{
701	struct radix_tree_path path;
702	void **vpp;
703	void *oldp;
704	int i;
705
706	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
707	KASSERT(vpp != NULL);
708	oldp = *vpp;
709	KASSERT(oldp != NULL);
710	KASSERT(path.p_lastidx == t->t_height);
711	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
712	*vpp = NULL;
713	for (i = t->t_height - 1; i >= 0; i--) {
714		void *entry;
715		struct radix_tree_node ** const pptr =
716		    (struct radix_tree_node **)path_pptr(t, &path, i);
717		struct radix_tree_node *n;
718
719		KASSERT(pptr != NULL);
720		entry = *pptr;
721		n = entry_ptr(entry);
722		KASSERT(n != NULL);
723		if (radix_tree_sum_node(n) != 0) {
724			break;
725		}
726		radix_tree_free_node(n);
727		*pptr = NULL;
728	}
729	/*
730	 * fix up height
731	 */
732	if (i < 0) {
733		KASSERT(t->t_root == NULL);
734		t->t_height = 0;
735	}
736	/*
737	 * update tags
738	 */
739	for (; i >= 0; i--) {
740		void *entry;
741		struct radix_tree_node ** const pptr =
742		    (struct radix_tree_node **)path_pptr(t, &path, i);
743		struct radix_tree_node *n;
744		unsigned int newmask;
745
746		KASSERT(pptr != NULL);
747		entry = *pptr;
748		n = entry_ptr(entry);
749		KASSERT(n != NULL);
750		KASSERT(radix_tree_sum_node(n) != 0);
751		newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
752		if (newmask == entry_tagmask(entry)) {
753			break;
754		}
755		*pptr = entry_compose(n, newmask);
756	}
757	/*
758	 * XXX is it worth to try to reduce height?
759	 * if we do that, make radix_tree_grow rollback its change as well.
760	 */
761	return entry_ptr(oldp);
762}
763
764/*
765 * radix_tree_lookup_node:
766 *
767 * Returns the node at the given index.
768 * Returns NULL if nothing is found at the given index.
769 */
770
771void *
772radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
773{
774	void **vpp;
775
776	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
777	if (vpp == NULL) {
778		return NULL;
779	}
780	return entry_ptr(*vpp);
781}
782
783static inline void
784gang_lookup_init(struct radix_tree *t, uint64_t idx,
785    struct radix_tree_path *path, const unsigned int tagmask)
786{
787	void **vpp __unused;
788
789	vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
790	KASSERT(vpp == NULL ||
791	    vpp == path_pptr(t, path, path->p_lastidx));
792	KASSERT(&t->t_root == path_pptr(t, path, 0));
793	KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
794	   path->p_lastidx == t->t_height ||
795	   !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
796}
797
798/*
799 * gang_lookup_scan:
800 *
801 * a helper routine for radix_tree_gang_lookup_node and its variants.
802 */
803
804static inline unsigned int
805__attribute__((__always_inline__))
806gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
807    void **results, const unsigned int maxresults, const unsigned int tagmask,
808    const bool reverse, const bool dense)
809{
810
811	/*
812	 * we keep the path updated only for lastidx-1.
813	 * vpp is what path_pptr(t, path, lastidx) would be.
814	 */
815	void **vpp;
816	unsigned int nfound;
817	unsigned int lastidx;
818	/*
819	 * set up scan direction dependant constants so that we can iterate
820	 * n_ptrs as the following.
821	 *
822	 *	for (i = first; i != guard; i += step)
823	 *		visit n->n_ptrs[i];
824	 */
825	const int step = reverse ? -1 : 1;
826	const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
827	const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
828	const unsigned int guard = last + step;
829
830	KASSERT(maxresults > 0);
831	KASSERT(&t->t_root == path_pptr(t, path, 0));
832	lastidx = path->p_lastidx;
833	KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
834	   lastidx == t->t_height ||
835	   !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
836	nfound = 0;
837	if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
838		/*
839		 * requested idx is beyond the right-most node.
840		 */
841		if (reverse && !dense) {
842			lastidx = 0;
843			vpp = path_pptr(t, path, lastidx);
844			goto descend;
845		}
846		return 0;
847	}
848	vpp = path_pptr(t, path, lastidx);
849	while (/*CONSTCOND*/true) {
850		struct radix_tree_node *n;
851		unsigned int i;
852
853		if (entry_match_p(*vpp, tagmask)) {
854			KASSERT(lastidx == t->t_height);
855			/*
856			 * record the matching non-NULL leaf.
857			 */
858			results[nfound] = entry_ptr(*vpp);
859			nfound++;
860			if (nfound == maxresults) {
861				return nfound;
862			}
863		} else if (dense) {
864			return nfound;
865		}
866scan_siblings:
867		/*
868		 * try to find the next matching non-NULL sibling.
869		 */
870		if (lastidx == 0) {
871			/*
872			 * the root has no siblings.
873			 * we've done.
874			 */
875			KASSERT(vpp == &t->t_root);
876			break;
877		}
878		n = path_node(t, path, lastidx - 1);
879		for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
880			KASSERT(i < RADIX_TREE_PTR_PER_NODE);
881			if (entry_match_p(n->n_ptrs[i], tagmask)) {
882				vpp = &n->n_ptrs[i];
883				break;
884			} else if (dense) {
885				return nfound;
886			}
887		}
888		if (i == guard) {
889			/*
890			 * not found.  go to parent.
891			 */
892			lastidx--;
893			vpp = path_pptr(t, path, lastidx);
894			goto scan_siblings;
895		}
896descend:
897		/*
898		 * following the left-most (or right-most in the case of
899		 * reverse scan) child node, descend until reaching the leaf or
900		 * a non-matching entry.
901		 */
902		while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
903			/*
904			 * save vpp in the path so that we can come back to this
905			 * node after finishing visiting children.
906			 */
907			path->p_refs[lastidx].pptr = vpp;
908			n = entry_ptr(*vpp);
909			vpp = &n->n_ptrs[first];
910			lastidx++;
911		}
912	}
913	return nfound;
914}
915
916/*
917 * radix_tree_gang_lookup_node:
918 *
919 * Scan the tree starting from the given index in the ascending order and
920 * return found nodes.
921 *
922 * results should be an array large enough to hold maxresults pointers.
923 * This function returns the number of nodes found, up to maxresults.
924 * Returning less than maxresults means there are no more nodes in the tree.
925 *
926 * If dense == true, this function stops scanning when it founds a hole of
927 * indexes.  I.e. an index for which radix_tree_lookup_node would returns NULL.
928 * If dense == false, this function skips holes and continue scanning until
929 * maxresults nodes are found or it reaches the limit of the index range.
930 *
931 * The result of this function is semantically equivalent to what could be
932 * obtained by repeated calls of radix_tree_lookup_node with increasing index.
933 * but this function is expected to be computationally cheaper when looking up
934 * multiple nodes at once.  Especially, it's expected to be much cheaper when
935 * node indexes are distributed sparsely.
936 *
937 * Note that this function doesn't return index values of found nodes.
938 * Thus, in the case of dense == false, if index values are important for
939 * a caller, it's the caller's responsibility to check them, typically
940 * by examining the returned nodes using some caller-specific knowledge
941 * about them.
942 * In the case of dense == true, a node returned via results[N] is always for
943 * the index (idx + N).
944 */
945
946unsigned int
947radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
948    void **results, unsigned int maxresults, bool dense)
949{
950	struct radix_tree_path path;
951
952	gang_lookup_init(t, idx, &path, 0);
953	return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
954}
955
956/*
957 * radix_tree_gang_lookup_node_reverse:
958 *
959 * Same as radix_tree_gang_lookup_node except that this one scans the
960 * tree in the reverse order.  I.e. descending index values.
961 */
962
963unsigned int
964radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
965    void **results, unsigned int maxresults, bool dense)
966{
967	struct radix_tree_path path;
968
969	gang_lookup_init(t, idx, &path, 0);
970	return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
971}
972
973/*
974 * radix_tree_gang_lookup_tagged_node:
975 *
976 * Same as radix_tree_gang_lookup_node except that this one only returns
977 * nodes tagged with tagid.
978 *
979 * It's illegal to call this function with tagmask 0.
980 */
981
982unsigned int
983radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
984    void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
985{
986	struct radix_tree_path path;
987
988	KASSERT(tagmask != 0);
989	gang_lookup_init(t, idx, &path, tagmask);
990	return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
991	    dense);
992}
993
994/*
995 * radix_tree_gang_lookup_tagged_node_reverse:
996 *
997 * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
998 * tree in the reverse order.  I.e. descending index values.
999 */
1000
1001unsigned int
1002radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
1003    void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1004{
1005	struct radix_tree_path path;
1006
1007	KASSERT(tagmask != 0);
1008	gang_lookup_init(t, idx, &path, tagmask);
1009	return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
1010	    dense);
1011}
1012
1013/*
1014 * radix_tree_get_tag:
1015 *
1016 * Return the tagmask for the node at the given index.
1017 *
1018 * It's illegal to call this function for a node which has not been inserted.
1019 */
1020
1021unsigned int
1022radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1023{
1024	/*
1025	 * the following two implementations should behave same.
1026	 * the former one was chosen because it seems faster.
1027	 */
1028#if 1
1029	void **vpp;
1030
1031	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
1032	if (vpp == NULL) {
1033		return false;
1034	}
1035	KASSERT(*vpp != NULL);
1036	return (entry_tagmask(*vpp) & tagmask);
1037#else
1038	void **vpp;
1039
1040	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
1041	KASSERT(vpp != NULL);
1042	return (entry_tagmask(*vpp) & tagmask);
1043#endif
1044}
1045
1046/*
1047 * radix_tree_set_tag:
1048 *
1049 * Set the tag for the node at the given index.
1050 *
1051 * It's illegal to call this function for a node which has not been inserted.
1052 * It's illegal to call this function with tagmask 0.
1053 */
1054
1055void
1056radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1057{
1058	struct radix_tree_path path;
1059	void **vpp __unused;
1060	int i;
1061
1062	KASSERT(tagmask != 0);
1063	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1064	KASSERT(vpp != NULL);
1065	KASSERT(*vpp != NULL);
1066	KASSERT(path.p_lastidx == t->t_height);
1067	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1068	for (i = t->t_height; i >= 0; i--) {
1069		void ** const pptr = (void **)path_pptr(t, &path, i);
1070		void *entry;
1071
1072		KASSERT(pptr != NULL);
1073		entry = *pptr;
1074		if ((entry_tagmask(entry) & tagmask) != 0) {
1075			break;
1076		}
1077		*pptr = (void *)((uintptr_t)entry | tagmask);
1078	}
1079}
1080
1081/*
1082 * radix_tree_clear_tag:
1083 *
1084 * Clear the tag for the node at the given index.
1085 *
1086 * It's illegal to call this function for a node which has not been inserted.
1087 * It's illegal to call this function with tagmask 0.
1088 */
1089
1090void
1091radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1092{
1093	struct radix_tree_path path;
1094	void **vpp;
1095	int i;
1096
1097	KASSERT(tagmask != 0);
1098	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1099	KASSERT(vpp != NULL);
1100	KASSERT(*vpp != NULL);
1101	KASSERT(path.p_lastidx == t->t_height);
1102	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1103	/*
1104	 * if already cleared, nothing to do
1105	 */
1106	if ((entry_tagmask(*vpp) & tagmask) == 0) {
1107		return;
1108	}
1109	/*
1110	 * clear the tag only if no children have the tag.
1111	 */
1112	for (i = t->t_height; i >= 0; i--) {
1113		void ** const pptr = (void **)path_pptr(t, &path, i);
1114		void *entry;
1115
1116		KASSERT(pptr != NULL);
1117		entry = *pptr;
1118		KASSERT((entry_tagmask(entry) & tagmask) != 0);
1119		*pptr = entry_compose(entry_ptr(entry),
1120		    entry_tagmask(entry) & ~tagmask);
1121		/*
1122		 * check if we should proceed to process the next level.
1123		 */
1124		if (0 < i) {
1125			struct radix_tree_node *n = path_node(t, &path, i - 1);
1126
1127			if ((radix_tree_sum_node(n) & tagmask) != 0) {
1128				break;
1129			}
1130		}
1131	}
1132}
1133
1134#if defined(UNITTEST)
1135
1136#include <inttypes.h>
1137#include <stdio.h>
1138
1139static void
1140radix_tree_dump_node(const struct radix_tree *t, void *vp,
1141    uint64_t offset, unsigned int height)
1142{
1143	struct radix_tree_node *n;
1144	unsigned int i;
1145
1146	for (i = 0; i < t->t_height - height; i++) {
1147		printf(" ");
1148	}
1149	if (entry_tagmask(vp) == 0) {
1150		printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1151	} else {
1152		printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1153		    entry_tagmask(vp));
1154	}
1155	if (height == 0) {
1156		printf(" (leaf)\n");
1157		return;
1158	}
1159	n = entry_ptr(vp);
1160	assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
1161	    entry_tagmask(vp));
1162	printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1163	for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1164		void *c;
1165
1166		c = n->n_ptrs[i];
1167		if (c == NULL) {
1168			continue;
1169		}
1170		radix_tree_dump_node(t, c,
1171		    offset + i * (UINT64_C(1) <<
1172		    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1173	}
1174}
1175
1176void radix_tree_dump(const struct radix_tree *);
1177
1178void
1179radix_tree_dump(const struct radix_tree *t)
1180{
1181
1182	printf("tree %p height=%u\n", t, t->t_height);
1183	radix_tree_dump_node(t, t->t_root, 0, t->t_height);
1184}
1185
1186static void
1187test1(void)
1188{
1189	struct radix_tree s;
1190	struct radix_tree *t = &s;
1191	void *results[3];
1192
1193	radix_tree_init_tree(t);
1194	radix_tree_dump(t);
1195	assert(radix_tree_lookup_node(t, 0) == NULL);
1196	assert(radix_tree_lookup_node(t, 1000) == NULL);
1197	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
1198	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1199	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1200	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1201	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1202	    0);
1203	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1204	    0);
1205	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1206	    == 0);
1207	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1208	    == 0);
1209	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1210	    == 0);
1211	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1212	    == 0);
1213	assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
1214	    == 0);
1215	assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
1216	    == 0);
1217	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1218	    false, 1) == 0);
1219	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1220	    true, 1) == 0);
1221	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1222	    false, 1) == 0);
1223	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1224	    true, 1) == 0);
1225	assert(radix_tree_empty_tree_p(t));
1226	assert(radix_tree_empty_tagged_tree_p(t, 1));
1227	assert(radix_tree_empty_tagged_tree_p(t, 2));
1228	assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
1229	assert(!radix_tree_empty_tree_p(t));
1230	assert(radix_tree_empty_tagged_tree_p(t, 1));
1231	assert(radix_tree_empty_tagged_tree_p(t, 2));
1232	assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
1233	assert(radix_tree_lookup_node(t, 1000) == NULL);
1234	memset(results, 0, sizeof(results));
1235	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1236	assert(results[0] == (void *)0xdeadbea0);
1237	memset(results, 0, sizeof(results));
1238	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1239	assert(results[0] == (void *)0xdeadbea0);
1240	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1241	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1242	memset(results, 0, sizeof(results));
1243	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1244	    1);
1245	assert(results[0] == (void *)0xdeadbea0);
1246	memset(results, 0, sizeof(results));
1247	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1248	    1);
1249	assert(results[0] == (void *)0xdeadbea0);
1250	memset(results, 0, sizeof(results));
1251	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1252	    == 1);
1253	assert(results[0] == (void *)0xdeadbea0);
1254	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1255	    == 0);
1256	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1257	    == 0);
1258	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1259	    == 0);
1260	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1261	    false, 1) == 0);
1262	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1263	    true, 1) == 0);
1264	assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
1265	assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
1266	assert(!radix_tree_empty_tree_p(t));
1267	radix_tree_dump(t);
1268	assert(radix_tree_lookup_node(t, 0) == NULL);
1269	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1270	memset(results, 0, sizeof(results));
1271	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1272	assert(results[0] == (void *)0xdeadbea0);
1273	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1274	memset(results, 0, sizeof(results));
1275	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
1276	assert(results[0] == (void *)0xdeadbea0);
1277	memset(results, 0, sizeof(results));
1278	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
1279	assert(results[0] == (void *)0xdeadbea0);
1280	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
1281	    == 0);
1282	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
1283	    == 0);
1284	memset(results, 0, sizeof(results));
1285	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1286	    == 1);
1287	memset(results, 0, sizeof(results));
1288	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1289	    == 1);
1290	assert(results[0] == (void *)0xdeadbea0);
1291	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1292	    == 0);
1293	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1294	    == 0);
1295	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1296	    false, 1) == 0);
1297	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1298	    true, 1) == 0);
1299	assert(!radix_tree_get_tag(t, 1000, 1));
1300	assert(!radix_tree_get_tag(t, 1000, 2));
1301	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
1302	assert(radix_tree_empty_tagged_tree_p(t, 1));
1303	assert(radix_tree_empty_tagged_tree_p(t, 2));
1304	radix_tree_set_tag(t, 1000, 2);
1305	assert(!radix_tree_get_tag(t, 1000, 1));
1306	assert(radix_tree_get_tag(t, 1000, 2));
1307	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1308	assert(radix_tree_empty_tagged_tree_p(t, 1));
1309	assert(!radix_tree_empty_tagged_tree_p(t, 2));
1310	radix_tree_dump(t);
1311	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1312	assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
1313	radix_tree_dump(t);
1314	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1315	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1316	assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
1317	    == 0);
1318	radix_tree_dump(t);
1319	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1320	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1321	assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
1322	    (void *)0xdea0);
1323	radix_tree_dump(t);
1324	assert(!radix_tree_get_tag(t, 0, 2));
1325	assert(radix_tree_get_tag(t, 1000, 2));
1326	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
1327	radix_tree_set_tag(t, 0, 2);
1328	radix_tree_set_tag(t, UINT64_C(10000000000), 2);
1329	radix_tree_dump(t);
1330	assert(radix_tree_get_tag(t, 0, 2));
1331	assert(radix_tree_get_tag(t, 1000, 2));
1332	assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1333	radix_tree_clear_tag(t, 0, 2);
1334	radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
1335	radix_tree_dump(t);
1336	assert(!radix_tree_get_tag(t, 0, 2));
1337	assert(radix_tree_get_tag(t, 1000, 2));
1338	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1339	radix_tree_dump(t);
1340	assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
1341	    (void *)0xdeadbea0);
1342	assert(!radix_tree_get_tag(t, 1000, 1));
1343	assert(radix_tree_get_tag(t, 1000, 2));
1344	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1345	memset(results, 0, sizeof(results));
1346	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
1347	assert(results[0] == (void *)0xbea0);
1348	assert(results[1] == (void *)0x12345678);
1349	assert(results[2] == (void *)0xdea0);
1350	memset(results, 0, sizeof(results));
1351	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1352	assert(results[0] == (void *)0xbea0);
1353	memset(results, 0, sizeof(results));
1354	assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
1355	assert(results[0] == (void *)0x12345678);
1356	assert(results[1] == (void *)0xdea0);
1357	assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
1358	memset(results, 0, sizeof(results));
1359	assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
1360	assert(results[0] == (void *)0xdea0);
1361	assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
1362	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1363	    false) == 0);
1364	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1365	    true) == 0);
1366	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1367	    3, false) == 0);
1368	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1369	    3, true) == 0);
1370	memset(results, 0, sizeof(results));
1371	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
1372	    == 1);
1373	assert(results[0] == (void *)0x12345678);
1374	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
1375	    == 0);
1376	assert(entry_tagmask(t->t_root) != 0);
1377	assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
1378	assert(entry_tagmask(t->t_root) == 0);
1379	radix_tree_dump(t);
1380	assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
1381	    == 0);
1382	memset(results, 0, sizeof(results));
1383	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1384	    false) == 2);
1385	assert(results[0] == (void *)0xdea0);
1386	assert(results[1] == (void *)0xfff0);
1387	memset(results, 0, sizeof(results));
1388	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1389	    true) == 2);
1390	assert(results[0] == (void *)0xdea0);
1391	assert(results[1] == (void *)0xfff0);
1392	memset(results, 0, sizeof(results));
1393	assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1394	    results, 3, false) == 3);
1395	assert(results[0] == (void *)0xfff0);
1396	assert(results[1] == (void *)0xdea0);
1397	assert(results[2] == (void *)0xbea0);
1398	memset(results, 0, sizeof(results));
1399	assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1400	    results, 3, true) == 2);
1401	assert(results[0] == (void *)0xfff0);
1402	assert(results[1] == (void *)0xdea0);
1403	assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
1404	    (void *)0xdea0);
1405	assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
1406	    (void *)0xfff0);
1407	radix_tree_dump(t);
1408	assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
1409	radix_tree_dump(t);
1410	radix_tree_fini_tree(t);
1411}
1412
1413#include <sys/time.h>
1414
1415struct testnode {
1416	uint64_t idx;
1417	bool tagged[RADIX_TREE_TAG_ID_MAX];
1418};
1419
1420static void
1421printops(const char *title, const char *name, int tag, unsigned int n,
1422    const struct timeval *stv, const struct timeval *etv)
1423{
1424	uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
1425	uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
1426
1427	printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
1428	    (double)n / (e - s) * 1000000);
1429}
1430
1431#define	TEST2_GANG_LOOKUP_NODES	16
1432
1433static bool
1434test2_should_tag(unsigned int i, unsigned int tagid)
1435{
1436
1437	if (tagid == 0) {
1438		return (i % 4) == 0;	/* 25% */
1439	} else {
1440		return (i % 7) == 0;	/* 14% */
1441	}
1442	return 1;
1443}
1444
1445static void
1446check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
1447    unsigned int count)
1448{
1449	unsigned int tag;
1450
1451	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1452		if ((tagmask & (1 << tag)) == 0) {
1453			continue;
1454		}
1455		if (((tagmask - 1) & tagmask) == 0) {
1456			assert(count == ntagged[tag]);
1457		} else {
1458			assert(count >= ntagged[tag]);
1459		}
1460	}
1461}
1462
1463static void
1464test2(const char *title, bool dense)
1465{
1466	struct radix_tree s;
1467	struct radix_tree *t = &s;
1468	struct testnode *n;
1469	unsigned int i;
1470	unsigned int nnodes = 100000;
1471	unsigned int removed;
1472	unsigned int tag;
1473	unsigned int tagmask;
1474	unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1475	struct testnode *nodes;
1476	struct timeval stv;
1477	struct timeval etv;
1478
1479	nodes = malloc(nnodes * sizeof(*nodes));
1480	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1481		ntagged[tag] = 0;
1482	}
1483	radix_tree_init_tree(t);
1484	for (i = 0; i < nnodes; i++) {
1485		n = &nodes[i];
1486		n->idx = random();
1487		if (sizeof(long) == 4) {
1488			n->idx <<= 32;
1489			n->idx |= (uint32_t)random();
1490		}
1491		if (dense) {
1492			n->idx %= nnodes * 2;
1493		}
1494		while (radix_tree_lookup_node(t, n->idx) != NULL) {
1495			n->idx++;
1496		}
1497		radix_tree_insert_node(t, n->idx, n);
1498		for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1499			tagmask = 1 << tag;
1500
1501			n->tagged[tag] = test2_should_tag(i, tag);
1502			if (n->tagged[tag]) {
1503				radix_tree_set_tag(t, n->idx, tagmask);
1504				ntagged[tag]++;
1505			}
1506			assert((n->tagged[tag] ? tagmask : 0) ==
1507			    radix_tree_get_tag(t, n->idx, tagmask));
1508		}
1509	}
1510
1511	gettimeofday(&stv, NULL);
1512	for (i = 0; i < nnodes; i++) {
1513		n = &nodes[i];
1514		assert(radix_tree_lookup_node(t, n->idx) == n);
1515	}
1516	gettimeofday(&etv, NULL);
1517	printops(title, "lookup", 0, nnodes, &stv, &etv);
1518
1519	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1520		unsigned int count = 0;
1521
1522		gettimeofday(&stv, NULL);
1523		for (i = 0; i < nnodes; i++) {
1524			unsigned int tagged;
1525
1526			n = &nodes[i];
1527			tagged = radix_tree_get_tag(t, n->idx, tagmask);
1528			assert((tagged & ~tagmask) == 0);
1529			for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1530				assert((tagmask & (1 << tag)) == 0 ||
1531				    n->tagged[tag] == !!(tagged & (1 << tag)));
1532			}
1533			if (tagged) {
1534				count++;
1535			}
1536		}
1537		gettimeofday(&etv, NULL);
1538		check_tag_count(ntagged, tagmask, count);
1539		printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
1540	}
1541
1542	gettimeofday(&stv, NULL);
1543	for (i = 0; i < nnodes; i++) {
1544		n = &nodes[i];
1545		radix_tree_remove_node(t, n->idx);
1546	}
1547	gettimeofday(&etv, NULL);
1548	printops(title, "remove", 0, nnodes, &stv, &etv);
1549
1550	gettimeofday(&stv, NULL);
1551	for (i = 0; i < nnodes; i++) {
1552		n = &nodes[i];
1553		radix_tree_insert_node(t, n->idx, n);
1554	}
1555	gettimeofday(&etv, NULL);
1556	printops(title, "insert", 0, nnodes, &stv, &etv);
1557
1558	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1559		tagmask = 1 << tag;
1560
1561		ntagged[tag] = 0;
1562		gettimeofday(&stv, NULL);
1563		for (i = 0; i < nnodes; i++) {
1564			n = &nodes[i];
1565			if (n->tagged[tag]) {
1566				radix_tree_set_tag(t, n->idx, tagmask);
1567				ntagged[tag]++;
1568			}
1569		}
1570		gettimeofday(&etv, NULL);
1571		printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
1572	}
1573
1574	gettimeofday(&stv, NULL);
1575	{
1576		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1577		uint64_t nextidx;
1578		unsigned int nfound;
1579		unsigned int total;
1580
1581		nextidx = 0;
1582		total = 0;
1583		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1584		    (void *)results, __arraycount(results), false)) > 0) {
1585			nextidx = results[nfound - 1]->idx + 1;
1586			total += nfound;
1587			if (nextidx == 0) {
1588				break;
1589			}
1590		}
1591		assert(total == nnodes);
1592	}
1593	gettimeofday(&etv, NULL);
1594	printops(title, "ganglookup", 0, nnodes, &stv, &etv);
1595
1596	gettimeofday(&stv, NULL);
1597	{
1598		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1599		uint64_t nextidx;
1600		unsigned int nfound;
1601		unsigned int total;
1602
1603		nextidx = UINT64_MAX;
1604		total = 0;
1605		while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
1606		    (void *)results, __arraycount(results), false)) > 0) {
1607			nextidx = results[nfound - 1]->idx - 1;
1608			total += nfound;
1609			if (nextidx == UINT64_MAX) {
1610				break;
1611			}
1612		}
1613		assert(total == nnodes);
1614	}
1615	gettimeofday(&etv, NULL);
1616	printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);
1617
1618	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1619		unsigned int total = 0;
1620
1621		gettimeofday(&stv, NULL);
1622		{
1623			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1624			uint64_t nextidx;
1625			unsigned int nfound;
1626
1627			nextidx = 0;
1628			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1629			    nextidx, (void *)results, __arraycount(results),
1630			    false, tagmask)) > 0) {
1631				nextidx = results[nfound - 1]->idx + 1;
1632				total += nfound;
1633			}
1634		}
1635		gettimeofday(&etv, NULL);
1636		check_tag_count(ntagged, tagmask, total);
1637		assert(tagmask != 0 || total == 0);
1638		printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
1639	}
1640
1641	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1642		unsigned int total = 0;
1643
1644		gettimeofday(&stv, NULL);
1645		{
1646			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1647			uint64_t nextidx;
1648			unsigned int nfound;
1649
1650			nextidx = UINT64_MAX;
1651			while ((nfound =
1652			    radix_tree_gang_lookup_tagged_node_reverse(t,
1653			    nextidx, (void *)results, __arraycount(results),
1654			    false, tagmask)) > 0) {
1655				nextidx = results[nfound - 1]->idx - 1;
1656				total += nfound;
1657				if (nextidx == UINT64_MAX) {
1658					break;
1659				}
1660			}
1661		}
1662		gettimeofday(&etv, NULL);
1663		check_tag_count(ntagged, tagmask, total);
1664		assert(tagmask != 0 || total == 0);
1665		printops(title, "ganglookup_tag_reverse", tagmask, total,
1666		    &stv, &etv);
1667	}
1668
1669	removed = 0;
1670	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1671		unsigned int total;
1672
1673		total = 0;
1674		tagmask = 1 << tag;
1675		gettimeofday(&stv, NULL);
1676		{
1677			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1678			uint64_t nextidx;
1679			unsigned int nfound;
1680
1681			nextidx = 0;
1682			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1683			    nextidx, (void *)results, __arraycount(results),
1684			    false, tagmask)) > 0) {
1685				for (i = 0; i < nfound; i++) {
1686					radix_tree_remove_node(t,
1687					    results[i]->idx);
1688				}
1689				nextidx = results[nfound - 1]->idx + 1;
1690				total += nfound;
1691				if (nextidx == 0) {
1692					break;
1693				}
1694			}
1695		}
1696		gettimeofday(&etv, NULL);
1697		if (tag == 0) {
1698			check_tag_count(ntagged, tagmask, total);
1699		} else {
1700			assert(total <= ntagged[tag]);
1701		}
1702		printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
1703		    &etv);
1704		removed += total;
1705	}
1706
1707	gettimeofday(&stv, NULL);
1708	{
1709		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1710		uint64_t nextidx;
1711		unsigned int nfound;
1712		unsigned int total;
1713
1714		nextidx = 0;
1715		total = 0;
1716		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1717		    (void *)results, __arraycount(results), false)) > 0) {
1718			for (i = 0; i < nfound; i++) {
1719				assert(results[i] == radix_tree_remove_node(t,
1720				    results[i]->idx));
1721			}
1722			nextidx = results[nfound - 1]->idx + 1;
1723			total += nfound;
1724			if (nextidx == 0) {
1725				break;
1726			}
1727		}
1728		assert(total == nnodes - removed);
1729	}
1730	gettimeofday(&etv, NULL);
1731	printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);
1732
1733	assert(radix_tree_empty_tree_p(t));
1734	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1735		assert(radix_tree_empty_tagged_tree_p(t, tagmask));
1736	}
1737	radix_tree_fini_tree(t);
1738	free(nodes);
1739}
1740
1741int
1742main(int argc, char *argv[])
1743{
1744
1745	test1();
1746	test2("dense", true);
1747	test2("sparse", false);
1748	return 0;
1749}
1750
1751#endif /* defined(UNITTEST) */
1752