vm_radix.c revision 247742
1193323Sed/*
2193323Sed * Copyright (c) 2013 EMC Corp.
3193323Sed * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4193323Sed * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5193323Sed * All rights reserved.
6193323Sed *
7193323Sed * Redistribution and use in source and binary forms, with or without
8193323Sed * modification, are permitted provided that the following conditions
9193323Sed * are met:
10193323Sed * 1. Redistributions of source code must retain the above copyright
11193323Sed *    notice, this list of conditions and the following disclaimer.
12193323Sed * 2. Redistributions in binary form must reproduce the above copyright
13193323Sed *    notice, this list of conditions and the following disclaimer in the
14193323Sed *    documentation and/or other materials provided with the distribution.
15193323Sed *
16193323Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19193323Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26193323Sed * SUCH DAMAGE.
27193323Sed *
28193323Sed */
29193323Sed
30193323Sed/*
31193323Sed * Path-compressed radix trie implementation.
32193323Sed * The following code is not generalized into a general purpose library
33193323Sed * because there are way too many parameters embedded that should really
34193323Sed * be decided by the library consumers.  At the same time, consumers
35193323Sed * of this code must achieve highest possible performance.
36193323Sed *
37193323Sed * The implementation takes into account the following rationale:
38193323Sed * - Size of the nodes might be as small as possible.
39193323Sed * - There is no bias toward lookup operations over inserts or removes,
40193323Sed *   and vice-versa.
41193323Sed * - In average there are not many complete levels, than level
42193323Sed *   compression may just complicate things.
43193323Sed */
44193323Sed
45193323Sed#include <sys/cdefs.h>
46193323Sed
47193323Sed#include "opt_ddb.h"
48193323Sed
49193323Sed#include <sys/param.h>
50193323Sed#include <sys/systm.h>
51193323Sed#include <sys/kernel.h>
52193323Sed#include <sys/vmmeter.h>
53193323Sed
54193323Sed#include <vm/uma.h>
55193323Sed#include <vm/vm.h>
56193323Sed#include <vm/vm_param.h>
57193323Sed#include <vm/vm_page.h>
58193323Sed#include <vm/vm_radix.h>
59193323Sed
60193323Sed#ifdef DDB
61193323Sed#include <ddb/ddb.h>
62193323Sed#endif
63193323Sed
64/*
65 * Such sizes should permit to keep node children contained into a single
66 * cache-line, or to at least not span many of those.
67 * In particular, sparse tries should however be compressed properly and
68 * then make some extra-levels not a big deal.
69 */
70#ifdef __LP64__
71#define	VM_RADIX_WIDTH	4
72#else
73#define	VM_RADIX_WIDTH	3
74#endif
75
76#define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
77#define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
78#define	VM_RADIX_LIMIT							\
79	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80
81/* Flag bits stored in node pointers. */
82#define	VM_RADIX_ISLEAF	0x1
83#define	VM_RADIX_FLAGS	0x1
84#define	VM_RADIX_PAD	VM_RADIX_FLAGS
85
86/* Returns one unit associated with specified level. */
87#define	VM_RADIX_UNITLEVEL(lev)						\
88	((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
89
90struct vm_radix_node {
91	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
92	vm_pindex_t	 rn_owner;			/* Owner of record. */
93	uint16_t	 rn_count;			/* Valid children. */
94	uint16_t	 rn_clev;			/* Current level. */
95};
96
97static uma_zone_t vm_radix_node_zone;
98
99/*
100 * Allocate a radix node.  Pre-allocation ensures that the request will be
101 * always successfully satisfied.
102 */
103static __inline struct vm_radix_node *
104vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
105{
106	struct vm_radix_node *rnode;
107
108	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
109
110	/*
111	 * The required number of nodes might be already correctly
112	 * pre-allocated in vm_radix_init().  However, UMA can reserve
113	 * few nodes on per-cpu specific buckets, which will not be
114	 * accessible from the curcpu.  The allocation could then
115	 * return NULL when the pre-allocation pool is close to be
116	 * exhausted.  Anyway, in practice this should never be a
117	 * problem because a new node is not always required for
118	 * insert, thus the pre-allocation pool should already have
119	 * some extra-pages that indirectly deal with this situation.
120	 */
121	if (rnode == NULL)
122		panic("%s: uma_zalloc() returned NULL for a new node",
123		    __func__);
124	rnode->rn_owner = owner;
125	rnode->rn_count = count;
126	rnode->rn_clev = clevel;
127	return (rnode);
128}
129
130/*
131 * Free radix node.
132 */
133static __inline void
134vm_radix_node_put(struct vm_radix_node *rnode)
135{
136
137	uma_zfree(vm_radix_node_zone, rnode);
138}
139
140/*
141 * Return the position in the array for a given level.
142 */
143static __inline int
144vm_radix_slot(vm_pindex_t index, uint16_t level)
145{
146
147	return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
148	    VM_RADIX_MASK);
149}
150
151/* Trims the key after the specified level. */
152static __inline vm_pindex_t
153vm_radix_trimkey(vm_pindex_t index, uint16_t level)
154{
155	vm_pindex_t ret;
156
157	ret = index;
158	if (level < VM_RADIX_LIMIT) {
159		ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
160		ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
161	}
162	return (ret);
163}
164
165/*
166 * Get the root node for a radix tree.
167 */
168static __inline struct vm_radix_node *
169vm_radix_getroot(struct vm_radix *rtree)
170{
171
172	return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
173}
174
175/*
176 * Set the root node for a radix tree.
177 */
178static __inline void
179vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
180{
181
182	rtree->rt_root = (uintptr_t)rnode;
183}
184
185/*
186 * Returns the associated page extracted from rnode if available,
187 * NULL otherwise.
188 */
189static __inline vm_page_t
190vm_radix_node_page(struct vm_radix_node *rnode)
191{
192
193	return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
194	    (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
195}
196
197/*
198 * Adds the page as a child of provided node.
199 */
200static __inline void
201vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
202    vm_page_t page)
203{
204	int slot;
205
206	slot = vm_radix_slot(index, clev);
207	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
208}
209
210/*
211 * Returns the slot where two keys differ.
212 * It cannot accept 2 equal keys.
213 */
214static __inline uint16_t
215vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
216{
217	uint16_t clev;
218
219	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
220	    __func__, (uintmax_t)index1));
221
222	index1 ^= index2;
223	for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
224		if (vm_radix_slot(index1, clev))
225			return (clev);
226	panic("%s: it might have not reached this point", __func__);
227	return (0);
228}
229
230/*
231 * Returns TRUE if it can be determined that key does not belong to the
232 * specified rnode. FALSE otherwise.
233 */
234static __inline boolean_t
235vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
236{
237
238	if (rnode->rn_clev > 0) {
239		idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
240		idx -= rnode->rn_owner;
241		if (idx != 0)
242			return (TRUE);
243	}
244	return (FALSE);
245}
246
247/*
248 * Adjusts the idx key to the first upper level available, based on a valid
249 * initial level and map of available levels.
250 * Returns a value bigger than 0 to signal that there are not valid levels
251 * available.
252 */
253static __inline int
254vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
255{
256	vm_pindex_t wrapidx;
257
258	for (; levels[ilev] == FALSE ||
259	    vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
260		if (ilev == 0)
261			break;
262	KASSERT(ilev > 0 || levels[0] == TRUE,
263	    ("%s: levels back-scanning problem", __func__));
264	if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
265		return (1);
266	wrapidx = *idx;
267	*idx = vm_radix_trimkey(*idx, ilev);
268	*idx += VM_RADIX_UNITLEVEL(ilev);
269	return (*idx < wrapidx);
270}
271
272/*
273 * Adjusts the idx key to the first lower level available, based on a valid
274 * initial level and map of available levels.
275 * Returns a value bigger than 0 to signal that there are not valid levels
276 * available.
277 */
278static __inline int
279vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
280{
281	vm_pindex_t wrapidx;
282
283	for (; levels[ilev] == FALSE ||
284	    vm_radix_slot(*idx, ilev) == 0; ilev--)
285		if (ilev == 0)
286			break;
287	KASSERT(ilev > 0 || levels[0] == TRUE,
288	    ("%s: levels back-scanning problem", __func__));
289	if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
290		return (1);
291	wrapidx = *idx;
292	*idx = vm_radix_trimkey(*idx, ilev);
293	*idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
294	*idx -= VM_RADIX_UNITLEVEL(ilev);
295	return (*idx > wrapidx);
296}
297
298/*
299 * Internal handwork for vm_radix_reclaim_allonodes() primitive.
300 * This function is recrusive.
301 */
302static void
303vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
304{
305	int slot;
306
307	for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
308		if (rnode->rn_child[slot] == NULL)
309			continue;
310		if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
311			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
312		rnode->rn_count--;
313	}
314	vm_radix_node_put(rnode);
315}
316
317#ifdef INVARIANTS
318/*
319 * Radix node zone destructor.
320 */
321static void
322vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
323{
324	struct vm_radix_node *rnode;
325
326	rnode = mem;
327	KASSERT(rnode->rn_count == 0,
328	    ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
329	    rnode->rn_count));
330}
331#endif
332
333/*
334 * Pre-allocate intermediate nodes from the UMA slab zone.
335 */
336static void
337vm_radix_prealloc(void *arg __unused)
338{
339
340	if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
341		panic("%s: unable to create new zone", __func__);
342	uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
343}
344SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
345    NULL);
346
347/*
348 * Initialize the UMA slab zone.
349 * Until vm_radix_prealloc() is called, the zone will be served by the
350 * UMA boot-time pre-allocated pool of pages.
351 */
352void
353vm_radix_init(void)
354{
355
356	vm_radix_node_zone = uma_zcreate("RADIX NODE",
357	    sizeof(struct vm_radix_node), NULL,
358#ifdef INVARIANTS
359	    vm_radix_node_zone_dtor,
360#else
361	    NULL,
362#endif
363	    NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
364}
365
366/*
367 * Inserts the key-value pair in to the trie.
368 * Panics if the key already exists.
369 */
370void
371vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
372{
373	vm_pindex_t newind;
374	struct vm_radix_node *rnode, *tmp, *tmp2;
375	vm_page_t m;
376	int slot;
377	uint16_t clev;
378
379	/*
380	 * The owner of record for root is not really important because it
381	 * will never be used.
382	 */
383	rnode = vm_radix_getroot(rtree);
384	if (rnode == NULL) {
385		rnode = vm_radix_node_get(0, 1, 0);
386		vm_radix_setroot(rtree, rnode);
387		vm_radix_addpage(rnode, index, 0, page);
388		return;
389	}
390	while (rnode != NULL) {
391		if (vm_radix_keybarr(rnode, index) == TRUE)
392			break;
393		slot = vm_radix_slot(index, rnode->rn_clev);
394		m = vm_radix_node_page(rnode->rn_child[slot]);
395		if (m != NULL) {
396			if (m->pindex == index)
397				panic("%s: key %jx is already present",
398				    __func__, (uintmax_t)index);
399			clev = vm_radix_keydiff(m->pindex, index);
400			tmp = vm_radix_node_get(vm_radix_trimkey(index,
401			    clev - 1), 2, clev);
402			rnode->rn_child[slot] = tmp;
403			vm_radix_addpage(tmp, index, clev, page);
404			vm_radix_addpage(tmp, m->pindex, clev, m);
405			return;
406		}
407		if (rnode->rn_child[slot] == NULL) {
408			rnode->rn_count++;
409			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
410			return;
411		}
412		rnode = rnode->rn_child[slot];
413	}
414	if (rnode == NULL)
415		panic("%s: path traversal ended unexpectedly", __func__);
416
417	/*
418	 * Scan the trie from the top and find the parent to insert
419	 * the new object.
420	 */
421	newind = rnode->rn_owner;
422	clev = vm_radix_keydiff(newind, index);
423	slot = VM_RADIX_COUNT;
424	for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
425		KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
426		    __func__));
427		KASSERT(clev >= rnode->rn_clev,
428		    ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
429		    __func__, clev, rnode->rn_clev));
430		slot = vm_radix_slot(index, rnode->rn_clev);
431		tmp = rnode->rn_child[slot];
432		KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
433		    ("%s: unexpected lookup interruption", __func__));
434		if (tmp->rn_clev > clev)
435			break;
436	}
437	KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
438	    ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
439	    __func__, (void *)rnode, (void *)tmp, slot));
440
441	/*
442	 * A new node is needed because the right insertion level is reached.
443	 * Setup the new intermediate node and add the 2 children: the
444	 * new object and the older edge.
445	 */
446	tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
447	    clev);
448	rnode->rn_child[slot] = tmp2;
449	vm_radix_addpage(tmp2, index, clev, page);
450	slot = vm_radix_slot(newind, clev);
451	tmp2->rn_child[slot] = tmp;
452}
453
454/*
455 * Returns the value stored at the index.  If the index is not present
456 * NULL is returned.
457 */
458vm_page_t
459vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
460{
461	struct vm_radix_node *rnode;
462	vm_page_t m;
463	int slot;
464
465	rnode = vm_radix_getroot(rtree);
466	while (rnode != NULL) {
467		if (vm_radix_keybarr(rnode, index) == TRUE)
468			return (NULL);
469		slot = vm_radix_slot(index, rnode->rn_clev);
470		rnode = rnode->rn_child[slot];
471		m = vm_radix_node_page(rnode);
472		if (m != NULL) {
473			if (m->pindex == index)
474				return (m);
475			else
476				return (NULL);
477		}
478	}
479	return (NULL);
480}
481
482/*
483 * Look up any entry at a position bigger than or equal to index.
484 */
485vm_page_t
486vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
487{
488	vm_pindex_t inc;
489	vm_page_t m;
490	struct vm_radix_node *rnode;
491	int slot;
492	uint16_t difflev;
493	boolean_t maplevels[VM_RADIX_LIMIT + 1];
494#ifdef INVARIANTS
495	int loops = 0;
496#endif
497
498restart:
499	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
500	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
501		maplevels[difflev] = FALSE;
502	rnode = vm_radix_getroot(rtree);
503	while (rnode != NULL) {
504		maplevels[rnode->rn_clev] = TRUE;
505
506		/*
507		 * If the keys differ before the current bisection node
508		 * the search key might rollback to the earlierst
509		 * available bisection node, or to the smaller value
510		 * in the current domain (if the owner is bigger than the
511		 * search key).
512		 * The search for a valid bisection node is helped through
513		 * the use of maplevels array which should bring immediately
514		 * a lower useful level, skipping holes.
515		 */
516		if (vm_radix_keybarr(rnode, index) == TRUE) {
517			difflev = vm_radix_keydiff(index, rnode->rn_owner);
518			if (index > rnode->rn_owner) {
519				if (vm_radix_addlev(&index, maplevels,
520				    difflev) > 0)
521					break;
522			} else
523				index = vm_radix_trimkey(rnode->rn_owner,
524				    difflev);
525			goto restart;
526		}
527		slot = vm_radix_slot(index, rnode->rn_clev);
528		m = vm_radix_node_page(rnode->rn_child[slot]);
529		if (m != NULL && m->pindex >= index)
530			return (m);
531		if (rnode->rn_child[slot] != NULL && m == NULL) {
532			rnode = rnode->rn_child[slot];
533			continue;
534		}
535
536		/*
537		 * Look for an available edge or page within the current
538		 * bisection node.
539		 */
540                if (slot < (VM_RADIX_COUNT - 1)) {
541			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
542			index = vm_radix_trimkey(index, rnode->rn_clev);
543			index += inc;
544			slot++;
545			for (;; index += inc, slot++) {
546				m = vm_radix_node_page(rnode->rn_child[slot]);
547				if (m != NULL && m->pindex >= index)
548					return (m);
549				if ((rnode->rn_child[slot] != NULL &&
550				    m == NULL) || slot == (VM_RADIX_COUNT - 1))
551					break;
552			}
553		}
554
555		/*
556		 * If a valid page or edge, bigger than the search slot, is
557		 * found in the traversal, skip to the next higher-level key.
558		 */
559		if (slot == (VM_RADIX_COUNT - 1) &&
560		    (rnode->rn_child[slot] == NULL || m != NULL)) {
561			if (rnode->rn_clev == 0  || vm_radix_addlev(&index,
562			    maplevels, rnode->rn_clev - 1) > 0)
563				break;
564			goto restart;
565		}
566		rnode = rnode->rn_child[slot];
567	}
568	return (NULL);
569}
570
571/*
572 * Look up any entry at a position less than or equal to index.
573 */
574vm_page_t
575vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
576{
577	vm_pindex_t inc;
578	vm_page_t m;
579	struct vm_radix_node *rnode;
580	int slot;
581	uint16_t difflev;
582	boolean_t maplevels[VM_RADIX_LIMIT + 1];
583#ifdef INVARIANTS
584	int loops = 0;
585#endif
586
587restart:
588	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
589	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
590		maplevels[difflev] = FALSE;
591	rnode = vm_radix_getroot(rtree);
592	while (rnode != NULL) {
593		maplevels[rnode->rn_clev] = TRUE;
594
595		/*
596		 * If the keys differ before the current bisection node
597		 * the search key might rollback to the earlierst
598		 * available bisection node, or to the higher value
599		 * in the current domain (if the owner is smaller than the
600		 * search key).
601		 * The search for a valid bisection node is helped through
602		 * the use of maplevels array which should bring immediately
603		 * a lower useful level, skipping holes.
604		 */
605		if (vm_radix_keybarr(rnode, index) == TRUE) {
606			difflev = vm_radix_keydiff(index, rnode->rn_owner);
607			if (index > rnode->rn_owner) {
608				index = vm_radix_trimkey(rnode->rn_owner,
609				    difflev);
610				index |= VM_RADIX_UNITLEVEL(difflev) - 1;
611			} else if (vm_radix_declev(&index, maplevels,
612			    difflev) > 0)
613				break;
614			goto restart;
615		}
616		slot = vm_radix_slot(index, rnode->rn_clev);
617		m = vm_radix_node_page(rnode->rn_child[slot]);
618		if (m != NULL && m->pindex <= index)
619			return (m);
620		if (rnode->rn_child[slot] != NULL && m == NULL) {
621			rnode = rnode->rn_child[slot];
622			continue;
623		}
624
625		/*
626		 * Look for an available edge or page within the current
627		 * bisection node.
628		 */
629		if (slot > 0) {
630			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
631			index = vm_radix_trimkey(index, rnode->rn_clev);
632			index |= inc - 1;
633			index -= inc;
634			slot--;
635			for (;; index -= inc, slot--) {
636				m = vm_radix_node_page(rnode->rn_child[slot]);
637				if (m != NULL && m->pindex <= index)
638					return (m);
639				if ((rnode->rn_child[slot] != NULL &&
640				    m == NULL) || slot == 0)
641					break;
642			}
643		}
644
645		/*
646		 * If a valid page or edge, smaller than the search slot, is
647		 * found in the traversal, skip to the next higher-level key.
648		 */
649		if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
650			if (rnode->rn_clev == 0 || vm_radix_declev(&index,
651			    maplevels, rnode->rn_clev - 1) > 0)
652				break;
653			goto restart;
654		}
655		rnode = rnode->rn_child[slot];
656	}
657	return (NULL);
658}
659
660/*
661 * Remove the specified index from the tree.
662 * Panics if the key is not present.
663 */
664void
665vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
666{
667	struct vm_radix_node *rnode, *parent;
668	vm_page_t m;
669	int i, slot;
670
671	parent = NULL;
672	rnode = vm_radix_getroot(rtree);
673	for (;;) {
674		if (rnode == NULL)
675			panic("vm_radix_remove: impossible to locate the key");
676		slot = vm_radix_slot(index, rnode->rn_clev);
677		m = vm_radix_node_page(rnode->rn_child[slot]);
678		if (m != NULL && m->pindex == index) {
679			rnode->rn_child[slot] = NULL;
680			rnode->rn_count--;
681			if (rnode->rn_count > 1)
682				break;
683			if (parent == NULL) {
684				if (rnode->rn_count == 0) {
685					vm_radix_node_put(rnode);
686					vm_radix_setroot(rtree, NULL);
687				}
688				break;
689			}
690			for (i = 0; i < VM_RADIX_COUNT; i++)
691				if (rnode->rn_child[i] != NULL)
692					break;
693			KASSERT(i != VM_RADIX_COUNT,
694			    ("%s: invalid node configuration", __func__));
695			slot = vm_radix_slot(index, parent->rn_clev);
696			KASSERT(parent->rn_child[slot] == rnode,
697			    ("%s: invalid child value", __func__));
698			parent->rn_child[slot] = rnode->rn_child[i];
699			rnode->rn_count--;
700			rnode->rn_child[i] = NULL;
701			vm_radix_node_put(rnode);
702			break;
703		}
704		if (m != NULL && m->pindex != index)
705			panic("%s: invalid key found", __func__);
706		parent = rnode;
707		rnode = rnode->rn_child[slot];
708	}
709}
710
711/*
712 * Remove and free all the nodes from the radix tree.
713 * This function is recrusive but there is a tight control on it as the
714 * maximum depth of the tree is fixed.
715 */
716void
717vm_radix_reclaim_allnodes(struct vm_radix *rtree)
718{
719	struct vm_radix_node *root;
720
721	root = vm_radix_getroot(rtree);
722	if (root == NULL)
723		return;
724	vm_radix_reclaim_allnodes_int(root);
725	vm_radix_setroot(rtree, NULL);
726}
727
728#ifdef DDB
729/*
730 * Show details about the given radix node.
731 */
732DB_SHOW_COMMAND(radixnode, db_show_radixnode)
733{
734	struct vm_radix_node *rnode;
735	int i;
736
737        if (!have_addr)
738                return;
739	rnode = (struct vm_radix_node *)addr;
740	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
741	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
742	    rnode->rn_clev);
743	for (i = 0; i < VM_RADIX_COUNT; i++)
744		if (rnode->rn_child[i] != NULL)
745			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
746			    i, (void *)rnode->rn_child[i],
747			    (void *)vm_radix_node_page(rnode->rn_child[i]),
748			    rnode->rn_clev);
749}
750#endif /* DDB */
751