vm_radix.c revision 247742
1226645Sattilio/*
2246726Sattilio * Copyright (c) 2013 EMC Corp.
3226930Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4226645Sattilio * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5226645Sattilio * All rights reserved.
6226645Sattilio *
7226645Sattilio * Redistribution and use in source and binary forms, with or without
8226645Sattilio * modification, are permitted provided that the following conditions
9226645Sattilio * are met:
10226645Sattilio * 1. Redistributions of source code must retain the above copyright
11226645Sattilio *    notice, this list of conditions and the following disclaimer.
12226645Sattilio * 2. Redistributions in binary form must reproduce the above copyright
13226645Sattilio *    notice, this list of conditions and the following disclaimer in the
14226645Sattilio *    documentation and/or other materials provided with the distribution.
15226645Sattilio *
16226645Sattilio * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17226645Sattilio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18226645Sattilio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19226645Sattilio * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20226645Sattilio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21226645Sattilio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22226645Sattilio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23226645Sattilio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24226645Sattilio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25226645Sattilio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26226645Sattilio * SUCH DAMAGE.
27226645Sattilio *
28226645Sattilio */
29226645Sattilio
30226645Sattilio/*
31246726Sattilio * Path-compressed radix trie implementation.
32246726Sattilio * The following code is not generalized into a general purpose library
33246726Sattilio * because there are way too many parameters embedded that should really
34246726Sattilio * be decided by the library consumers.  At the same time, consumers
35246726Sattilio * of this code must achieve highest possible performance.
36246726Sattilio *
37246726Sattilio * The implementation takes into account the following rationale:
38246726Sattilio * - Size of the nodes might be as small as possible.
39246726Sattilio * - There is no bias toward lookup operations over inserts or removes,
40246726Sattilio *   and vice-versa.
41246726Sattilio * - In average there are not many complete levels, than level
42246726Sattilio *   compression may just complicate things.
43226645Sattilio */
44226645Sattilio
45226645Sattilio#include <sys/cdefs.h>
46226645Sattilio
47246726Sattilio#include "opt_ddb.h"
48246726Sattilio
49226645Sattilio#include <sys/param.h>
50226645Sattilio#include <sys/systm.h>
51226645Sattilio#include <sys/kernel.h>
52246840Sattilio#include <sys/vmmeter.h>
53246726Sattilio
54226873Sattilio#include <vm/uma.h>
55226645Sattilio#include <vm/vm.h>
56226876Sjeff#include <vm/vm_param.h>
57226873Sattilio#include <vm/vm_page.h>
58226645Sattilio#include <vm/vm_radix.h>
59226645Sattilio
60246726Sattilio#ifdef DDB
61246726Sattilio#include <ddb/ddb.h>
62246726Sattilio#endif
63226645Sattilio
64233034Sattilio/*
65246726Sattilio * Such sizes should permit to keep node children contained into a single
66246726Sattilio * cache-line, or to at least not span many of those.
67246726Sattilio * In particular, sparse tries should however be compressed properly and
68246726Sattilio * then make some extra-levels not a big deal.
69233034Sattilio */
70246726Sattilio#ifdef __LP64__
71246726Sattilio#define	VM_RADIX_WIDTH	4
72233034Sattilio#else
73246726Sattilio#define	VM_RADIX_WIDTH	3
74233034Sattilio#endif
75233034Sattilio
76232326Sattilio#define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
77232326Sattilio#define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
78246726Sattilio#define	VM_RADIX_LIMIT							\
79246726Sattilio	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80232326Sattilio
81232326Sattilio/* Flag bits stored in node pointers. */
82246726Sattilio#define	VM_RADIX_ISLEAF	0x1
83246726Sattilio#define	VM_RADIX_FLAGS	0x1
84246726Sattilio#define	VM_RADIX_PAD	VM_RADIX_FLAGS
85232326Sattilio
86246726Sattilio/* Returns one unit associated with specified level. */
87246726Sattilio#define	VM_RADIX_UNITLEVEL(lev)						\
88246726Sattilio	((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
89232326Sattilio
90232326Sattiliostruct vm_radix_node {
91232326Sattilio	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
92246726Sattilio	vm_pindex_t	 rn_owner;			/* Owner of record. */
93246726Sattilio	uint16_t	 rn_count;			/* Valid children. */
94246726Sattilio	uint16_t	 rn_clev;			/* Current level. */
95232326Sattilio};
96232326Sattilio
97226873Sattiliostatic uma_zone_t vm_radix_node_zone;
98226645Sattilio
99246794Sattilio/*
100246726Sattilio * Allocate a radix node.  Pre-allocation ensures that the request will be
101246726Sattilio * always successfully satisfied.
102246726Sattilio */
103246726Sattiliostatic __inline struct vm_radix_node *
104246726Sattiliovm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
105226873Sattilio{
106246726Sattilio	struct vm_radix_node *rnode;
107226645Sattilio
108247742Sattilio	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
109226873Sattilio
110247742Sattilio	/*
111247742Sattilio	 * The required number of nodes might be already correctly
112247742Sattilio	 * pre-allocated in vm_radix_init().  However, UMA can reserve
113247742Sattilio	 * few nodes on per-cpu specific buckets, which will not be
114247742Sattilio	 * accessible from the curcpu.  The allocation could then
115247742Sattilio	 * return NULL when the pre-allocation pool is close to be
116247742Sattilio	 * exhausted.  Anyway, in practice this should never be a
117247742Sattilio	 * problem because a new node is not always required for
118247742Sattilio	 * insert, thus the pre-allocation pool should already have
119247742Sattilio	 * some extra-pages that indirectly deal with this situation.
120247742Sattilio	 */
121247742Sattilio	if (rnode == NULL)
122247742Sattilio		panic("%s: uma_zalloc() returned NULL for a new node",
123247742Sattilio		    __func__);
124246726Sattilio	rnode->rn_owner = owner;
125246726Sattilio	rnode->rn_count = count;
126246726Sattilio	rnode->rn_clev = clevel;
127246726Sattilio	return (rnode);
128246726Sattilio}
129226873Sattilio
130246726Sattilio/*
131246726Sattilio * Free radix node.
132246726Sattilio */
133246726Sattiliostatic __inline void
134246726Sattiliovm_radix_node_put(struct vm_radix_node *rnode)
135246726Sattilio{
136246726Sattilio
137246726Sattilio	uma_zfree(vm_radix_node_zone, rnode);
138226873Sattilio}
139226873Sattilio
140246726Sattilio/*
141246726Sattilio * Return the position in the array for a given level.
142246726Sattilio */
143246726Sattiliostatic __inline int
144246726Sattiliovm_radix_slot(vm_pindex_t index, uint16_t level)
145226873Sattilio{
146226873Sattilio
147246726Sattilio	return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
148246726Sattilio	    VM_RADIX_MASK);
149226873Sattilio}
150226873Sattilio
151246726Sattilio/* Trims the key after the specified level. */
152246726Sattiliostatic __inline vm_pindex_t
153246726Sattiliovm_radix_trimkey(vm_pindex_t index, uint16_t level)
154226873Sattilio{
155246726Sattilio	vm_pindex_t ret;
156226873Sattilio
157246726Sattilio	ret = index;
158246726Sattilio	if (level < VM_RADIX_LIMIT) {
159246726Sattilio		ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
160246726Sattilio		ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
161246726Sattilio	}
162246726Sattilio	return (ret);
163226873Sattilio}
164226873Sattilio
165226645Sattilio/*
166246726Sattilio * Get the root node for a radix tree.
167226873Sattilio */
168246726Sattiliostatic __inline struct vm_radix_node *
169246726Sattiliovm_radix_getroot(struct vm_radix *rtree)
170226873Sattilio{
171226873Sattilio
172246726Sattilio	return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
173226873Sattilio}
174226873Sattilio
175226873Sattilio/*
176246726Sattilio * Set the root node for a radix tree.
177226645Sattilio */
178246726Sattiliostatic __inline void
179246726Sattiliovm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
180226645Sattilio{
181226645Sattilio
182246726Sattilio	rtree->rt_root = (uintptr_t)rnode;
183226645Sattilio}
184226645Sattilio
185226645Sattilio/*
186246726Sattilio * Returns the associated page extracted from rnode if available,
187246726Sattilio * NULL otherwise.
188226645Sattilio */
189246726Sattiliostatic __inline vm_page_t
190246726Sattiliovm_radix_node_page(struct vm_radix_node *rnode)
191226645Sattilio{
192226645Sattilio
193246726Sattilio	return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
194246726Sattilio	    (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
195226645Sattilio}
196226645Sattilio
197226645Sattilio/*
198246726Sattilio * Adds the page as a child of provided node.
199226645Sattilio */
200246726Sattiliostatic __inline void
201246726Sattiliovm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
202246726Sattilio    vm_page_t page)
203226645Sattilio{
204246726Sattilio	int slot;
205226645Sattilio
206246726Sattilio	slot = vm_radix_slot(index, clev);
207246726Sattilio	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
208226645Sattilio}
209226645Sattilio
210233034Sattilio/*
211246726Sattilio * Returns the slot where two keys differ.
212246726Sattilio * It cannot accept 2 equal keys.
213233034Sattilio */
214246726Sattiliostatic __inline uint16_t
215246726Sattiliovm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
216226873Sattilio{
217246726Sattilio	uint16_t clev;
218226873Sattilio
219246726Sattilio	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
220246726Sattilio	    __func__, (uintmax_t)index1));
221233034Sattilio
222246726Sattilio	index1 ^= index2;
223246726Sattilio	for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
224246726Sattilio		if (vm_radix_slot(index1, clev))
225246726Sattilio			return (clev);
226246726Sattilio	panic("%s: it might have not reached this point", __func__);
227246726Sattilio	return (0);
228226873Sattilio}
229226873Sattilio
230226645Sattilio/*
231246726Sattilio * Returns TRUE if it can be determined that key does not belong to the
232246726Sattilio * specified rnode. FALSE otherwise.
233226876Sjeff */
234246726Sattiliostatic __inline boolean_t
235246726Sattiliovm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
236226876Sjeff{
237226876Sjeff
238246726Sattilio	if (rnode->rn_clev > 0) {
239246726Sattilio		idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
240246726Sattilio		idx -= rnode->rn_owner;
241246726Sattilio		if (idx != 0)
242246726Sattilio			return (TRUE);
243246726Sattilio	}
244246726Sattilio	return (FALSE);
245226876Sjeff}
246226876Sjeff
247226876Sjeff/*
248246726Sattilio * Adjusts the idx key to the first upper level available, based on a valid
249246726Sattilio * initial level and map of available levels.
250246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels
251246726Sattilio * available.
252226876Sjeff */
253246726Sattiliostatic __inline int
254246726Sattiliovm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
255226876Sjeff{
256246726Sattilio	vm_pindex_t wrapidx;
257226876Sjeff
258246726Sattilio	for (; levels[ilev] == FALSE ||
259246726Sattilio	    vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
260246726Sattilio		if (ilev == 0)
261246726Sattilio			break;
262246726Sattilio	KASSERT(ilev > 0 || levels[0] == TRUE,
263246726Sattilio	    ("%s: levels back-scanning problem", __func__));
264246726Sattilio	if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
265246726Sattilio		return (1);
266246726Sattilio	wrapidx = *idx;
267246726Sattilio	*idx = vm_radix_trimkey(*idx, ilev);
268246726Sattilio	*idx += VM_RADIX_UNITLEVEL(ilev);
269247232Sattilio	return (*idx < wrapidx);
270226876Sjeff}
271226876Sjeff
272246726Sattilio/*
273246726Sattilio * Adjusts the idx key to the first lower level available, based on a valid
274246726Sattilio * initial level and map of available levels.
275246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels
276246726Sattilio * available.
277246726Sattilio */
278246726Sattiliostatic __inline int
279246726Sattiliovm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
280226930Sjeff{
281246726Sattilio	vm_pindex_t wrapidx;
282226930Sjeff
283246726Sattilio	for (; levels[ilev] == FALSE ||
284246726Sattilio	    vm_radix_slot(*idx, ilev) == 0; ilev--)
285246726Sattilio		if (ilev == 0)
286246726Sattilio			break;
287246726Sattilio	KASSERT(ilev > 0 || levels[0] == TRUE,
288246726Sattilio	    ("%s: levels back-scanning problem", __func__));
289246726Sattilio	if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
290246726Sattilio		return (1);
291246726Sattilio	wrapidx = *idx;
292246726Sattilio	*idx = vm_radix_trimkey(*idx, ilev);
293246726Sattilio	*idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
294246726Sattilio	*idx -= VM_RADIX_UNITLEVEL(ilev);
295247233Sattilio	return (*idx > wrapidx);
296226930Sjeff}
297226930Sjeff
298246726Sattilio/*
299246726Sattilio * Internal handwork for vm_radix_reclaim_allonodes() primitive.
300246726Sattilio * This function is recrusive.
301246726Sattilio */
302226980Sattiliostatic void
303246726Sattiliovm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
304226980Sattilio{
305226980Sattilio	int slot;
306226980Sattilio
307226980Sattilio	for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
308226980Sattilio		if (rnode->rn_child[slot] == NULL)
309226980Sattilio			continue;
310246726Sattilio		if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
311246726Sattilio			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
312226980Sattilio		rnode->rn_count--;
313226980Sattilio	}
314226980Sattilio	vm_radix_node_put(rnode);
315226980Sattilio}
316226980Sattilio
317246836Sattilio#ifdef INVARIANTS
318226876Sjeff/*
319246836Sattilio * Radix node zone destructor.
320246836Sattilio */
321246836Sattiliostatic void
322246836Sattiliovm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
323246836Sattilio{
324246836Sattilio	struct vm_radix_node *rnode;
325246836Sattilio
326246836Sattilio	rnode = mem;
327246836Sattilio	KASSERT(rnode->rn_count == 0,
328246836Sattilio	    ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
329246836Sattilio	    rnode->rn_count));
330246836Sattilio}
331246836Sattilio#endif
332246836Sattilio
333246836Sattilio/*
334246726Sattilio * Pre-allocate intermediate nodes from the UMA slab zone.
335246726Sattilio */
336246794Sattiliostatic void
337247742Sattiliovm_radix_prealloc(void *arg __unused)
338246726Sattilio{
339246726Sattilio
340247742Sattilio	if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
341247742Sattilio		panic("%s: unable to create new zone", __func__);
342247742Sattilio	uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
343247742Sattilio}
344247742SattilioSYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
345247742Sattilio    NULL);
346247742Sattilio
347247742Sattilio/*
348247742Sattilio * Initialize the UMA slab zone.
349247742Sattilio * Until vm_radix_prealloc() is called, the zone will be served by the
350247742Sattilio * UMA boot-time pre-allocated pool of pages.
351247742Sattilio */
352247742Sattiliovoid
353247742Sattiliovm_radix_init(void)
354247742Sattilio{
355247742Sattilio
356246726Sattilio	vm_radix_node_zone = uma_zcreate("RADIX NODE",
357246726Sattilio	    sizeof(struct vm_radix_node), NULL,
358246726Sattilio#ifdef INVARIANTS
359246726Sattilio	    vm_radix_node_zone_dtor,
360246726Sattilio#else
361246726Sattilio	    NULL,
362246726Sattilio#endif
363246726Sattilio	    NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
364246726Sattilio}
365246726Sattilio
366246726Sattilio/*
367246726Sattilio * Inserts the key-value pair in to the trie.
368226645Sattilio * Panics if the key already exists.
369226645Sattilio */
370246430Sattiliovoid
371246430Sattiliovm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
372226645Sattilio{
373246726Sattilio	vm_pindex_t newind;
374246726Sattilio	struct vm_radix_node *rnode, *tmp, *tmp2;
375246726Sattilio	vm_page_t m;
376236763Sattilio	int slot;
377246726Sattilio	uint16_t clev;
378226645Sattilio
379226645Sattilio	/*
380246726Sattilio	 * The owner of record for root is not really important because it
381246726Sattilio	 * will never be used.
382226645Sattilio	 */
383246726Sattilio	rnode = vm_radix_getroot(rtree);
384246726Sattilio	if (rnode == NULL) {
385246726Sattilio		rnode = vm_radix_node_get(0, 1, 0);
386246726Sattilio		vm_radix_setroot(rtree, rnode);
387246726Sattilio		vm_radix_addpage(rnode, index, 0, page);
388246726Sattilio		return;
389246726Sattilio	}
390246795Sattilio	while (rnode != NULL) {
391246726Sattilio		if (vm_radix_keybarr(rnode, index) == TRUE)
392246726Sattilio			break;
393246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
394246726Sattilio		m = vm_radix_node_page(rnode->rn_child[slot]);
395246726Sattilio		if (m != NULL) {
396246726Sattilio			if (m->pindex == index)
397246726Sattilio				panic("%s: key %jx is already present",
398246726Sattilio				    __func__, (uintmax_t)index);
399246726Sattilio			clev = vm_radix_keydiff(m->pindex, index);
400246726Sattilio			tmp = vm_radix_node_get(vm_radix_trimkey(index,
401246726Sattilio			    clev - 1), 2, clev);
402246726Sattilio			rnode->rn_child[slot] = tmp;
403246726Sattilio			vm_radix_addpage(tmp, index, clev, page);
404246726Sattilio			vm_radix_addpage(tmp, m->pindex, clev, m);
405246726Sattilio			return;
406226645Sattilio		}
407226645Sattilio		if (rnode->rn_child[slot] == NULL) {
408236760Sattilio			rnode->rn_count++;
409246726Sattilio			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
410246726Sattilio			return;
411246726Sattilio		}
412226645Sattilio		rnode = rnode->rn_child[slot];
413226645Sattilio	}
414246726Sattilio	if (rnode == NULL)
415246726Sattilio		panic("%s: path traversal ended unexpectedly", __func__);
416226645Sattilio
417246726Sattilio	/*
418246726Sattilio	 * Scan the trie from the top and find the parent to insert
419246726Sattilio	 * the new object.
420246726Sattilio	 */
421246726Sattilio	newind = rnode->rn_owner;
422246726Sattilio	clev = vm_radix_keydiff(newind, index);
423246726Sattilio	slot = VM_RADIX_COUNT;
424246726Sattilio	for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
425246726Sattilio		KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
426246726Sattilio		    __func__));
427246726Sattilio		KASSERT(clev >= rnode->rn_clev,
428246726Sattilio		    ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
429246726Sattilio		    __func__, clev, rnode->rn_clev));
430246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
431246726Sattilio		tmp = rnode->rn_child[slot];
432246726Sattilio		KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
433246726Sattilio		    ("%s: unexpected lookup interruption", __func__));
434246726Sattilio		if (tmp->rn_clev > clev)
435246726Sattilio			break;
436246726Sattilio	}
437246726Sattilio	KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
438246726Sattilio	    ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
439246726Sattilio	    __func__, (void *)rnode, (void *)tmp, slot));
440246726Sattilio
441246726Sattilio	/*
442246726Sattilio	 * A new node is needed because the right insertion level is reached.
443246726Sattilio	 * Setup the new intermediate node and add the 2 children: the
444246726Sattilio	 * new object and the older edge.
445246726Sattilio	 */
446246726Sattilio	tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
447246726Sattilio	    clev);
448246726Sattilio	rnode->rn_child[slot] = tmp2;
449246726Sattilio	vm_radix_addpage(tmp2, index, clev, page);
450246726Sattilio	slot = vm_radix_slot(newind, clev);
451246726Sattilio	tmp2->rn_child[slot] = tmp;
452226645Sattilio}
453226645Sattilio
454226645Sattilio/*
455226645Sattilio * Returns the value stored at the index.  If the index is not present
456226645Sattilio * NULL is returned.
457226645Sattilio */
458246430Sattiliovm_page_t
459238245Sattiliovm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
460226645Sattilio{
461226645Sattilio	struct vm_radix_node *rnode;
462246726Sattilio	vm_page_t m;
463226645Sattilio	int slot;
464226645Sattilio
465246726Sattilio	rnode = vm_radix_getroot(rtree);
466246795Sattilio	while (rnode != NULL) {
467246726Sattilio		if (vm_radix_keybarr(rnode, index) == TRUE)
468246726Sattilio			return (NULL);
469246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
470226645Sattilio		rnode = rnode->rn_child[slot];
471246726Sattilio		m = vm_radix_node_page(rnode);
472246726Sattilio		if (m != NULL) {
473246726Sattilio			if (m->pindex == index)
474246726Sattilio				return (m);
475246726Sattilio			else
476246726Sattilio				return (NULL);
477246726Sattilio		}
478226645Sattilio	}
479246726Sattilio	return (NULL);
480226645Sattilio}
481226645Sattilio
482226645Sattilio/*
483246726Sattilio * Look up any entry at a position bigger than or equal to index.
484226645Sattilio */
485246726Sattiliovm_page_t
486246726Sattiliovm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
487226645Sattilio{
488246726Sattilio	vm_pindex_t inc;
489246726Sattilio	vm_page_t m;
490226645Sattilio	struct vm_radix_node *rnode;
491226645Sattilio	int slot;
492246726Sattilio	uint16_t difflev;
493246726Sattilio	boolean_t maplevels[VM_RADIX_LIMIT + 1];
494246726Sattilio#ifdef INVARIANTS
495246726Sattilio	int loops = 0;
496246726Sattilio#endif
497226645Sattilio
498226876Sjeffrestart:
499246726Sattilio	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
500246726Sattilio	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
501246726Sattilio		maplevels[difflev] = FALSE;
502246726Sattilio	rnode = vm_radix_getroot(rtree);
503246795Sattilio	while (rnode != NULL) {
504246726Sattilio		maplevels[rnode->rn_clev] = TRUE;
505246726Sattilio
506246726Sattilio		/*
507246726Sattilio		 * If the keys differ before the current bisection node
508246726Sattilio		 * the search key might rollback to the earlierst
509246726Sattilio		 * available bisection node, or to the smaller value
510246726Sattilio		 * in the current domain (if the owner is bigger than the
511246726Sattilio		 * search key).
512246726Sattilio		 * The search for a valid bisection node is helped through
513246726Sattilio		 * the use of maplevels array which should bring immediately
514246726Sattilio		 * a lower useful level, skipping holes.
515246726Sattilio		 */
516246726Sattilio		if (vm_radix_keybarr(rnode, index) == TRUE) {
517246726Sattilio			difflev = vm_radix_keydiff(index, rnode->rn_owner);
518246726Sattilio			if (index > rnode->rn_owner) {
519246726Sattilio				if (vm_radix_addlev(&index, maplevels,
520246726Sattilio				    difflev) > 0)
521246726Sattilio					break;
522246726Sattilio			} else
523246726Sattilio				index = vm_radix_trimkey(rnode->rn_owner,
524246726Sattilio				    difflev);
525246726Sattilio			goto restart;
526246726Sattilio		}
527246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
528246726Sattilio		m = vm_radix_node_page(rnode->rn_child[slot]);
529246726Sattilio		if (m != NULL && m->pindex >= index)
530246726Sattilio			return (m);
531246726Sattilio		if (rnode->rn_child[slot] != NULL && m == NULL) {
532226930Sjeff			rnode = rnode->rn_child[slot];
533226930Sjeff			continue;
534226930Sjeff		}
535246726Sattilio
536226645Sattilio		/*
537246726Sattilio		 * Look for an available edge or page within the current
538246726Sattilio		 * bisection node.
539246726Sattilio		 */
540246726Sattilio                if (slot < (VM_RADIX_COUNT - 1)) {
541246726Sattilio			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
542246726Sattilio			index = vm_radix_trimkey(index, rnode->rn_clev);
543246726Sattilio			index += inc;
544246726Sattilio			slot++;
545246726Sattilio			for (;; index += inc, slot++) {
546246726Sattilio				m = vm_radix_node_page(rnode->rn_child[slot]);
547246726Sattilio				if (m != NULL && m->pindex >= index)
548246726Sattilio					return (m);
549246726Sattilio				if ((rnode->rn_child[slot] != NULL &&
550246726Sattilio				    m == NULL) || slot == (VM_RADIX_COUNT - 1))
551246726Sattilio					break;
552246726Sattilio			}
553246726Sattilio		}
554230750Sattilio
555246726Sattilio		/*
556246726Sattilio		 * If a valid page or edge, bigger than the search slot, is
557246726Sattilio		 * found in the traversal, skip to the next higher-level key.
558246726Sattilio		 */
559246726Sattilio		if (slot == (VM_RADIX_COUNT - 1) &&
560246726Sattilio		    (rnode->rn_child[slot] == NULL || m != NULL)) {
561246726Sattilio			if (rnode->rn_clev == 0  || vm_radix_addlev(&index,
562246726Sattilio			    maplevels, rnode->rn_clev - 1) > 0)
563226930Sjeff				break;
564246726Sattilio			goto restart;
565226645Sattilio		}
566246726Sattilio		rnode = rnode->rn_child[slot];
567226645Sattilio	}
568245254Sattilio	return (NULL);
569226645Sattilio}
570226645Sattilio
571226645Sattilio/*
572226646Sjeff * Look up any entry at a position less than or equal to index.
573226645Sattilio */
574246430Sattiliovm_page_t
575238245Sattiliovm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
576226645Sattilio{
577246726Sattilio	vm_pindex_t inc;
578246726Sattilio	vm_page_t m;
579226646Sjeff	struct vm_radix_node *rnode;
580226646Sjeff	int slot;
581246726Sattilio	uint16_t difflev;
582246726Sattilio	boolean_t maplevels[VM_RADIX_LIMIT + 1];
583246726Sattilio#ifdef INVARIANTS
584246726Sattilio	int loops = 0;
585246726Sattilio#endif
586226645Sattilio
587226876Sjeffrestart:
588246726Sattilio	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
589246726Sattilio	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
590246726Sattilio		maplevels[difflev] = FALSE;
591246726Sattilio	rnode = vm_radix_getroot(rtree);
592246795Sattilio	while (rnode != NULL) {
593246726Sattilio		maplevels[rnode->rn_clev] = TRUE;
594246726Sattilio
595226646Sjeff		/*
596246726Sattilio		 * If the keys differ before the current bisection node
597246726Sattilio		 * the search key might rollback to the earlierst
598246726Sattilio		 * available bisection node, or to the higher value
599246726Sattilio		 * in the current domain (if the owner is smaller than the
600246726Sattilio		 * search key).
601246726Sattilio		 * The search for a valid bisection node is helped through
602246726Sattilio		 * the use of maplevels array which should bring immediately
603246726Sattilio		 * a lower useful level, skipping holes.
604226646Sjeff		 */
605246726Sattilio		if (vm_radix_keybarr(rnode, index) == TRUE) {
606246726Sattilio			difflev = vm_radix_keydiff(index, rnode->rn_owner);
607246726Sattilio			if (index > rnode->rn_owner) {
608246726Sattilio				index = vm_radix_trimkey(rnode->rn_owner,
609246726Sattilio				    difflev);
610246726Sattilio				index |= VM_RADIX_UNITLEVEL(difflev) - 1;
611246726Sattilio			} else if (vm_radix_declev(&index, maplevels,
612246726Sattilio			    difflev) > 0)
613246726Sattilio				break;
614246726Sattilio			goto restart;
615246726Sattilio		}
616246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
617246726Sattilio		m = vm_radix_node_page(rnode->rn_child[slot]);
618246726Sattilio		if (m != NULL && m->pindex <= index)
619246726Sattilio			return (m);
620246726Sattilio		if (rnode->rn_child[slot] != NULL && m == NULL) {
621246726Sattilio			rnode = rnode->rn_child[slot];
622246726Sattilio			continue;
623246726Sattilio		}
624246726Sattilio
625246726Sattilio		/*
626246726Sattilio		 * Look for an available edge or page within the current
627246726Sattilio		 * bisection node.
628246726Sattilio		 */
629246726Sattilio		if (slot > 0) {
630246726Sattilio			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
631246726Sattilio			index = vm_radix_trimkey(index, rnode->rn_clev);
632246726Sattilio			index |= inc - 1;
633226646Sjeff			index -= inc;
634226646Sjeff			slot--;
635246726Sattilio			for (;; index -= inc, slot--) {
636246726Sattilio				m = vm_radix_node_page(rnode->rn_child[slot]);
637246726Sattilio				if (m != NULL && m->pindex <= index)
638246726Sattilio					return (m);
639246726Sattilio				if ((rnode->rn_child[slot] != NULL &&
640246726Sattilio				    m == NULL) || slot == 0)
641226646Sjeff					break;
642226646Sjeff			}
643226646Sjeff		}
644246726Sattilio
645246726Sattilio		/*
646246730Sattilio		 * If a valid page or edge, smaller than the search slot, is
647246726Sattilio		 * found in the traversal, skip to the next higher-level key.
648246726Sattilio		 */
649246726Sattilio		if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
650246726Sattilio			if (rnode->rn_clev == 0 || vm_radix_declev(&index,
651246726Sattilio			    maplevels, rnode->rn_clev - 1) > 0)
652246726Sattilio				break;
653246726Sattilio			goto restart;
654226646Sjeff		}
655246726Sattilio		rnode = rnode->rn_child[slot];
656226646Sjeff	}
657226645Sattilio	return (NULL);
658226645Sattilio}
659226645Sattilio
660226645Sattilio/*
661246726Sattilio * Remove the specified index from the tree.
662246726Sattilio * Panics if the key is not present.
663226645Sattilio */
664236763Sattiliovoid
665238245Sattiliovm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
666226645Sattilio{
667246726Sattilio	struct vm_radix_node *rnode, *parent;
668246726Sattilio	vm_page_t m;
669246726Sattilio	int i, slot;
670226645Sattilio
671246726Sattilio	parent = NULL;
672246726Sattilio	rnode = vm_radix_getroot(rtree);
673236760Sattilio	for (;;) {
674246726Sattilio		if (rnode == NULL)
675246726Sattilio			panic("vm_radix_remove: impossible to locate the key");
676246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
677246726Sattilio		m = vm_radix_node_page(rnode->rn_child[slot]);
678246726Sattilio		if (m != NULL && m->pindex == index) {
679246726Sattilio			rnode->rn_child[slot] = NULL;
680246726Sattilio			rnode->rn_count--;
681246726Sattilio			if (rnode->rn_count > 1)
682246726Sattilio				break;
683246726Sattilio			if (parent == NULL) {
684246726Sattilio				if (rnode->rn_count == 0) {
685246726Sattilio					vm_radix_node_put(rnode);
686246726Sattilio					vm_radix_setroot(rtree, NULL);
687246726Sattilio				}
688246726Sattilio				break;
689246726Sattilio			}
690246726Sattilio			for (i = 0; i < VM_RADIX_COUNT; i++)
691246726Sattilio				if (rnode->rn_child[i] != NULL)
692246726Sattilio					break;
693246726Sattilio			KASSERT(i != VM_RADIX_COUNT,
694246726Sattilio			    ("%s: invalid node configuration", __func__));
695246726Sattilio			slot = vm_radix_slot(index, parent->rn_clev);
696246726Sattilio			KASSERT(parent->rn_child[slot] == rnode,
697246726Sattilio			    ("%s: invalid child value", __func__));
698246726Sattilio			parent->rn_child[slot] = rnode->rn_child[i];
699246726Sattilio			rnode->rn_count--;
700246726Sattilio			rnode->rn_child[i] = NULL;
701246726Sattilio			vm_radix_node_put(rnode);
702236760Sattilio			break;
703236760Sattilio		}
704246726Sattilio		if (m != NULL && m->pindex != index)
705246726Sattilio			panic("%s: invalid key found", __func__);
706246726Sattilio		parent = rnode;
707246726Sattilio		rnode = rnode->rn_child[slot];
708236760Sattilio	}
709226645Sattilio}
710226645Sattilio
711226645Sattilio/*
712226980Sattilio * Remove and free all the nodes from the radix tree.
713226980Sattilio * This function is recrusive but there is a tight control on it as the
714226980Sattilio * maximum depth of the tree is fixed.
715226980Sattilio */
716226980Sattiliovoid
717226980Sattiliovm_radix_reclaim_allnodes(struct vm_radix *rtree)
718226980Sattilio{
719226980Sattilio	struct vm_radix_node *root;
720226980Sattilio
721246726Sattilio	root = vm_radix_getroot(rtree);
722246726Sattilio	if (root == NULL)
723226980Sattilio		return;
724246726Sattilio	vm_radix_reclaim_allnodes_int(root);
725246726Sattilio	vm_radix_setroot(rtree, NULL);
726226980Sattilio}
727246726Sattilio
728246726Sattilio#ifdef DDB
729246726Sattilio/*
730246837Sattilio * Show details about the given radix node.
731246726Sattilio */
732246726SattilioDB_SHOW_COMMAND(radixnode, db_show_radixnode)
733246726Sattilio{
734246726Sattilio	struct vm_radix_node *rnode;
735246726Sattilio	int i;
736246726Sattilio
737246726Sattilio        if (!have_addr)
738246726Sattilio                return;
739246726Sattilio	rnode = (struct vm_radix_node *)addr;
740246726Sattilio	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
741246726Sattilio	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
742246726Sattilio	    rnode->rn_clev);
743246726Sattilio	for (i = 0; i < VM_RADIX_COUNT; i++)
744246726Sattilio		if (rnode->rn_child[i] != NULL)
745246726Sattilio			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
746246726Sattilio			    i, (void *)rnode->rn_child[i],
747246726Sattilio			    (void *)vm_radix_node_page(rnode->rn_child[i]),
748246726Sattilio			    rnode->rn_clev);
749246726Sattilio}
750246726Sattilio#endif /* DDB */
751