vm_radix.c revision 249502
1226645Sattilio/*
2246726Sattilio * Copyright (c) 2013 EMC Corp.
3226930Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4226645Sattilio * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5226645Sattilio * All rights reserved.
6226645Sattilio *
7226645Sattilio * Redistribution and use in source and binary forms, with or without
8226645Sattilio * modification, are permitted provided that the following conditions
9226645Sattilio * are met:
10226645Sattilio * 1. Redistributions of source code must retain the above copyright
11226645Sattilio *    notice, this list of conditions and the following disclaimer.
12226645Sattilio * 2. Redistributions in binary form must reproduce the above copyright
13226645Sattilio *    notice, this list of conditions and the following disclaimer in the
14226645Sattilio *    documentation and/or other materials provided with the distribution.
15226645Sattilio *
16226645Sattilio * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17226645Sattilio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18226645Sattilio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19226645Sattilio * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20226645Sattilio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21226645Sattilio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22226645Sattilio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23226645Sattilio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24226645Sattilio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25226645Sattilio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26226645Sattilio * SUCH DAMAGE.
27226645Sattilio *
28226645Sattilio */
29226645Sattilio
30226645Sattilio/*
31246726Sattilio * Path-compressed radix trie implementation.
32246726Sattilio * The following code is not generalized into a general purpose library
33246726Sattilio * because there are way too many parameters embedded that should really
34246726Sattilio * be decided by the library consumers.  At the same time, consumers
35246726Sattilio * of this code must achieve highest possible performance.
36246726Sattilio *
37246726Sattilio * The implementation takes into account the following rationale:
38248424Sattilio * - Size of the nodes should be as small as possible but still big enough
39248424Sattilio *   to avoid a large maximum depth for the trie.  This is a balance
40248424Sattilio *   between the necessity to not wire too much physical memory for the nodes
41248424Sattilio *   and the necessity to avoid too much cache pollution during the trie
42248424Sattilio *   operations.
43248424Sattilio * - There is not a huge bias toward the number of lookup operations over
44248424Sattilio *   the number of insert and remove operations.  This basically implies
45248424Sattilio *   that optimizations supposedly helping one operation but hurting the
46248424Sattilio *   other might be carefully evaluated.
47248424Sattilio * - On average not many nodes are expected to be fully populated, hence
48248424Sattilio *   level compression may just complicate things.
49226645Sattilio */
50226645Sattilio
51226645Sattilio#include <sys/cdefs.h>
52248448Sattilio__FBSDID("$FreeBSD: head/sys/vm/vm_radix.c 249502 2013-04-15 06:12:00Z alc $");
53226645Sattilio
54246726Sattilio#include "opt_ddb.h"
55246726Sattilio
56226645Sattilio#include <sys/param.h>
57226645Sattilio#include <sys/systm.h>
58226645Sattilio#include <sys/kernel.h>
59246840Sattilio#include <sys/vmmeter.h>
60246726Sattilio
61226873Sattilio#include <vm/uma.h>
62226645Sattilio#include <vm/vm.h>
63226876Sjeff#include <vm/vm_param.h>
64226873Sattilio#include <vm/vm_page.h>
65226645Sattilio#include <vm/vm_radix.h>
66226645Sattilio
67246726Sattilio#ifdef DDB
68246726Sattilio#include <ddb/ddb.h>
69246726Sattilio#endif
70226645Sattilio
71233034Sattilio/*
72247771Salc * These widths should allow the pointers to a node's children to fit within
73247771Salc * a single cache line.  The extra levels from a narrow width should not be
74247771Salc * a problem thanks to path compression.
75233034Sattilio */
76246726Sattilio#ifdef __LP64__
77246726Sattilio#define	VM_RADIX_WIDTH	4
78233034Sattilio#else
79246726Sattilio#define	VM_RADIX_WIDTH	3
80233034Sattilio#endif
81233034Sattilio
82232326Sattilio#define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
83232326Sattilio#define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
84246726Sattilio#define	VM_RADIX_LIMIT							\
85246726Sattilio	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
86232326Sattilio
87232326Sattilio/* Flag bits stored in node pointers. */
88246726Sattilio#define	VM_RADIX_ISLEAF	0x1
89246726Sattilio#define	VM_RADIX_FLAGS	0x1
90246726Sattilio#define	VM_RADIX_PAD	VM_RADIX_FLAGS
91232326Sattilio
92246726Sattilio/* Returns one unit associated with specified level. */
93246726Sattilio#define	VM_RADIX_UNITLEVEL(lev)						\
94246726Sattilio	((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
95232326Sattilio
96232326Sattiliostruct vm_radix_node {
97246726Sattilio	vm_pindex_t	 rn_owner;			/* Owner of record. */
98246726Sattilio	uint16_t	 rn_count;			/* Valid children. */
99246726Sattilio	uint16_t	 rn_clev;			/* Current level. */
100249221Salc	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
101232326Sattilio};
102232326Sattilio
103226873Sattiliostatic uma_zone_t vm_radix_node_zone;
104226645Sattilio
105246794Sattilio/*
106247771Salc * Allocate a radix node.  Pre-allocation should ensure that the request
107247771Salc * will always be satisfied.
108246726Sattilio */
109246726Sattiliostatic __inline struct vm_radix_node *
110246726Sattiliovm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
111226873Sattilio{
112246726Sattilio	struct vm_radix_node *rnode;
113226645Sattilio
114248431Salc	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
115226873Sattilio
116247742Sattilio	/*
117247771Salc	 * The required number of nodes should already be pre-allocated
118247771Salc	 * by vm_radix_prealloc().  However, UMA can hold a few nodes
119247771Salc	 * in per-CPU buckets, which will not be accessible by the
120247771Salc	 * current CPU.  Thus, the allocation could return NULL when
121247771Salc	 * the pre-allocated pool is close to exhaustion.  Anyway,
122247771Salc	 * in practice this should never occur because a new node
123247771Salc	 * is not always required for insert.  Thus, the pre-allocated
124247771Salc	 * pool should have some extra pages that prevent this from
125247771Salc	 * becoming a problem.
126247742Sattilio	 */
127247742Sattilio	if (rnode == NULL)
128247742Sattilio		panic("%s: uma_zalloc() returned NULL for a new node",
129247742Sattilio		    __func__);
130246726Sattilio	rnode->rn_owner = owner;
131246726Sattilio	rnode->rn_count = count;
132246726Sattilio	rnode->rn_clev = clevel;
133246726Sattilio	return (rnode);
134246726Sattilio}
135226873Sattilio
136246726Sattilio/*
137246726Sattilio * Free radix node.
138246726Sattilio */
139246726Sattiliostatic __inline void
140246726Sattiliovm_radix_node_put(struct vm_radix_node *rnode)
141246726Sattilio{
142246726Sattilio
143246726Sattilio	uma_zfree(vm_radix_node_zone, rnode);
144226873Sattilio}
145226873Sattilio
146246726Sattilio/*
147246726Sattilio * Return the position in the array for a given level.
148246726Sattilio */
149246726Sattiliostatic __inline int
150246726Sattiliovm_radix_slot(vm_pindex_t index, uint16_t level)
151226873Sattilio{
152226873Sattilio
153246726Sattilio	return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
154246726Sattilio	    VM_RADIX_MASK);
155226873Sattilio}
156226873Sattilio
157246726Sattilio/* Trims the key after the specified level. */
158246726Sattiliostatic __inline vm_pindex_t
159246726Sattiliovm_radix_trimkey(vm_pindex_t index, uint16_t level)
160226873Sattilio{
161246726Sattilio	vm_pindex_t ret;
162226873Sattilio
163246726Sattilio	ret = index;
164246726Sattilio	if (level < VM_RADIX_LIMIT) {
165246726Sattilio		ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
166246726Sattilio		ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
167246726Sattilio	}
168246726Sattilio	return (ret);
169226873Sattilio}
170226873Sattilio
171226645Sattilio/*
172246726Sattilio * Get the root node for a radix tree.
173226873Sattilio */
174246726Sattiliostatic __inline struct vm_radix_node *
175246726Sattiliovm_radix_getroot(struct vm_radix *rtree)
176226873Sattilio{
177226873Sattilio
178249221Salc	return ((struct vm_radix_node *)rtree->rt_root);
179226873Sattilio}
180226873Sattilio
181226873Sattilio/*
182246726Sattilio * Set the root node for a radix tree.
183226645Sattilio */
184246726Sattiliostatic __inline void
185246726Sattiliovm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
186226645Sattilio{
187226645Sattilio
188246726Sattilio	rtree->rt_root = (uintptr_t)rnode;
189226645Sattilio}
190226645Sattilio
191226645Sattilio/*
192248728Salc * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
193248728Salc */
194248728Salcstatic __inline boolean_t
195248728Salcvm_radix_isleaf(struct vm_radix_node *rnode)
196248728Salc{
197248728Salc
198248728Salc	return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
199248728Salc}
200248728Salc
201248728Salc/*
202249038Salc * Returns the associated page extracted from rnode.
203226645Sattilio */
204246726Sattiliostatic __inline vm_page_t
205249038Salcvm_radix_topage(struct vm_radix_node *rnode)
206226645Sattilio{
207226645Sattilio
208249038Salc	return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
209226645Sattilio}
210226645Sattilio
211226645Sattilio/*
212247771Salc * Adds the page as a child of the provided node.
213226645Sattilio */
214246726Sattiliostatic __inline void
215246726Sattiliovm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
216246726Sattilio    vm_page_t page)
217226645Sattilio{
218246726Sattilio	int slot;
219226645Sattilio
220246726Sattilio	slot = vm_radix_slot(index, clev);
221246726Sattilio	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
222226645Sattilio}
223226645Sattilio
224233034Sattilio/*
225246726Sattilio * Returns the slot where two keys differ.
226246726Sattilio * It cannot accept 2 equal keys.
227233034Sattilio */
228246726Sattiliostatic __inline uint16_t
229246726Sattiliovm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
230226873Sattilio{
231246726Sattilio	uint16_t clev;
232226873Sattilio
233246726Sattilio	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
234246726Sattilio	    __func__, (uintmax_t)index1));
235233034Sattilio
236246726Sattilio	index1 ^= index2;
237246726Sattilio	for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
238246726Sattilio		if (vm_radix_slot(index1, clev))
239246726Sattilio			return (clev);
240247969Sattilio	panic("%s: cannot reach this point", __func__);
241246726Sattilio	return (0);
242226873Sattilio}
243226873Sattilio
244226645Sattilio/*
245246726Sattilio * Returns TRUE if it can be determined that key does not belong to the
246247771Salc * specified rnode.  Otherwise, returns FALSE.
247226876Sjeff */
248246726Sattiliostatic __inline boolean_t
249246726Sattiliovm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
250226876Sjeff{
251226876Sjeff
252246726Sattilio	if (rnode->rn_clev > 0) {
253246726Sattilio		idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
254249211Salc		return (idx != rnode->rn_owner);
255246726Sattilio	}
256246726Sattilio	return (FALSE);
257226876Sjeff}
258226876Sjeff
259226876Sjeff/*
260246726Sattilio * Adjusts the idx key to the first upper level available, based on a valid
261246726Sattilio * initial level and map of available levels.
262246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels
263246726Sattilio * available.
264226876Sjeff */
265246726Sattiliostatic __inline int
266246726Sattiliovm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
267226876Sjeff{
268246726Sattilio	vm_pindex_t wrapidx;
269226876Sjeff
270246726Sattilio	for (; levels[ilev] == FALSE ||
271246726Sattilio	    vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
272246726Sattilio		if (ilev == 0)
273249427Salc			return (1);
274246726Sattilio	wrapidx = *idx;
275246726Sattilio	*idx = vm_radix_trimkey(*idx, ilev);
276246726Sattilio	*idx += VM_RADIX_UNITLEVEL(ilev);
277247232Sattilio	return (*idx < wrapidx);
278226876Sjeff}
279226876Sjeff
280246726Sattilio/*
281246726Sattilio * Adjusts the idx key to the first lower level available, based on a valid
282246726Sattilio * initial level and map of available levels.
283246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels
284246726Sattilio * available.
285246726Sattilio */
286246726Sattiliostatic __inline int
287246726Sattiliovm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
288226930Sjeff{
289246726Sattilio	vm_pindex_t wrapidx;
290226930Sjeff
291246726Sattilio	for (; levels[ilev] == FALSE ||
292246726Sattilio	    vm_radix_slot(*idx, ilev) == 0; ilev--)
293246726Sattilio		if (ilev == 0)
294249427Salc			return (1);
295246726Sattilio	wrapidx = *idx;
296246726Sattilio	*idx = vm_radix_trimkey(*idx, ilev);
297246726Sattilio	*idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
298246726Sattilio	*idx -= VM_RADIX_UNITLEVEL(ilev);
299247233Sattilio	return (*idx > wrapidx);
300226930Sjeff}
301226930Sjeff
302246726Sattilio/*
303247773Salc * Internal helper for vm_radix_reclaim_allnodes().
304247769Salc * This function is recursive.
305246726Sattilio */
306226980Sattiliostatic void
307246726Sattiliovm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
308226980Sattilio{
309226980Sattilio	int slot;
310226980Sattilio
311248684Salc	KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
312248684Salc	    ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
313248684Salc	for (slot = 0; rnode->rn_count != 0; slot++) {
314226980Sattilio		if (rnode->rn_child[slot] == NULL)
315226980Sattilio			continue;
316248728Salc		if (!vm_radix_isleaf(rnode->rn_child[slot]))
317246726Sattilio			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
318248431Salc		rnode->rn_child[slot] = NULL;
319226980Sattilio		rnode->rn_count--;
320226980Sattilio	}
321226980Sattilio	vm_radix_node_put(rnode);
322226980Sattilio}
323226980Sattilio
324246836Sattilio#ifdef INVARIANTS
325226876Sjeff/*
326246836Sattilio * Radix node zone destructor.
327246836Sattilio */
328246836Sattiliostatic void
329246836Sattiliovm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
330246836Sattilio{
331246836Sattilio	struct vm_radix_node *rnode;
332248431Salc	int slot;
333246836Sattilio
334246836Sattilio	rnode = mem;
335246836Sattilio	KASSERT(rnode->rn_count == 0,
336248431Salc	    ("vm_radix_node_put: rnode %p has %d children", rnode,
337246836Sattilio	    rnode->rn_count));
338248431Salc	for (slot = 0; slot < VM_RADIX_COUNT; slot++)
339248431Salc		KASSERT(rnode->rn_child[slot] == NULL,
340248431Salc		    ("vm_radix_node_put: rnode %p has a child", rnode));
341246836Sattilio}
342246836Sattilio#endif
343246836Sattilio
344246836Sattilio/*
345248431Salc * Radix node zone initializer.
346248431Salc */
347248431Salcstatic int
348248431Salcvm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
349248431Salc{
350248431Salc	struct vm_radix_node *rnode;
351248431Salc
352248431Salc	rnode = mem;
353248431Salc	memset(rnode->rn_child, 0, sizeof(rnode->rn_child));
354248431Salc	return (0);
355248431Salc}
356248431Salc
357248431Salc/*
358246726Sattilio * Pre-allocate intermediate nodes from the UMA slab zone.
359246726Sattilio */
360246794Sattiliostatic void
361247742Sattiliovm_radix_prealloc(void *arg __unused)
362246726Sattilio{
363246726Sattilio
364247742Sattilio	if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
365247742Sattilio		panic("%s: unable to create new zone", __func__);
366247742Sattilio	uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
367247742Sattilio}
368247742SattilioSYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
369247742Sattilio    NULL);
370247742Sattilio
371247742Sattilio/*
372247742Sattilio * Initialize the UMA slab zone.
373247742Sattilio * Until vm_radix_prealloc() is called, the zone will be served by the
374247742Sattilio * UMA boot-time pre-allocated pool of pages.
375247742Sattilio */
376247742Sattiliovoid
377247742Sattiliovm_radix_init(void)
378247742Sattilio{
379247742Sattilio
380246726Sattilio	vm_radix_node_zone = uma_zcreate("RADIX NODE",
381246726Sattilio	    sizeof(struct vm_radix_node), NULL,
382246726Sattilio#ifdef INVARIANTS
383246726Sattilio	    vm_radix_node_zone_dtor,
384246726Sattilio#else
385246726Sattilio	    NULL,
386246726Sattilio#endif
387248431Salc	    vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM |
388248431Salc	    UMA_ZONE_NOFREE);
389246726Sattilio}
390246726Sattilio
391246726Sattilio/*
392247771Salc * Inserts the key-value pair into the trie.
393226645Sattilio * Panics if the key already exists.
394226645Sattilio */
395246430Sattiliovoid
396248428Salcvm_radix_insert(struct vm_radix *rtree, vm_page_t page)
397226645Sattilio{
398248428Salc	vm_pindex_t index, newind;
399249502Salc	void **parentp;
400249502Salc	struct vm_radix_node *rnode, *tmp;
401246726Sattilio	vm_page_t m;
402236763Sattilio	int slot;
403246726Sattilio	uint16_t clev;
404226645Sattilio
405248428Salc	index = page->pindex;
406248222Sattilio
407226645Sattilio	/*
408246726Sattilio	 * The owner of record for root is not really important because it
409246726Sattilio	 * will never be used.
410226645Sattilio	 */
411246726Sattilio	rnode = vm_radix_getroot(rtree);
412246726Sattilio	if (rnode == NULL) {
413249502Salc		rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
414246726Sattilio		return;
415246726Sattilio	}
416249502Salc	parentp = (void **)&rtree->rt_root;
417249502Salc	for (;;) {
418249502Salc		if (vm_radix_isleaf(rnode)) {
419249502Salc			m = vm_radix_topage(rnode);
420246726Sattilio			if (m->pindex == index)
421246726Sattilio				panic("%s: key %jx is already present",
422246726Sattilio				    __func__, (uintmax_t)index);
423246726Sattilio			clev = vm_radix_keydiff(m->pindex, index);
424246726Sattilio			tmp = vm_radix_node_get(vm_radix_trimkey(index,
425246726Sattilio			    clev - 1), 2, clev);
426249502Salc			*parentp = tmp;
427246726Sattilio			vm_radix_addpage(tmp, index, clev, page);
428246726Sattilio			vm_radix_addpage(tmp, m->pindex, clev, m);
429246726Sattilio			return;
430249502Salc		} else if (vm_radix_keybarr(rnode, index))
431249502Salc			break;
432249502Salc		slot = vm_radix_slot(index, rnode->rn_clev);
433226645Sattilio		if (rnode->rn_child[slot] == NULL) {
434236760Sattilio			rnode->rn_count++;
435246726Sattilio			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
436246726Sattilio			return;
437246726Sattilio		}
438249502Salc		parentp = &rnode->rn_child[slot];
439226645Sattilio		rnode = rnode->rn_child[slot];
440249502Salc	}
441226645Sattilio
442246726Sattilio	/*
443246726Sattilio	 * A new node is needed because the right insertion level is reached.
444246726Sattilio	 * Setup the new intermediate node and add the 2 children: the
445246726Sattilio	 * new object and the older edge.
446246726Sattilio	 */
447249182Salc	newind = rnode->rn_owner;
448249182Salc	clev = vm_radix_keydiff(newind, index);
449249182Salc	tmp = vm_radix_node_get(vm_radix_trimkey(index, clev - 1), 2,
450246726Sattilio	    clev);
451249502Salc	*parentp = tmp;
452249182Salc	vm_radix_addpage(tmp, index, clev, page);
453246726Sattilio	slot = vm_radix_slot(newind, clev);
454249182Salc	tmp->rn_child[slot] = rnode;
455226645Sattilio}
456226645Sattilio
457226645Sattilio/*
458247771Salc * Returns the value stored at the index.  If the index is not present,
459226645Sattilio * NULL is returned.
460226645Sattilio */
461246430Sattiliovm_page_t
462238245Sattiliovm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
463226645Sattilio{
464226645Sattilio	struct vm_radix_node *rnode;
465246726Sattilio	vm_page_t m;
466226645Sattilio	int slot;
467226645Sattilio
468246726Sattilio	rnode = vm_radix_getroot(rtree);
469246795Sattilio	while (rnode != NULL) {
470249038Salc		if (vm_radix_isleaf(rnode)) {
471249038Salc			m = vm_radix_topage(rnode);
472246726Sattilio			if (m->pindex == index)
473246726Sattilio				return (m);
474246726Sattilio			else
475249427Salc				break;
476249427Salc		} else if (vm_radix_keybarr(rnode, index))
477249427Salc			break;
478249427Salc		slot = vm_radix_slot(index, rnode->rn_clev);
479249427Salc		rnode = rnode->rn_child[slot];
480226645Sattilio	}
481246726Sattilio	return (NULL);
482226645Sattilio}
483226645Sattilio
484226645Sattilio/*
485247771Salc * Look up the nearest entry at a position bigger than or equal to index.
486226645Sattilio */
487246726Sattiliovm_page_t
488246726Sattiliovm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
489226645Sattilio{
490246726Sattilio	vm_pindex_t inc;
491246726Sattilio	vm_page_t m;
492249038Salc	struct vm_radix_node *child, *rnode;
493226645Sattilio	int slot;
494246726Sattilio	uint16_t difflev;
495246726Sattilio	boolean_t maplevels[VM_RADIX_LIMIT + 1];
496246726Sattilio#ifdef INVARIANTS
497246726Sattilio	int loops = 0;
498246726Sattilio#endif
499226645Sattilio
500249427Salc	rnode = vm_radix_getroot(rtree);
501249427Salc	if (rnode == NULL)
502249427Salc		return (NULL);
503249427Salc	else if (vm_radix_isleaf(rnode)) {
504249427Salc		m = vm_radix_topage(rnode);
505249427Salc		if (m->pindex >= index)
506249427Salc			return (m);
507249427Salc		else
508249427Salc			return (NULL);
509249427Salc	}
510226876Sjeffrestart:
511246726Sattilio	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
512246726Sattilio	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
513246726Sattilio		maplevels[difflev] = FALSE;
514249427Salc	for (;;) {
515246726Sattilio		maplevels[rnode->rn_clev] = TRUE;
516246726Sattilio
517246726Sattilio		/*
518246726Sattilio		 * If the keys differ before the current bisection node
519248444Salc		 * the search key might rollback to the earliest
520246726Sattilio		 * available bisection node, or to the smaller value
521246726Sattilio		 * in the current domain (if the owner is bigger than the
522246726Sattilio		 * search key).
523247969Sattilio		 * The maplevels array records any node has been seen
524247969Sattilio		 * at a given level.  This aids the search for a valid
525247969Sattilio		 * bisection node.
526246726Sattilio		 */
527247770Salc		if (vm_radix_keybarr(rnode, index)) {
528246726Sattilio			difflev = vm_radix_keydiff(index, rnode->rn_owner);
529246726Sattilio			if (index > rnode->rn_owner) {
530246726Sattilio				if (vm_radix_addlev(&index, maplevels,
531246726Sattilio				    difflev) > 0)
532246726Sattilio					break;
533246726Sattilio			} else
534246726Sattilio				index = vm_radix_trimkey(rnode->rn_owner,
535246726Sattilio				    difflev);
536249427Salc			rnode = vm_radix_getroot(rtree);
537246726Sattilio			goto restart;
538246726Sattilio		}
539246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
540249038Salc		child = rnode->rn_child[slot];
541249038Salc		if (vm_radix_isleaf(child)) {
542249038Salc			m = vm_radix_topage(child);
543249038Salc			if (m->pindex >= index)
544249038Salc				return (m);
545249038Salc		} else if (child != NULL)
546249038Salc			goto descend;
547246726Sattilio
548226645Sattilio		/*
549246726Sattilio		 * Look for an available edge or page within the current
550246726Sattilio		 * bisection node.
551246726Sattilio		 */
552246726Sattilio                if (slot < (VM_RADIX_COUNT - 1)) {
553246726Sattilio			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
554246726Sattilio			index = vm_radix_trimkey(index, rnode->rn_clev);
555249038Salc			do {
556249038Salc				index += inc;
557249038Salc				slot++;
558249038Salc				child = rnode->rn_child[slot];
559249038Salc				if (vm_radix_isleaf(child)) {
560249038Salc					m = vm_radix_topage(child);
561249038Salc					if (m->pindex >= index)
562249038Salc						return (m);
563249038Salc				} else if (child != NULL)
564249038Salc					goto descend;
565249038Salc			} while (slot < (VM_RADIX_COUNT - 1));
566246726Sattilio		}
567249038Salc		KASSERT(child == NULL || vm_radix_isleaf(child),
568249038Salc		    ("vm_radix_lookup_ge: child is radix node"));
569230750Sattilio
570246726Sattilio		/*
571247771Salc		 * If a valid page or edge bigger than the search slot is
572246726Sattilio		 * found in the traversal, skip to the next higher-level key.
573246726Sattilio		 */
574249038Salc		if (rnode->rn_clev == 0 || vm_radix_addlev(&index, maplevels,
575249038Salc		    rnode->rn_clev - 1) > 0)
576249038Salc			break;
577249427Salc		rnode = vm_radix_getroot(rtree);
578249038Salc		goto restart;
579249038Salcdescend:
580249038Salc		rnode = child;
581226645Sattilio	}
582245254Sattilio	return (NULL);
583226645Sattilio}
584226645Sattilio
585226645Sattilio/*
586247771Salc * Look up the nearest entry at a position less than or equal to index.
587226645Sattilio */
588246430Sattiliovm_page_t
589238245Sattiliovm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
590226645Sattilio{
591246726Sattilio	vm_pindex_t inc;
592246726Sattilio	vm_page_t m;
593249038Salc	struct vm_radix_node *child, *rnode;
594226646Sjeff	int slot;
595246726Sattilio	uint16_t difflev;
596246726Sattilio	boolean_t maplevels[VM_RADIX_LIMIT + 1];
597246726Sattilio#ifdef INVARIANTS
598246726Sattilio	int loops = 0;
599246726Sattilio#endif
600226645Sattilio
601249427Salc	rnode = vm_radix_getroot(rtree);
602249427Salc	if (rnode == NULL)
603249427Salc		return (NULL);
604249427Salc	else if (vm_radix_isleaf(rnode)) {
605249427Salc		m = vm_radix_topage(rnode);
606249427Salc		if (m->pindex <= index)
607249427Salc			return (m);
608249427Salc		else
609249427Salc			return (NULL);
610249427Salc	}
611226876Sjeffrestart:
612246726Sattilio	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
613246726Sattilio	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
614246726Sattilio		maplevels[difflev] = FALSE;
615249427Salc	for (;;) {
616246726Sattilio		maplevels[rnode->rn_clev] = TRUE;
617246726Sattilio
618226646Sjeff		/*
619246726Sattilio		 * If the keys differ before the current bisection node
620248444Salc		 * the search key might rollback to the earliest
621246726Sattilio		 * available bisection node, or to the higher value
622246726Sattilio		 * in the current domain (if the owner is smaller than the
623246726Sattilio		 * search key).
624247969Sattilio		 * The maplevels array records any node has been seen
625247969Sattilio		 * at a given level.  This aids the search for a valid
626247969Sattilio		 * bisection node.
627226646Sjeff		 */
628247770Salc		if (vm_radix_keybarr(rnode, index)) {
629246726Sattilio			difflev = vm_radix_keydiff(index, rnode->rn_owner);
630246726Sattilio			if (index > rnode->rn_owner) {
631246726Sattilio				index = vm_radix_trimkey(rnode->rn_owner,
632246726Sattilio				    difflev);
633246726Sattilio				index |= VM_RADIX_UNITLEVEL(difflev) - 1;
634246726Sattilio			} else if (vm_radix_declev(&index, maplevels,
635246726Sattilio			    difflev) > 0)
636246726Sattilio				break;
637249427Salc			rnode = vm_radix_getroot(rtree);
638246726Sattilio			goto restart;
639246726Sattilio		}
640246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
641249038Salc		child = rnode->rn_child[slot];
642249038Salc		if (vm_radix_isleaf(child)) {
643249038Salc			m = vm_radix_topage(child);
644249038Salc			if (m->pindex <= index)
645249038Salc				return (m);
646249038Salc		} else if (child != NULL)
647249038Salc			goto descend;
648246726Sattilio
649246726Sattilio		/*
650246726Sattilio		 * Look for an available edge or page within the current
651246726Sattilio		 * bisection node.
652246726Sattilio		 */
653246726Sattilio		if (slot > 0) {
654246726Sattilio			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
655246726Sattilio			index = vm_radix_trimkey(index, rnode->rn_clev);
656246726Sattilio			index |= inc - 1;
657249038Salc			do {
658249038Salc				index -= inc;
659249038Salc				slot--;
660249038Salc				child = rnode->rn_child[slot];
661249038Salc				if (vm_radix_isleaf(child)) {
662249038Salc					m = vm_radix_topage(child);
663249038Salc					if (m->pindex <= index)
664249038Salc						return (m);
665249038Salc				} else if (child != NULL)
666249038Salc					goto descend;
667249038Salc			} while (slot > 0);
668226646Sjeff		}
669249038Salc		KASSERT(child == NULL || vm_radix_isleaf(child),
670249038Salc		    ("vm_radix_lookup_le: child is radix node"));
671246726Sattilio
672246726Sattilio		/*
673247771Salc		 * If a valid page or edge smaller than the search slot is
674246726Sattilio		 * found in the traversal, skip to the next higher-level key.
675246726Sattilio		 */
676249038Salc		if (rnode->rn_clev == 0 || vm_radix_declev(&index, maplevels,
677249038Salc		    rnode->rn_clev - 1) > 0)
678249038Salc			break;
679249427Salc		rnode = vm_radix_getroot(rtree);
680249038Salc		goto restart;
681249038Salcdescend:
682249038Salc		rnode = child;
683226646Sjeff	}
684226645Sattilio	return (NULL);
685226645Sattilio}
686226645Sattilio
687226645Sattilio/*
688246726Sattilio * Remove the specified index from the tree.
689246726Sattilio * Panics if the key is not present.
690226645Sattilio */
691236763Sattiliovoid
692238245Sattiliovm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
693226645Sattilio{
694246726Sattilio	struct vm_radix_node *rnode, *parent;
695246726Sattilio	vm_page_t m;
696246726Sattilio	int i, slot;
697226645Sattilio
698249502Salc	rnode = vm_radix_getroot(rtree);
699249502Salc	if (vm_radix_isleaf(rnode)) {
700249502Salc		m = vm_radix_topage(rnode);
701249502Salc		if (m->pindex != index)
702249502Salc			panic("%s: invalid key found", __func__);
703249502Salc		vm_radix_setroot(rtree, NULL);
704249502Salc		return;
705249502Salc	}
706246726Sattilio	parent = NULL;
707236760Sattilio	for (;;) {
708246726Sattilio		if (rnode == NULL)
709246726Sattilio			panic("vm_radix_remove: impossible to locate the key");
710246726Sattilio		slot = vm_radix_slot(index, rnode->rn_clev);
711249038Salc		if (vm_radix_isleaf(rnode->rn_child[slot])) {
712249038Salc			m = vm_radix_topage(rnode->rn_child[slot]);
713249038Salc			if (m->pindex != index)
714249038Salc				panic("%s: invalid key found", __func__);
715246726Sattilio			rnode->rn_child[slot] = NULL;
716246726Sattilio			rnode->rn_count--;
717246726Sattilio			if (rnode->rn_count > 1)
718246726Sattilio				break;
719246726Sattilio			for (i = 0; i < VM_RADIX_COUNT; i++)
720246726Sattilio				if (rnode->rn_child[i] != NULL)
721246726Sattilio					break;
722246726Sattilio			KASSERT(i != VM_RADIX_COUNT,
723246726Sattilio			    ("%s: invalid node configuration", __func__));
724249502Salc			if (parent == NULL)
725249502Salc				vm_radix_setroot(rtree, rnode->rn_child[i]);
726249502Salc			else {
727249502Salc				slot = vm_radix_slot(index, parent->rn_clev);
728249502Salc				KASSERT(parent->rn_child[slot] == rnode,
729249502Salc				    ("%s: invalid child value", __func__));
730249502Salc				parent->rn_child[slot] = rnode->rn_child[i];
731249502Salc			}
732246726Sattilio			rnode->rn_count--;
733246726Sattilio			rnode->rn_child[i] = NULL;
734246726Sattilio			vm_radix_node_put(rnode);
735236760Sattilio			break;
736236760Sattilio		}
737246726Sattilio		parent = rnode;
738246726Sattilio		rnode = rnode->rn_child[slot];
739236760Sattilio	}
740226645Sattilio}
741226645Sattilio
742226645Sattilio/*
743226980Sattilio * Remove and free all the nodes from the radix tree.
744247771Salc * This function is recursive but there is a tight control on it as the
745226980Sattilio * maximum depth of the tree is fixed.
746226980Sattilio */
747226980Sattiliovoid
748226980Sattiliovm_radix_reclaim_allnodes(struct vm_radix *rtree)
749226980Sattilio{
750226980Sattilio	struct vm_radix_node *root;
751226980Sattilio
752246726Sattilio	root = vm_radix_getroot(rtree);
753246726Sattilio	if (root == NULL)
754226980Sattilio		return;
755248684Salc	vm_radix_setroot(rtree, NULL);
756249502Salc	if (!vm_radix_isleaf(root))
757249502Salc		vm_radix_reclaim_allnodes_int(root);
758226980Sattilio}
759246726Sattilio
760246726Sattilio#ifdef DDB
761246726Sattilio/*
762246837Sattilio * Show details about the given radix node.
763246726Sattilio */
764246726SattilioDB_SHOW_COMMAND(radixnode, db_show_radixnode)
765246726Sattilio{
766246726Sattilio	struct vm_radix_node *rnode;
767246726Sattilio	int i;
768246726Sattilio
769246726Sattilio        if (!have_addr)
770246726Sattilio                return;
771246726Sattilio	rnode = (struct vm_radix_node *)addr;
772246726Sattilio	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
773246726Sattilio	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
774246726Sattilio	    rnode->rn_clev);
775246726Sattilio	for (i = 0; i < VM_RADIX_COUNT; i++)
776246726Sattilio		if (rnode->rn_child[i] != NULL)
777246726Sattilio			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
778246726Sattilio			    i, (void *)rnode->rn_child[i],
779249038Salc			    vm_radix_isleaf(rnode->rn_child[i]) ?
780249038Salc			    vm_radix_topage(rnode->rn_child[i]) : NULL,
781246726Sattilio			    rnode->rn_clev);
782246726Sattilio}
783246726Sattilio#endif /* DDB */
784