Deleted Added
full compact
vm_radix.c (247742) vm_radix.c (247769)
1/*
2 * Copyright (c) 2013 EMC Corp.
3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30/*
31 * Path-compressed radix trie implementation.
32 * The following code is not generalized into a general purpose library
33 * because there are way too many parameters embedded that should really
34 * be decided by the library consumers. At the same time, consumers
35 * of this code must achieve highest possible performance.
36 *
37 * The implementation takes into account the following rationale:
38 * - Size of the nodes might be as small as possible.
39 * - There is no bias toward lookup operations over inserts or removes,
40 * and vice-versa.
41 * - In average there are not many complete levels, than level
42 * compression may just complicate things.
43 */
44
45#include <sys/cdefs.h>
46
47#include "opt_ddb.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/kernel.h>
52#include <sys/vmmeter.h>
53
54#include <vm/uma.h>
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/vm_page.h>
58#include <vm/vm_radix.h>
59
60#ifdef DDB
61#include <ddb/ddb.h>
62#endif
63
64/*
65 * Such sizes should permit to keep node children contained into a single
66 * cache-line, or to at least not span many of those.
67 * In particular, sparse tries should however be compressed properly and
68 * then make some extra-levels not a big deal.
69 */
70#ifdef __LP64__
71#define VM_RADIX_WIDTH 4
72#else
73#define VM_RADIX_WIDTH 3
74#endif
75
76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
78#define VM_RADIX_LIMIT \
79 (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80
81/* Flag bits stored in node pointers. */
82#define VM_RADIX_ISLEAF 0x1
83#define VM_RADIX_FLAGS 0x1
84#define VM_RADIX_PAD VM_RADIX_FLAGS
85
86/* Returns one unit associated with specified level. */
87#define VM_RADIX_UNITLEVEL(lev) \
88 ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
89
90struct vm_radix_node {
91 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
92 vm_pindex_t rn_owner; /* Owner of record. */
93 uint16_t rn_count; /* Valid children. */
94 uint16_t rn_clev; /* Current level. */
95};
96
97static uma_zone_t vm_radix_node_zone;
98
99/*
100 * Allocate a radix node. Pre-allocation ensures that the request will be
101 * always successfully satisfied.
102 */
103static __inline struct vm_radix_node *
104vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
105{
106 struct vm_radix_node *rnode;
107
108 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
109
110 /*
111 * The required number of nodes might be already correctly
112 * pre-allocated in vm_radix_init(). However, UMA can reserve
113 * few nodes on per-cpu specific buckets, which will not be
114 * accessible from the curcpu. The allocation could then
115 * return NULL when the pre-allocation pool is close to be
116 * exhausted. Anyway, in practice this should never be a
117 * problem because a new node is not always required for
118 * insert, thus the pre-allocation pool should already have
119 * some extra-pages that indirectly deal with this situation.
120 */
121 if (rnode == NULL)
122 panic("%s: uma_zalloc() returned NULL for a new node",
123 __func__);
124 rnode->rn_owner = owner;
125 rnode->rn_count = count;
126 rnode->rn_clev = clevel;
127 return (rnode);
128}
129
130/*
131 * Free radix node.
132 */
133static __inline void
134vm_radix_node_put(struct vm_radix_node *rnode)
135{
136
137 uma_zfree(vm_radix_node_zone, rnode);
138}
139
140/*
141 * Return the position in the array for a given level.
142 */
143static __inline int
144vm_radix_slot(vm_pindex_t index, uint16_t level)
145{
146
147 return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
148 VM_RADIX_MASK);
149}
150
151/* Trims the key after the specified level. */
152static __inline vm_pindex_t
153vm_radix_trimkey(vm_pindex_t index, uint16_t level)
154{
155 vm_pindex_t ret;
156
157 ret = index;
158 if (level < VM_RADIX_LIMIT) {
159 ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
160 ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
161 }
162 return (ret);
163}
164
165/*
166 * Get the root node for a radix tree.
167 */
168static __inline struct vm_radix_node *
169vm_radix_getroot(struct vm_radix *rtree)
170{
171
172 return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
173}
174
175/*
176 * Set the root node for a radix tree.
177 */
178static __inline void
179vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
180{
181
182 rtree->rt_root = (uintptr_t)rnode;
183}
184
185/*
186 * Returns the associated page extracted from rnode if available,
187 * NULL otherwise.
188 */
189static __inline vm_page_t
190vm_radix_node_page(struct vm_radix_node *rnode)
191{
192
193 return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
194 (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
195}
196
197/*
198 * Adds the page as a child of provided node.
199 */
200static __inline void
201vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
202 vm_page_t page)
203{
204 int slot;
205
206 slot = vm_radix_slot(index, clev);
207 rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
208}
209
210/*
211 * Returns the slot where two keys differ.
212 * It cannot accept 2 equal keys.
213 */
214static __inline uint16_t
215vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
216{
217 uint16_t clev;
218
219 KASSERT(index1 != index2, ("%s: passing the same key value %jx",
220 __func__, (uintmax_t)index1));
221
222 index1 ^= index2;
223 for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
224 if (vm_radix_slot(index1, clev))
225 return (clev);
226 panic("%s: it might have not reached this point", __func__);
227 return (0);
228}
229
230/*
231 * Returns TRUE if it can be determined that key does not belong to the
232 * specified rnode. FALSE otherwise.
233 */
234static __inline boolean_t
235vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
236{
237
238 if (rnode->rn_clev > 0) {
239 idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
240 idx -= rnode->rn_owner;
241 if (idx != 0)
242 return (TRUE);
243 }
244 return (FALSE);
245}
246
247/*
248 * Adjusts the idx key to the first upper level available, based on a valid
249 * initial level and map of available levels.
250 * Returns a value bigger than 0 to signal that there are not valid levels
251 * available.
252 */
253static __inline int
254vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
255{
256 vm_pindex_t wrapidx;
257
258 for (; levels[ilev] == FALSE ||
259 vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
260 if (ilev == 0)
261 break;
262 KASSERT(ilev > 0 || levels[0] == TRUE,
263 ("%s: levels back-scanning problem", __func__));
264 if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
265 return (1);
266 wrapidx = *idx;
267 *idx = vm_radix_trimkey(*idx, ilev);
268 *idx += VM_RADIX_UNITLEVEL(ilev);
269 return (*idx < wrapidx);
270}
271
272/*
273 * Adjusts the idx key to the first lower level available, based on a valid
274 * initial level and map of available levels.
275 * Returns a value bigger than 0 to signal that there are not valid levels
276 * available.
277 */
278static __inline int
279vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
280{
281 vm_pindex_t wrapidx;
282
283 for (; levels[ilev] == FALSE ||
284 vm_radix_slot(*idx, ilev) == 0; ilev--)
285 if (ilev == 0)
286 break;
287 KASSERT(ilev > 0 || levels[0] == TRUE,
288 ("%s: levels back-scanning problem", __func__));
289 if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
290 return (1);
291 wrapidx = *idx;
292 *idx = vm_radix_trimkey(*idx, ilev);
293 *idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
294 *idx -= VM_RADIX_UNITLEVEL(ilev);
295 return (*idx > wrapidx);
296}
297
298/*
299 * Internal handwork for vm_radix_reclaim_allonodes() primitive.
1/*
2 * Copyright (c) 2013 EMC Corp.
3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30/*
31 * Path-compressed radix trie implementation.
32 * The following code is not generalized into a general purpose library
33 * because there are way too many parameters embedded that should really
34 * be decided by the library consumers. At the same time, consumers
35 * of this code must achieve highest possible performance.
36 *
37 * The implementation takes into account the following rationale:
38 * - Size of the nodes might be as small as possible.
39 * - There is no bias toward lookup operations over inserts or removes,
40 * and vice-versa.
41 * - In average there are not many complete levels, than level
42 * compression may just complicate things.
43 */
44
45#include <sys/cdefs.h>
46
47#include "opt_ddb.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/kernel.h>
52#include <sys/vmmeter.h>
53
54#include <vm/uma.h>
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/vm_page.h>
58#include <vm/vm_radix.h>
59
60#ifdef DDB
61#include <ddb/ddb.h>
62#endif
63
64/*
65 * Such sizes should permit to keep node children contained into a single
66 * cache-line, or to at least not span many of those.
67 * In particular, sparse tries should however be compressed properly and
68 * then make some extra-levels not a big deal.
69 */
70#ifdef __LP64__
71#define VM_RADIX_WIDTH 4
72#else
73#define VM_RADIX_WIDTH 3
74#endif
75
76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
78#define VM_RADIX_LIMIT \
79 (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80
81/* Flag bits stored in node pointers. */
82#define VM_RADIX_ISLEAF 0x1
83#define VM_RADIX_FLAGS 0x1
84#define VM_RADIX_PAD VM_RADIX_FLAGS
85
86/* Returns one unit associated with specified level. */
87#define VM_RADIX_UNITLEVEL(lev) \
88 ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
89
90struct vm_radix_node {
91 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
92 vm_pindex_t rn_owner; /* Owner of record. */
93 uint16_t rn_count; /* Valid children. */
94 uint16_t rn_clev; /* Current level. */
95};
96
97static uma_zone_t vm_radix_node_zone;
98
99/*
100 * Allocate a radix node. Pre-allocation ensures that the request will be
101 * always successfully satisfied.
102 */
103static __inline struct vm_radix_node *
104vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
105{
106 struct vm_radix_node *rnode;
107
108 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
109
110 /*
111 * The required number of nodes might be already correctly
112 * pre-allocated in vm_radix_init(). However, UMA can reserve
113 * few nodes on per-cpu specific buckets, which will not be
114 * accessible from the curcpu. The allocation could then
115 * return NULL when the pre-allocation pool is close to be
116 * exhausted. Anyway, in practice this should never be a
117 * problem because a new node is not always required for
118 * insert, thus the pre-allocation pool should already have
119 * some extra-pages that indirectly deal with this situation.
120 */
121 if (rnode == NULL)
122 panic("%s: uma_zalloc() returned NULL for a new node",
123 __func__);
124 rnode->rn_owner = owner;
125 rnode->rn_count = count;
126 rnode->rn_clev = clevel;
127 return (rnode);
128}
129
130/*
131 * Free radix node.
132 */
133static __inline void
134vm_radix_node_put(struct vm_radix_node *rnode)
135{
136
137 uma_zfree(vm_radix_node_zone, rnode);
138}
139
140/*
141 * Return the position in the array for a given level.
142 */
143static __inline int
144vm_radix_slot(vm_pindex_t index, uint16_t level)
145{
146
147 return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
148 VM_RADIX_MASK);
149}
150
151/* Trims the key after the specified level. */
152static __inline vm_pindex_t
153vm_radix_trimkey(vm_pindex_t index, uint16_t level)
154{
155 vm_pindex_t ret;
156
157 ret = index;
158 if (level < VM_RADIX_LIMIT) {
159 ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
160 ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
161 }
162 return (ret);
163}
164
165/*
166 * Get the root node for a radix tree.
167 */
168static __inline struct vm_radix_node *
169vm_radix_getroot(struct vm_radix *rtree)
170{
171
172 return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
173}
174
175/*
176 * Set the root node for a radix tree.
177 */
178static __inline void
179vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
180{
181
182 rtree->rt_root = (uintptr_t)rnode;
183}
184
185/*
186 * Returns the associated page extracted from rnode if available,
187 * NULL otherwise.
188 */
189static __inline vm_page_t
190vm_radix_node_page(struct vm_radix_node *rnode)
191{
192
193 return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
194 (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
195}
196
197/*
198 * Adds the page as a child of provided node.
199 */
200static __inline void
201vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
202 vm_page_t page)
203{
204 int slot;
205
206 slot = vm_radix_slot(index, clev);
207 rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
208}
209
210/*
211 * Returns the slot where two keys differ.
212 * It cannot accept 2 equal keys.
213 */
214static __inline uint16_t
215vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
216{
217 uint16_t clev;
218
219 KASSERT(index1 != index2, ("%s: passing the same key value %jx",
220 __func__, (uintmax_t)index1));
221
222 index1 ^= index2;
223 for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
224 if (vm_radix_slot(index1, clev))
225 return (clev);
226 panic("%s: it might have not reached this point", __func__);
227 return (0);
228}
229
230/*
231 * Returns TRUE if it can be determined that key does not belong to the
232 * specified rnode. FALSE otherwise.
233 */
234static __inline boolean_t
235vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
236{
237
238 if (rnode->rn_clev > 0) {
239 idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
240 idx -= rnode->rn_owner;
241 if (idx != 0)
242 return (TRUE);
243 }
244 return (FALSE);
245}
246
247/*
248 * Adjusts the idx key to the first upper level available, based on a valid
249 * initial level and map of available levels.
250 * Returns a value bigger than 0 to signal that there are not valid levels
251 * available.
252 */
253static __inline int
254vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
255{
256 vm_pindex_t wrapidx;
257
258 for (; levels[ilev] == FALSE ||
259 vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
260 if (ilev == 0)
261 break;
262 KASSERT(ilev > 0 || levels[0] == TRUE,
263 ("%s: levels back-scanning problem", __func__));
264 if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
265 return (1);
266 wrapidx = *idx;
267 *idx = vm_radix_trimkey(*idx, ilev);
268 *idx += VM_RADIX_UNITLEVEL(ilev);
269 return (*idx < wrapidx);
270}
271
272/*
273 * Adjusts the idx key to the first lower level available, based on a valid
274 * initial level and map of available levels.
275 * Returns a value bigger than 0 to signal that there are not valid levels
276 * available.
277 */
278static __inline int
279vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
280{
281 vm_pindex_t wrapidx;
282
283 for (; levels[ilev] == FALSE ||
284 vm_radix_slot(*idx, ilev) == 0; ilev--)
285 if (ilev == 0)
286 break;
287 KASSERT(ilev > 0 || levels[0] == TRUE,
288 ("%s: levels back-scanning problem", __func__));
289 if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
290 return (1);
291 wrapidx = *idx;
292 *idx = vm_radix_trimkey(*idx, ilev);
293 *idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
294 *idx -= VM_RADIX_UNITLEVEL(ilev);
295 return (*idx > wrapidx);
296}
297
298/*
299 * Internal handwork for vm_radix_reclaim_allonodes() primitive.
300 * This function is recrusive.
300 * This function is recursive.
301 */
302static void
303vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
304{
305 int slot;
306
307 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
308 if (rnode->rn_child[slot] == NULL)
309 continue;
310 if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
311 vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
312 rnode->rn_count--;
313 }
314 vm_radix_node_put(rnode);
315}
316
317#ifdef INVARIANTS
318/*
319 * Radix node zone destructor.
320 */
321static void
322vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
323{
324 struct vm_radix_node *rnode;
325
326 rnode = mem;
327 KASSERT(rnode->rn_count == 0,
328 ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
329 rnode->rn_count));
330}
331#endif
332
333/*
334 * Pre-allocate intermediate nodes from the UMA slab zone.
335 */
336static void
337vm_radix_prealloc(void *arg __unused)
338{
339
340 if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
341 panic("%s: unable to create new zone", __func__);
342 uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
343}
344SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
345 NULL);
346
347/*
348 * Initialize the UMA slab zone.
349 * Until vm_radix_prealloc() is called, the zone will be served by the
350 * UMA boot-time pre-allocated pool of pages.
351 */
352void
353vm_radix_init(void)
354{
355
356 vm_radix_node_zone = uma_zcreate("RADIX NODE",
357 sizeof(struct vm_radix_node), NULL,
358#ifdef INVARIANTS
359 vm_radix_node_zone_dtor,
360#else
361 NULL,
362#endif
363 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
364}
365
366/*
367 * Inserts the key-value pair in to the trie.
368 * Panics if the key already exists.
369 */
370void
371vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
372{
373 vm_pindex_t newind;
374 struct vm_radix_node *rnode, *tmp, *tmp2;
375 vm_page_t m;
376 int slot;
377 uint16_t clev;
378
379 /*
380 * The owner of record for root is not really important because it
381 * will never be used.
382 */
383 rnode = vm_radix_getroot(rtree);
384 if (rnode == NULL) {
385 rnode = vm_radix_node_get(0, 1, 0);
386 vm_radix_setroot(rtree, rnode);
387 vm_radix_addpage(rnode, index, 0, page);
388 return;
389 }
390 while (rnode != NULL) {
391 if (vm_radix_keybarr(rnode, index) == TRUE)
392 break;
393 slot = vm_radix_slot(index, rnode->rn_clev);
394 m = vm_radix_node_page(rnode->rn_child[slot]);
395 if (m != NULL) {
396 if (m->pindex == index)
397 panic("%s: key %jx is already present",
398 __func__, (uintmax_t)index);
399 clev = vm_radix_keydiff(m->pindex, index);
400 tmp = vm_radix_node_get(vm_radix_trimkey(index,
401 clev - 1), 2, clev);
402 rnode->rn_child[slot] = tmp;
403 vm_radix_addpage(tmp, index, clev, page);
404 vm_radix_addpage(tmp, m->pindex, clev, m);
405 return;
406 }
407 if (rnode->rn_child[slot] == NULL) {
408 rnode->rn_count++;
409 vm_radix_addpage(rnode, index, rnode->rn_clev, page);
410 return;
411 }
412 rnode = rnode->rn_child[slot];
413 }
414 if (rnode == NULL)
415 panic("%s: path traversal ended unexpectedly", __func__);
416
417 /*
418 * Scan the trie from the top and find the parent to insert
419 * the new object.
420 */
421 newind = rnode->rn_owner;
422 clev = vm_radix_keydiff(newind, index);
423 slot = VM_RADIX_COUNT;
424 for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
425 KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
426 __func__));
427 KASSERT(clev >= rnode->rn_clev,
428 ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
429 __func__, clev, rnode->rn_clev));
430 slot = vm_radix_slot(index, rnode->rn_clev);
431 tmp = rnode->rn_child[slot];
432 KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
433 ("%s: unexpected lookup interruption", __func__));
434 if (tmp->rn_clev > clev)
435 break;
436 }
437 KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
438 ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
439 __func__, (void *)rnode, (void *)tmp, slot));
440
441 /*
442 * A new node is needed because the right insertion level is reached.
443 * Setup the new intermediate node and add the 2 children: the
444 * new object and the older edge.
445 */
446 tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
447 clev);
448 rnode->rn_child[slot] = tmp2;
449 vm_radix_addpage(tmp2, index, clev, page);
450 slot = vm_radix_slot(newind, clev);
451 tmp2->rn_child[slot] = tmp;
452}
453
454/*
455 * Returns the value stored at the index. If the index is not present
456 * NULL is returned.
457 */
458vm_page_t
459vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
460{
461 struct vm_radix_node *rnode;
462 vm_page_t m;
463 int slot;
464
465 rnode = vm_radix_getroot(rtree);
466 while (rnode != NULL) {
467 if (vm_radix_keybarr(rnode, index) == TRUE)
468 return (NULL);
469 slot = vm_radix_slot(index, rnode->rn_clev);
470 rnode = rnode->rn_child[slot];
471 m = vm_radix_node_page(rnode);
472 if (m != NULL) {
473 if (m->pindex == index)
474 return (m);
475 else
476 return (NULL);
477 }
478 }
479 return (NULL);
480}
481
482/*
483 * Look up any entry at a position bigger than or equal to index.
484 */
485vm_page_t
486vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
487{
488 vm_pindex_t inc;
489 vm_page_t m;
490 struct vm_radix_node *rnode;
491 int slot;
492 uint16_t difflev;
493 boolean_t maplevels[VM_RADIX_LIMIT + 1];
494#ifdef INVARIANTS
495 int loops = 0;
496#endif
497
498restart:
499 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
500 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
501 maplevels[difflev] = FALSE;
502 rnode = vm_radix_getroot(rtree);
503 while (rnode != NULL) {
504 maplevels[rnode->rn_clev] = TRUE;
505
506 /*
507 * If the keys differ before the current bisection node
508 * the search key might rollback to the earlierst
509 * available bisection node, or to the smaller value
510 * in the current domain (if the owner is bigger than the
511 * search key).
512 * The search for a valid bisection node is helped through
513 * the use of maplevels array which should bring immediately
514 * a lower useful level, skipping holes.
515 */
516 if (vm_radix_keybarr(rnode, index) == TRUE) {
517 difflev = vm_radix_keydiff(index, rnode->rn_owner);
518 if (index > rnode->rn_owner) {
519 if (vm_radix_addlev(&index, maplevels,
520 difflev) > 0)
521 break;
522 } else
523 index = vm_radix_trimkey(rnode->rn_owner,
524 difflev);
525 goto restart;
526 }
527 slot = vm_radix_slot(index, rnode->rn_clev);
528 m = vm_radix_node_page(rnode->rn_child[slot]);
529 if (m != NULL && m->pindex >= index)
530 return (m);
531 if (rnode->rn_child[slot] != NULL && m == NULL) {
532 rnode = rnode->rn_child[slot];
533 continue;
534 }
535
536 /*
537 * Look for an available edge or page within the current
538 * bisection node.
539 */
540 if (slot < (VM_RADIX_COUNT - 1)) {
541 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
542 index = vm_radix_trimkey(index, rnode->rn_clev);
543 index += inc;
544 slot++;
545 for (;; index += inc, slot++) {
546 m = vm_radix_node_page(rnode->rn_child[slot]);
547 if (m != NULL && m->pindex >= index)
548 return (m);
549 if ((rnode->rn_child[slot] != NULL &&
550 m == NULL) || slot == (VM_RADIX_COUNT - 1))
551 break;
552 }
553 }
554
555 /*
556 * If a valid page or edge, bigger than the search slot, is
557 * found in the traversal, skip to the next higher-level key.
558 */
559 if (slot == (VM_RADIX_COUNT - 1) &&
560 (rnode->rn_child[slot] == NULL || m != NULL)) {
561 if (rnode->rn_clev == 0 || vm_radix_addlev(&index,
562 maplevels, rnode->rn_clev - 1) > 0)
563 break;
564 goto restart;
565 }
566 rnode = rnode->rn_child[slot];
567 }
568 return (NULL);
569}
570
571/*
572 * Look up any entry at a position less than or equal to index.
573 */
574vm_page_t
575vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
576{
577 vm_pindex_t inc;
578 vm_page_t m;
579 struct vm_radix_node *rnode;
580 int slot;
581 uint16_t difflev;
582 boolean_t maplevels[VM_RADIX_LIMIT + 1];
583#ifdef INVARIANTS
584 int loops = 0;
585#endif
586
587restart:
588 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
589 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
590 maplevels[difflev] = FALSE;
591 rnode = vm_radix_getroot(rtree);
592 while (rnode != NULL) {
593 maplevels[rnode->rn_clev] = TRUE;
594
595 /*
596 * If the keys differ before the current bisection node
597 * the search key might rollback to the earlierst
598 * available bisection node, or to the higher value
599 * in the current domain (if the owner is smaller than the
600 * search key).
601 * The search for a valid bisection node is helped through
602 * the use of maplevels array which should bring immediately
603 * a lower useful level, skipping holes.
604 */
605 if (vm_radix_keybarr(rnode, index) == TRUE) {
606 difflev = vm_radix_keydiff(index, rnode->rn_owner);
607 if (index > rnode->rn_owner) {
608 index = vm_radix_trimkey(rnode->rn_owner,
609 difflev);
610 index |= VM_RADIX_UNITLEVEL(difflev) - 1;
611 } else if (vm_radix_declev(&index, maplevels,
612 difflev) > 0)
613 break;
614 goto restart;
615 }
616 slot = vm_radix_slot(index, rnode->rn_clev);
617 m = vm_radix_node_page(rnode->rn_child[slot]);
618 if (m != NULL && m->pindex <= index)
619 return (m);
620 if (rnode->rn_child[slot] != NULL && m == NULL) {
621 rnode = rnode->rn_child[slot];
622 continue;
623 }
624
625 /*
626 * Look for an available edge or page within the current
627 * bisection node.
628 */
629 if (slot > 0) {
630 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
631 index = vm_radix_trimkey(index, rnode->rn_clev);
632 index |= inc - 1;
633 index -= inc;
634 slot--;
635 for (;; index -= inc, slot--) {
636 m = vm_radix_node_page(rnode->rn_child[slot]);
637 if (m != NULL && m->pindex <= index)
638 return (m);
639 if ((rnode->rn_child[slot] != NULL &&
640 m == NULL) || slot == 0)
641 break;
642 }
643 }
644
645 /*
646 * If a valid page or edge, smaller than the search slot, is
647 * found in the traversal, skip to the next higher-level key.
648 */
649 if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
650 if (rnode->rn_clev == 0 || vm_radix_declev(&index,
651 maplevels, rnode->rn_clev - 1) > 0)
652 break;
653 goto restart;
654 }
655 rnode = rnode->rn_child[slot];
656 }
657 return (NULL);
658}
659
660/*
661 * Remove the specified index from the tree.
662 * Panics if the key is not present.
663 */
664void
665vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
666{
667 struct vm_radix_node *rnode, *parent;
668 vm_page_t m;
669 int i, slot;
670
671 parent = NULL;
672 rnode = vm_radix_getroot(rtree);
673 for (;;) {
674 if (rnode == NULL)
675 panic("vm_radix_remove: impossible to locate the key");
676 slot = vm_radix_slot(index, rnode->rn_clev);
677 m = vm_radix_node_page(rnode->rn_child[slot]);
678 if (m != NULL && m->pindex == index) {
679 rnode->rn_child[slot] = NULL;
680 rnode->rn_count--;
681 if (rnode->rn_count > 1)
682 break;
683 if (parent == NULL) {
684 if (rnode->rn_count == 0) {
685 vm_radix_node_put(rnode);
686 vm_radix_setroot(rtree, NULL);
687 }
688 break;
689 }
690 for (i = 0; i < VM_RADIX_COUNT; i++)
691 if (rnode->rn_child[i] != NULL)
692 break;
693 KASSERT(i != VM_RADIX_COUNT,
694 ("%s: invalid node configuration", __func__));
695 slot = vm_radix_slot(index, parent->rn_clev);
696 KASSERT(parent->rn_child[slot] == rnode,
697 ("%s: invalid child value", __func__));
698 parent->rn_child[slot] = rnode->rn_child[i];
699 rnode->rn_count--;
700 rnode->rn_child[i] = NULL;
701 vm_radix_node_put(rnode);
702 break;
703 }
704 if (m != NULL && m->pindex != index)
705 panic("%s: invalid key found", __func__);
706 parent = rnode;
707 rnode = rnode->rn_child[slot];
708 }
709}
710
711/*
712 * Remove and free all the nodes from the radix tree.
713 * This function is recrusive but there is a tight control on it as the
714 * maximum depth of the tree is fixed.
715 */
716void
717vm_radix_reclaim_allnodes(struct vm_radix *rtree)
718{
719 struct vm_radix_node *root;
720
721 root = vm_radix_getroot(rtree);
722 if (root == NULL)
723 return;
724 vm_radix_reclaim_allnodes_int(root);
725 vm_radix_setroot(rtree, NULL);
726}
727
728#ifdef DDB
729/*
730 * Show details about the given radix node.
731 */
732DB_SHOW_COMMAND(radixnode, db_show_radixnode)
733{
734 struct vm_radix_node *rnode;
735 int i;
736
737 if (!have_addr)
738 return;
739 rnode = (struct vm_radix_node *)addr;
740 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
741 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
742 rnode->rn_clev);
743 for (i = 0; i < VM_RADIX_COUNT; i++)
744 if (rnode->rn_child[i] != NULL)
745 db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
746 i, (void *)rnode->rn_child[i],
747 (void *)vm_radix_node_page(rnode->rn_child[i]),
748 rnode->rn_clev);
749}
750#endif /* DDB */
301 */
302static void
303vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
304{
305 int slot;
306
307 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
308 if (rnode->rn_child[slot] == NULL)
309 continue;
310 if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
311 vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
312 rnode->rn_count--;
313 }
314 vm_radix_node_put(rnode);
315}
316
317#ifdef INVARIANTS
318/*
319 * Radix node zone destructor.
320 */
321static void
322vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
323{
324 struct vm_radix_node *rnode;
325
326 rnode = mem;
327 KASSERT(rnode->rn_count == 0,
328 ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
329 rnode->rn_count));
330}
331#endif
332
333/*
334 * Pre-allocate intermediate nodes from the UMA slab zone.
335 */
336static void
337vm_radix_prealloc(void *arg __unused)
338{
339
340 if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
341 panic("%s: unable to create new zone", __func__);
342 uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
343}
344SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
345 NULL);
346
347/*
348 * Initialize the UMA slab zone.
349 * Until vm_radix_prealloc() is called, the zone will be served by the
350 * UMA boot-time pre-allocated pool of pages.
351 */
352void
353vm_radix_init(void)
354{
355
356 vm_radix_node_zone = uma_zcreate("RADIX NODE",
357 sizeof(struct vm_radix_node), NULL,
358#ifdef INVARIANTS
359 vm_radix_node_zone_dtor,
360#else
361 NULL,
362#endif
363 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
364}
365
366/*
367 * Inserts the key-value pair in to the trie.
368 * Panics if the key already exists.
369 */
370void
371vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
372{
373 vm_pindex_t newind;
374 struct vm_radix_node *rnode, *tmp, *tmp2;
375 vm_page_t m;
376 int slot;
377 uint16_t clev;
378
379 /*
380 * The owner of record for root is not really important because it
381 * will never be used.
382 */
383 rnode = vm_radix_getroot(rtree);
384 if (rnode == NULL) {
385 rnode = vm_radix_node_get(0, 1, 0);
386 vm_radix_setroot(rtree, rnode);
387 vm_radix_addpage(rnode, index, 0, page);
388 return;
389 }
390 while (rnode != NULL) {
391 if (vm_radix_keybarr(rnode, index) == TRUE)
392 break;
393 slot = vm_radix_slot(index, rnode->rn_clev);
394 m = vm_radix_node_page(rnode->rn_child[slot]);
395 if (m != NULL) {
396 if (m->pindex == index)
397 panic("%s: key %jx is already present",
398 __func__, (uintmax_t)index);
399 clev = vm_radix_keydiff(m->pindex, index);
400 tmp = vm_radix_node_get(vm_radix_trimkey(index,
401 clev - 1), 2, clev);
402 rnode->rn_child[slot] = tmp;
403 vm_radix_addpage(tmp, index, clev, page);
404 vm_radix_addpage(tmp, m->pindex, clev, m);
405 return;
406 }
407 if (rnode->rn_child[slot] == NULL) {
408 rnode->rn_count++;
409 vm_radix_addpage(rnode, index, rnode->rn_clev, page);
410 return;
411 }
412 rnode = rnode->rn_child[slot];
413 }
414 if (rnode == NULL)
415 panic("%s: path traversal ended unexpectedly", __func__);
416
417 /*
418 * Scan the trie from the top and find the parent to insert
419 * the new object.
420 */
421 newind = rnode->rn_owner;
422 clev = vm_radix_keydiff(newind, index);
423 slot = VM_RADIX_COUNT;
424 for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
425 KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
426 __func__));
427 KASSERT(clev >= rnode->rn_clev,
428 ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
429 __func__, clev, rnode->rn_clev));
430 slot = vm_radix_slot(index, rnode->rn_clev);
431 tmp = rnode->rn_child[slot];
432 KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
433 ("%s: unexpected lookup interruption", __func__));
434 if (tmp->rn_clev > clev)
435 break;
436 }
437 KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
438 ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
439 __func__, (void *)rnode, (void *)tmp, slot));
440
441 /*
442 * A new node is needed because the right insertion level is reached.
443 * Setup the new intermediate node and add the 2 children: the
444 * new object and the older edge.
445 */
446 tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
447 clev);
448 rnode->rn_child[slot] = tmp2;
449 vm_radix_addpage(tmp2, index, clev, page);
450 slot = vm_radix_slot(newind, clev);
451 tmp2->rn_child[slot] = tmp;
452}
453
454/*
455 * Returns the value stored at the index. If the index is not present
456 * NULL is returned.
457 */
458vm_page_t
459vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
460{
461 struct vm_radix_node *rnode;
462 vm_page_t m;
463 int slot;
464
465 rnode = vm_radix_getroot(rtree);
466 while (rnode != NULL) {
467 if (vm_radix_keybarr(rnode, index) == TRUE)
468 return (NULL);
469 slot = vm_radix_slot(index, rnode->rn_clev);
470 rnode = rnode->rn_child[slot];
471 m = vm_radix_node_page(rnode);
472 if (m != NULL) {
473 if (m->pindex == index)
474 return (m);
475 else
476 return (NULL);
477 }
478 }
479 return (NULL);
480}
481
482/*
483 * Look up any entry at a position bigger than or equal to index.
484 */
485vm_page_t
486vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
487{
488 vm_pindex_t inc;
489 vm_page_t m;
490 struct vm_radix_node *rnode;
491 int slot;
492 uint16_t difflev;
493 boolean_t maplevels[VM_RADIX_LIMIT + 1];
494#ifdef INVARIANTS
495 int loops = 0;
496#endif
497
498restart:
499 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
500 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
501 maplevels[difflev] = FALSE;
502 rnode = vm_radix_getroot(rtree);
503 while (rnode != NULL) {
504 maplevels[rnode->rn_clev] = TRUE;
505
506 /*
507 * If the keys differ before the current bisection node
508 * the search key might rollback to the earlierst
509 * available bisection node, or to the smaller value
510 * in the current domain (if the owner is bigger than the
511 * search key).
512 * The search for a valid bisection node is helped through
513 * the use of maplevels array which should bring immediately
514 * a lower useful level, skipping holes.
515 */
516 if (vm_radix_keybarr(rnode, index) == TRUE) {
517 difflev = vm_radix_keydiff(index, rnode->rn_owner);
518 if (index > rnode->rn_owner) {
519 if (vm_radix_addlev(&index, maplevels,
520 difflev) > 0)
521 break;
522 } else
523 index = vm_radix_trimkey(rnode->rn_owner,
524 difflev);
525 goto restart;
526 }
527 slot = vm_radix_slot(index, rnode->rn_clev);
528 m = vm_radix_node_page(rnode->rn_child[slot]);
529 if (m != NULL && m->pindex >= index)
530 return (m);
531 if (rnode->rn_child[slot] != NULL && m == NULL) {
532 rnode = rnode->rn_child[slot];
533 continue;
534 }
535
536 /*
537 * Look for an available edge or page within the current
538 * bisection node.
539 */
540 if (slot < (VM_RADIX_COUNT - 1)) {
541 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
542 index = vm_radix_trimkey(index, rnode->rn_clev);
543 index += inc;
544 slot++;
545 for (;; index += inc, slot++) {
546 m = vm_radix_node_page(rnode->rn_child[slot]);
547 if (m != NULL && m->pindex >= index)
548 return (m);
549 if ((rnode->rn_child[slot] != NULL &&
550 m == NULL) || slot == (VM_RADIX_COUNT - 1))
551 break;
552 }
553 }
554
555 /*
556 * If a valid page or edge, bigger than the search slot, is
557 * found in the traversal, skip to the next higher-level key.
558 */
559 if (slot == (VM_RADIX_COUNT - 1) &&
560 (rnode->rn_child[slot] == NULL || m != NULL)) {
561 if (rnode->rn_clev == 0 || vm_radix_addlev(&index,
562 maplevels, rnode->rn_clev - 1) > 0)
563 break;
564 goto restart;
565 }
566 rnode = rnode->rn_child[slot];
567 }
568 return (NULL);
569}
570
571/*
572 * Look up any entry at a position less than or equal to index.
573 */
574vm_page_t
575vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
576{
577 vm_pindex_t inc;
578 vm_page_t m;
579 struct vm_radix_node *rnode;
580 int slot;
581 uint16_t difflev;
582 boolean_t maplevels[VM_RADIX_LIMIT + 1];
583#ifdef INVARIANTS
584 int loops = 0;
585#endif
586
587restart:
588 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
589 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
590 maplevels[difflev] = FALSE;
591 rnode = vm_radix_getroot(rtree);
592 while (rnode != NULL) {
593 maplevels[rnode->rn_clev] = TRUE;
594
595 /*
596 * If the keys differ before the current bisection node
597 * the search key might rollback to the earlierst
598 * available bisection node, or to the higher value
599 * in the current domain (if the owner is smaller than the
600 * search key).
601 * The search for a valid bisection node is helped through
602 * the use of maplevels array which should bring immediately
603 * a lower useful level, skipping holes.
604 */
605 if (vm_radix_keybarr(rnode, index) == TRUE) {
606 difflev = vm_radix_keydiff(index, rnode->rn_owner);
607 if (index > rnode->rn_owner) {
608 index = vm_radix_trimkey(rnode->rn_owner,
609 difflev);
610 index |= VM_RADIX_UNITLEVEL(difflev) - 1;
611 } else if (vm_radix_declev(&index, maplevels,
612 difflev) > 0)
613 break;
614 goto restart;
615 }
616 slot = vm_radix_slot(index, rnode->rn_clev);
617 m = vm_radix_node_page(rnode->rn_child[slot]);
618 if (m != NULL && m->pindex <= index)
619 return (m);
620 if (rnode->rn_child[slot] != NULL && m == NULL) {
621 rnode = rnode->rn_child[slot];
622 continue;
623 }
624
625 /*
626 * Look for an available edge or page within the current
627 * bisection node.
628 */
629 if (slot > 0) {
630 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
631 index = vm_radix_trimkey(index, rnode->rn_clev);
632 index |= inc - 1;
633 index -= inc;
634 slot--;
635 for (;; index -= inc, slot--) {
636 m = vm_radix_node_page(rnode->rn_child[slot]);
637 if (m != NULL && m->pindex <= index)
638 return (m);
639 if ((rnode->rn_child[slot] != NULL &&
640 m == NULL) || slot == 0)
641 break;
642 }
643 }
644
645 /*
646 * If a valid page or edge, smaller than the search slot, is
647 * found in the traversal, skip to the next higher-level key.
648 */
649 if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
650 if (rnode->rn_clev == 0 || vm_radix_declev(&index,
651 maplevels, rnode->rn_clev - 1) > 0)
652 break;
653 goto restart;
654 }
655 rnode = rnode->rn_child[slot];
656 }
657 return (NULL);
658}
659
660/*
661 * Remove the specified index from the tree.
662 * Panics if the key is not present.
663 */
664void
665vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
666{
667 struct vm_radix_node *rnode, *parent;
668 vm_page_t m;
669 int i, slot;
670
671 parent = NULL;
672 rnode = vm_radix_getroot(rtree);
673 for (;;) {
674 if (rnode == NULL)
675 panic("vm_radix_remove: impossible to locate the key");
676 slot = vm_radix_slot(index, rnode->rn_clev);
677 m = vm_radix_node_page(rnode->rn_child[slot]);
678 if (m != NULL && m->pindex == index) {
679 rnode->rn_child[slot] = NULL;
680 rnode->rn_count--;
681 if (rnode->rn_count > 1)
682 break;
683 if (parent == NULL) {
684 if (rnode->rn_count == 0) {
685 vm_radix_node_put(rnode);
686 vm_radix_setroot(rtree, NULL);
687 }
688 break;
689 }
690 for (i = 0; i < VM_RADIX_COUNT; i++)
691 if (rnode->rn_child[i] != NULL)
692 break;
693 KASSERT(i != VM_RADIX_COUNT,
694 ("%s: invalid node configuration", __func__));
695 slot = vm_radix_slot(index, parent->rn_clev);
696 KASSERT(parent->rn_child[slot] == rnode,
697 ("%s: invalid child value", __func__));
698 parent->rn_child[slot] = rnode->rn_child[i];
699 rnode->rn_count--;
700 rnode->rn_child[i] = NULL;
701 vm_radix_node_put(rnode);
702 break;
703 }
704 if (m != NULL && m->pindex != index)
705 panic("%s: invalid key found", __func__);
706 parent = rnode;
707 rnode = rnode->rn_child[slot];
708 }
709}
710
711/*
712 * Remove and free all the nodes from the radix tree.
713 * This function is recrusive but there is a tight control on it as the
714 * maximum depth of the tree is fixed.
715 */
716void
717vm_radix_reclaim_allnodes(struct vm_radix *rtree)
718{
719 struct vm_radix_node *root;
720
721 root = vm_radix_getroot(rtree);
722 if (root == NULL)
723 return;
724 vm_radix_reclaim_allnodes_int(root);
725 vm_radix_setroot(rtree, NULL);
726}
727
728#ifdef DDB
729/*
730 * Show details about the given radix node.
731 */
732DB_SHOW_COMMAND(radixnode, db_show_radixnode)
733{
734 struct vm_radix_node *rnode;
735 int i;
736
737 if (!have_addr)
738 return;
739 rnode = (struct vm_radix_node *)addr;
740 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
741 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
742 rnode->rn_clev);
743 for (i = 0; i < VM_RADIX_COUNT; i++)
744 if (rnode->rn_child[i] != NULL)
745 db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
746 i, (void *)rnode->rn_child[i],
747 (void *)vm_radix_node_page(rnode->rn_child[i]),
748 rnode->rn_clev);
749}
750#endif /* DDB */