Deleted Added
full compact
vm_radix.c (246430) vm_radix.c (246726)
1/*
1/*
2 * Copyright (c) 2013 EMC Corp.
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright

--- 11 unchanged lines hidden (view full) ---

21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright

--- 11 unchanged lines hidden (view full) ---

22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
29
30/*
30/*
31 * Radix tree implementation.
31 * Path-compressed radix trie implementation.
32 * The following code is not generalized into a general purpose library
33 * because there are way too many parameters embedded that should really
34 * be decided by the library consumers. At the same time, consumers
35 * of this code must achieve highest possible performance.
36 *
37 * The implementation takes into account the following rationale:
38 * - Size of the nodes might be as small as possible.
39 * - There is no bias toward lookup operations over inserts or removes,
40 * and vice-versa.
41 * - In average there are not many complete levels, than level
42 * compression may just complicate things.
32 */
33
34#include <sys/cdefs.h>
35
43 */
44
45#include <sys/cdefs.h>
46
47#include "opt_ddb.h"
48
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
49#include <sys/param.h>
50#include <sys/conf.h>
51#include <sys/systm.h>
52#include <sys/kernel.h>
53#include <sys/malloc.h>
54#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
45#include <sys/ktr.h>
57
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
58#include <vm/uma.h>
59#include <vm/vm.h>
60#include <vm/vm_param.h>
61#include <vm/vm_extern.h>
62#include <vm/vm_kern.h>
63#include <vm/vm_page.h>
52#ifndef UMA_MD_SMALL_ALLOC
53#include <vm/vm_map.h>
54#endif
55#include <vm/vm_radix.h>
64#include <vm/vm_radix.h>
56#include <vm/vm_object.h>
57
65
58#include <sys/kdb.h>
66#ifdef DDB
67#include <ddb/ddb.h>
68#endif
59
69
60#ifndef UMA_MD_SMALL_ALLOC
61#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2)
62#define VM_RADIX_WIDTH 4
63
64/*
70/*
65 * Bits of height in root.
66 * The mask of smaller power of 2 containing VM_RADIX_LIMIT.
71 * Such sizes should permit to keep node children contained into a single
72 * cache-line, or to at least not span many of those.
73 * In particular, sparse tries should however be compressed properly and
74 * then make some extra-levels not a big deal.
67 */
75 */
68#define VM_RADIX_HEIGHT 0x1f
76#ifdef __LP64__
77#define VM_RADIX_WIDTH 4
69#else
78#else
70#define VM_RADIX_WIDTH 5
71
72/* See the comment above. */
73#define VM_RADIX_HEIGHT 0xf
79#define VM_RADIX_WIDTH 3
74#endif
75
76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
80#endif
81
82#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
83#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
78#define VM_RADIX_MAXVAL ((vm_pindex_t)-1)
79#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH)
84#define VM_RADIX_LIMIT \
85 (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80
81/* Flag bits stored in node pointers. */
86
87/* Flag bits stored in node pointers. */
82#define VM_RADIX_FLAGS 0x3
88#define VM_RADIX_ISLEAF 0x1
89#define VM_RADIX_FLAGS 0x1
90#define VM_RADIX_PAD VM_RADIX_FLAGS
83
91
84/* Calculates maximum value for a tree of height h. */
85#define VM_RADIX_MAX(h) \
86 ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \
87 (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1))
92/* Returns one unit associated with specified level. */
93#define VM_RADIX_UNITLEVEL(lev) \
94 ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
88
95
89/*
90 * On 32-bits architectures KTR cannot handle 64-bits values.
91 * Add macros for splitting in 32-bits quantity and provide format strings.
92 * Note that braces are not used because they would break compilation.
93 * Also, note that arguments are cast to u_long in order to follow KTR
94 * convention.
95 */
96#ifdef KTR
97#define KFRMT64(x) __STRING(x)"l 0x%08lx, "__STRING(x)"h 0x%08lx"
98#define KSPLT64L(x) ((u_long)((x) & 0xFFFFFFFF))
99#define KSPLT64H(x) ((u_long)(((x) >> 32) & 0xFFFFFFFF))
100#endif
101
102CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT);
103
104struct vm_radix_node {
105 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
96struct vm_radix_node {
97 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
106 volatile uint32_t rn_count; /* Valid children. */
98 vm_pindex_t rn_owner; /* Owner of record. */
99 uint16_t rn_count; /* Valid children. */
100 uint16_t rn_clev; /* Current level. */
107};
108
101};
102
109CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
110
111static uma_zone_t vm_radix_node_zone;
112
103static uma_zone_t vm_radix_node_zone;
104
113#ifndef UMA_MD_SMALL_ALLOC
114static vm_map_t rnode_map;
115static u_long rnode_map_scale;
116
117static void *
118vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
105#ifdef INVARIANTS
106/*
107 * Radix node zone destructor.
108 */
109static void
110vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
119{
111{
120 vm_offset_t addr;
121 vm_page_t m;
122 int pflags;
112 struct vm_radix_node *rnode;
123
113
124 /* Inform UMA that this allocator uses rnode_map. */
125 *flags = UMA_SLAB_KERNEL;
114 rnode = mem;
115 KASSERT(rnode->rn_count == 0,
116 ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
117 rnode->rn_count));
118}
119#endif
126
120
127 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
121/*
122 * Allocate a radix node. Pre-allocation ensures that the request will be
123 * always successfully satisfied.
124 */
125static __inline struct vm_radix_node *
126vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
127{
128 struct vm_radix_node *rnode;
128
129
130 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
131
129 /*
132 /*
130 * As kmem_alloc_nofault() can however fail, let just assume that
131 * M_NOWAIT is on and act accordingly.
133 * The required number of nodes might be already correctly
134 * pre-allocated in vm_radix_init(). However, UMA can reserve few
135 * nodes on per-cpu specific buckets, which will not be accessible
136 * from the curcpu. The allocation could then return NULL when the
137 * pre-allocation pool is close to be exhausted.
138 * Anyway, in practice this should never be a problem because a new
139 * node is not always required for insert, thus the pre-allocation
140 * pool should already have some extra-pages that indirectly deal with
141 * this situation.
132 */
142 */
133 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
134 VM_ALLOC_SYSTEM;
135 if ((wait & M_ZERO) != 0)
136 pflags |= VM_ALLOC_ZERO;
137 addr = kmem_alloc_nofault(rnode_map, size);
138 if (addr == 0)
139 return (NULL);
140
141 /* Just one page allocation is assumed here. */
142 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
143 pflags);
144 if (m == NULL) {
145 kmem_free(rnode_map, addr, size);
146 return (NULL);
147 }
148 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
149 pmap_zero_page(m);
150 pmap_qenter(addr, &m, 1);
151 return ((void *)addr);
143 if (rnode == NULL)
144 panic("%s: uma_zalloc() returned NULL for a new node",
145 __func__);
146 rnode->rn_owner = owner;
147 rnode->rn_count = count;
148 rnode->rn_clev = clevel;
149 return (rnode);
152}
153
150}
151
154static void
155vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
152/*
153 * Free radix node.
154 */
155static __inline void
156vm_radix_node_put(struct vm_radix_node *rnode)
156{
157{
157 vm_page_t m;
158 vm_offset_t voitem;
159
158
160 MPASS((flags & UMA_SLAB_KERNEL) != 0);
159 uma_zfree(vm_radix_node_zone, rnode);
160}
161
161
162 /* Just one page allocation is assumed here. */
163 voitem = (vm_offset_t)item;
164 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
165 pmap_qremove(voitem, 1);
166 vm_page_lock(m);
167 vm_page_unwire(m, 0);
168 vm_page_free(m);
169 vm_page_unlock(m);
170 kmem_free(rnode_map, voitem, size);
162/*
163 * Return the position in the array for a given level.
164 */
165static __inline int
166vm_radix_slot(vm_pindex_t index, uint16_t level)
167{
168
169 return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
170 VM_RADIX_MASK);
171}
172
171}
172
173static void
174init_vm_radix_alloc(void *dummy __unused)
173/* Trims the key after the specified level. */
174static __inline vm_pindex_t
175vm_radix_trimkey(vm_pindex_t index, uint16_t level)
175{
176{
177 vm_pindex_t ret;
176
178
177 uma_zone_set_max(vm_radix_node_zone, rnode_map_scale);
178 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
179 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
179 ret = index;
180 if (level < VM_RADIX_LIMIT) {
181 ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
182 ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
183 }
184 return (ret);
180}
185}
181SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
182#endif
183
184/*
186
187/*
185 * Radix node zone destructor.
188 * Get the root node for a radix tree.
186 */
189 */
187#ifdef INVARIANTS
188static void
189vm_radix_node_zone_dtor(void *mem, int size, void *arg)
190static __inline struct vm_radix_node *
191vm_radix_getroot(struct vm_radix *rtree)
190{
192{
191 struct vm_radix_node *rnode;
192
193
193 rnode = mem;
194 KASSERT(rnode->rn_count == 0,
195 ("vm_radix_node_put: Freeing a node with %d children\n",
196 rnode->rn_count));
194 return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
197}
195}
198#endif
199
200/*
196
197/*
201 * Allocate a radix node. Initializes all elements to 0.
198 * Set the root node for a radix tree.
202 */
199 */
203static __inline struct vm_radix_node *
204vm_radix_node_get(void)
200static __inline void
201vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
205{
206
202{
203
207 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
204 rtree->rt_root = (uintptr_t)rnode;
208}
209
210/*
205}
206
207/*
211 * Free radix node.
208 * Returns the associated page extracted from rnode if available,
209 * NULL otherwise.
212 */
210 */
213static __inline void
214vm_radix_node_put(struct vm_radix_node *rnode)
211static __inline vm_page_t
212vm_radix_node_page(struct vm_radix_node *rnode)
215{
216
213{
214
217 uma_zfree(vm_radix_node_zone, rnode);
215 return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
216 (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
218}
219
220/*
217}
218
219/*
221 * Return the position in the array for a given level.
220 * Adds the page as a child of provided node.
222 */
221 */
223static __inline int
224vm_radix_slot(vm_pindex_t index, int level)
222static __inline void
223vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
224 vm_page_t page)
225{
225{
226 int slot;
226
227
227 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
228 slot = vm_radix_slot(index, clev);
229 rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
228}
229
230/*
230}
231
232/*
231 * Initialize the radix node submap (for architectures not supporting
232 * direct-mapping) and the radix node zone.
233 *
234 * WITNESS reports a lock order reversal, for architectures not
235 * supporting direct-mapping, between the "system map" lock
236 * and the "vm object" lock. This is because the well established ordering
237 * "system map" -> "vm object" is not honoured in this case as allocating
238 * from the radix node submap ends up adding a mapping entry to it, meaning
239 * it is necessary to lock the submap. However, the radix node submap is
240 * a leaf and self-contained, thus a deadlock cannot happen here and
241 * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal.
233 * Returns the slot where two keys differ.
234 * It cannot accept 2 equal keys.
242 */
235 */
243void
244vm_radix_init(void)
236static __inline uint16_t
237vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
245{
238{
246#ifndef UMA_MD_SMALL_ALLOC
247 vm_offset_t maxaddr, minaddr;
239 uint16_t clev;
248
240
249 rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE;
250 TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale);
251 rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
252 rnode_map_scale * sizeof(struct vm_radix_node), FALSE);
253 rnode_map->system_map = 1;
254#endif
241 KASSERT(index1 != index2, ("%s: passing the same key value %jx",
242 __func__, (uintmax_t)index1));
255
243
256 vm_radix_node_zone = uma_zcreate("RADIX NODE",
257 sizeof(struct vm_radix_node), NULL,
258#ifdef INVARIANTS
259 vm_radix_node_zone_dtor,
260#else
261 NULL,
262#endif
263 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
244 index1 ^= index2;
245 for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
246 if (vm_radix_slot(index1, clev))
247 return (clev);
248 panic("%s: it might have not reached this point", __func__);
249 return (0);
264}
265
266/*
250}
251
252/*
267 * Extract the root node and height from a radix tree with a single load.
253 * Returns TRUE if it can be determined that key does not belong to the
254 * specified rnode. FALSE otherwise.
268 */
255 */
269static __inline int
270vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
256static __inline boolean_t
257vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
271{
258{
272 uintptr_t root;
273 int height;
274
259
275 root = rtree->rt_root;
276 height = root & VM_RADIX_HEIGHT;
277 *rnode = (struct vm_radix_node *)(root - height);
278 return (height);
260 if (rnode->rn_clev > 0) {
261 idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
262 idx -= rnode->rn_owner;
263 if (idx != 0)
264 return (TRUE);
265 }
266 return (FALSE);
279}
280
267}
268
281
282/*
269/*
283 * Set the root node and height for a radix tree.
270 * Adjusts the idx key to the first upper level available, based on a valid
271 * initial level and map of available levels.
272 * Returns a value bigger than 0 to signal that there are not valid levels
273 * available.
284 */
274 */
285static inline void
286vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
287 int height)
275static __inline int
276vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
288{
277{
289 uintptr_t root;
278 vm_pindex_t wrapidx;
290
279
291 root = (uintptr_t)rnode | height;
292 rtree->rt_root = root;
280 for (; levels[ilev] == FALSE ||
281 vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
282 if (ilev == 0)
283 break;
284 KASSERT(ilev > 0 || levels[0] == TRUE,
285 ("%s: levels back-scanning problem", __func__));
286 if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
287 return (1);
288 wrapidx = *idx;
289 *idx = vm_radix_trimkey(*idx, ilev);
290 *idx += VM_RADIX_UNITLEVEL(ilev);
291 if (*idx < wrapidx)
292 return (1);
293 return (0);
293}
294
294}
295
295static inline vm_page_t
296vm_radix_match(void *child)
296/*
297 * Adjusts the idx key to the first lower level available, based on a valid
298 * initial level and map of available levels.
299 * Returns a value bigger than 0 to signal that there are not valid levels
300 * available.
301 */
302static __inline int
303vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
297{
304{
298 uintptr_t c;
305 vm_pindex_t wrapidx;
299
306
300 c = (uintptr_t)child;
301
302 return ((vm_page_t)(c & ~VM_RADIX_FLAGS));
307 for (; levels[ilev] == FALSE ||
308 vm_radix_slot(*idx, ilev) == 0; ilev--)
309 if (ilev == 0)
310 break;
311 KASSERT(ilev > 0 || levels[0] == TRUE,
312 ("%s: levels back-scanning problem", __func__));
313 if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
314 return (1);
315 wrapidx = *idx;
316 *idx = vm_radix_trimkey(*idx, ilev);
317 *idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
318 *idx -= VM_RADIX_UNITLEVEL(ilev);
319 if (*idx < wrapidx)
320 return (1);
321 return (0);
303}
304
322}
323
324/*
325 * Internal handwork for vm_radix_reclaim_allonodes() primitive.
326 * This function is recrusive.
327 */
305static void
328static void
306vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
329vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
307{
308 int slot;
309
330{
331 int slot;
332
310 MPASS(rnode != NULL && level >= 0);
311
312 /*
313 * Level 0 just contains pages as children, thus make it a special
314 * case, free the node and return.
315 */
316 if (level == 0) {
317 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
318 rnode->rn_count = 0;
319 vm_radix_node_put(rnode);
320 return;
321 }
322 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
323 if (rnode->rn_child[slot] == NULL)
324 continue;
333 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
334 if (rnode->rn_child[slot] == NULL)
335 continue;
325 CTR3(KTR_VM,
326 "reclaiming: node %p, level %d recursing in slot %d",
327 rnode, level, slot);
328 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
329 level - 1);
336 if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
337 vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
330 rnode->rn_count--;
331 }
338 rnode->rn_count--;
339 }
332 MPASS(rnode->rn_count == 0);
333 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
334 vm_radix_node_put(rnode);
335}
336
337/*
340 vm_radix_node_put(rnode);
341}
342
343/*
338 * Inserts the key-value pair in to the radix tree. Returns errno.
344 * Returns the amount of requested memory to satisfy nodes pre-allocation.
345 */
346size_t
347vm_radix_allocphys_size(size_t nitems)
348{
349
350 return (nitems * sizeof(struct vm_radix_node));
351}
352
353/*
354 * Pre-allocate intermediate nodes from the UMA slab zone.
355 */
356void
357vm_radix_init(void)
358{
359
360 vm_radix_node_zone = uma_zcreate("RADIX NODE",
361 sizeof(struct vm_radix_node), NULL,
362#ifdef INVARIANTS
363 vm_radix_node_zone_dtor,
364#else
365 NULL,
366#endif
367 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
368 uma_prealloc(vm_radix_node_zone, vm_page_array_size);
369}
370
371/*
372 * Inserts the key-value pair in to the trie.
339 * Panics if the key already exists.
340 */
341void
342vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
343{
373 * Panics if the key already exists.
374 */
375void
376vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
377{
344 struct vm_radix_node *rnode;
345 struct vm_radix_node *root;
346 int level;
378 vm_pindex_t newind;
379 struct vm_radix_node *rnode, *tmp, *tmp2;
380 vm_page_t m;
347 int slot;
381 int slot;
382 uint16_t clev;
348
383
349 CTR4(KTR_VM,
350 "insert: tree %p, " KFRMT64(index) ", page %p", rtree,
351 KSPLT64L(index), KSPLT64H(index), page);
352 if (index == -1)
353 panic("vm_radix_insert: -1 is not a valid index.\n");
354 level = vm_radix_height(rtree, &root);
355 /*
384 /*
356 * Increase the height by adding nodes at the root until
357 * there is sufficient space.
385 * The owner of record for root is not really important because it
386 * will never be used.
358 */
387 */
359 while (level == 0 || index > VM_RADIX_MAX(level)) {
360 CTR5(KTR_VM,
361 "insert: expanding " KFRMT64(index) ">" KFRMT64(mxl) ", height %d",
362 KSPLT64L(index), KSPLT64H(index),
363 KSPLT64L(VM_RADIX_MAX(level)),
364 KSPLT64H(VM_RADIX_MAX(level)), level);
365 level++;
366 KASSERT(level <= VM_RADIX_LIMIT,
367 ("vm_radix_insert: Tree %p height %d too tall",
368 rtree, level));
369 /*
370 * Only allocate tree nodes if they are needed.
371 */
372 if (root == NULL || root->rn_count != 0) {
373 rnode = vm_radix_node_get();
374 if (rnode == NULL) {
375 CTR5(KTR_VM,
376 "insert: tree %p, root %p, " KFRMT64(index) ", level %d ENOMEM",
377 rtree, root, KSPLT64L(index),
378 KSPLT64H(index), level);
379 panic("vm_radix_insert: failed allocation");
380 }
381 /*
382 * Store the new pointer with a memory barrier so
383 * that it is visible before the new root.
384 */
385 if (root) {
386 atomic_store_rel_ptr((volatile uintptr_t *)
387 &rnode->rn_child[0], (uintptr_t)root);
388 rnode->rn_count = 1;
389 }
390 root = rnode;
391 }
392 vm_radix_setroot(rtree, root, level);
388 rnode = vm_radix_getroot(rtree);
389 if (rnode == NULL) {
390 rnode = vm_radix_node_get(0, 1, 0);
391 vm_radix_setroot(rtree, rnode);
392 vm_radix_addpage(rnode, index, 0, page);
393 return;
393 }
394 }
394
395 /* Now that the tree is tall enough, fill in the path to the index. */
396 rnode = root;
397 for (level = level - 1; level > 0; level--) {
398 slot = vm_radix_slot(index, level);
399 /* Add the required intermidiate nodes. */
395 while (rnode) {
396 if (vm_radix_keybarr(rnode, index) == TRUE)
397 break;
398 slot = vm_radix_slot(index, rnode->rn_clev);
399 m = vm_radix_node_page(rnode->rn_child[slot]);
400 if (m != NULL) {
401 if (m->pindex == index)
402 panic("%s: key %jx is already present",
403 __func__, (uintmax_t)index);
404 clev = vm_radix_keydiff(m->pindex, index);
405 tmp = vm_radix_node_get(vm_radix_trimkey(index,
406 clev - 1), 2, clev);
407 rnode->rn_child[slot] = tmp;
408 vm_radix_addpage(tmp, index, clev, page);
409 vm_radix_addpage(tmp, m->pindex, clev, m);
410 return;
411 }
400 if (rnode->rn_child[slot] == NULL) {
412 if (rnode->rn_child[slot] == NULL) {
401 rnode->rn_child[slot] = vm_radix_node_get();
402 if (rnode->rn_child[slot] == NULL) {
403 CTR6(KTR_VM,
404 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p ENOMEM",
405 rtree, KSPLT64L(index), KSPLT64H(index),
406 level, slot, rnode);
407 CTR4(KTR_VM,
408 "insert: tree %p, rnode %p, child %p, count %u ENOMEM",
409 rtree, rnode, rnode->rn_child[slot],
410 rnode->rn_count);
411 panic("vm_radix_insert: failed allocation");
412 }
413 rnode->rn_count++;
413 rnode->rn_count++;
414 }
415 CTR6(KTR_VM,
416 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
417 rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
418 rnode);
419 CTR4(KTR_VM,
420 "insert: tree %p, rnode %p, child %p, count %u",
421 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
414 vm_radix_addpage(rnode, index, rnode->rn_clev, page);
415 return;
416 }
422 rnode = rnode->rn_child[slot];
423 }
417 rnode = rnode->rn_child[slot];
418 }
419 if (rnode == NULL)
420 panic("%s: path traversal ended unexpectedly", __func__);
424
421
425 slot = vm_radix_slot(index, 0);
426 MPASS(rnode != NULL);
427 KASSERT(rnode->rn_child[slot] == NULL,
428 ("vm_radix_insert: Duplicate value %p at index: %lu\n",
429 rnode->rn_child[slot], (u_long)index));
430 rnode->rn_child[slot] = page;
431 rnode->rn_count++;
432 CTR5(KTR_VM,
433 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d",
434 rtree, KSPLT64L(index), KSPLT64H(index), level, slot);
435 CTR3(KTR_VM, "insert: slot %d, rnode %p, count %u", slot, rnode,
436 rnode->rn_count);
422 /*
423 * Scan the trie from the top and find the parent to insert
424 * the new object.
425 */
426 newind = rnode->rn_owner;
427 clev = vm_radix_keydiff(newind, index);
428 slot = VM_RADIX_COUNT;
429 for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
430 KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
431 __func__));
432 KASSERT(clev >= rnode->rn_clev,
433 ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
434 __func__, clev, rnode->rn_clev));
435 slot = vm_radix_slot(index, rnode->rn_clev);
436 tmp = rnode->rn_child[slot];
437 KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
438 ("%s: unexpected lookup interruption", __func__));
439 if (tmp->rn_clev > clev)
440 break;
441 }
442 KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
443 ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
444 __func__, (void *)rnode, (void *)tmp, slot));
445
446 /*
447 * A new node is needed because the right insertion level is reached.
448 * Setup the new intermediate node and add the 2 children: the
449 * new object and the older edge.
450 */
451 tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
452 clev);
453 rnode->rn_child[slot] = tmp2;
454 vm_radix_addpage(tmp2, index, clev, page);
455 slot = vm_radix_slot(newind, clev);
456 tmp2->rn_child[slot] = tmp;
437}
438
439/*
440 * Returns the value stored at the index. If the index is not present
441 * NULL is returned.
442 */
443vm_page_t
444vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
445{
446 struct vm_radix_node *rnode;
457}
458
459/*
460 * Returns the value stored at the index. If the index is not present
461 * NULL is returned.
462 */
463vm_page_t
464vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
465{
466 struct vm_radix_node *rnode;
467 vm_page_t m;
447 int slot;
468 int slot;
448 int level;
449
469
450 level = vm_radix_height(rtree, &rnode);
451 if (index > VM_RADIX_MAX(level))
452 return NULL;
453 level--;
470 rnode = vm_radix_getroot(rtree);
454 while (rnode) {
471 while (rnode) {
455 slot = vm_radix_slot(index, level);
456 CTR6(KTR_VM,
457 "lookup: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
458 rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
459 rnode);
460 CTR2(KTR_VM, "lookup: rnode %p, child %p", rnode,
461 rnode->rn_child[slot]);
462 if (level == 0)
463 return vm_radix_match(rnode->rn_child[slot]);
472 if (vm_radix_keybarr(rnode, index) == TRUE)
473 return (NULL);
474 slot = vm_radix_slot(index, rnode->rn_clev);
464 rnode = rnode->rn_child[slot];
475 rnode = rnode->rn_child[slot];
465 level--;
476 m = vm_radix_node_page(rnode);
477 if (m != NULL) {
478 if (m->pindex == index)
479 return (m);
480 else
481 return (NULL);
482 }
466 }
483 }
467 CTR3(KTR_VM, "lookup: tree %p, " KFRMT64(index) " failed", rtree,
468 KSPLT64L(index), KSPLT64H(index));
469
470 return NULL;
484 return (NULL);
471}
472
473/*
485}
486
487/*
474 * Find the first leaf with a valid node between *startp and end. Return
475 * the index of the first valid item in the leaf in *startp.
488 * Look up any entry at a position bigger than or equal to index.
476 */
489 */
477static struct vm_radix_node *
478vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp)
490vm_page_t
491vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
479{
492{
480 struct vm_radix_node *rnode;
481 vm_pindex_t start;
482 vm_pindex_t inc;
493 vm_pindex_t inc;
494 vm_page_t m;
495 struct vm_radix_node *rnode;
483 int slot;
496 int slot;
484 int level;
497 uint16_t difflev;
498 boolean_t maplevels[VM_RADIX_LIMIT + 1];
499#ifdef INVARIANTS
500 int loops = 0;
501#endif
485
502
486 start = *startp;
487restart:
503restart:
488 level = vm_radix_height(rtree, &rnode);
489 if (start > VM_RADIX_MAX(level)) {
490 rnode = NULL;
491 goto out;
492 }
493 /*
494 * Search the tree from the top for any leaf node holding an index
495 * between start and maxval.
496 */
497 for (level--; level; level--) {
498 slot = vm_radix_slot(start, level);
499 CTR6(KTR_VM,
500 "leaf: tree %p, " KFRMT64(start) ", level %d, slot %d, rnode %p",
501 rtree, KSPLT64L(start), KSPLT64H(start), level, slot,
502 rnode);
503 CTR2(KTR_VM, "leaf: rnode %p, child %p", rnode,
504 rnode->rn_child[slot]);
505 if (rnode->rn_child[slot] != NULL) {
504 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
505 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
506 maplevels[difflev] = FALSE;
507 rnode = vm_radix_getroot(rtree);
508 while (rnode) {
509 maplevels[rnode->rn_clev] = TRUE;
510
511 /*
512 * If the keys differ before the current bisection node
513 * the search key might rollback to the earlierst
514 * available bisection node, or to the smaller value
515 * in the current domain (if the owner is bigger than the
516 * search key).
517 * The search for a valid bisection node is helped through
518 * the use of maplevels array which should bring immediately
519 * a lower useful level, skipping holes.
520 */
521 if (vm_radix_keybarr(rnode, index) == TRUE) {
522 difflev = vm_radix_keydiff(index, rnode->rn_owner);
523 if (index > rnode->rn_owner) {
524 if (vm_radix_addlev(&index, maplevels,
525 difflev) > 0)
526 break;
527 } else
528 index = vm_radix_trimkey(rnode->rn_owner,
529 difflev);
530 goto restart;
531 }
532 slot = vm_radix_slot(index, rnode->rn_clev);
533 m = vm_radix_node_page(rnode->rn_child[slot]);
534 if (m != NULL && m->pindex >= index)
535 return (m);
536 if (rnode->rn_child[slot] != NULL && m == NULL) {
506 rnode = rnode->rn_child[slot];
507 continue;
508 }
537 rnode = rnode->rn_child[slot];
538 continue;
539 }
509 /*
510 * Calculate how much to increment our index by
511 * based on the tree level. We must truncate the
512 * lower bits to start from the begnning of the
513 * next leaf.
514 */
515 inc = 1LL << (level * VM_RADIX_WIDTH);
516 start &= ~VM_RADIX_MAX(level);
517
540
518 /* Avoid start address wrapping up. */
519 if ((VM_RADIX_MAXVAL - start) < inc) {
520 rnode = NULL;
521 goto out;
522 }
523 start += inc;
524 slot++;
525 CTR4(KTR_VM,
526 "leaf: " KFRMT64(start) ", " KFRMT64(inc),
527 KSPLT64L(start), KSPLT64H(start), KSPLT64L(inc),
528 KSPLT64H(inc));
529 CTR2(KTR_VM, "leaf: level %d, slot %d", level, slot);
530 for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
531 if (rnode->rn_child[slot]) {
532 rnode = rnode->rn_child[slot];
533 break;
541 /*
542 * Look for an available edge or page within the current
543 * bisection node.
544 */
545 if (slot < (VM_RADIX_COUNT - 1)) {
546 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
547 index = vm_radix_trimkey(index, rnode->rn_clev);
548 index += inc;
549 slot++;
550 for (;; index += inc, slot++) {
551 m = vm_radix_node_page(rnode->rn_child[slot]);
552 if (m != NULL && m->pindex >= index)
553 return (m);
554 if ((rnode->rn_child[slot] != NULL &&
555 m == NULL) || slot == (VM_RADIX_COUNT - 1))
556 break;
534 }
557 }
535 if ((VM_RADIX_MAXVAL - start) < inc) {
536 rnode = NULL;
537 goto out;
538 }
539 }
558 }
540 if (slot == VM_RADIX_COUNT)
559
560 /*
561 * If a valid page or edge, bigger than the search slot, is
562 * found in the traversal, skip to the next higher-level key.
563 */
564 if (slot == (VM_RADIX_COUNT - 1) &&
565 (rnode->rn_child[slot] == NULL || m != NULL)) {
566 if (rnode->rn_clev == 0 || vm_radix_addlev(&index,
567 maplevels, rnode->rn_clev - 1) > 0)
568 break;
541 goto restart;
569 goto restart;
570 }
571 rnode = rnode->rn_child[slot];
542 }
572 }
543
544out:
545 *startp = start;
546 return (rnode);
547}
548
549/*
550 * Look up any entry at a position bigger than or equal to index.
551 */
552vm_page_t
553vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
554{
555 vm_page_t page;
556 struct vm_radix_node *rnode;
557 int slot;
558
559 CTR3(KTR_VM, "lookupn: tree %p, " KFRMT64(index), rtree,
560 KSPLT64L(index), KSPLT64H(index));
561 if (rtree->rt_root == 0)
562 return (NULL);
563 while ((rnode = vm_radix_leaf(rtree, &index)) != NULL) {
564 slot = vm_radix_slot(index, 0);
565 for (; slot < VM_RADIX_COUNT; slot++, index++) {
566 page = vm_radix_match(rnode->rn_child[slot]);
567 if (page == NULL) {
568
569 /*
570 * The index address can wrap at the
571 * VM_RADIX_MAXVAL value.
572 * We need to make sure that index address
573 * point to the next chunk (even if wrapping)
574 * to stay consistent with default scanning
575 * behaviour. Also, because of the nature
576 * of the wrapping, the wrap up checks must
577 * be done after all the necessary controls
578 * on index are completed.
579 */
580 if ((VM_RADIX_MAXVAL - index) == 0)
581 return (NULL);
582 continue;
583 }
584 CTR5(KTR_VM,
585 "lookupn: tree %p " KFRMT64(index) " slot %d found child %p",
586 rtree, KSPLT64L(index), KSPLT64H(index), slot,
587 page);
588 return (page);
589 }
590 MPASS((VM_RADIX_MAXVAL - index) != 0);
591 }
592 return (NULL);
593}
594
595/*
596 * Look up any entry at a position less than or equal to index.
597 */
598vm_page_t
599vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
600{
573 return (NULL);
574}
575
576/*
577 * Look up any entry at a position less than or equal to index.
578 */
579vm_page_t
580vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
581{
601 struct vm_radix_node *rnode;
602 struct vm_radix_node *child;
603 vm_pindex_t max;
604 vm_pindex_t inc;
582 vm_pindex_t inc;
605 vm_page_t page;
583 vm_page_t m;
584 struct vm_radix_node *rnode;
606 int slot;
585 int slot;
607 int level;
586 uint16_t difflev;
587 boolean_t maplevels[VM_RADIX_LIMIT + 1];
588#ifdef INVARIANTS
589 int loops = 0;
590#endif
608
591
609 CTR3(KTR_VM, "lookup_le: tree %p, " KFRMT64(index), rtree,
610 KSPLT64L(index), KSPLT64H(index));
611restart:
592restart:
612 level = vm_radix_height(rtree, &rnode);
613 if (rnode == NULL)
614 return (NULL);
615 max = VM_RADIX_MAX(level);
616 if (index > max || index == 0)
617 index = max;
618 /*
619 * Search the tree from the top for any leaf node holding an index
620 * lower than 'index'.
621 */
622 level--;
593 KASSERT(++loops < 1000, ("%s: too many loops", __func__));
594 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
595 maplevels[difflev] = FALSE;
596 rnode = vm_radix_getroot(rtree);
623 while (rnode) {
597 while (rnode) {
624 slot = vm_radix_slot(index, level);
625 CTR6(KTR_VM,
626 "lookup_le: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
627 rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
628 rnode);
629 CTR2(KTR_VM, "lookup_le: rnode %p, child %p", rnode,
630 rnode->rn_child[slot]);
631 if (level == 0)
632 break;
598 maplevels[rnode->rn_clev] = TRUE;
599
633 /*
600 /*
634 * If we don't have an exact match we must start our search
635 * from the next leaf and adjust our index appropriately.
601 * If the keys differ before the current bisection node
602 * the search key might rollback to the earlierst
603 * available bisection node, or to the higher value
604 * in the current domain (if the owner is smaller than the
605 * search key).
606 * The search for a valid bisection node is helped through
607 * the use of maplevels array which should bring immediately
608 * a lower useful level, skipping holes.
636 */
609 */
637 if ((child = rnode->rn_child[slot]) == NULL) {
638 /*
639 * Calculate how much to decrement our index by
640 * based on the tree level. We must set the
641 * lower bits to start from the end of the next
642 * leaf.
643 */
644 inc = 1LL << (level * VM_RADIX_WIDTH);
645 index |= VM_RADIX_MAX(level);
610 if (vm_radix_keybarr(rnode, index) == TRUE) {
611 difflev = vm_radix_keydiff(index, rnode->rn_owner);
612 if (index > rnode->rn_owner) {
613 index = vm_radix_trimkey(rnode->rn_owner,
614 difflev);
615 index |= VM_RADIX_UNITLEVEL(difflev) - 1;
616 } else if (vm_radix_declev(&index, maplevels,
617 difflev) > 0)
618 break;
619 goto restart;
620 }
621 slot = vm_radix_slot(index, rnode->rn_clev);
622 m = vm_radix_node_page(rnode->rn_child[slot]);
623 if (m != NULL && m->pindex <= index)
624 return (m);
625 if (rnode->rn_child[slot] != NULL && m == NULL) {
626 rnode = rnode->rn_child[slot];
627 continue;
628 }
629
630 /*
631 * Look for an available edge or page within the current
632 * bisection node.
633 */
634 if (slot > 0) {
635 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
636 index = vm_radix_trimkey(index, rnode->rn_clev);
637 index |= inc - 1;
646 index -= inc;
647 slot--;
638 index -= inc;
639 slot--;
648 CTR6(KTR_VM,
649 "lookup_le: " KFRMT64(start) ", " KFRMT64(inc) ", level %d slot %d",
650 KSPLT64L(index), KSPLT64H(index), KSPLT64L(inc),
651 KSPLT64H(inc), level, slot);
652 for (; slot >= 0; slot--, index -= inc) {
653 child = rnode->rn_child[slot];
654 if (child)
640 for (;; index -= inc, slot--) {
641 m = vm_radix_node_page(rnode->rn_child[slot]);
642 if (m != NULL && m->pindex <= index)
643 return (m);
644 if ((rnode->rn_child[slot] != NULL &&
645 m == NULL) || slot == 0)
655 break;
656 }
657 }
646 break;
647 }
648 }
658 rnode = child;
659 level--;
660 }
661 if (rnode) {
662 for (; slot >= 0; slot--, index--) {
663 page = vm_radix_match(rnode->rn_child[slot]);
664 if (page)
665 return (page);
649
650 /*
651 * If a valid page or edge, smaller then the search slot, is
652 * found in the traversal, skip to the next higher-level key.
653 */
654 if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
655 if (rnode->rn_clev == 0 || vm_radix_declev(&index,
656 maplevels, rnode->rn_clev - 1) > 0)
657 break;
658 goto restart;
666 }
659 }
660 rnode = rnode->rn_child[slot];
667 }
661 }
668 if (index != -1)
669 goto restart;
670 return (NULL);
671}
672
673/*
662 return (NULL);
663}
664
665/*
674 * Remove the specified index from the tree. If possible the height of the
675 * tree is adjusted after deletion. The value stored at index is returned
676 * panics if the key is not present.
666 * Remove the specified index from the tree.
667 * Panics if the key is not present.
677 */
678void
679vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
680{
668 */
669void
670vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
671{
681 struct vm_radix_node *stack[VM_RADIX_LIMIT];
682 struct vm_radix_node *rnode, *root;
683 int level;
684 int slot;
672 struct vm_radix_node *rnode, *parent;
673 vm_page_t m;
674 int i, slot;
685
675
686 level = vm_radix_height(rtree, &root);
687 KASSERT(index <= VM_RADIX_MAX(level),
688 ("vm_radix_remove: %p index out of range %jd.", rtree,
689 VM_RADIX_MAX(level)));
690 rnode = root;
691 level--;
692 /*
693 * Find the node and record the path in stack.
694 */
695 while (level && rnode) {
696 stack[level] = rnode;
697 slot = vm_radix_slot(index, level);
698 CTR6(KTR_VM,
699 "remove: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
700 rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
701 rnode);
702 CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
703 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
704 rnode = rnode->rn_child[slot];
705 level--;
706 }
707 KASSERT(rnode != NULL,
708 ("vm_radix_remove: index not present in the tree.\n"));
709 slot = vm_radix_slot(index, 0);
710 KASSERT(vm_radix_match(rnode->rn_child[slot]) != NULL,
711 ("vm_radix_remove: index not present in the tree.\n"));
712
676 parent = NULL;
677 rnode = vm_radix_getroot(rtree);
713 for (;;) {
678 for (;;) {
714 CTR6(KTR_VM,
715"remove: resetting tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
716 rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
717 rnode);
718 CTR4(KTR_VM,
719 "remove: resetting tree %p, rnode %p, child %p, count %u",
720 rtree, rnode,
721 (rnode != NULL) ? rnode->rn_child[slot] : NULL,
722 (rnode != NULL) ? rnode->rn_count : 0);
723 rnode->rn_child[slot] = NULL;
724 /*
725 * Use a write memory barrier here in order to avoid
726 * rn_count reaching 0 before to fetch the actual pointer.
727 * Concurrent node removal, infact, may want to reclaim
728 * the radix node itself before to read it.
729 */
730 rnode->rn_count--;
731 wmb();
732 if (rnode->rn_count > 0)
679 if (rnode == NULL)
680 panic("vm_radix_remove: impossible to locate the key");
681 slot = vm_radix_slot(index, rnode->rn_clev);
682 m = vm_radix_node_page(rnode->rn_child[slot]);
683 if (m != NULL && m->pindex == index) {
684 rnode->rn_child[slot] = NULL;
685 rnode->rn_count--;
686 if (rnode->rn_count > 1)
687 break;
688 if (parent == NULL) {
689 if (rnode->rn_count == 0) {
690 vm_radix_node_put(rnode);
691 vm_radix_setroot(rtree, NULL);
692 }
693 break;
694 }
695 for (i = 0; i < VM_RADIX_COUNT; i++)
696 if (rnode->rn_child[i] != NULL)
697 break;
698 KASSERT(i != VM_RADIX_COUNT,
699 ("%s: invalid node configuration", __func__));
700 slot = vm_radix_slot(index, parent->rn_clev);
701 KASSERT(parent->rn_child[slot] == rnode,
702 ("%s: invalid child value", __func__));
703 parent->rn_child[slot] = rnode->rn_child[i];
704 rnode->rn_count--;
705 rnode->rn_child[i] = NULL;
706 vm_radix_node_put(rnode);
733 break;
707 break;
734 vm_radix_node_put(rnode);
735 if (rnode == root) {
736 vm_radix_setroot(rtree, NULL, 0);
737 break;
738 }
708 }
739 rnode = stack[++level];
740 slot = vm_radix_slot(index, level);
741
709 if (m != NULL && m->pindex != index)
710 panic("%s: invalid key found", __func__);
711 parent = rnode;
712 rnode = rnode->rn_child[slot];
742 }
743}
744
745/*
746 * Remove and free all the nodes from the radix tree.
747 * This function is recrusive but there is a tight control on it as the
748 * maximum depth of the tree is fixed.
749 */
750void
751vm_radix_reclaim_allnodes(struct vm_radix *rtree)
752{
753 struct vm_radix_node *root;
713 }
714}
715
716/*
717 * Remove and free all the nodes from the radix tree.
718 * This function is recrusive but there is a tight control on it as the
719 * maximum depth of the tree is fixed.
720 */
721void
722vm_radix_reclaim_allnodes(struct vm_radix *rtree)
723{
724 struct vm_radix_node *root;
754 int level;
755
725
756 if (rtree->rt_root == 0)
726 root = vm_radix_getroot(rtree);
727 if (root == NULL)
757 return;
728 return;
758 level = vm_radix_height(rtree, &root);
759 vm_radix_reclaim_allnodes_internal(root, level - 1);
760 rtree->rt_root = 0;
729 vm_radix_reclaim_allnodes_int(root);
730 vm_radix_setroot(rtree, NULL);
761}
731}
732
733#ifdef DDB
734/*
735 * Show details about the given vnode.
736 */
737DB_SHOW_COMMAND(radixnode, db_show_radixnode)
738{
739 struct vm_radix_node *rnode;
740 int i;
741
742 if (!have_addr)
743 return;
744 rnode = (struct vm_radix_node *)addr;
745 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
746 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
747 rnode->rn_clev);
748 for (i = 0; i < VM_RADIX_COUNT; i++)
749 if (rnode->rn_child[i] != NULL)
750 db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
751 i, (void *)rnode->rn_child[i],
752 (void *)vm_radix_node_page(rnode->rn_child[i]),
753 rnode->rn_clev);
754}
755#endif /* DDB */