Deleted Added
full compact
vm_radix.c (232631) vm_radix.c (233034)
1/*
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
1/*
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
52#ifndef UMA_MD_SMALL_ALLOC
53#include <vm/vm_map.h>
54#endif
52#include <vm/vm_radix.h>
53#include <vm/vm_object.h>
54
55#include <sys/kdb.h>
56
55#include <vm/vm_radix.h>
56#include <vm/vm_object.h>
57
58#include <sys/kdb.h>
59
60#ifndef UMA_MD_SMALL_ALLOC
61#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2)
62#define VM_RADIX_WIDTH 4
63
64/*
65 * Bits of height in root.
66 * The mask of smaller power of 2 containing VM_RADIX_LIMIT.
67 */
68#define VM_RADIX_HEIGHT 0x1f
69#else
57#define VM_RADIX_WIDTH 5
70#define VM_RADIX_WIDTH 5
71
72/* See the comment above. */
73#define VM_RADIX_HEIGHT 0xf
74#endif
75
58#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
59#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
60#define VM_RADIX_MAXVAL ((vm_pindex_t)-1)
61#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH)
62
63/* Flag bits stored in node pointers. */
64#define VM_RADIX_FLAGS 0x3
65
76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
78#define VM_RADIX_MAXVAL ((vm_pindex_t)-1)
79#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH)
80
81/* Flag bits stored in node pointers. */
82#define VM_RADIX_FLAGS 0x3
83
66/* Bits of height in root. */
67#define VM_RADIX_HEIGHT 0xf
68
69/* Calculates maximum value for a tree of height h. */
70#define VM_RADIX_MAX(h) \
71 ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \
72 (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1))
73
74CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT);
75CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
76
77struct vm_radix_node {
78 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
79 volatile uint32_t rn_count; /* Valid children. */
80};
81
82CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
83
84static uma_zone_t vm_radix_node_zone;
85
86#ifndef UMA_MD_SMALL_ALLOC
84/* Calculates maximum value for a tree of height h. */
85#define VM_RADIX_MAX(h) \
86 ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \
87 (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1))
88
89CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT);
90CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
91
92struct vm_radix_node {
93 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
94 volatile uint32_t rn_count; /* Valid children. */
95};
96
97CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
98
99static uma_zone_t vm_radix_node_zone;
100
101#ifndef UMA_MD_SMALL_ALLOC
102static vm_map_t rnode_map;
103static u_long rnode_map_scale;
104
87static void *
88vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
89{
90 vm_offset_t addr;
91 vm_page_t m;
92 int pflags;
93
105static void *
106vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
107{
108 vm_offset_t addr;
109 vm_page_t m;
110 int pflags;
111
94 /* Inform UMA that this allocator uses kernel_map. */
112 /* Inform UMA that this allocator uses rnode_map. */
95 *flags = UMA_SLAB_KERNEL;
96
97 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
98
99 /*
100 * As kmem_alloc_nofault() can however fail, let just assume that
101 * M_NOWAIT is on and act accordingly.
102 */
103 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
104 VM_ALLOC_SYSTEM;
105 if ((wait & M_ZERO) != 0)
106 pflags |= VM_ALLOC_ZERO;
113 *flags = UMA_SLAB_KERNEL;
114
115 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
116
117 /*
118 * As kmem_alloc_nofault() can however fail, let just assume that
119 * M_NOWAIT is on and act accordingly.
120 */
121 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
122 VM_ALLOC_SYSTEM;
123 if ((wait & M_ZERO) != 0)
124 pflags |= VM_ALLOC_ZERO;
107 addr = kmem_alloc_nofault(kernel_map, size);
125 addr = kmem_alloc_nofault(rnode_map, size);
108 if (addr == 0)
109 return (NULL);
110
111 /* Just one page allocation is assumed here. */
112 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
113 pflags);
114 if (m == NULL) {
126 if (addr == 0)
127 return (NULL);
128
129 /* Just one page allocation is assumed here. */
130 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
131 pflags);
132 if (m == NULL) {
115 kmem_free(kernel_map, addr, size);
133 kmem_free(rnode_map, addr, size);
116 return (NULL);
117 }
118 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
119 pmap_zero_page(m);
120 pmap_qenter(addr, &m, 1);
121 return ((void *)addr);
122}
123
124static void
125vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
126{
127 vm_page_t m;
128 vm_offset_t voitem;
129
130 MPASS((flags & UMA_SLAB_KERNEL) != 0);
131
132 /* Just one page allocation is assumed here. */
133 voitem = (vm_offset_t)item;
134 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
135 pmap_qremove(voitem, 1);
134 return (NULL);
135 }
136 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
137 pmap_zero_page(m);
138 pmap_qenter(addr, &m, 1);
139 return ((void *)addr);
140}
141
142static void
143vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
144{
145 vm_page_t m;
146 vm_offset_t voitem;
147
148 MPASS((flags & UMA_SLAB_KERNEL) != 0);
149
150 /* Just one page allocation is assumed here. */
151 voitem = (vm_offset_t)item;
152 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
153 pmap_qremove(voitem, 1);
154 vm_page_lock(m);
155 vm_page_unwire(m, 0);
136 vm_page_free(m);
156 vm_page_free(m);
137 kmem_free(kernel_map, voitem, size);
157 vm_page_unlock(m);
158 kmem_free(rnode_map, voitem, size);
138}
139
140static void
141init_vm_radix_alloc(void *dummy __unused)
142{
143
159}
160
161static void
162init_vm_radix_alloc(void *dummy __unused)
163{
164
165 uma_zone_set_max(vm_radix_node_zone, rnode_map_scale);
144 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
145 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
146}
147SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
148#endif
149
150/*
151 * Radix node zone destructor.
152 */
153#ifdef INVARIANTS
154static void
155vm_radix_node_zone_dtor(void *mem, int size, void *arg)
156{
157 struct vm_radix_node *rnode;
158
159 rnode = mem;
160 KASSERT(rnode->rn_count == 0,
161 ("vm_radix_node_put: Freeing a node with %d children\n",
162 rnode->rn_count));
163}
164#endif
165
166/*
167 * Allocate a radix node. Initializes all elements to 0.
168 */
169static __inline struct vm_radix_node *
170vm_radix_node_get(void)
171{
172
173 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
174}
175
176/*
177 * Free radix node.
178 */
179static __inline void
180vm_radix_node_put(struct vm_radix_node *rnode)
181{
182
183 uma_zfree(vm_radix_node_zone, rnode);
184}
185
186/*
187 * Return the position in the array for a given level.
188 */
189static __inline int
190vm_radix_slot(vm_pindex_t index, int level)
191{
192
193 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
194}
195
166 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
167 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
168}
169SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
170#endif
171
172/*
173 * Radix node zone destructor.
174 */
175#ifdef INVARIANTS
176static void
177vm_radix_node_zone_dtor(void *mem, int size, void *arg)
178{
179 struct vm_radix_node *rnode;
180
181 rnode = mem;
182 KASSERT(rnode->rn_count == 0,
183 ("vm_radix_node_put: Freeing a node with %d children\n",
184 rnode->rn_count));
185}
186#endif
187
188/*
189 * Allocate a radix node. Initializes all elements to 0.
190 */
191static __inline struct vm_radix_node *
192vm_radix_node_get(void)
193{
194
195 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
196}
197
198/*
199 * Free radix node.
200 */
201static __inline void
202vm_radix_node_put(struct vm_radix_node *rnode)
203{
204
205 uma_zfree(vm_radix_node_zone, rnode);
206}
207
208/*
209 * Return the position in the array for a given level.
210 */
211static __inline int
212vm_radix_slot(vm_pindex_t index, int level)
213{
214
215 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
216}
217
218/*
219 * Initialize the radix node submap (for architectures not supporting
220 * direct-mapping) and the radix node zone.
221 *
222 * WITNESS reports a lock order reversal, for architectures not
223 * supporting direct-mapping, between the "system map" lock
224 * and the "vm object" lock. This is because the well established ordering
225 * "system map" -> "vm object" is not honoured in this case as allocating
226 * from the radix node submap ends up adding a mapping entry to it, meaning
227 * it is necessary to lock the submap. However, the radix node submap is
228 * a leaf and self-contained, thus a deadlock cannot happen here and
229 * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal.
230 */
196void
197vm_radix_init(void)
198{
231void
232vm_radix_init(void)
233{
234#ifndef UMA_MD_SMALL_ALLOC
235 vm_offset_t maxaddr, minaddr;
199
236
237 rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE;
238 TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale);
239 rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
240 rnode_map_scale * sizeof(struct vm_radix_node), FALSE);
241 rnode_map->system_map = 1;
242#endif
243
200 vm_radix_node_zone = uma_zcreate("RADIX NODE",
201 sizeof(struct vm_radix_node), NULL,
202#ifdef INVARIANTS
203 vm_radix_node_zone_dtor,
204#else
205 NULL,
206#endif
207 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
208}
209
210/*
211 * Extract the root node and height from a radix tree with a single load.
212 */
213static __inline int
214vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
215{
216 uintptr_t root;
217 int height;
218
219 root = rtree->rt_root;
220 height = root & VM_RADIX_HEIGHT;
221 *rnode = (struct vm_radix_node *)(root - height);
222 return (height);
223}
224
225
226/*
227 * Set the root node and height for a radix tree.
228 */
229static inline void
230vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
231 int height)
232{
233 uintptr_t root;
234
235 root = (uintptr_t)rnode | height;
236 rtree->rt_root = root;
237}
238
239static inline void
240vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
241 struct vm_radix_node *iroot, int ilevel)
242{
243 struct vm_radix_node *rnode;
244
245 CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
246 rtree, root, iroot, ilevel);
247 while (iroot != root && root != NULL) {
248 rnode = root;
249 MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
250 rnode->rn_count = 0;
251 root = rnode->rn_child[0];
252 vm_radix_node_put(rnode);
253 }
254 vm_radix_setroot(rtree, iroot, ilevel);
255}
256
257static inline void *
258vm_radix_match(void *child, int color)
259{
260 uintptr_t c;
261
262 c = (uintptr_t)child;
263
264 if ((c & color) == 0)
265 return (NULL);
266 return ((void *)(c & ~VM_RADIX_FLAGS));
267}
268
269static void
270vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
271{
272 int slot;
273
274 MPASS(rnode != NULL && level >= 0);
275
276 /*
277 * Level 0 just contains pages as children, thus make it a special
278 * case, free the node and return.
279 */
280 if (level == 0) {
281 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
282 rnode->rn_count = 0;
283 vm_radix_node_put(rnode);
284 return;
285 }
286 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
287 if (rnode->rn_child[slot] == NULL)
288 continue;
289 CTR3(KTR_VM,
290 "reclaiming: node %p, level %d recursing in slot %d",
291 rnode, level, slot);
292 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
293 level - 1);
294 rnode->rn_count--;
295 }
296 MPASS(rnode->rn_count == 0);
297 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
298 vm_radix_node_put(rnode);
299}
300
301/*
302 * Inserts the key-value pair in to the radix tree. Returns errno.
303 * Panics if the key already exists.
304 */
305int
306vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
307{
308 struct vm_radix_node *iroot, *rnode, *root;
309 u_int allocmsk;
310 int clev, ilevel, level, slot;
311
312 CTR3(KTR_VM,
313 "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
314 if (index == -1)
315 panic("vm_radix_insert: -1 is not a valid index.\n");
316 level = vm_radix_height(rtree, &root);
317 /*
318 * Increase the height by adding nodes at the root until
319 * there is sufficient space.
320 */
321 ilevel = level;
322 iroot = root;
323 while (level == 0 || index > VM_RADIX_MAX(level)) {
324 CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
325 (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
326 level++;
327 KASSERT(level <= VM_RADIX_LIMIT,
328 ("vm_radix_insert: Tree %p height %d too tall",
329 rtree, level));
330 /*
331 * Only allocate tree nodes if they are needed.
332 */
333 if (root == NULL || root->rn_count != 0) {
334 rnode = vm_radix_node_get();
335 if (rnode == NULL) {
336 CTR4(KTR_VM,
337 "insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
338 rtree, root, (uintmax_t)index, level);
339 vm_radix_unwind_heightup(rtree, root, iroot,
340 ilevel);
341 return (ENOMEM);
342 }
343 /*
344 * Store the new pointer with a memory barrier so
345 * that it is visible before the new root.
346 */
347 if (root) {
348 atomic_store_rel_ptr((volatile uintptr_t *)
349 &rnode->rn_child[0], (uintptr_t)root);
350 rnode->rn_count = 1;
351 }
352 root = rnode;
353 }
354 vm_radix_setroot(rtree, root, level);
355 }
356
357 /* Now that the tree is tall enough, fill in the path to the index. */
358 allocmsk = 0;
359 clev = level;
360 rnode = root;
361 for (level = level - 1; level > 0; level--) {
362 slot = vm_radix_slot(index, level);
363 /* Add the required intermidiate nodes. */
364 if (rnode->rn_child[slot] == NULL) {
365 rnode->rn_child[slot] = vm_radix_node_get();
366 if (rnode->rn_child[slot] == NULL) {
367 CTR5(KTR_VM,
368 "insert: tree %p, index %ju, level %d, slot %d, rnode %p ENOMEM",
369 rtree, (uintmax_t)index, level, slot,
370 rnode);
371 CTR4(KTR_VM,
372 "insert: tree %p, rnode %p, child %p, count %u ENOMEM",
373 rtree, rnode, rnode->rn_child[slot],
374 rnode->rn_count);
375 MPASS(level != clev || allocmsk == 0);
376 while (allocmsk != 0) {
377 rnode = root;
378 level = clev;
379 level--;
380 slot = vm_radix_slot(index, level);
381 CTR4(KTR_VM,
382 "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
383 root, level, slot, allocmsk);
384 MPASS(level >= (ffs(allocmsk) - 1));
385 while (level > (ffs(allocmsk) - 1)) {
386 MPASS(level > 0);
387 slot = vm_radix_slot(index,
388 level);
389 rnode = rnode->rn_child[slot];
390 level--;
391 }
392 MPASS((allocmsk & (1 << level)) != 0);
393 allocmsk &= ~(1 << level);
394 rnode->rn_count--;
395 vm_radix_node_put(rnode->rn_child[slot]);
396 rnode->rn_child[slot] = NULL;
397 }
398 vm_radix_unwind_heightup(rtree, root, iroot,
399 ilevel);
400 return (ENOMEM);
401 }
402 rnode->rn_count++;
403 allocmsk |= (1 << level);
404 }
405 CTR5(KTR_VM,
406 "insert: tree %p, index %ju, level %d, slot %d, rnode %p",
407 rtree, (uintmax_t)index, level, slot, rnode);
408 CTR4(KTR_VM,
409 "insert: tree %p, rnode %p, child %p, count %u",
410 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
411 rnode = rnode->rn_child[slot];
412 }
413
414 slot = vm_radix_slot(index, 0);
415 MPASS(rnode != NULL);
416 KASSERT(rnode->rn_child[slot] == NULL,
417 ("vm_radix_insert: Duplicate value %p at index: %lu\n",
418 rnode->rn_child[slot], (u_long)index));
419 val = (void *)((uintptr_t)val | VM_RADIX_BLACK);
420 rnode->rn_child[slot] = val;
421 atomic_add_32(&rnode->rn_count, 1);
422 CTR6(KTR_VM,
423 "insert: tree %p, index %ju, level %d, slot %d, rnode %p, count %u",
424 rtree, (uintmax_t)index, level, slot, rnode, rnode->rn_count);
425
426 return 0;
427}
428
429/*
430 * Returns the value stored at the index. If the index is not present
431 * NULL is returned.
432 */
433void *
434vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color)
435{
436 struct vm_radix_node *rnode;
437 int slot;
438 int level;
439
440 level = vm_radix_height(rtree, &rnode);
441 if (index > VM_RADIX_MAX(level))
442 return NULL;
443 level--;
444 while (rnode) {
445 slot = vm_radix_slot(index, level);
446 CTR6(KTR_VM,
447 "lookup: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
448 rtree, (uintmax_t)index, level, slot, rnode,
449 rnode->rn_child[slot]);
450 if (level == 0)
451 return vm_radix_match(rnode->rn_child[slot], color);
452 rnode = rnode->rn_child[slot];
453 level--;
454 }
455 CTR2(KTR_VM, "lookup: tree %p, index %ju failed", rtree,
456 (uintmax_t)index);
457
458 return NULL;
459}
460
461void *
462vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color)
463{
464 struct vm_radix_node *rnode;
465 uintptr_t child;
466 int slot;
467 int level;
468
469 level = vm_radix_height(rtree, &rnode);
470 if (index > VM_RADIX_MAX(level))
471 return NULL;
472 level--;
473 while (rnode) {
474 slot = vm_radix_slot(index, level);
475 CTR6(KTR_VM,
476 "color: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
477 rtree, (uintmax_t)index, level, slot, rnode,
478 rnode->rn_child[slot]);
479 if (level == 0)
480 break;
481 rnode = rnode->rn_child[slot];
482 level--;
483 }
484 if (rnode == NULL || rnode->rn_child[slot] == NULL)
485 return (NULL);
486 child = (uintptr_t)rnode->rn_child[slot];
487 child &= ~VM_RADIX_FLAGS;
488 rnode->rn_child[slot] = (void *)(child | color);
489
490 return (void *)child;
491}
492
493/*
494 * Find the first leaf with a valid node between *startp and end. Return
495 * the index of the first valid item in the leaf in *startp.
496 */
497static struct vm_radix_node *
498vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end)
499{
500 struct vm_radix_node *rnode;
501 vm_pindex_t start;
502 vm_pindex_t inc;
503 int slot;
504 int level;
505
506 start = *startp;
507restart:
508 level = vm_radix_height(rtree, &rnode);
509 if (start > VM_RADIX_MAX(level) || (end && start >= end)) {
510 rnode = NULL;
511 goto out;
512 }
513 /*
514 * Search the tree from the top for any leaf node holding an index
515 * between start and end.
516 */
517 for (level--; level; level--) {
518 slot = vm_radix_slot(start, level);
519 CTR6(KTR_VM,
520 "leaf: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
521 rtree, (uintmax_t)start, level, slot, rnode,
522 (rnode != NULL) ? rnode->rn_child[slot] : NULL);
523 if (rnode->rn_child[slot] != NULL) {
524 rnode = rnode->rn_child[slot];
525 continue;
526 }
527 /*
528 * Calculate how much to increment our index by
529 * based on the tree level. We must truncate the
530 * lower bits to start from the begnning of the
531 * next leaf.
532 */
533 inc = 1LL << (level * VM_RADIX_WIDTH);
534 start &= ~VM_RADIX_MAX(level);
535
536 /* Avoid start address wrapping up. */
537 if ((VM_RADIX_MAXVAL - start) < inc) {
538 rnode = NULL;
539 goto out;
540 }
541 start += inc;
542 slot++;
543 CTR5(KTR_VM,
544 "leaf: start %ju end %ju inc %ju mask 0x%jX slot %d",
545 (uintmax_t)start, (uintmax_t)end, (uintmax_t)inc,
546 (uintmax_t)~VM_RADIX_MAX(level), slot);
547 for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
548 if (end != 0 && start >= end) {
549 rnode = NULL;
550 goto out;
551 }
552 if (rnode->rn_child[slot]) {
553 rnode = rnode->rn_child[slot];
554 break;
555 }
556 if ((VM_RADIX_MAXVAL - start) < inc) {
557 rnode = NULL;
558 goto out;
559 }
560 }
561 if (slot == VM_RADIX_COUNT)
562 goto restart;
563 }
564
565out:
566 *startp = start;
567 return (rnode);
568}
569
570
571
572/*
573 * Looks up as many as cnt values between start and end, and stores
574 * them in the caller allocated array out. The next index can be used
575 * to restart the scan. This optimizes forward scans in the tree.
576 */
577int
578vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
579 vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next)
580{
581 struct vm_radix_node *rnode;
582 void *val;
583 int slot;
584 int outidx;
585
586 CTR3(KTR_VM, "lookupn: tree %p, start %ju, end %ju",
587 rtree, (uintmax_t)start, (uintmax_t)end);
588 if (rtree->rt_root == 0)
589 return (0);
590 outidx = 0;
591 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
592 slot = vm_radix_slot(start, 0);
593 for (; slot < VM_RADIX_COUNT; slot++, start++) {
594 if (end != 0 && start >= end)
595 goto out;
596 val = vm_radix_match(rnode->rn_child[slot], color);
597 if (val == NULL)
598 continue;
599 CTR4(KTR_VM,
600 "lookupn: tree %p index %ju slot %d found child %p",
601 rtree, (uintmax_t)start, slot, val);
602 out[outidx] = val;
603 if (++outidx == cnt)
604 goto out;
605 }
606 if (end != 0 && start >= end)
607 break;
608 }
609out:
610 *next = start;
611 return (outidx);
612}
613
614void
615vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end,
616 int color, void (*iter)(void *))
617{
618 struct vm_radix_node *rnode;
619 void *val;
620 int slot;
621
622 if (rtree->rt_root == 0)
623 return;
624 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
625 slot = vm_radix_slot(start, 0);
626 for (; slot < VM_RADIX_COUNT; slot++, start++) {
627 if (end != 0 && start >= end)
628 return;
629 val = vm_radix_match(rnode->rn_child[slot], color);
630 if (val)
631 iter(val);
632 }
633 if (end != 0 && start >= end)
634 return;
635 }
636}
637
638
639/*
640 * Look up any entry at a position less than or equal to index.
641 */
642void *
643vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color)
644{
645 struct vm_radix_node *rnode;
646 struct vm_radix_node *child;
647 vm_pindex_t max;
648 vm_pindex_t inc;
649 void *val;
650 int slot;
651 int level;
652
653 CTR2(KTR_VM,
654 "lookup_le: tree %p, index %ju", rtree, (uintmax_t)index);
655restart:
656 level = vm_radix_height(rtree, &rnode);
657 if (rnode == NULL)
658 return (NULL);
659 max = VM_RADIX_MAX(level);
660 if (index > max || index == 0)
661 index = max;
662 /*
663 * Search the tree from the top for any leaf node holding an index
664 * lower than 'index'.
665 */
666 level--;
667 while (rnode) {
668 slot = vm_radix_slot(index, level);
669 CTR6(KTR_VM,
670 "lookup_le: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
671 rtree, (uintmax_t)index, level, slot, rnode,
672 rnode->rn_child[slot]);
673 if (level == 0)
674 break;
675 /*
676 * If we don't have an exact match we must start our search
677 * from the next leaf and adjust our index appropriately.
678 */
679 if ((child = rnode->rn_child[slot]) == NULL) {
680 /*
681 * Calculate how much to decrement our index by
682 * based on the tree level. We must set the
683 * lower bits to start from the end of the next
684 * leaf.
685 */
686 inc = 1LL << (level * VM_RADIX_WIDTH);
687 index |= VM_RADIX_MAX(level);
688 index -= inc;
689 slot--;
690 CTR4(KTR_VM,
691 "lookup_le: start %ju inc %ju mask 0x%jX slot %d",
692 (uintmax_t)index, (uintmax_t)inc,
693 (uintmax_t)VM_RADIX_MAX(level), slot);
694 for (; slot >= 0; slot--, index -= inc) {
695 child = rnode->rn_child[slot];
696 if (child)
697 break;
698 }
699 }
700 rnode = child;
701 level--;
702 }
703 if (rnode) {
704 for (; slot >= 0; slot--, index--) {
705 val = vm_radix_match(rnode->rn_child[slot], color);
706 if (val)
707 return (val);
708 }
709 }
710 if (index != -1)
711 goto restart;
712 return (NULL);
713}
714
715/*
716 * Remove the specified index from the tree. If possible the height of the
717 * tree is adjusted after deletion. The value stored at index is returned
718 * panics if the key is not present.
719 */
720void *
721vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color)
722{
723 struct vm_radix_node *stack[VM_RADIX_LIMIT];
724 struct vm_radix_node *rnode, *root;
725 void *val;
726 int level;
727 int slot;
728
729 level = vm_radix_height(rtree, &root);
730 KASSERT(index <= VM_RADIX_MAX(level),
731 ("vm_radix_remove: %p index %ju out of range %jd.",
732 rtree, (uintmax_t)index, VM_RADIX_MAX(level)));
733 rnode = root;
734 val = NULL;
735 level--;
736 /*
737 * Find the node and record the path in stack.
738 */
739 while (level && rnode) {
740 stack[level] = rnode;
741 slot = vm_radix_slot(index, level);
742 CTR5(KTR_VM,
743 "remove: tree %p, index %ju, level %d, slot %d, rnode %p",
744 rtree, (uintmax_t)index, level, slot, rnode);
745 CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
746 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
747 rnode = rnode->rn_child[slot];
748 level--;
749 }
750 KASSERT(rnode != NULL,
751 ("vm_radix_remove: index %ju not present in the tree.\n",
752 (uintmax_t)index));
753 slot = vm_radix_slot(index, 0);
754 val = vm_radix_match(rnode->rn_child[slot], color);
755 KASSERT(val != NULL,
756 ("vm_radix_remove: index %ju not present in the tree.\n",
757 (uintmax_t)index));
758
759 for (;;) {
760 CTR5(KTR_VM,
761 "remove: resetting tree %p, index %ju, level %d, slot %d, rnode %p",
762 rtree, (uintmax_t)index, level, slot, rnode);
763 CTR4(KTR_VM,
764 "remove: resetting tree %p, rnode %p, child %p, count %u",
765 rtree, rnode,
766 (rnode != NULL) ? rnode->rn_child[slot] : NULL,
767 (rnode != NULL) ? rnode->rn_count : 0);
768 rnode->rn_child[slot] = NULL;
769 /*
770 * Use atomics for the last level since red and black
771 * will both adjust it.
772 * Use a write memory barrier here in order to avoid
773 * rn_count reaching 0 before to fetch the actual pointer.
774 * Concurrent black removal, infact, may want to reclaim
775 * the radix node itself before to read it.
776 */
777 if (level == 0)
778 atomic_add_rel_32(&rnode->rn_count, -1);
779 else
780 rnode->rn_count--;
781 /*
782 * Only allow black removes to prune the tree.
783 */
784 if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0)
785 break;
786 vm_radix_node_put(rnode);
787 if (rnode == root) {
788 vm_radix_setroot(rtree, NULL, 0);
789 break;
790 }
791 rnode = stack[++level];
792 slot = vm_radix_slot(index, level);
793
794 }
795 return (val);
796}
797
798/*
799 * Remove and free all the nodes from the radix tree.
800 * This function is recrusive but there is a tight control on it as the
801 * maximum depth of the tree is fixed.
802 */
803void
804vm_radix_reclaim_allnodes(struct vm_radix *rtree)
805{
806 struct vm_radix_node *root;
807 int level;
808
809 if (rtree->rt_root == 0)
810 return;
811 level = vm_radix_height(rtree, &root);
812 vm_radix_reclaim_allnodes_internal(root, level - 1);
813 rtree->rt_root = 0;
814}
815
816#ifdef notyet
817/*
818 * Attempts to reduce the height of the tree.
819 */
820void
821vm_radix_shrink(struct vm_radix *rtree)
822{
823 struct vm_radix_node *tmp, *root;
824 int level;
825
826 if (rtree->rt_root == 0)
827 return;
828 level = vm_radix_height(rtree, &root);
829
830 /* Adjust the height of the tree. */
831 while (root->rn_count == 1 && root->rn_child[0] != NULL) {
832 tmp = root;
833 root->rn_count--;
834 root = root->rn_child[0];
835 level--;
836 vm_radix_node_put(tmp);
837 }
838 /* Finally see if we have an empty tree. */
839 if (root->rn_count == 0) {
840 vm_radix_node_put(root);
841 root = NULL;
842 level--;
843 }
844 vm_radix_setroot(rtree, root, level);
845}
846#endif
244 vm_radix_node_zone = uma_zcreate("RADIX NODE",
245 sizeof(struct vm_radix_node), NULL,
246#ifdef INVARIANTS
247 vm_radix_node_zone_dtor,
248#else
249 NULL,
250#endif
251 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
252}
253
254/*
255 * Extract the root node and height from a radix tree with a single load.
256 */
257static __inline int
258vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
259{
260 uintptr_t root;
261 int height;
262
263 root = rtree->rt_root;
264 height = root & VM_RADIX_HEIGHT;
265 *rnode = (struct vm_radix_node *)(root - height);
266 return (height);
267}
268
269
270/*
271 * Set the root node and height for a radix tree.
272 */
273static inline void
274vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
275 int height)
276{
277 uintptr_t root;
278
279 root = (uintptr_t)rnode | height;
280 rtree->rt_root = root;
281}
282
283static inline void
284vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
285 struct vm_radix_node *iroot, int ilevel)
286{
287 struct vm_radix_node *rnode;
288
289 CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
290 rtree, root, iroot, ilevel);
291 while (iroot != root && root != NULL) {
292 rnode = root;
293 MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
294 rnode->rn_count = 0;
295 root = rnode->rn_child[0];
296 vm_radix_node_put(rnode);
297 }
298 vm_radix_setroot(rtree, iroot, ilevel);
299}
300
301static inline void *
302vm_radix_match(void *child, int color)
303{
304 uintptr_t c;
305
306 c = (uintptr_t)child;
307
308 if ((c & color) == 0)
309 return (NULL);
310 return ((void *)(c & ~VM_RADIX_FLAGS));
311}
312
313static void
314vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
315{
316 int slot;
317
318 MPASS(rnode != NULL && level >= 0);
319
320 /*
321 * Level 0 just contains pages as children, thus make it a special
322 * case, free the node and return.
323 */
324 if (level == 0) {
325 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
326 rnode->rn_count = 0;
327 vm_radix_node_put(rnode);
328 return;
329 }
330 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
331 if (rnode->rn_child[slot] == NULL)
332 continue;
333 CTR3(KTR_VM,
334 "reclaiming: node %p, level %d recursing in slot %d",
335 rnode, level, slot);
336 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
337 level - 1);
338 rnode->rn_count--;
339 }
340 MPASS(rnode->rn_count == 0);
341 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
342 vm_radix_node_put(rnode);
343}
344
345/*
346 * Inserts the key-value pair in to the radix tree. Returns errno.
347 * Panics if the key already exists.
348 */
349int
350vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
351{
352 struct vm_radix_node *iroot, *rnode, *root;
353 u_int allocmsk;
354 int clev, ilevel, level, slot;
355
356 CTR3(KTR_VM,
357 "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
358 if (index == -1)
359 panic("vm_radix_insert: -1 is not a valid index.\n");
360 level = vm_radix_height(rtree, &root);
361 /*
362 * Increase the height by adding nodes at the root until
363 * there is sufficient space.
364 */
365 ilevel = level;
366 iroot = root;
367 while (level == 0 || index > VM_RADIX_MAX(level)) {
368 CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
369 (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
370 level++;
371 KASSERT(level <= VM_RADIX_LIMIT,
372 ("vm_radix_insert: Tree %p height %d too tall",
373 rtree, level));
374 /*
375 * Only allocate tree nodes if they are needed.
376 */
377 if (root == NULL || root->rn_count != 0) {
378 rnode = vm_radix_node_get();
379 if (rnode == NULL) {
380 CTR4(KTR_VM,
381 "insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
382 rtree, root, (uintmax_t)index, level);
383 vm_radix_unwind_heightup(rtree, root, iroot,
384 ilevel);
385 return (ENOMEM);
386 }
387 /*
388 * Store the new pointer with a memory barrier so
389 * that it is visible before the new root.
390 */
391 if (root) {
392 atomic_store_rel_ptr((volatile uintptr_t *)
393 &rnode->rn_child[0], (uintptr_t)root);
394 rnode->rn_count = 1;
395 }
396 root = rnode;
397 }
398 vm_radix_setroot(rtree, root, level);
399 }
400
401 /* Now that the tree is tall enough, fill in the path to the index. */
402 allocmsk = 0;
403 clev = level;
404 rnode = root;
405 for (level = level - 1; level > 0; level--) {
406 slot = vm_radix_slot(index, level);
407 /* Add the required intermidiate nodes. */
408 if (rnode->rn_child[slot] == NULL) {
409 rnode->rn_child[slot] = vm_radix_node_get();
410 if (rnode->rn_child[slot] == NULL) {
411 CTR5(KTR_VM,
412 "insert: tree %p, index %ju, level %d, slot %d, rnode %p ENOMEM",
413 rtree, (uintmax_t)index, level, slot,
414 rnode);
415 CTR4(KTR_VM,
416 "insert: tree %p, rnode %p, child %p, count %u ENOMEM",
417 rtree, rnode, rnode->rn_child[slot],
418 rnode->rn_count);
419 MPASS(level != clev || allocmsk == 0);
420 while (allocmsk != 0) {
421 rnode = root;
422 level = clev;
423 level--;
424 slot = vm_radix_slot(index, level);
425 CTR4(KTR_VM,
426 "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
427 root, level, slot, allocmsk);
428 MPASS(level >= (ffs(allocmsk) - 1));
429 while (level > (ffs(allocmsk) - 1)) {
430 MPASS(level > 0);
431 slot = vm_radix_slot(index,
432 level);
433 rnode = rnode->rn_child[slot];
434 level--;
435 }
436 MPASS((allocmsk & (1 << level)) != 0);
437 allocmsk &= ~(1 << level);
438 rnode->rn_count--;
439 vm_radix_node_put(rnode->rn_child[slot]);
440 rnode->rn_child[slot] = NULL;
441 }
442 vm_radix_unwind_heightup(rtree, root, iroot,
443 ilevel);
444 return (ENOMEM);
445 }
446 rnode->rn_count++;
447 allocmsk |= (1 << level);
448 }
449 CTR5(KTR_VM,
450 "insert: tree %p, index %ju, level %d, slot %d, rnode %p",
451 rtree, (uintmax_t)index, level, slot, rnode);
452 CTR4(KTR_VM,
453 "insert: tree %p, rnode %p, child %p, count %u",
454 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
455 rnode = rnode->rn_child[slot];
456 }
457
458 slot = vm_radix_slot(index, 0);
459 MPASS(rnode != NULL);
460 KASSERT(rnode->rn_child[slot] == NULL,
461 ("vm_radix_insert: Duplicate value %p at index: %lu\n",
462 rnode->rn_child[slot], (u_long)index));
463 val = (void *)((uintptr_t)val | VM_RADIX_BLACK);
464 rnode->rn_child[slot] = val;
465 atomic_add_32(&rnode->rn_count, 1);
466 CTR6(KTR_VM,
467 "insert: tree %p, index %ju, level %d, slot %d, rnode %p, count %u",
468 rtree, (uintmax_t)index, level, slot, rnode, rnode->rn_count);
469
470 return 0;
471}
472
473/*
474 * Returns the value stored at the index. If the index is not present
475 * NULL is returned.
476 */
477void *
478vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color)
479{
480 struct vm_radix_node *rnode;
481 int slot;
482 int level;
483
484 level = vm_radix_height(rtree, &rnode);
485 if (index > VM_RADIX_MAX(level))
486 return NULL;
487 level--;
488 while (rnode) {
489 slot = vm_radix_slot(index, level);
490 CTR6(KTR_VM,
491 "lookup: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
492 rtree, (uintmax_t)index, level, slot, rnode,
493 rnode->rn_child[slot]);
494 if (level == 0)
495 return vm_radix_match(rnode->rn_child[slot], color);
496 rnode = rnode->rn_child[slot];
497 level--;
498 }
499 CTR2(KTR_VM, "lookup: tree %p, index %ju failed", rtree,
500 (uintmax_t)index);
501
502 return NULL;
503}
504
505void *
506vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color)
507{
508 struct vm_radix_node *rnode;
509 uintptr_t child;
510 int slot;
511 int level;
512
513 level = vm_radix_height(rtree, &rnode);
514 if (index > VM_RADIX_MAX(level))
515 return NULL;
516 level--;
517 while (rnode) {
518 slot = vm_radix_slot(index, level);
519 CTR6(KTR_VM,
520 "color: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
521 rtree, (uintmax_t)index, level, slot, rnode,
522 rnode->rn_child[slot]);
523 if (level == 0)
524 break;
525 rnode = rnode->rn_child[slot];
526 level--;
527 }
528 if (rnode == NULL || rnode->rn_child[slot] == NULL)
529 return (NULL);
530 child = (uintptr_t)rnode->rn_child[slot];
531 child &= ~VM_RADIX_FLAGS;
532 rnode->rn_child[slot] = (void *)(child | color);
533
534 return (void *)child;
535}
536
537/*
538 * Find the first leaf with a valid node between *startp and end. Return
539 * the index of the first valid item in the leaf in *startp.
540 */
541static struct vm_radix_node *
542vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end)
543{
544 struct vm_radix_node *rnode;
545 vm_pindex_t start;
546 vm_pindex_t inc;
547 int slot;
548 int level;
549
550 start = *startp;
551restart:
552 level = vm_radix_height(rtree, &rnode);
553 if (start > VM_RADIX_MAX(level) || (end && start >= end)) {
554 rnode = NULL;
555 goto out;
556 }
557 /*
558 * Search the tree from the top for any leaf node holding an index
559 * between start and end.
560 */
561 for (level--; level; level--) {
562 slot = vm_radix_slot(start, level);
563 CTR6(KTR_VM,
564 "leaf: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
565 rtree, (uintmax_t)start, level, slot, rnode,
566 (rnode != NULL) ? rnode->rn_child[slot] : NULL);
567 if (rnode->rn_child[slot] != NULL) {
568 rnode = rnode->rn_child[slot];
569 continue;
570 }
571 /*
572 * Calculate how much to increment our index by
573 * based on the tree level. We must truncate the
574 * lower bits to start from the begnning of the
575 * next leaf.
576 */
577 inc = 1LL << (level * VM_RADIX_WIDTH);
578 start &= ~VM_RADIX_MAX(level);
579
580 /* Avoid start address wrapping up. */
581 if ((VM_RADIX_MAXVAL - start) < inc) {
582 rnode = NULL;
583 goto out;
584 }
585 start += inc;
586 slot++;
587 CTR5(KTR_VM,
588 "leaf: start %ju end %ju inc %ju mask 0x%jX slot %d",
589 (uintmax_t)start, (uintmax_t)end, (uintmax_t)inc,
590 (uintmax_t)~VM_RADIX_MAX(level), slot);
591 for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
592 if (end != 0 && start >= end) {
593 rnode = NULL;
594 goto out;
595 }
596 if (rnode->rn_child[slot]) {
597 rnode = rnode->rn_child[slot];
598 break;
599 }
600 if ((VM_RADIX_MAXVAL - start) < inc) {
601 rnode = NULL;
602 goto out;
603 }
604 }
605 if (slot == VM_RADIX_COUNT)
606 goto restart;
607 }
608
609out:
610 *startp = start;
611 return (rnode);
612}
613
614
615
616/*
617 * Looks up as many as cnt values between start and end, and stores
618 * them in the caller allocated array out. The next index can be used
619 * to restart the scan. This optimizes forward scans in the tree.
620 */
621int
622vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
623 vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next)
624{
625 struct vm_radix_node *rnode;
626 void *val;
627 int slot;
628 int outidx;
629
630 CTR3(KTR_VM, "lookupn: tree %p, start %ju, end %ju",
631 rtree, (uintmax_t)start, (uintmax_t)end);
632 if (rtree->rt_root == 0)
633 return (0);
634 outidx = 0;
635 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
636 slot = vm_radix_slot(start, 0);
637 for (; slot < VM_RADIX_COUNT; slot++, start++) {
638 if (end != 0 && start >= end)
639 goto out;
640 val = vm_radix_match(rnode->rn_child[slot], color);
641 if (val == NULL)
642 continue;
643 CTR4(KTR_VM,
644 "lookupn: tree %p index %ju slot %d found child %p",
645 rtree, (uintmax_t)start, slot, val);
646 out[outidx] = val;
647 if (++outidx == cnt)
648 goto out;
649 }
650 if (end != 0 && start >= end)
651 break;
652 }
653out:
654 *next = start;
655 return (outidx);
656}
657
658void
659vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end,
660 int color, void (*iter)(void *))
661{
662 struct vm_radix_node *rnode;
663 void *val;
664 int slot;
665
666 if (rtree->rt_root == 0)
667 return;
668 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
669 slot = vm_radix_slot(start, 0);
670 for (; slot < VM_RADIX_COUNT; slot++, start++) {
671 if (end != 0 && start >= end)
672 return;
673 val = vm_radix_match(rnode->rn_child[slot], color);
674 if (val)
675 iter(val);
676 }
677 if (end != 0 && start >= end)
678 return;
679 }
680}
681
682
683/*
684 * Look up any entry at a position less than or equal to index.
685 */
686void *
687vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color)
688{
689 struct vm_radix_node *rnode;
690 struct vm_radix_node *child;
691 vm_pindex_t max;
692 vm_pindex_t inc;
693 void *val;
694 int slot;
695 int level;
696
697 CTR2(KTR_VM,
698 "lookup_le: tree %p, index %ju", rtree, (uintmax_t)index);
699restart:
700 level = vm_radix_height(rtree, &rnode);
701 if (rnode == NULL)
702 return (NULL);
703 max = VM_RADIX_MAX(level);
704 if (index > max || index == 0)
705 index = max;
706 /*
707 * Search the tree from the top for any leaf node holding an index
708 * lower than 'index'.
709 */
710 level--;
711 while (rnode) {
712 slot = vm_radix_slot(index, level);
713 CTR6(KTR_VM,
714 "lookup_le: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
715 rtree, (uintmax_t)index, level, slot, rnode,
716 rnode->rn_child[slot]);
717 if (level == 0)
718 break;
719 /*
720 * If we don't have an exact match we must start our search
721 * from the next leaf and adjust our index appropriately.
722 */
723 if ((child = rnode->rn_child[slot]) == NULL) {
724 /*
725 * Calculate how much to decrement our index by
726 * based on the tree level. We must set the
727 * lower bits to start from the end of the next
728 * leaf.
729 */
730 inc = 1LL << (level * VM_RADIX_WIDTH);
731 index |= VM_RADIX_MAX(level);
732 index -= inc;
733 slot--;
734 CTR4(KTR_VM,
735 "lookup_le: start %ju inc %ju mask 0x%jX slot %d",
736 (uintmax_t)index, (uintmax_t)inc,
737 (uintmax_t)VM_RADIX_MAX(level), slot);
738 for (; slot >= 0; slot--, index -= inc) {
739 child = rnode->rn_child[slot];
740 if (child)
741 break;
742 }
743 }
744 rnode = child;
745 level--;
746 }
747 if (rnode) {
748 for (; slot >= 0; slot--, index--) {
749 val = vm_radix_match(rnode->rn_child[slot], color);
750 if (val)
751 return (val);
752 }
753 }
754 if (index != -1)
755 goto restart;
756 return (NULL);
757}
758
759/*
760 * Remove the specified index from the tree. If possible the height of the
761 * tree is adjusted after deletion. The value stored at index is returned
762 * panics if the key is not present.
763 */
764void *
765vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color)
766{
767 struct vm_radix_node *stack[VM_RADIX_LIMIT];
768 struct vm_radix_node *rnode, *root;
769 void *val;
770 int level;
771 int slot;
772
773 level = vm_radix_height(rtree, &root);
774 KASSERT(index <= VM_RADIX_MAX(level),
775 ("vm_radix_remove: %p index %ju out of range %jd.",
776 rtree, (uintmax_t)index, VM_RADIX_MAX(level)));
777 rnode = root;
778 val = NULL;
779 level--;
780 /*
781 * Find the node and record the path in stack.
782 */
783 while (level && rnode) {
784 stack[level] = rnode;
785 slot = vm_radix_slot(index, level);
786 CTR5(KTR_VM,
787 "remove: tree %p, index %ju, level %d, slot %d, rnode %p",
788 rtree, (uintmax_t)index, level, slot, rnode);
789 CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
790 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
791 rnode = rnode->rn_child[slot];
792 level--;
793 }
794 KASSERT(rnode != NULL,
795 ("vm_radix_remove: index %ju not present in the tree.\n",
796 (uintmax_t)index));
797 slot = vm_radix_slot(index, 0);
798 val = vm_radix_match(rnode->rn_child[slot], color);
799 KASSERT(val != NULL,
800 ("vm_radix_remove: index %ju not present in the tree.\n",
801 (uintmax_t)index));
802
803 for (;;) {
804 CTR5(KTR_VM,
805 "remove: resetting tree %p, index %ju, level %d, slot %d, rnode %p",
806 rtree, (uintmax_t)index, level, slot, rnode);
807 CTR4(KTR_VM,
808 "remove: resetting tree %p, rnode %p, child %p, count %u",
809 rtree, rnode,
810 (rnode != NULL) ? rnode->rn_child[slot] : NULL,
811 (rnode != NULL) ? rnode->rn_count : 0);
812 rnode->rn_child[slot] = NULL;
813 /*
814 * Use atomics for the last level since red and black
815 * will both adjust it.
816 * Use a write memory barrier here in order to avoid
817 * rn_count reaching 0 before to fetch the actual pointer.
818 * Concurrent black removal, infact, may want to reclaim
819 * the radix node itself before to read it.
820 */
821 if (level == 0)
822 atomic_add_rel_32(&rnode->rn_count, -1);
823 else
824 rnode->rn_count--;
825 /*
826 * Only allow black removes to prune the tree.
827 */
828 if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0)
829 break;
830 vm_radix_node_put(rnode);
831 if (rnode == root) {
832 vm_radix_setroot(rtree, NULL, 0);
833 break;
834 }
835 rnode = stack[++level];
836 slot = vm_radix_slot(index, level);
837
838 }
839 return (val);
840}
841
842/*
843 * Remove and free all the nodes from the radix tree.
844 * This function is recrusive but there is a tight control on it as the
845 * maximum depth of the tree is fixed.
846 */
847void
848vm_radix_reclaim_allnodes(struct vm_radix *rtree)
849{
850 struct vm_radix_node *root;
851 int level;
852
853 if (rtree->rt_root == 0)
854 return;
855 level = vm_radix_height(rtree, &root);
856 vm_radix_reclaim_allnodes_internal(root, level - 1);
857 rtree->rt_root = 0;
858}
859
860#ifdef notyet
861/*
862 * Attempts to reduce the height of the tree.
863 */
864void
865vm_radix_shrink(struct vm_radix *rtree)
866{
867 struct vm_radix_node *tmp, *root;
868 int level;
869
870 if (rtree->rt_root == 0)
871 return;
872 level = vm_radix_height(rtree, &root);
873
874 /* Adjust the height of the tree. */
875 while (root->rn_count == 1 && root->rn_child[0] != NULL) {
876 tmp = root;
877 root->rn_count--;
878 root = root->rn_child[0];
879 level--;
880 vm_radix_node_put(tmp);
881 }
882 /* Finally see if we have an empty tree. */
883 if (root->rn_count == 0) {
884 vm_radix_node_put(root);
885 root = NULL;
886 level--;
887 }
888 vm_radix_setroot(rtree, root, level);
889}
890#endif