Deleted Added
sdiff udiff text old ( 232631 ) new ( 233034 )
full compact
1/*
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
52#ifndef UMA_MD_SMALL_ALLOC
53#include <vm/vm_map.h>
54#endif
55#include <vm/vm_radix.h>
56#include <vm/vm_object.h>
57
58#include <sys/kdb.h>
59
60#ifndef UMA_MD_SMALL_ALLOC
61#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2)
62#define VM_RADIX_WIDTH 4
63
64/*
65 * Bits of height in root.
66 * The mask of smaller power of 2 containing VM_RADIX_LIMIT.
67 */
68#define VM_RADIX_HEIGHT 0x1f
69#else
70#define VM_RADIX_WIDTH 5
71
72/* See the comment above. */
73#define VM_RADIX_HEIGHT 0xf
74#endif
75
76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
78#define VM_RADIX_MAXVAL ((vm_pindex_t)-1)
79#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH)
80
81/* Flag bits stored in node pointers. */
82#define VM_RADIX_FLAGS 0x3
83
84/* Calculates maximum value for a tree of height h. */
85#define VM_RADIX_MAX(h) \
86 ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \
87 (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1))
88
89CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT);
90CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
91
92struct vm_radix_node {
93 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
94 volatile uint32_t rn_count; /* Valid children. */
95};
96
97CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
98
99static uma_zone_t vm_radix_node_zone;
100
101#ifndef UMA_MD_SMALL_ALLOC
102static vm_map_t rnode_map;
103static u_long rnode_map_scale;
104
105static void *
106vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
107{
108 vm_offset_t addr;
109 vm_page_t m;
110 int pflags;
111
112 /* Inform UMA that this allocator uses rnode_map. */
113 *flags = UMA_SLAB_KERNEL;
114
115 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
116
117 /*
118 * As kmem_alloc_nofault() can however fail, let just assume that
119 * M_NOWAIT is on and act accordingly.
120 */
121 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
122 VM_ALLOC_SYSTEM;
123 if ((wait & M_ZERO) != 0)
124 pflags |= VM_ALLOC_ZERO;
125 addr = kmem_alloc_nofault(rnode_map, size);
126 if (addr == 0)
127 return (NULL);
128
129 /* Just one page allocation is assumed here. */
130 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
131 pflags);
132 if (m == NULL) {
133 kmem_free(rnode_map, addr, size);
134 return (NULL);
135 }
136 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
137 pmap_zero_page(m);
138 pmap_qenter(addr, &m, 1);
139 return ((void *)addr);
140}
141
142static void
143vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
144{
145 vm_page_t m;
146 vm_offset_t voitem;
147
148 MPASS((flags & UMA_SLAB_KERNEL) != 0);
149
150 /* Just one page allocation is assumed here. */
151 voitem = (vm_offset_t)item;
152 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
153 pmap_qremove(voitem, 1);
154 vm_page_lock(m);
155 vm_page_unwire(m, 0);
156 vm_page_free(m);
157 vm_page_unlock(m);
158 kmem_free(rnode_map, voitem, size);
159}
160
161static void
162init_vm_radix_alloc(void *dummy __unused)
163{
164
165 uma_zone_set_max(vm_radix_node_zone, rnode_map_scale);
166 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
167 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
168}
169SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
170#endif
171
172/*
173 * Radix node zone destructor.
174 */
175#ifdef INVARIANTS
176static void
177vm_radix_node_zone_dtor(void *mem, int size, void *arg)
178{
179 struct vm_radix_node *rnode;
180
181 rnode = mem;
182 KASSERT(rnode->rn_count == 0,
183 ("vm_radix_node_put: Freeing a node with %d children\n",
184 rnode->rn_count));
185}
186#endif
187
188/*
189 * Allocate a radix node. Initializes all elements to 0.
190 */
191static __inline struct vm_radix_node *
192vm_radix_node_get(void)
193{
194
195 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
196}
197
198/*
199 * Free radix node.
200 */
201static __inline void
202vm_radix_node_put(struct vm_radix_node *rnode)
203{
204
205 uma_zfree(vm_radix_node_zone, rnode);
206}
207
208/*
209 * Return the position in the array for a given level.
210 */
211static __inline int
212vm_radix_slot(vm_pindex_t index, int level)
213{
214
215 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
216}
217
218/*
219 * Initialize the radix node submap (for architectures not supporting
220 * direct-mapping) and the radix node zone.
221 *
222 * WITNESS reports a lock order reversal, for architectures not
223 * supporting direct-mapping, between the "system map" lock
224 * and the "vm object" lock. This is because the well established ordering
225 * "system map" -> "vm object" is not honoured in this case as allocating
226 * from the radix node submap ends up adding a mapping entry to it, meaning
227 * it is necessary to lock the submap. However, the radix node submap is
228 * a leaf and self-contained, thus a deadlock cannot happen here and
229 * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal.
230 */
231void
232vm_radix_init(void)
233{
234#ifndef UMA_MD_SMALL_ALLOC
235 vm_offset_t maxaddr, minaddr;
236
237 rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE;
238 TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale);
239 rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
240 rnode_map_scale * sizeof(struct vm_radix_node), FALSE);
241 rnode_map->system_map = 1;
242#endif
243
244 vm_radix_node_zone = uma_zcreate("RADIX NODE",
245 sizeof(struct vm_radix_node), NULL,
246#ifdef INVARIANTS
247 vm_radix_node_zone_dtor,
248#else
249 NULL,
250#endif
251 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
252}
253
254/*
255 * Extract the root node and height from a radix tree with a single load.
256 */
257static __inline int
258vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
259{
260 uintptr_t root;
261 int height;
262
263 root = rtree->rt_root;
264 height = root & VM_RADIX_HEIGHT;
265 *rnode = (struct vm_radix_node *)(root - height);
266 return (height);
267}
268
269
270/*
271 * Set the root node and height for a radix tree.
272 */
273static inline void
274vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
275 int height)
276{
277 uintptr_t root;
278
279 root = (uintptr_t)rnode | height;
280 rtree->rt_root = root;
281}
282
283static inline void
284vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
285 struct vm_radix_node *iroot, int ilevel)
286{
287 struct vm_radix_node *rnode;
288
289 CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
290 rtree, root, iroot, ilevel);
291 while (iroot != root && root != NULL) {
292 rnode = root;
293 MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
294 rnode->rn_count = 0;
295 root = rnode->rn_child[0];
296 vm_radix_node_put(rnode);
297 }
298 vm_radix_setroot(rtree, iroot, ilevel);
299}
300
301static inline void *
302vm_radix_match(void *child, int color)
303{
304 uintptr_t c;
305
306 c = (uintptr_t)child;
307
308 if ((c & color) == 0)
309 return (NULL);
310 return ((void *)(c & ~VM_RADIX_FLAGS));
311}
312
313static void
314vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
315{
316 int slot;
317
318 MPASS(rnode != NULL && level >= 0);
319
320 /*
321 * Level 0 just contains pages as children, thus make it a special
322 * case, free the node and return.
323 */
324 if (level == 0) {
325 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
326 rnode->rn_count = 0;
327 vm_radix_node_put(rnode);
328 return;
329 }
330 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
331 if (rnode->rn_child[slot] == NULL)
332 continue;
333 CTR3(KTR_VM,
334 "reclaiming: node %p, level %d recursing in slot %d",
335 rnode, level, slot);
336 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
337 level - 1);
338 rnode->rn_count--;
339 }
340 MPASS(rnode->rn_count == 0);
341 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
342 vm_radix_node_put(rnode);
343}
344
345/*
346 * Inserts the key-value pair in to the radix tree. Returns errno.
347 * Panics if the key already exists.
348 */
349int
350vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
351{
352 struct vm_radix_node *iroot, *rnode, *root;
353 u_int allocmsk;
354 int clev, ilevel, level, slot;
355
356 CTR3(KTR_VM,
357 "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
358 if (index == -1)
359 panic("vm_radix_insert: -1 is not a valid index.\n");
360 level = vm_radix_height(rtree, &root);
361 /*
362 * Increase the height by adding nodes at the root until
363 * there is sufficient space.
364 */
365 ilevel = level;
366 iroot = root;
367 while (level == 0 || index > VM_RADIX_MAX(level)) {
368 CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
369 (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
370 level++;
371 KASSERT(level <= VM_RADIX_LIMIT,
372 ("vm_radix_insert: Tree %p height %d too tall",
373 rtree, level));
374 /*
375 * Only allocate tree nodes if they are needed.
376 */
377 if (root == NULL || root->rn_count != 0) {
378 rnode = vm_radix_node_get();
379 if (rnode == NULL) {
380 CTR4(KTR_VM,
381 "insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
382 rtree, root, (uintmax_t)index, level);
383 vm_radix_unwind_heightup(rtree, root, iroot,
384 ilevel);
385 return (ENOMEM);
386 }
387 /*
388 * Store the new pointer with a memory barrier so
389 * that it is visible before the new root.
390 */
391 if (root) {
392 atomic_store_rel_ptr((volatile uintptr_t *)
393 &rnode->rn_child[0], (uintptr_t)root);
394 rnode->rn_count = 1;
395 }
396 root = rnode;
397 }
398 vm_radix_setroot(rtree, root, level);
399 }
400
401 /* Now that the tree is tall enough, fill in the path to the index. */
402 allocmsk = 0;
403 clev = level;
404 rnode = root;
405 for (level = level - 1; level > 0; level--) {
406 slot = vm_radix_slot(index, level);
407 /* Add the required intermidiate nodes. */
408 if (rnode->rn_child[slot] == NULL) {
409 rnode->rn_child[slot] = vm_radix_node_get();
410 if (rnode->rn_child[slot] == NULL) {
411 CTR5(KTR_VM,
412 "insert: tree %p, index %ju, level %d, slot %d, rnode %p ENOMEM",
413 rtree, (uintmax_t)index, level, slot,
414 rnode);
415 CTR4(KTR_VM,
416 "insert: tree %p, rnode %p, child %p, count %u ENOMEM",
417 rtree, rnode, rnode->rn_child[slot],
418 rnode->rn_count);
419 MPASS(level != clev || allocmsk == 0);
420 while (allocmsk != 0) {
421 rnode = root;
422 level = clev;
423 level--;
424 slot = vm_radix_slot(index, level);
425 CTR4(KTR_VM,
426 "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
427 root, level, slot, allocmsk);
428 MPASS(level >= (ffs(allocmsk) - 1));
429 while (level > (ffs(allocmsk) - 1)) {
430 MPASS(level > 0);
431 slot = vm_radix_slot(index,
432 level);
433 rnode = rnode->rn_child[slot];
434 level--;
435 }
436 MPASS((allocmsk & (1 << level)) != 0);
437 allocmsk &= ~(1 << level);
438 rnode->rn_count--;
439 vm_radix_node_put(rnode->rn_child[slot]);
440 rnode->rn_child[slot] = NULL;
441 }
442 vm_radix_unwind_heightup(rtree, root, iroot,
443 ilevel);
444 return (ENOMEM);
445 }
446 rnode->rn_count++;
447 allocmsk |= (1 << level);
448 }
449 CTR5(KTR_VM,
450 "insert: tree %p, index %ju, level %d, slot %d, rnode %p",
451 rtree, (uintmax_t)index, level, slot, rnode);
452 CTR4(KTR_VM,
453 "insert: tree %p, rnode %p, child %p, count %u",
454 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
455 rnode = rnode->rn_child[slot];
456 }
457
458 slot = vm_radix_slot(index, 0);
459 MPASS(rnode != NULL);
460 KASSERT(rnode->rn_child[slot] == NULL,
461 ("vm_radix_insert: Duplicate value %p at index: %lu\n",
462 rnode->rn_child[slot], (u_long)index));
463 val = (void *)((uintptr_t)val | VM_RADIX_BLACK);
464 rnode->rn_child[slot] = val;
465 atomic_add_32(&rnode->rn_count, 1);
466 CTR6(KTR_VM,
467 "insert: tree %p, index %ju, level %d, slot %d, rnode %p, count %u",
468 rtree, (uintmax_t)index, level, slot, rnode, rnode->rn_count);
469
470 return 0;
471}
472
473/*
474 * Returns the value stored at the index. If the index is not present
475 * NULL is returned.
476 */
477void *
478vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color)
479{
480 struct vm_radix_node *rnode;
481 int slot;
482 int level;
483
484 level = vm_radix_height(rtree, &rnode);
485 if (index > VM_RADIX_MAX(level))
486 return NULL;
487 level--;
488 while (rnode) {
489 slot = vm_radix_slot(index, level);
490 CTR6(KTR_VM,
491 "lookup: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
492 rtree, (uintmax_t)index, level, slot, rnode,
493 rnode->rn_child[slot]);
494 if (level == 0)
495 return vm_radix_match(rnode->rn_child[slot], color);
496 rnode = rnode->rn_child[slot];
497 level--;
498 }
499 CTR2(KTR_VM, "lookup: tree %p, index %ju failed", rtree,
500 (uintmax_t)index);
501
502 return NULL;
503}
504
505void *
506vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color)
507{
508 struct vm_radix_node *rnode;
509 uintptr_t child;
510 int slot;
511 int level;
512
513 level = vm_radix_height(rtree, &rnode);
514 if (index > VM_RADIX_MAX(level))
515 return NULL;
516 level--;
517 while (rnode) {
518 slot = vm_radix_slot(index, level);
519 CTR6(KTR_VM,
520 "color: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
521 rtree, (uintmax_t)index, level, slot, rnode,
522 rnode->rn_child[slot]);
523 if (level == 0)
524 break;
525 rnode = rnode->rn_child[slot];
526 level--;
527 }
528 if (rnode == NULL || rnode->rn_child[slot] == NULL)
529 return (NULL);
530 child = (uintptr_t)rnode->rn_child[slot];
531 child &= ~VM_RADIX_FLAGS;
532 rnode->rn_child[slot] = (void *)(child | color);
533
534 return (void *)child;
535}
536
537/*
538 * Find the first leaf with a valid node between *startp and end. Return
539 * the index of the first valid item in the leaf in *startp.
540 */
541static struct vm_radix_node *
542vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end)
543{
544 struct vm_radix_node *rnode;
545 vm_pindex_t start;
546 vm_pindex_t inc;
547 int slot;
548 int level;
549
550 start = *startp;
551restart:
552 level = vm_radix_height(rtree, &rnode);
553 if (start > VM_RADIX_MAX(level) || (end && start >= end)) {
554 rnode = NULL;
555 goto out;
556 }
557 /*
558 * Search the tree from the top for any leaf node holding an index
559 * between start and end.
560 */
561 for (level--; level; level--) {
562 slot = vm_radix_slot(start, level);
563 CTR6(KTR_VM,
564 "leaf: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
565 rtree, (uintmax_t)start, level, slot, rnode,
566 (rnode != NULL) ? rnode->rn_child[slot] : NULL);
567 if (rnode->rn_child[slot] != NULL) {
568 rnode = rnode->rn_child[slot];
569 continue;
570 }
571 /*
572 * Calculate how much to increment our index by
573 * based on the tree level. We must truncate the
574 * lower bits to start from the begnning of the
575 * next leaf.
576 */
577 inc = 1LL << (level * VM_RADIX_WIDTH);
578 start &= ~VM_RADIX_MAX(level);
579
580 /* Avoid start address wrapping up. */
581 if ((VM_RADIX_MAXVAL - start) < inc) {
582 rnode = NULL;
583 goto out;
584 }
585 start += inc;
586 slot++;
587 CTR5(KTR_VM,
588 "leaf: start %ju end %ju inc %ju mask 0x%jX slot %d",
589 (uintmax_t)start, (uintmax_t)end, (uintmax_t)inc,
590 (uintmax_t)~VM_RADIX_MAX(level), slot);
591 for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
592 if (end != 0 && start >= end) {
593 rnode = NULL;
594 goto out;
595 }
596 if (rnode->rn_child[slot]) {
597 rnode = rnode->rn_child[slot];
598 break;
599 }
600 if ((VM_RADIX_MAXVAL - start) < inc) {
601 rnode = NULL;
602 goto out;
603 }
604 }
605 if (slot == VM_RADIX_COUNT)
606 goto restart;
607 }
608
609out:
610 *startp = start;
611 return (rnode);
612}
613
614
615
616/*
617 * Looks up as many as cnt values between start and end, and stores
618 * them in the caller allocated array out. The next index can be used
619 * to restart the scan. This optimizes forward scans in the tree.
620 */
621int
622vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
623 vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next)
624{
625 struct vm_radix_node *rnode;
626 void *val;
627 int slot;
628 int outidx;
629
630 CTR3(KTR_VM, "lookupn: tree %p, start %ju, end %ju",
631 rtree, (uintmax_t)start, (uintmax_t)end);
632 if (rtree->rt_root == 0)
633 return (0);
634 outidx = 0;
635 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
636 slot = vm_radix_slot(start, 0);
637 for (; slot < VM_RADIX_COUNT; slot++, start++) {
638 if (end != 0 && start >= end)
639 goto out;
640 val = vm_radix_match(rnode->rn_child[slot], color);
641 if (val == NULL)
642 continue;
643 CTR4(KTR_VM,
644 "lookupn: tree %p index %ju slot %d found child %p",
645 rtree, (uintmax_t)start, slot, val);
646 out[outidx] = val;
647 if (++outidx == cnt)
648 goto out;
649 }
650 if (end != 0 && start >= end)
651 break;
652 }
653out:
654 *next = start;
655 return (outidx);
656}
657
658void
659vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end,
660 int color, void (*iter)(void *))
661{
662 struct vm_radix_node *rnode;
663 void *val;
664 int slot;
665
666 if (rtree->rt_root == 0)
667 return;
668 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
669 slot = vm_radix_slot(start, 0);
670 for (; slot < VM_RADIX_COUNT; slot++, start++) {
671 if (end != 0 && start >= end)
672 return;
673 val = vm_radix_match(rnode->rn_child[slot], color);
674 if (val)
675 iter(val);
676 }
677 if (end != 0 && start >= end)
678 return;
679 }
680}
681
682
683/*
684 * Look up any entry at a position less than or equal to index.
685 */
686void *
687vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color)
688{
689 struct vm_radix_node *rnode;
690 struct vm_radix_node *child;
691 vm_pindex_t max;
692 vm_pindex_t inc;
693 void *val;
694 int slot;
695 int level;
696
697 CTR2(KTR_VM,
698 "lookup_le: tree %p, index %ju", rtree, (uintmax_t)index);
699restart:
700 level = vm_radix_height(rtree, &rnode);
701 if (rnode == NULL)
702 return (NULL);
703 max = VM_RADIX_MAX(level);
704 if (index > max || index == 0)
705 index = max;
706 /*
707 * Search the tree from the top for any leaf node holding an index
708 * lower than 'index'.
709 */
710 level--;
711 while (rnode) {
712 slot = vm_radix_slot(index, level);
713 CTR6(KTR_VM,
714 "lookup_le: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
715 rtree, (uintmax_t)index, level, slot, rnode,
716 rnode->rn_child[slot]);
717 if (level == 0)
718 break;
719 /*
720 * If we don't have an exact match we must start our search
721 * from the next leaf and adjust our index appropriately.
722 */
723 if ((child = rnode->rn_child[slot]) == NULL) {
724 /*
725 * Calculate how much to decrement our index by
726 * based on the tree level. We must set the
727 * lower bits to start from the end of the next
728 * leaf.
729 */
730 inc = 1LL << (level * VM_RADIX_WIDTH);
731 index |= VM_RADIX_MAX(level);
732 index -= inc;
733 slot--;
734 CTR4(KTR_VM,
735 "lookup_le: start %ju inc %ju mask 0x%jX slot %d",
736 (uintmax_t)index, (uintmax_t)inc,
737 (uintmax_t)VM_RADIX_MAX(level), slot);
738 for (; slot >= 0; slot--, index -= inc) {
739 child = rnode->rn_child[slot];
740 if (child)
741 break;
742 }
743 }
744 rnode = child;
745 level--;
746 }
747 if (rnode) {
748 for (; slot >= 0; slot--, index--) {
749 val = vm_radix_match(rnode->rn_child[slot], color);
750 if (val)
751 return (val);
752 }
753 }
754 if (index != -1)
755 goto restart;
756 return (NULL);
757}
758
759/*
760 * Remove the specified index from the tree. If possible the height of the
761 * tree is adjusted after deletion. The value stored at index is returned
762 * panics if the key is not present.
763 */
764void *
765vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color)
766{
767 struct vm_radix_node *stack[VM_RADIX_LIMIT];
768 struct vm_radix_node *rnode, *root;
769 void *val;
770 int level;
771 int slot;
772
773 level = vm_radix_height(rtree, &root);
774 KASSERT(index <= VM_RADIX_MAX(level),
775 ("vm_radix_remove: %p index %ju out of range %jd.",
776 rtree, (uintmax_t)index, VM_RADIX_MAX(level)));
777 rnode = root;
778 val = NULL;
779 level--;
780 /*
781 * Find the node and record the path in stack.
782 */
783 while (level && rnode) {
784 stack[level] = rnode;
785 slot = vm_radix_slot(index, level);
786 CTR5(KTR_VM,
787 "remove: tree %p, index %ju, level %d, slot %d, rnode %p",
788 rtree, (uintmax_t)index, level, slot, rnode);
789 CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
790 rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
791 rnode = rnode->rn_child[slot];
792 level--;
793 }
794 KASSERT(rnode != NULL,
795 ("vm_radix_remove: index %ju not present in the tree.\n",
796 (uintmax_t)index));
797 slot = vm_radix_slot(index, 0);
798 val = vm_radix_match(rnode->rn_child[slot], color);
799 KASSERT(val != NULL,
800 ("vm_radix_remove: index %ju not present in the tree.\n",
801 (uintmax_t)index));
802
803 for (;;) {
804 CTR5(KTR_VM,
805 "remove: resetting tree %p, index %ju, level %d, slot %d, rnode %p",
806 rtree, (uintmax_t)index, level, slot, rnode);
807 CTR4(KTR_VM,
808 "remove: resetting tree %p, rnode %p, child %p, count %u",
809 rtree, rnode,
810 (rnode != NULL) ? rnode->rn_child[slot] : NULL,
811 (rnode != NULL) ? rnode->rn_count : 0);
812 rnode->rn_child[slot] = NULL;
813 /*
814 * Use atomics for the last level since red and black
815 * will both adjust it.
816 * Use a write memory barrier here in order to avoid
817 * rn_count reaching 0 before to fetch the actual pointer.
818 * Concurrent black removal, infact, may want to reclaim
819 * the radix node itself before to read it.
820 */
821 if (level == 0)
822 atomic_add_rel_32(&rnode->rn_count, -1);
823 else
824 rnode->rn_count--;
825 /*
826 * Only allow black removes to prune the tree.
827 */
828 if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0)
829 break;
830 vm_radix_node_put(rnode);
831 if (rnode == root) {
832 vm_radix_setroot(rtree, NULL, 0);
833 break;
834 }
835 rnode = stack[++level];
836 slot = vm_radix_slot(index, level);
837
838 }
839 return (val);
840}
841
842/*
843 * Remove and free all the nodes from the radix tree.
844 * This function is recrusive but there is a tight control on it as the
845 * maximum depth of the tree is fixed.
846 */
847void
848vm_radix_reclaim_allnodes(struct vm_radix *rtree)
849{
850 struct vm_radix_node *root;
851 int level;
852
853 if (rtree->rt_root == 0)
854 return;
855 level = vm_radix_height(rtree, &root);
856 vm_radix_reclaim_allnodes_internal(root, level - 1);
857 rtree->rt_root = 0;
858}
859
860#ifdef notyet
861/*
862 * Attempts to reduce the height of the tree.
863 */
864void
865vm_radix_shrink(struct vm_radix *rtree)
866{
867 struct vm_radix_node *tmp, *root;
868 int level;
869
870 if (rtree->rt_root == 0)
871 return;
872 level = vm_radix_height(rtree, &root);
873
874 /* Adjust the height of the tree. */
875 while (root->rn_count == 1 && root->rn_child[0] != NULL) {
876 tmp = root;
877 root->rn_count--;
878 root = root->rn_child[0];
879 level--;
880 vm_radix_node_put(tmp);
881 }
882 /* Finally see if we have an empty tree. */
883 if (root->rn_count == 0) {
884 vm_radix_node_put(root);
885 root = NULL;
886 level--;
887 }
888 vm_radix_setroot(rtree, root, level);
889}
890#endif