vm_radix.c revision 231027
1/*
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
52#include <vm/vm_radix.h>
53#include <vm/vm_object.h>
54
55#include <sys/kdb.h>
56
57CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
58CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
59
60static uma_zone_t vm_radix_node_zone;
61
62#ifndef UMA_MD_SMALL_ALLOC
63static void *
64vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
65{
66	vm_offset_t addr;
67	vm_page_t m;
68	int pflags;
69
70	/* Inform UMA that this allocator uses kernel_map. */
71	*flags = UMA_SLAB_KERNEL;
72
73	pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
74
75	/*
76	 * As kmem_alloc_nofault() can however fail, let just assume that
77	 * M_NOWAIT is on and act accordingly.
78	 */
79	pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
80	    VM_ALLOC_SYSTEM;
81	if ((wait & M_ZERO) != 0)
82		pflags |= VM_ALLOC_ZERO;
83	addr = kmem_alloc_nofault(kernel_map, size);
84	if (addr == 0)
85		return (NULL);
86
87	/* Just one page allocation is assumed here. */
88	m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
89	    pflags);
90	if (m == NULL) {
91		kmem_free(kernel_map, addr, size);
92		return (NULL);
93	}
94	if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
95		pmap_zero_page(m);
96	pmap_qenter(addr, &m, 1);
97	return ((void *)addr);
98}
99
100static void
101vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
102{
103	vm_page_t m;
104	vm_offset_t voitem;
105
106	MPASS((flags & UMA_SLAB_KERNEL) != 0);
107
108	/* Just one page allocation is assumed here. */
109	voitem = (vm_offset_t)item;
110	m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
111	pmap_qremove(voitem, 1);
112	vm_page_free(m);
113	kmem_free(kernel_map, voitem, size);
114}
115
116static void
117init_vm_radix_alloc(void *dummy __unused)
118{
119
120	uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
121	uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
122}
123SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
124#endif
125
126/*
127 * Radix node zone destructor.
128 */
129#ifdef INVARIANTS
130static void
131vm_radix_node_zone_dtor(void *mem, int size, void *arg)
132{
133	struct vm_radix_node *rnode;
134
135	rnode = mem;
136	KASSERT(rnode->rn_count == 0,
137	    ("vm_radix_node_put: Freeing a node with %d children\n",
138	    rnode->rn_count));
139}
140#endif
141
142/*
143 * Allocate a radix node.  Initializes all elements to 0.
144 */
145static __inline struct vm_radix_node *
146vm_radix_node_get(void)
147{
148
149	return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
150}
151
152/*
153 * Free radix node.
154 */
155static __inline void
156vm_radix_node_put(struct vm_radix_node *rnode)
157{
158
159	uma_zfree(vm_radix_node_zone, rnode);
160}
161
162/*
163 * Return the position in the array for a given level.
164 */
165static __inline int
166vm_radix_slot(vm_pindex_t index, int level)
167{
168
169	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
170}
171
172void
173vm_radix_init(void)
174{
175
176	vm_radix_node_zone = uma_zcreate("RADIX NODE",
177	    sizeof(struct vm_radix_node), NULL,
178#ifdef INVARIANTS
179	    vm_radix_node_zone_dtor,
180#else
181	    NULL,
182#endif
183	    NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
184}
185
186/*
187 * Extract the root node and height from a radix tree with a single load.
188 */
189static __inline int
190vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
191{
192	uintptr_t root;
193	int height;
194
195	root = rtree->rt_root;
196	height = root & VM_RADIX_HEIGHT;
197	*rnode = (struct vm_radix_node *)(root - height);
198	return (height);
199}
200
201
202/*
203 * Set the root node and height for a radix tree.
204 */
205static inline void
206vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
207    int height)
208{
209	uintptr_t root;
210
211	root = (uintptr_t)rnode | height;
212	rtree->rt_root = root;
213}
214
215static inline void
216vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
217    struct vm_radix_node *iroot, int ilevel)
218{
219	struct vm_radix_node *rnode;
220
221	CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
222	    rtree, root, iroot, ilevel);
223	while (iroot != root && root != NULL) {
224		rnode = root;
225		MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
226		rnode->rn_count = 0;
227		root = rnode->rn_child[0];
228		vm_radix_node_put(rnode);
229	}
230	vm_radix_setroot(rtree, iroot, ilevel);
231}
232
233static inline void *
234vm_radix_match(void *child, int color)
235{
236	uintptr_t c;
237
238	c = (uintptr_t)child;
239
240	if ((c & color) == 0)
241		return (NULL);
242	return ((void *)(c & ~VM_RADIX_FLAGS));
243}
244
245static void
246vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
247{
248	int slot;
249
250	MPASS(rnode != NULL && level >= 0);
251
252	/*
253	 * Level 0 just contains pages as children, thus make it a special
254	 * case, free the node and return.
255	 */
256	if (level == 0) {
257		CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
258		rnode->rn_count = 0;
259		vm_radix_node_put(rnode);
260		return;
261	}
262	for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
263		if (rnode->rn_child[slot] == NULL)
264			continue;
265		CTR3(KTR_VM,
266		    "reclaiming: node %p, level %d recursing in slot %d",
267		    rnode, level, slot);
268		vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
269		    level - 1);
270		rnode->rn_count--;
271	}
272	MPASS(rnode->rn_count == 0);
273	CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
274	vm_radix_node_put(rnode);
275}
276
277/*
278 * Inserts the key-value pair in to the radix tree.  Returns errno.
279 * Panics if the key already exists.
280 */
281int
282vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
283{
284	struct vm_radix_node *iroot, *rnode, *root;
285	u_int allocmsk;
286	int clev, ilevel, level, slot;
287
288	CTR3(KTR_VM,
289	    "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
290	if (index == -1)
291		panic("vm_radix_insert: -1 is not a valid index.\n");
292	level = vm_radix_height(rtree, &root);
293	/*
294	 * Increase the height by adding nodes at the root until
295	 * there is sufficient space.
296	 */
297	ilevel = level;
298	iroot = root;
299	while (level == 0 || index > VM_RADIX_MAX(level)) {
300		CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
301		    (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
302		level++;
303		KASSERT(level <= VM_RADIX_LIMIT,
304		    ("vm_radix_insert: Tree %p height %d too tall",
305		    rtree, level));
306		/*
307		 * Only allocate tree nodes if they are needed.
308		 */
309		if (root == NULL || root->rn_count != 0) {
310			rnode = vm_radix_node_get();
311			if (rnode == NULL) {
312				CTR4(KTR_VM,
313		"insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
314				    rtree, root, (uintmax_t)index, level);
315				vm_radix_unwind_heightup(rtree, root, iroot,
316				    ilevel);
317				return (ENOMEM);
318			}
319			/*
320			 * Store the new pointer with a memory barrier so
321			 * that it is visible before the new root.
322			 */
323			if (root) {
324				atomic_store_rel_ptr((volatile uintptr_t *)
325				    &rnode->rn_child[0], (uintptr_t)root);
326				rnode->rn_count = 1;
327			}
328			root = rnode;
329		}
330		vm_radix_setroot(rtree, root, level);
331	}
332
333	/* Now that the tree is tall enough, fill in the path to the index. */
334	allocmsk = 0;
335	clev = level;
336	rnode = root;
337	for (level = level - 1; level > 0; level--) {
338		slot = vm_radix_slot(index, level);
339		/* Add the required intermidiate nodes. */
340		if (rnode->rn_child[slot] == NULL) {
341			rnode->rn_child[slot] = vm_radix_node_get();
342    			if (rnode->rn_child[slot] == NULL) {
343				CTR5(KTR_VM,
344	"insert: tree %p, index %ju, level %d, slot %d, rnode %p ENOMEM",
345				     rtree, (uintmax_t)index, level, slot,
346				     rnode);
347				CTR4(KTR_VM,
348			"insert: tree %p, rnode %p, child %p, count %u ENOMEM",
349		    		    rtree, rnode, rnode->rn_child[slot],
350				    rnode->rn_count);
351				MPASS(level != clev || allocmsk == 0);
352				while (allocmsk != 0) {
353					rnode = root;
354					level = clev;
355					level--;
356					CTR4(KTR_VM,
357		    "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
358					    root, level, slot, allocmsk);
359					slot = vm_radix_slot(index, level);
360					MPASS(level >= (ffs(allocmsk) - 1));
361					while (level > (ffs(allocmsk) - 1)) {
362						MPASS(level > 0);
363						slot = vm_radix_slot(index,
364						    level);
365						rnode = rnode->rn_child[slot];
366						level--;
367					}
368					MPASS((allocmsk & (1 << level)) != 0);
369					allocmsk &= ~(1 << level);
370					rnode->rn_count--;
371				vm_radix_node_put(rnode->rn_child[slot]);
372					rnode->rn_child[slot] = NULL;
373				}
374				vm_radix_unwind_heightup(rtree, root, iroot,
375				    ilevel);
376		    		return (ENOMEM);
377			}
378			rnode->rn_count++;
379			allocmsk |= (1 << level);
380	    	}
381		CTR5(KTR_VM,
382		    "insert: tree %p, index %ju, level %d, slot %d, rnode %p",
383		    rtree, (uintmax_t)index, level, slot, rnode);
384		CTR4(KTR_VM,
385		    "insert: tree %p, rnode %p, child %p, count %u",
386		    rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
387		rnode = rnode->rn_child[slot];
388	}
389
390	slot = vm_radix_slot(index, 0);
391	MPASS(rnode != NULL);
392	KASSERT(rnode->rn_child[slot] == NULL,
393	    ("vm_radix_insert: Duplicate value %p at index: %lu\n",
394	    rnode->rn_child[slot], (u_long)index));
395	val = (void *)((uintptr_t)val | VM_RADIX_BLACK);
396	rnode->rn_child[slot] = val;
397	atomic_add_32(&rnode->rn_count, 1);
398	CTR6(KTR_VM,
399	    "insert: tree %p, index %ju, level %d, slot %d, rnode %p, count %u",
400	    rtree, (uintmax_t)index, level, slot, rnode, rnode->rn_count);
401
402	return 0;
403}
404
405/*
406 * Returns the value stored at the index.  If the index is not present
407 * NULL is returned.
408 */
409void *
410vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color)
411{
412	struct vm_radix_node *rnode;
413	int slot;
414	int level;
415
416	level = vm_radix_height(rtree, &rnode);
417	if (index > VM_RADIX_MAX(level))
418		return NULL;
419	level--;
420	while (rnode) {
421		slot = vm_radix_slot(index, level);
422		CTR6(KTR_VM,
423	"lookup: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
424		    rtree, (uintmax_t)index, level, slot, rnode,
425		    rnode->rn_child[slot]);
426		if (level == 0)
427			return vm_radix_match(rnode->rn_child[slot], color);
428		rnode = rnode->rn_child[slot];
429		level--;
430	}
431	CTR2(KTR_VM, "lookup: tree %p, index %ju failed", rtree,
432	     (uintmax_t)index);
433
434	return NULL;
435}
436
437void *
438vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color)
439{
440	struct vm_radix_node *rnode;
441	uintptr_t child;
442	int slot;
443	int level;
444
445	level = vm_radix_height(rtree, &rnode);
446	if (index > VM_RADIX_MAX(level))
447		return NULL;
448	level--;
449	while (rnode) {
450		slot = vm_radix_slot(index, level);
451		CTR6(KTR_VM,
452	"color: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
453		    rtree, (uintmax_t)index, level, slot, rnode,
454		    rnode->rn_child[slot]);
455		if (level == 0)
456			break;
457		rnode = rnode->rn_child[slot];
458		level--;
459	}
460	if (rnode == NULL || rnode->rn_child[slot] == NULL)
461		return (NULL);
462	child = (uintptr_t)rnode->rn_child[slot];
463	child &= ~VM_RADIX_FLAGS;
464	rnode->rn_child[slot] = (void *)(child | color);
465
466	return (void *)child;
467}
468
469/*
470 * Find the first leaf with a valid node between *startp and end.  Return
471 * the index of the first valid item in the leaf in *startp.
472 */
473static struct vm_radix_node *
474vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end)
475{
476	struct vm_radix_node *rnode;
477	vm_pindex_t start;
478	vm_pindex_t inc;
479	int slot;
480	int level;
481
482	start = *startp;
483restart:
484	level = vm_radix_height(rtree, &rnode);
485	if (start > VM_RADIX_MAX(level) || (end && start >= end)) {
486		rnode = NULL;
487		goto out;
488	}
489	/*
490	 * Search the tree from the top for any leaf node holding an index
491	 * between start and end.
492	 */
493	for (level--; level; level--) {
494		slot = vm_radix_slot(start, level);
495		CTR6(KTR_VM,
496	"leaf: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
497		    rtree, (uintmax_t)start, level, slot, rnode,
498		    (rnode != NULL) ? rnode->rn_child[slot] : NULL);
499		if (rnode->rn_child[slot] != NULL) {
500			rnode = rnode->rn_child[slot];
501			continue;
502		}
503		/*
504		 * Calculate how much to increment our index by
505		 * based on the tree level.  We must truncate the
506		 * lower bits to start from the begnning of the
507		 * next leaf.
508	 	 */
509		inc = 1LL << (level * VM_RADIX_WIDTH);
510		start &= ~VM_RADIX_MAX(level);
511
512		/* Avoid start address wrapping up. */
513		if ((VM_RADIX_MAXVAL - start) < inc) {
514			rnode = NULL;
515			goto out;
516		}
517		start += inc;
518		slot++;
519		CTR5(KTR_VM,
520		    "leaf: start %ju end %ju inc %ju mask 0x%jX slot %d",
521		    (uintmax_t)start, (uintmax_t)end, (uintmax_t)inc,
522		    (uintmax_t)~VM_RADIX_MAX(level), slot);
523		for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
524			if (end != 0 && start >= end) {
525				rnode = NULL;
526				goto out;
527			}
528			if (rnode->rn_child[slot]) {
529				rnode = rnode->rn_child[slot];
530				break;
531			}
532			if ((VM_RADIX_MAXVAL - start) < inc) {
533				rnode = NULL;
534				goto out;
535			}
536		}
537		if (slot == VM_RADIX_COUNT)
538			goto restart;
539	}
540
541out:
542	*startp = start;
543	return (rnode);
544}
545
546
547
548/*
549 * Looks up as many as cnt values between start and end, and stores
550 * them in the caller allocated array out.  The next index can be used
551 * to restart the scan.  This optimizes forward scans in the tree.
552 */
553int
554vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
555    vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next)
556{
557	struct vm_radix_node *rnode;
558	void *val;
559	int slot;
560	int outidx;
561
562	CTR3(KTR_VM, "lookupn: tree %p, start %ju, end %ju",
563	    rtree, (uintmax_t)start, (uintmax_t)end);
564	if (rtree->rt_root == 0)
565		return (0);
566	outidx = 0;
567	while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
568		slot = vm_radix_slot(start, 0);
569		for (; slot < VM_RADIX_COUNT; slot++, start++) {
570			if (end != 0 && start >= end)
571				goto out;
572			val = vm_radix_match(rnode->rn_child[slot], color);
573			if (val == NULL)
574				continue;
575			CTR4(KTR_VM,
576			    "lookupn: tree %p index %ju slot %d found child %p",
577			    rtree, (uintmax_t)start, slot, val);
578			out[outidx] = val;
579			if (++outidx == cnt)
580				goto out;
581		}
582		if (end != 0 && start >= end)
583			break;
584	}
585out:
586	*next = start;
587	return (outidx);
588}
589
590void
591vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end,
592    int color, void (*iter)(void *))
593{
594	struct vm_radix_node *rnode;
595	void *val;
596	int slot;
597
598	if (rtree->rt_root == 0)
599		return;
600	while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
601		slot = vm_radix_slot(start, 0);
602		for (; slot < VM_RADIX_COUNT; slot++, start++) {
603			if (end != 0 && start >= end)
604				return;
605			val = vm_radix_match(rnode->rn_child[slot], color);
606			if (val)
607				iter(val);
608		}
609		if (end != 0 && start >= end)
610			return;
611	}
612}
613
614
615/*
616 * Look up any entry at a position less than or equal to index.
617 */
618void *
619vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color)
620{
621	struct vm_radix_node *rnode;
622	struct vm_radix_node *child;
623	vm_pindex_t max;
624	vm_pindex_t inc;
625	void *val;
626	int slot;
627	int level;
628
629	CTR2(KTR_VM,
630	    "lookup_le: tree %p, index %ju", rtree, (uintmax_t)index);
631restart:
632	level = vm_radix_height(rtree, &rnode);
633	if (rnode == NULL)
634		return (NULL);
635	max = VM_RADIX_MAX(level);
636	if (index > max || index == 0)
637		index = max;
638	/*
639	 * Search the tree from the top for any leaf node holding an index
640	 * lower than 'index'.
641	 */
642	level--;
643	while (rnode) {
644		slot = vm_radix_slot(index, level);
645		CTR6(KTR_VM,
646	"lookup_le: tree %p, index %ju, level %d, slot %d, rnode %p, child %p",
647		    rtree, (uintmax_t)index, level, slot, rnode,
648		    rnode->rn_child[slot]);
649		if (level == 0)
650			break;
651		/*
652		 * If we don't have an exact match we must start our search
653		 * from the next leaf and adjust our index appropriately.
654		 */
655		if ((child = rnode->rn_child[slot]) == NULL) {
656			/*
657			 * Calculate how much to decrement our index by
658			 * based on the tree level.  We must set the
659			 * lower bits to start from the end of the next
660			 * leaf.
661		 	 */
662			inc = 1LL << (level * VM_RADIX_WIDTH);
663			index |= VM_RADIX_MAX(level);
664			index -= inc;
665			slot--;
666			CTR4(KTR_VM,
667			    "lookup_le: start %ju inc %ju mask 0x%jX slot %d",
668			    (uintmax_t)index, (uintmax_t)inc,
669			    (uintmax_t)VM_RADIX_MAX(level), slot);
670			for (; slot >= 0; slot--, index -= inc) {
671				child = rnode->rn_child[slot];
672				if (child)
673					break;
674			}
675		}
676		rnode = child;
677		level--;
678	}
679	if (rnode) {
680		for (; slot >= 0; slot--, index--) {
681			val = vm_radix_match(rnode->rn_child[slot], color);
682			if (val)
683				return (val);
684		}
685	}
686	if (index != -1)
687		goto restart;
688	return (NULL);
689}
690
691/*
692 * Remove the specified index from the tree.  If possible the height of the
693 * tree is adjusted after deletion.  The value stored at index is returned
694 * panics if the key is not present.
695 */
696void *
697vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color)
698{
699	struct vm_radix_node *stack[VM_RADIX_LIMIT];
700	struct vm_radix_node *rnode, *root;
701	void *val;
702	int level;
703	int slot;
704
705	level = vm_radix_height(rtree, &root);
706	KASSERT(index <= VM_RADIX_MAX(level),
707	    ("vm_radix_remove: %p index %ju out of range %jd.",
708	     rtree, (uintmax_t)index, VM_RADIX_MAX(level)));
709	rnode = root;
710	val = NULL;
711	level--;
712	/*
713	 * Find the node and record the path in stack.
714	 */
715	while (level && rnode) {
716		stack[level] = rnode;
717		slot = vm_radix_slot(index, level);
718		CTR5(KTR_VM,
719		    "remove: tree %p, index %ju, level %d, slot %d, rnode %p",
720		    rtree, (uintmax_t)index, level, slot, rnode);
721		CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
722		    rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
723		rnode = rnode->rn_child[slot];
724		level--;
725	}
726	KASSERT(rnode != NULL,
727		("vm_radix_remove: index %ju not present in the tree.\n",
728		 (uintmax_t)index));
729	slot = vm_radix_slot(index, 0);
730	val = vm_radix_match(rnode->rn_child[slot], color);
731	KASSERT(val != NULL,
732		("vm_radix_remove: index %ju not present in the tree.\n",
733		 (uintmax_t)index));
734
735	for (;;) {
736		CTR5(KTR_VM,
737	"remove: resetting tree %p, index %ju, level %d, slot %d, rnode %p",
738		    rtree, (uintmax_t)index, level, slot, rnode);
739		CTR4(KTR_VM,
740		    "remove: resetting tree %p, rnode %p, child %p, count %u",
741		    rtree, rnode,
742		    (rnode != NULL) ? rnode->rn_child[slot] : NULL,
743		    (rnode != NULL) ? rnode->rn_count : 0);
744		rnode->rn_child[slot] = NULL;
745		/*
746		 * Use atomics for the last level since red and black
747		 * will both adjust it.
748		 * Use a write memory barrier here in order to avoid
749		 * rn_count reaching 0 before to fetch the actual pointer.
750		 * Concurrent black removal, infact, may want to reclaim
751		 * the radix node itself before to read it.
752		 */
753		if (level == 0)
754			atomic_add_rel_32(&rnode->rn_count, -1);
755		else
756			rnode->rn_count--;
757		/*
758		 * Only allow black removes to prune the tree.
759		 */
760		if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0)
761			break;
762		vm_radix_node_put(rnode);
763		if (rnode == root) {
764			vm_radix_setroot(rtree, NULL, 0);
765			break;
766		}
767		rnode = stack[++level];
768		slot = vm_radix_slot(index, level);
769
770	}
771	return (val);
772}
773
774/*
775 * Remove and free all the nodes from the radix tree.
776 * This function is recrusive but there is a tight control on it as the
777 * maximum depth of the tree is fixed.
778 */
779void
780vm_radix_reclaim_allnodes(struct vm_radix *rtree)
781{
782	struct vm_radix_node *root;
783	int level;
784
785	if (rtree->rt_root == 0)
786		return;
787	level = vm_radix_height(rtree, &root);
788	vm_radix_reclaim_allnodes_internal(root, level - 1);
789	rtree->rt_root = 0;
790}
791
792/*
793 * Attempts to reduce the height of the tree.
794 */
795void
796vm_radix_shrink(struct vm_radix *rtree)
797{
798	struct vm_radix_node *tmp, *root;
799	int level;
800
801	if (rtree->rt_root == 0)
802		return;
803	level = vm_radix_height(rtree, &root);
804
805	/* Adjust the height of the tree. */
806	while (root->rn_count == 1 && root->rn_child[0] != NULL) {
807		tmp = root;
808		root->rn_count--;
809		root = root->rn_child[0];
810		level--;
811		vm_radix_node_put(tmp);
812	}
813	/* Finally see if we have an empty tree. */
814	if (root->rn_count == 0) {
815		vm_radix_node_put(root);
816		root = NULL;
817		level--;
818	}
819	vm_radix_setroot(rtree, root, level);
820}
821