vm_radix.c revision 247742
1174294Sobrien/*
2131702Smbr * Copyright (c) 2013 EMC Corp.
3174294Sobrien * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4174294Sobrien * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5174294Sobrien * All rights reserved.
6131702Smbr *
7174294Sobrien * Redistribution and use in source and binary forms, with or without
8131702Smbr * modification, are permitted provided that the following conditions
9174294Sobrien * are met:
10174294Sobrien * 1. Redistributions of source code must retain the above copyright
11131702Smbr *    notice, this list of conditions and the following disclaimer.
12174294Sobrien * 2. Redistributions in binary form must reproduce the above copyright
13174294Sobrien *    notice, this list of conditions and the following disclaimer in the
14131702Smbr *    documentation and/or other materials provided with the distribution.
15174294Sobrien *
16174294Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17131702Smbr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18174294Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19131702Smbr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20174294Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21174294Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22174294Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23131702Smbr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24174294Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25131702Smbr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26174294Sobrien * SUCH DAMAGE.
27174294Sobrien *
28174294Sobrien */
29174294Sobrien
30174294Sobrien/*
31174294Sobrien * Path-compressed radix trie implementation.
32174294Sobrien * The following code is not generalized into a general purpose library
33174294Sobrien * because there are way too many parameters embedded that should really
34174294Sobrien * be decided by the library consumers.  At the same time, consumers
35174294Sobrien * of this code must achieve highest possible performance.
36174294Sobrien *
37174294Sobrien * The implementation takes into account the following rationale:
38174294Sobrien * - Size of the nodes might be as small as possible.
39174294Sobrien * - There is no bias toward lookup operations over inserts or removes,
40174294Sobrien *   and vice-versa.
41174294Sobrien * - In average there are not many complete levels, than level
42174294Sobrien *   compression may just complicate things.
43174294Sobrien */
44174294Sobrien
45174294Sobrien#include <sys/cdefs.h>
46174294Sobrien
47174294Sobrien#include "opt_ddb.h"
48174294Sobrien
49174294Sobrien#include <sys/param.h>
50174294Sobrien#include <sys/systm.h>
51174294Sobrien#include <sys/kernel.h>
52174294Sobrien#include <sys/vmmeter.h>
53174294Sobrien
54174294Sobrien#include <vm/uma.h>
55174294Sobrien#include <vm/vm.h>
56174294Sobrien#include <vm/vm_param.h>
57174294Sobrien#include <vm/vm_page.h>
58174294Sobrien#include <vm/vm_radix.h>
59174294Sobrien
60174294Sobrien#ifdef DDB
61174294Sobrien#include <ddb/ddb.h>
62174294Sobrien#endif
63174294Sobrien
64174294Sobrien/*
65174294Sobrien * Such sizes should permit to keep node children contained into a single
66174294Sobrien * cache-line, or to at least not span many of those.
67174294Sobrien * In particular, sparse tries should however be compressed properly and
68174294Sobrien * then make some extra-levels not a big deal.
69174294Sobrien */
70174294Sobrien#ifdef __LP64__
71174294Sobrien#define	VM_RADIX_WIDTH	4
72174294Sobrien#else
73174294Sobrien#define	VM_RADIX_WIDTH	3
74174294Sobrien#endif
75174294Sobrien
76174294Sobrien#define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
77174294Sobrien#define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
78174294Sobrien#define	VM_RADIX_LIMIT							\
79174294Sobrien	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
80174294Sobrien
81174294Sobrien/* Flag bits stored in node pointers. */
82174294Sobrien#define	VM_RADIX_ISLEAF	0x1
83174294Sobrien#define	VM_RADIX_FLAGS	0x1
84174294Sobrien#define	VM_RADIX_PAD	VM_RADIX_FLAGS
85174294Sobrien
86174294Sobrien/* Returns one unit associated with specified level. */
87174294Sobrien#define	VM_RADIX_UNITLEVEL(lev)						\
88174294Sobrien	((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
89174294Sobrien
90174294Sobrienstruct vm_radix_node {
91174294Sobrien	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
92174294Sobrien	vm_pindex_t	 rn_owner;			/* Owner of record. */
93174294Sobrien	uint16_t	 rn_count;			/* Valid children. */
94174294Sobrien	uint16_t	 rn_clev;			/* Current level. */
95174294Sobrien};
96174294Sobrien
97174294Sobrienstatic uma_zone_t vm_radix_node_zone;
98174294Sobrien
99174294Sobrien/*
100174294Sobrien * Allocate a radix node.  Pre-allocation ensures that the request will be
101174294Sobrien * always successfully satisfied.
102174294Sobrien */
103174294Sobrienstatic __inline struct vm_radix_node *
104174294Sobrienvm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
105174294Sobrien{
106174294Sobrien	struct vm_radix_node *rnode;
107174294Sobrien
108174294Sobrien	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
109174294Sobrien
110174294Sobrien	/*
111174294Sobrien	 * The required number of nodes might be already correctly
112174294Sobrien	 * pre-allocated in vm_radix_init().  However, UMA can reserve
113174294Sobrien	 * few nodes on per-cpu specific buckets, which will not be
114174294Sobrien	 * accessible from the curcpu.  The allocation could then
115174294Sobrien	 * return NULL when the pre-allocation pool is close to be
116174294Sobrien	 * exhausted.  Anyway, in practice this should never be a
117174294Sobrien	 * problem because a new node is not always required for
118174294Sobrien	 * insert, thus the pre-allocation pool should already have
119174294Sobrien	 * some extra-pages that indirectly deal with this situation.
120174294Sobrien	 */
121174294Sobrien	if (rnode == NULL)
122174294Sobrien		panic("%s: uma_zalloc() returned NULL for a new node",
123174294Sobrien		    __func__);
124174294Sobrien	rnode->rn_owner = owner;
125174294Sobrien	rnode->rn_count = count;
126174294Sobrien	rnode->rn_clev = clevel;
127174294Sobrien	return (rnode);
128174294Sobrien}
129174294Sobrien
130174294Sobrien/*
131174294Sobrien * Free radix node.
132174294Sobrien */
133174294Sobrienstatic __inline void
134174294Sobrienvm_radix_node_put(struct vm_radix_node *rnode)
135174294Sobrien{
136174294Sobrien
137174294Sobrien	uma_zfree(vm_radix_node_zone, rnode);
138174294Sobrien}
139174294Sobrien
140174294Sobrien/*
141174294Sobrien * Return the position in the array for a given level.
142174294Sobrien */
143174294Sobrienstatic __inline int
144174294Sobrienvm_radix_slot(vm_pindex_t index, uint16_t level)
145174294Sobrien{
146174294Sobrien
147174294Sobrien	return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
148174294Sobrien	    VM_RADIX_MASK);
149174294Sobrien}
150174294Sobrien
151174294Sobrien/* Trims the key after the specified level. */
152174294Sobrienstatic __inline vm_pindex_t
153174294Sobrienvm_radix_trimkey(vm_pindex_t index, uint16_t level)
154174294Sobrien{
155174294Sobrien	vm_pindex_t ret;
156174294Sobrien
157174294Sobrien	ret = index;
158174294Sobrien	if (level < VM_RADIX_LIMIT) {
159174294Sobrien		ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
160174294Sobrien		ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
161174294Sobrien	}
162174294Sobrien	return (ret);
163174294Sobrien}
164174294Sobrien
165174294Sobrien/*
166174294Sobrien * Get the root node for a radix tree.
167174294Sobrien */
168174294Sobrienstatic __inline struct vm_radix_node *
169174294Sobrienvm_radix_getroot(struct vm_radix *rtree)
170174294Sobrien{
171174294Sobrien
172174294Sobrien	return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
173174294Sobrien}
174174294Sobrien
175174294Sobrien/*
176174294Sobrien * Set the root node for a radix tree.
177174294Sobrien */
178174294Sobrienstatic __inline void
179174294Sobrienvm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
180174294Sobrien{
181174294Sobrien
182174294Sobrien	rtree->rt_root = (uintptr_t)rnode;
183174294Sobrien}
184174294Sobrien
185174294Sobrien/*
186174294Sobrien * Returns the associated page extracted from rnode if available,
187174294Sobrien * NULL otherwise.
188174294Sobrien */
189174294Sobrienstatic __inline vm_page_t
190174294Sobrienvm_radix_node_page(struct vm_radix_node *rnode)
191174294Sobrien{
192174294Sobrien
193174294Sobrien	return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
194174294Sobrien	    (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
195174294Sobrien}
196174294Sobrien
197174294Sobrien/*
198174294Sobrien * Adds the page as a child of provided node.
199174294Sobrien */
200174294Sobrienstatic __inline void
201174294Sobrienvm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
202174294Sobrien    vm_page_t page)
203174294Sobrien{
204174294Sobrien	int slot;
205174294Sobrien
206174294Sobrien	slot = vm_radix_slot(index, clev);
207174294Sobrien	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
208174294Sobrien}
209174294Sobrien
210174294Sobrien/*
211174294Sobrien * Returns the slot where two keys differ.
212174294Sobrien * It cannot accept 2 equal keys.
213174294Sobrien */
214174294Sobrienstatic __inline uint16_t
215174294Sobrienvm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
216174294Sobrien{
217174294Sobrien	uint16_t clev;
218174294Sobrien
219174294Sobrien	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
220174294Sobrien	    __func__, (uintmax_t)index1));
221174294Sobrien
222174294Sobrien	index1 ^= index2;
223174294Sobrien	for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
224174294Sobrien		if (vm_radix_slot(index1, clev))
225174294Sobrien			return (clev);
226174294Sobrien	panic("%s: it might have not reached this point", __func__);
227174294Sobrien	return (0);
228174294Sobrien}
229174294Sobrien
230174294Sobrien/*
231174294Sobrien * Returns TRUE if it can be determined that key does not belong to the
232174294Sobrien * specified rnode. FALSE otherwise.
233174294Sobrien */
234174294Sobrienstatic __inline boolean_t
235174294Sobrienvm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
236174294Sobrien{
237174294Sobrien
238174294Sobrien	if (rnode->rn_clev > 0) {
239174294Sobrien		idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
240174294Sobrien		idx -= rnode->rn_owner;
241174294Sobrien		if (idx != 0)
242174294Sobrien			return (TRUE);
243174294Sobrien	}
244174294Sobrien	return (FALSE);
245174294Sobrien}
246174294Sobrien
247174294Sobrien/*
248174294Sobrien * Adjusts the idx key to the first upper level available, based on a valid
249174294Sobrien * initial level and map of available levels.
250174294Sobrien * Returns a value bigger than 0 to signal that there are not valid levels
251174294Sobrien * available.
252174294Sobrien */
253174294Sobrienstatic __inline int
254174294Sobrienvm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
255174294Sobrien{
256174294Sobrien	vm_pindex_t wrapidx;
257174294Sobrien
258174294Sobrien	for (; levels[ilev] == FALSE ||
259174294Sobrien	    vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
260174294Sobrien		if (ilev == 0)
261174294Sobrien			break;
262174294Sobrien	KASSERT(ilev > 0 || levels[0] == TRUE,
263174294Sobrien	    ("%s: levels back-scanning problem", __func__));
264174294Sobrien	if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
265174294Sobrien		return (1);
266174294Sobrien	wrapidx = *idx;
267174294Sobrien	*idx = vm_radix_trimkey(*idx, ilev);
268174294Sobrien	*idx += VM_RADIX_UNITLEVEL(ilev);
269174294Sobrien	return (*idx < wrapidx);
270174294Sobrien}
271174294Sobrien
272174294Sobrien/*
273174294Sobrien * Adjusts the idx key to the first lower level available, based on a valid
274174294Sobrien * initial level and map of available levels.
275174294Sobrien * Returns a value bigger than 0 to signal that there are not valid levels
276174294Sobrien * available.
277174294Sobrien */
278174294Sobrienstatic __inline int
279174294Sobrienvm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
280174294Sobrien{
281174294Sobrien	vm_pindex_t wrapidx;
282174294Sobrien
283174294Sobrien	for (; levels[ilev] == FALSE ||
284174294Sobrien	    vm_radix_slot(*idx, ilev) == 0; ilev--)
285174294Sobrien		if (ilev == 0)
286174294Sobrien			break;
287174294Sobrien	KASSERT(ilev > 0 || levels[0] == TRUE,
288174294Sobrien	    ("%s: levels back-scanning problem", __func__));
289174294Sobrien	if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
290174294Sobrien		return (1);
291174294Sobrien	wrapidx = *idx;
292174294Sobrien	*idx = vm_radix_trimkey(*idx, ilev);
293174294Sobrien	*idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
294174294Sobrien	*idx -= VM_RADIX_UNITLEVEL(ilev);
295174294Sobrien	return (*idx > wrapidx);
296174294Sobrien}
297174294Sobrien
298174294Sobrien/*
299174294Sobrien * Internal handwork for vm_radix_reclaim_allonodes() primitive.
300174294Sobrien * This function is recrusive.
301174294Sobrien */
302174294Sobrienstatic void
303174294Sobrienvm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
304174294Sobrien{
305174294Sobrien	int slot;
306174294Sobrien
307174294Sobrien	for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
308174294Sobrien		if (rnode->rn_child[slot] == NULL)
309174294Sobrien			continue;
310174294Sobrien		if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
311174294Sobrien			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
312174294Sobrien		rnode->rn_count--;
313174294Sobrien	}
314174294Sobrien	vm_radix_node_put(rnode);
315174294Sobrien}
316174294Sobrien
317174294Sobrien#ifdef INVARIANTS
318174294Sobrien/*
319174294Sobrien * Radix node zone destructor.
320174294Sobrien */
321174294Sobrienstatic void
322174294Sobrienvm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
323174294Sobrien{
324174294Sobrien	struct vm_radix_node *rnode;
325174294Sobrien
326174294Sobrien	rnode = mem;
327174294Sobrien	KASSERT(rnode->rn_count == 0,
328174294Sobrien	    ("vm_radix_node_put: Freeing node %p with %d children\n", mem,
329174294Sobrien	    rnode->rn_count));
330174294Sobrien}
331174294Sobrien#endif
332174294Sobrien
333174294Sobrien/*
334174294Sobrien * Pre-allocate intermediate nodes from the UMA slab zone.
335174294Sobrien */
336174294Sobrienstatic void
337174294Sobrienvm_radix_prealloc(void *arg __unused)
338174294Sobrien{
339174294Sobrien
340174294Sobrien	if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
341174294Sobrien		panic("%s: unable to create new zone", __func__);
342174294Sobrien	uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
343174294Sobrien}
344174294SobrienSYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
345174294Sobrien    NULL);
346174294Sobrien
347174294Sobrien/*
348174294Sobrien * Initialize the UMA slab zone.
349174294Sobrien * Until vm_radix_prealloc() is called, the zone will be served by the
350174294Sobrien * UMA boot-time pre-allocated pool of pages.
351174294Sobrien */
352174294Sobrienvoid
353174294Sobrienvm_radix_init(void)
354174294Sobrien{
355174294Sobrien
356174294Sobrien	vm_radix_node_zone = uma_zcreate("RADIX NODE",
357174294Sobrien	    sizeof(struct vm_radix_node), NULL,
358174294Sobrien#ifdef INVARIANTS
359174294Sobrien	    vm_radix_node_zone_dtor,
360174294Sobrien#else
361174294Sobrien	    NULL,
362174294Sobrien#endif
363174294Sobrien	    NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE);
364174294Sobrien}
365174294Sobrien
366174294Sobrien/*
367174294Sobrien * Inserts the key-value pair in to the trie.
368174294Sobrien * Panics if the key already exists.
369174294Sobrien */
370174294Sobrienvoid
371174294Sobrienvm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page)
372174294Sobrien{
373174294Sobrien	vm_pindex_t newind;
374174294Sobrien	struct vm_radix_node *rnode, *tmp, *tmp2;
375174294Sobrien	vm_page_t m;
376174294Sobrien	int slot;
377174294Sobrien	uint16_t clev;
378174294Sobrien
379174294Sobrien	/*
380174294Sobrien	 * The owner of record for root is not really important because it
381174294Sobrien	 * will never be used.
382174294Sobrien	 */
383174294Sobrien	rnode = vm_radix_getroot(rtree);
384174294Sobrien	if (rnode == NULL) {
385174294Sobrien		rnode = vm_radix_node_get(0, 1, 0);
386174294Sobrien		vm_radix_setroot(rtree, rnode);
387174294Sobrien		vm_radix_addpage(rnode, index, 0, page);
388174294Sobrien		return;
389174294Sobrien	}
390174294Sobrien	while (rnode != NULL) {
391174294Sobrien		if (vm_radix_keybarr(rnode, index) == TRUE)
392174294Sobrien			break;
393174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
394174294Sobrien		m = vm_radix_node_page(rnode->rn_child[slot]);
395174294Sobrien		if (m != NULL) {
396174294Sobrien			if (m->pindex == index)
397174294Sobrien				panic("%s: key %jx is already present",
398174294Sobrien				    __func__, (uintmax_t)index);
399174294Sobrien			clev = vm_radix_keydiff(m->pindex, index);
400174294Sobrien			tmp = vm_radix_node_get(vm_radix_trimkey(index,
401174294Sobrien			    clev - 1), 2, clev);
402174294Sobrien			rnode->rn_child[slot] = tmp;
403174294Sobrien			vm_radix_addpage(tmp, index, clev, page);
404174294Sobrien			vm_radix_addpage(tmp, m->pindex, clev, m);
405174294Sobrien			return;
406174294Sobrien		}
407174294Sobrien		if (rnode->rn_child[slot] == NULL) {
408174294Sobrien			rnode->rn_count++;
409174294Sobrien			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
410174294Sobrien			return;
411174294Sobrien		}
412174294Sobrien		rnode = rnode->rn_child[slot];
413174294Sobrien	}
414174294Sobrien	if (rnode == NULL)
415174294Sobrien		panic("%s: path traversal ended unexpectedly", __func__);
416174294Sobrien
417174294Sobrien	/*
418174294Sobrien	 * Scan the trie from the top and find the parent to insert
419174294Sobrien	 * the new object.
420174294Sobrien	 */
421174294Sobrien	newind = rnode->rn_owner;
422174294Sobrien	clev = vm_radix_keydiff(newind, index);
423174294Sobrien	slot = VM_RADIX_COUNT;
424174294Sobrien	for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
425174294Sobrien		KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
426174294Sobrien		    __func__));
427174294Sobrien		KASSERT(clev >= rnode->rn_clev,
428174294Sobrien		    ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
429174294Sobrien		    __func__, clev, rnode->rn_clev));
430174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
431174294Sobrien		tmp = rnode->rn_child[slot];
432174294Sobrien		KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
433174294Sobrien		    ("%s: unexpected lookup interruption", __func__));
434174294Sobrien		if (tmp->rn_clev > clev)
435174294Sobrien			break;
436174294Sobrien	}
437174294Sobrien	KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
438174294Sobrien	    ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
439174294Sobrien	    __func__, (void *)rnode, (void *)tmp, slot));
440174294Sobrien
441174294Sobrien	/*
442174294Sobrien	 * A new node is needed because the right insertion level is reached.
443174294Sobrien	 * Setup the new intermediate node and add the 2 children: the
444174294Sobrien	 * new object and the older edge.
445174294Sobrien	 */
446174294Sobrien	tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2,
447174294Sobrien	    clev);
448174294Sobrien	rnode->rn_child[slot] = tmp2;
449174294Sobrien	vm_radix_addpage(tmp2, index, clev, page);
450174294Sobrien	slot = vm_radix_slot(newind, clev);
451174294Sobrien	tmp2->rn_child[slot] = tmp;
452174294Sobrien}
453174294Sobrien
454174294Sobrien/*
455174294Sobrien * Returns the value stored at the index.  If the index is not present
456174294Sobrien * NULL is returned.
457174294Sobrien */
458174294Sobrienvm_page_t
459174294Sobrienvm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
460174294Sobrien{
461174294Sobrien	struct vm_radix_node *rnode;
462174294Sobrien	vm_page_t m;
463174294Sobrien	int slot;
464174294Sobrien
465174294Sobrien	rnode = vm_radix_getroot(rtree);
466174294Sobrien	while (rnode != NULL) {
467174294Sobrien		if (vm_radix_keybarr(rnode, index) == TRUE)
468174294Sobrien			return (NULL);
469174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
470174294Sobrien		rnode = rnode->rn_child[slot];
471174294Sobrien		m = vm_radix_node_page(rnode);
472174294Sobrien		if (m != NULL) {
473174294Sobrien			if (m->pindex == index)
474174294Sobrien				return (m);
475174294Sobrien			else
476174294Sobrien				return (NULL);
477174294Sobrien		}
478174294Sobrien	}
479174294Sobrien	return (NULL);
480174294Sobrien}
481174294Sobrien
482174294Sobrien/*
483174294Sobrien * Look up any entry at a position bigger than or equal to index.
484174294Sobrien */
485174294Sobrienvm_page_t
486174294Sobrienvm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
487174294Sobrien{
488174294Sobrien	vm_pindex_t inc;
489174294Sobrien	vm_page_t m;
490174294Sobrien	struct vm_radix_node *rnode;
491174294Sobrien	int slot;
492174294Sobrien	uint16_t difflev;
493174294Sobrien	boolean_t maplevels[VM_RADIX_LIMIT + 1];
494174294Sobrien#ifdef INVARIANTS
495174294Sobrien	int loops = 0;
496174294Sobrien#endif
497174294Sobrien
498174294Sobrienrestart:
499174294Sobrien	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
500174294Sobrien	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
501174294Sobrien		maplevels[difflev] = FALSE;
502174294Sobrien	rnode = vm_radix_getroot(rtree);
503174294Sobrien	while (rnode != NULL) {
504174294Sobrien		maplevels[rnode->rn_clev] = TRUE;
505174294Sobrien
506174294Sobrien		/*
507174294Sobrien		 * If the keys differ before the current bisection node
508174294Sobrien		 * the search key might rollback to the earlierst
509174294Sobrien		 * available bisection node, or to the smaller value
510174294Sobrien		 * in the current domain (if the owner is bigger than the
511174294Sobrien		 * search key).
512174294Sobrien		 * The search for a valid bisection node is helped through
513174294Sobrien		 * the use of maplevels array which should bring immediately
514174294Sobrien		 * a lower useful level, skipping holes.
515174294Sobrien		 */
516174294Sobrien		if (vm_radix_keybarr(rnode, index) == TRUE) {
517174294Sobrien			difflev = vm_radix_keydiff(index, rnode->rn_owner);
518174294Sobrien			if (index > rnode->rn_owner) {
519174294Sobrien				if (vm_radix_addlev(&index, maplevels,
520174294Sobrien				    difflev) > 0)
521174294Sobrien					break;
522174294Sobrien			} else
523174294Sobrien				index = vm_radix_trimkey(rnode->rn_owner,
524174294Sobrien				    difflev);
525174294Sobrien			goto restart;
526174294Sobrien		}
527174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
528174294Sobrien		m = vm_radix_node_page(rnode->rn_child[slot]);
529174294Sobrien		if (m != NULL && m->pindex >= index)
530174294Sobrien			return (m);
531174294Sobrien		if (rnode->rn_child[slot] != NULL && m == NULL) {
532174294Sobrien			rnode = rnode->rn_child[slot];
533174294Sobrien			continue;
534174294Sobrien		}
535174294Sobrien
536174294Sobrien		/*
537174294Sobrien		 * Look for an available edge or page within the current
538174294Sobrien		 * bisection node.
539174294Sobrien		 */
540174294Sobrien                if (slot < (VM_RADIX_COUNT - 1)) {
541174294Sobrien			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
542174294Sobrien			index = vm_radix_trimkey(index, rnode->rn_clev);
543174294Sobrien			index += inc;
544174294Sobrien			slot++;
545174294Sobrien			for (;; index += inc, slot++) {
546174294Sobrien				m = vm_radix_node_page(rnode->rn_child[slot]);
547174294Sobrien				if (m != NULL && m->pindex >= index)
548174294Sobrien					return (m);
549174294Sobrien				if ((rnode->rn_child[slot] != NULL &&
550174294Sobrien				    m == NULL) || slot == (VM_RADIX_COUNT - 1))
551174294Sobrien					break;
552174294Sobrien			}
553174294Sobrien		}
554174294Sobrien
555174294Sobrien		/*
556174294Sobrien		 * If a valid page or edge, bigger than the search slot, is
557174294Sobrien		 * found in the traversal, skip to the next higher-level key.
558174294Sobrien		 */
559174294Sobrien		if (slot == (VM_RADIX_COUNT - 1) &&
560174294Sobrien		    (rnode->rn_child[slot] == NULL || m != NULL)) {
561174294Sobrien			if (rnode->rn_clev == 0  || vm_radix_addlev(&index,
562174294Sobrien			    maplevels, rnode->rn_clev - 1) > 0)
563174294Sobrien				break;
564174294Sobrien			goto restart;
565174294Sobrien		}
566174294Sobrien		rnode = rnode->rn_child[slot];
567174294Sobrien	}
568174294Sobrien	return (NULL);
569174294Sobrien}
570174294Sobrien
571174294Sobrien/*
572174294Sobrien * Look up any entry at a position less than or equal to index.
573174294Sobrien */
574174294Sobrienvm_page_t
575174294Sobrienvm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
576174294Sobrien{
577174294Sobrien	vm_pindex_t inc;
578174294Sobrien	vm_page_t m;
579174294Sobrien	struct vm_radix_node *rnode;
580174294Sobrien	int slot;
581174294Sobrien	uint16_t difflev;
582174294Sobrien	boolean_t maplevels[VM_RADIX_LIMIT + 1];
583174294Sobrien#ifdef INVARIANTS
584174294Sobrien	int loops = 0;
585174294Sobrien#endif
586174294Sobrien
587174294Sobrienrestart:
588174294Sobrien	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
589174294Sobrien	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
590174294Sobrien		maplevels[difflev] = FALSE;
591174294Sobrien	rnode = vm_radix_getroot(rtree);
592174294Sobrien	while (rnode != NULL) {
593174294Sobrien		maplevels[rnode->rn_clev] = TRUE;
594174294Sobrien
595174294Sobrien		/*
596174294Sobrien		 * If the keys differ before the current bisection node
597174294Sobrien		 * the search key might rollback to the earlierst
598174294Sobrien		 * available bisection node, or to the higher value
599174294Sobrien		 * in the current domain (if the owner is smaller than the
600174294Sobrien		 * search key).
601174294Sobrien		 * The search for a valid bisection node is helped through
602174294Sobrien		 * the use of maplevels array which should bring immediately
603174294Sobrien		 * a lower useful level, skipping holes.
604174294Sobrien		 */
605174294Sobrien		if (vm_radix_keybarr(rnode, index) == TRUE) {
606174294Sobrien			difflev = vm_radix_keydiff(index, rnode->rn_owner);
607174294Sobrien			if (index > rnode->rn_owner) {
608174294Sobrien				index = vm_radix_trimkey(rnode->rn_owner,
609174294Sobrien				    difflev);
610174294Sobrien				index |= VM_RADIX_UNITLEVEL(difflev) - 1;
611174294Sobrien			} else if (vm_radix_declev(&index, maplevels,
612174294Sobrien			    difflev) > 0)
613174294Sobrien				break;
614174294Sobrien			goto restart;
615174294Sobrien		}
616174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
617174294Sobrien		m = vm_radix_node_page(rnode->rn_child[slot]);
618174294Sobrien		if (m != NULL && m->pindex <= index)
619174294Sobrien			return (m);
620174294Sobrien		if (rnode->rn_child[slot] != NULL && m == NULL) {
621174294Sobrien			rnode = rnode->rn_child[slot];
622174294Sobrien			continue;
623174294Sobrien		}
624174294Sobrien
625174294Sobrien		/*
626174294Sobrien		 * Look for an available edge or page within the current
627174294Sobrien		 * bisection node.
628174294Sobrien		 */
629174294Sobrien		if (slot > 0) {
630174294Sobrien			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
631174294Sobrien			index = vm_radix_trimkey(index, rnode->rn_clev);
632174294Sobrien			index |= inc - 1;
633174294Sobrien			index -= inc;
634174294Sobrien			slot--;
635174294Sobrien			for (;; index -= inc, slot--) {
636174294Sobrien				m = vm_radix_node_page(rnode->rn_child[slot]);
637174294Sobrien				if (m != NULL && m->pindex <= index)
638174294Sobrien					return (m);
639174294Sobrien				if ((rnode->rn_child[slot] != NULL &&
640174294Sobrien				    m == NULL) || slot == 0)
641174294Sobrien					break;
642174294Sobrien			}
643174294Sobrien		}
644174294Sobrien
645174294Sobrien		/*
646174294Sobrien		 * If a valid page or edge, smaller than the search slot, is
647174294Sobrien		 * found in the traversal, skip to the next higher-level key.
648174294Sobrien		 */
649174294Sobrien		if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
650174294Sobrien			if (rnode->rn_clev == 0 || vm_radix_declev(&index,
651174294Sobrien			    maplevels, rnode->rn_clev - 1) > 0)
652174294Sobrien				break;
653174294Sobrien			goto restart;
654174294Sobrien		}
655174294Sobrien		rnode = rnode->rn_child[slot];
656174294Sobrien	}
657174294Sobrien	return (NULL);
658174294Sobrien}
659174294Sobrien
660174294Sobrien/*
661174294Sobrien * Remove the specified index from the tree.
662174294Sobrien * Panics if the key is not present.
663174294Sobrien */
664174294Sobrienvoid
665174294Sobrienvm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
666174294Sobrien{
667174294Sobrien	struct vm_radix_node *rnode, *parent;
668174294Sobrien	vm_page_t m;
669174294Sobrien	int i, slot;
670174294Sobrien
671174294Sobrien	parent = NULL;
672174294Sobrien	rnode = vm_radix_getroot(rtree);
673174294Sobrien	for (;;) {
674174294Sobrien		if (rnode == NULL)
675174294Sobrien			panic("vm_radix_remove: impossible to locate the key");
676174294Sobrien		slot = vm_radix_slot(index, rnode->rn_clev);
677174294Sobrien		m = vm_radix_node_page(rnode->rn_child[slot]);
678174294Sobrien		if (m != NULL && m->pindex == index) {
679174294Sobrien			rnode->rn_child[slot] = NULL;
680174294Sobrien			rnode->rn_count--;
681174294Sobrien			if (rnode->rn_count > 1)
682174294Sobrien				break;
683174294Sobrien			if (parent == NULL) {
684174294Sobrien				if (rnode->rn_count == 0) {
685174294Sobrien					vm_radix_node_put(rnode);
686174294Sobrien					vm_radix_setroot(rtree, NULL);
687174294Sobrien				}
688174294Sobrien				break;
689174294Sobrien			}
690174294Sobrien			for (i = 0; i < VM_RADIX_COUNT; i++)
691174294Sobrien				if (rnode->rn_child[i] != NULL)
692174294Sobrien					break;
693174294Sobrien			KASSERT(i != VM_RADIX_COUNT,
694174294Sobrien			    ("%s: invalid node configuration", __func__));
695174294Sobrien			slot = vm_radix_slot(index, parent->rn_clev);
696174294Sobrien			KASSERT(parent->rn_child[slot] == rnode,
697174294Sobrien			    ("%s: invalid child value", __func__));
698174294Sobrien			parent->rn_child[slot] = rnode->rn_child[i];
699174294Sobrien			rnode->rn_count--;
700174294Sobrien			rnode->rn_child[i] = NULL;
701174294Sobrien			vm_radix_node_put(rnode);
702174294Sobrien			break;
703174294Sobrien		}
704174294Sobrien		if (m != NULL && m->pindex != index)
705174294Sobrien			panic("%s: invalid key found", __func__);
706174294Sobrien		parent = rnode;
707174294Sobrien		rnode = rnode->rn_child[slot];
708174294Sobrien	}
709174294Sobrien}
710174294Sobrien
711174294Sobrien/*
712174294Sobrien * Remove and free all the nodes from the radix tree.
713174294Sobrien * This function is recrusive but there is a tight control on it as the
714174294Sobrien * maximum depth of the tree is fixed.
715174294Sobrien */
716174294Sobrienvoid
717174294Sobrienvm_radix_reclaim_allnodes(struct vm_radix *rtree)
718174294Sobrien{
719174294Sobrien	struct vm_radix_node *root;
720174294Sobrien
721174294Sobrien	root = vm_radix_getroot(rtree);
722174294Sobrien	if (root == NULL)
723174294Sobrien		return;
724174294Sobrien	vm_radix_reclaim_allnodes_int(root);
725174294Sobrien	vm_radix_setroot(rtree, NULL);
726174294Sobrien}
727174294Sobrien
728174294Sobrien#ifdef DDB
729174294Sobrien/*
730174294Sobrien * Show details about the given radix node.
731174294Sobrien */
732174294SobrienDB_SHOW_COMMAND(radixnode, db_show_radixnode)
733174294Sobrien{
734174294Sobrien	struct vm_radix_node *rnode;
735174294Sobrien	int i;
736174294Sobrien
737174294Sobrien        if (!have_addr)
738174294Sobrien                return;
739174294Sobrien	rnode = (struct vm_radix_node *)addr;
740174294Sobrien	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
741174294Sobrien	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
742174294Sobrien	    rnode->rn_clev);
743174294Sobrien	for (i = 0; i < VM_RADIX_COUNT; i++)
744174294Sobrien		if (rnode->rn_child[i] != NULL)
745174294Sobrien			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
746174294Sobrien			    i, (void *)rnode->rn_child[i],
747174294Sobrien			    (void *)vm_radix_node_page(rnode->rn_child[i]),
748174294Sobrien			    rnode->rn_clev);
749174294Sobrien}
750174294Sobrien#endif /* DDB */
751174294Sobrien