vm_radix.c revision 226873
1/*
2 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_extern.h>
49#include <vm/vm_kern.h>
50#include <vm/vm_page.h>
51#include <vm/vm_radix.h>
52#include <vm/vm_object.h>
53
54#include <sys/kdb.h>
55
56CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
57
58static uma_zone_t vm_radix_node_zone;
59
60#if 0
61static void *
62vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
63{
64	vm_offset_t addr;
65	vm_page_t m;
66	int pflags;
67
68	/* Inform UMA that this allocator uses kernel_map. */
69	*flags = UMA_SLAB_KERNEL;
70
71	pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
72
73	/*
74	 * As kmem_alloc_nofault() can however fail, let just assume that
75	 * M_NOWAIT is on and act accordingly.
76	 */
77	pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
78	    VM_ALLOC_SYSTEM;
79	if ((wait & M_ZERO) != 0)
80		pflags |= VM_ALLOC_ZERO;
81	addr = kmem_alloc_nofault(kernel_map, size);
82	if (addr == 0)
83		return (NULL);
84
85	/* Just one page allocation is assumed here. */
86	m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
87	    pflags);
88	if (m == NULL) {
89		kmem_free(kernel_map, addr, size);
90		return (NULL);
91	}
92	if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
93		pmap_zero_page(m);
94	pmap_qenter(addr, &m, 1);
95	return ((void *)addr);
96}
97
98static void
99vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
100{
101	vm_page_t m;
102	vm_offset_t voitem;
103
104	MPASS((flags & UMA_SLAB_KERNEL) != 0);
105
106	/* Just one page allocation is assumed here. */
107	voitem = (vm_offset_t)item;
108	m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
109	pmap_qremove(voitem, 1);
110	vm_page_free(m);
111	kmem_free(kernel_map, voitem, size);
112}
113
114static void
115init_vm_radix_alloc(void *dummy __unused)
116{
117
118	uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
119	uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
120}
121SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
122#endif
123
124/*
125 * Radix node zone destructor.
126 */
127#ifdef INVARIANTS
128static void
129vm_radix_node_zone_dtor(void *mem, int size, void *arg)
130{
131	struct vm_radix_node *rnode;
132
133	rnode = mem;
134	KASSERT(rnode->rn_count == 0,
135	    ("vm_radix_node_put: Freeing a node with %d children\n",
136	    rnode->rn_count));
137}
138#endif
139
140/*
141 * Allocate a radix node.  Initializes all elements to 0.
142 */
143static struct vm_radix_node *
144vm_radix_node_get(void)
145{
146
147	return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
148}
149
150/*
151 * Free radix node.
152 */
153static void
154vm_radix_node_put(struct vm_radix_node *rnode)
155{
156
157	uma_zfree(vm_radix_node_zone, rnode);
158}
159
160/*
161 * Return the position in the array for a given level.
162 */
163static inline int
164vm_radix_slot(vm_pindex_t index, int level)
165{
166
167	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
168}
169
170void
171vm_radix_init(void)
172{
173
174	vm_radix_node_zone = uma_zcreate("RADIX NODE",
175	    sizeof(struct vm_radix_node), NULL,
176#ifdef INVARIANTS
177	    vm_radix_node_zone_dtor,
178#else
179	    NULL,
180#endif
181	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
182}
183
184/*
185 * Inserts the key-value pair in to the radix tree.  Returns errno.
186 * Panics if the key already exists.
187 */
188int
189vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
190{
191	struct vm_radix_node *rnode;
192	int slot;
193	int level;
194
195	CTR3(KTR_VM,
196	    "insert: tree %p, index %p, val %p", rtree, (void *)index, val);
197	if (index == -1)
198		panic("vm_radix_insert: -1 is not a valid index.\n");
199	/*
200	 * Increase the height by adding nodes at the root until
201	 * there is sufficient space.
202	 */
203	while (rtree->rt_height == 0 ||
204	     index > VM_RADIX_MAX(rtree->rt_height)) {
205		CTR3(KTR_VM, "insert: expanding %jd > %jd height %d",
206		    index, VM_RADIX_MAX(rtree->rt_height), rtree->rt_height);
207		/*
208		 * Only allocate tree nodes if they are needed.
209		 */
210		if (rtree->rt_root == NULL || rtree->rt_root->rn_count != 0) {
211			rnode = vm_radix_node_get();
212			if (rnode == NULL)
213				return (ENOMEM);
214			if (rtree->rt_root) {
215				rnode->rn_child[0] = rtree->rt_root;
216				rnode->rn_count = 1;
217			}
218			rtree->rt_root = rnode;
219		}
220		rtree->rt_height++;
221		KASSERT(rtree->rt_height <= VM_RADIX_LIMIT,
222		    ("vm_radix_insert: Tree %p height %d too tall", rtree,
223		    rtree->rt_height));
224	}
225
226	/* Now that the tree is tall enough, fill in the path to the index. */
227	rnode = rtree->rt_root;
228	for (level = rtree->rt_height - 1; level > 0; level--) {
229		slot = vm_radix_slot(index, level);
230		/* Add the required intermidiate nodes. */
231		if (rnode->rn_child[slot] == NULL) {
232			rnode->rn_child[slot] = vm_radix_node_get();
233    			if (rnode->rn_child[slot] == NULL)
234		    		return (ENOMEM);
235			rnode->rn_count++;
236	    	}
237		CTR5(KTR_VM,
238		    "insert: tree %p, index %p, level %d, slot %d, child %p",
239		    rtree, (void *)index, level, slot, rnode->rn_child[slot]);
240		rnode = rnode->rn_child[slot];
241	}
242
243	slot = vm_radix_slot(index, level);
244	CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p",
245	    rtree, (void *)index, level, slot, rnode->rn_child[slot]);
246	KASSERT(rnode->rn_child[slot] == NULL,
247	    ("vm_radix_insert: Duplicate value %p at index: %lu\n",
248	    rnode->rn_child[slot], (u_long)index));
249	rnode->rn_child[slot] = val;
250	rnode->rn_count++;
251
252	return 0;
253}
254
255/*
256 * Returns the value stored at the index.  If the index is not present
257 * NULL is returned.
258 */
259void *
260vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
261{
262	struct vm_radix_node *rnode;
263	int slot;
264	int level;
265
266	if (index > VM_RADIX_MAX(rtree->rt_height))
267		return NULL;
268	level = rtree->rt_height - 1;
269	rnode = rtree->rt_root;
270	while (rnode) {
271		slot = vm_radix_slot(index, level);
272		CTR5(KTR_VM,
273		    "lookup: tree %p, index %p, level %d, slot %d, child %p",
274		    rtree, (void *)index, level, slot, rnode->rn_child[slot]);
275		if (level == 0)
276			return rnode->rn_child[slot];
277		rnode = rnode->rn_child[slot];
278		level--;
279	}
280	CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index);
281
282	return NULL;
283}
284
285/*
286 * Looks up as many as cnt values between start and end inclusive, and stores
287 * them in the caller allocated array out.  The next index can be used to
288 * restart the scan.  This optimizes forward scans in the tree.
289 */
290int
291vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
292    vm_pindex_t end, void **out, int cnt, vm_pindex_t *next)
293{
294	struct vm_radix_node *rnode;
295	struct vm_radix_node *child;
296	vm_pindex_t max;
297	vm_pindex_t inc;
298	int slot;
299	int level;
300	void *val;
301	int outidx;
302	int loops = 0;
303
304	CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p",
305	    rtree, (void *)start, (void *)end);
306	outidx = 0;
307	max = VM_RADIX_MAX(rtree->rt_height);
308	if (start > max)
309		return 0;
310	if (end > max || end == 0)
311		end = max;
312restart:
313	loops++;
314	if (loops > 1000)
315		panic("vm_radix_lookupn: looping %d\n", loops);
316	/*
317	 * Search the tree from the top for any leaf node holding an index
318	 * between start and end.
319	 */
320	level = rtree->rt_height - 1;
321	rnode = rtree->rt_root;
322	while (rnode) {
323		slot = vm_radix_slot(start, level);
324		CTR5(KTR_VM,
325		    "lookupn: tree %p, index %p, level %d, slot %d, child %p",
326		    rtree, (void *)start, level, slot, rnode->rn_child[slot]);
327		if (level == 0)
328			break;
329		/*
330		 * If we don't have an exact match we must start our search
331		 * from the next leaf and adjust our index appropriately.
332		 */
333		if ((child = rnode->rn_child[slot]) == NULL) {
334			/*
335			 * Calculate how much to increment our index by
336			 * based on the tree level.  We must truncate the
337			 * lower bits to start from the begnning of the next
338			 * leaf.
339		 	 */
340			inc = 1LL << (level * VM_RADIX_WIDTH);
341			start &= ~VM_RADIX_MAX(level);
342			start += inc;
343			slot++;
344			CTR5(KTR_VM,
345			    "lookupn: start %p end %p inc %d mask 0x%lX slot %d",
346			    (void *)start, (void *)end, inc, ~VM_RADIX_MAX(level), slot);
347			for (; slot < VM_RADIX_COUNT && start <= end;
348			    slot++, start += inc) {
349				child = rnode->rn_child[slot];
350				if (child)
351					break;
352			}
353		}
354		rnode = child;
355		level--;
356	}
357	if (rnode == NULL) {
358		/*
359		 * If we still have another range to search, start looking
360		 * from the next node.  Otherwise, return what we've already
361		 * found.  The loop above has already adjusted start to the
362		 * beginning of the next node.
363		 *
364		 * Detect start wrapping back to 0 and return in this case.
365		 */
366		if (start <= end && start != 0)
367			goto restart;
368		goto out;
369	}
370	for (; outidx < cnt && slot < VM_RADIX_COUNT && start <= end;
371	    slot++, start++) {
372		val = rnode->rn_child[slot];
373		if (val == NULL)
374			continue;
375		out[outidx++] = val;
376	}
377	/*
378	 * Go fetch the next page to fill the requested number of pages
379	 * otherwise the caller will simply call us again to fulfill the
380	 * same request after the structures are pushed out of cache.
381	 */
382	if (outidx < cnt && start <= end)
383		goto restart;
384
385out:
386	*next = start;
387
388	return (outidx);
389}
390
391/*
392 * Look up any entry at a position less than or equal to index.
393 */
394void *
395vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
396{
397	struct vm_radix_node *rnode;
398	struct vm_radix_node *child;
399	vm_pindex_t max;
400	vm_pindex_t inc;
401	int slot;
402	int level;
403	int loops = 0;
404
405	CTR2(KTR_VM,
406	    "lookup_le: tree %p, index %p", rtree, (void *)index);
407	if (rtree->rt_root == NULL)
408		return (NULL);
409	max = VM_RADIX_MAX(rtree->rt_height);
410	if (index > max || index == 0)
411		index = max;
412restart:
413	loops++;
414	if (loops > 1000)
415		panic("vm_radix_looku_le: looping %d\n", loops);
416	/*
417	 * Search the tree from the top for any leaf node holding an index
418	 * lower than 'index'.
419	 */
420	level = rtree->rt_height - 1;
421	rnode = rtree->rt_root;
422	while (rnode) {
423		slot = vm_radix_slot(index, level);
424		CTR5(KTR_VM,
425		    "lookup_le: tree %p, index %p, level %d, slot %d, child %p",
426		    rtree, (void *)index, level, slot, rnode->rn_child[slot]);
427		if (level == 0)
428			break;
429		/*
430		 * If we don't have an exact match we must start our search
431		 * from the next leaf and adjust our index appropriately.
432		 */
433		if ((child = rnode->rn_child[slot]) == NULL) {
434			/*
435			 * Calculate how much to decrement our index by
436			 * based on the tree level.  We must set the
437			 * lower bits to start from the end of the next
438			 * leaf.
439		 	 */
440			inc = 1LL << (level * VM_RADIX_WIDTH);
441			index |= VM_RADIX_MAX(level);
442			index -= inc;
443			slot--;
444			CTR4(KTR_VM,
445			    "lookup_le: start %p inc %ld mask 0x%lX slot %d",
446			    (void *)index, inc, VM_RADIX_MAX(level), slot);
447			for (; slot >= 0; slot--, index -= inc) {
448				child = rnode->rn_child[slot];
449				if (child)
450					break;
451			}
452		}
453		rnode = child;
454		level--;
455	}
456	if (rnode) {
457		for (; slot >= 0; slot--, index--) {
458			if (rnode->rn_child[slot])
459				return (rnode->rn_child[slot]);
460		}
461	}
462	if (index != -1)
463		goto restart;
464	return (NULL);
465}
466
467/*
468 * Remove the specified index from the tree.  If possible the height of the
469 * tree is adjusted after deletion.  The value stored at index is returned
470 * panics if the key is not present.
471 */
472void *
473vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
474{
475	struct vm_radix_node *stack[VM_RADIX_LIMIT];
476	struct vm_radix_node *rnode;
477	void *val;
478	int level;
479	int slot;
480
481	KASSERT(index <= VM_RADIX_MAX(rtree->rt_height),
482	    ("vm_radix_remove: %p index %jd out of range %jd.",
483	    rtree, index, VM_RADIX_MAX(rtree->rt_height)));
484	val = NULL;
485	rnode = rtree->rt_root;
486	level = rtree->rt_height - 1;
487	/*
488	 * Find the node and record the path in stack.
489	 */
490	while (level && rnode) {
491		stack[level] = rnode;
492		slot = vm_radix_slot(index, level);
493		rnode = rnode->rn_child[slot];
494		CTR5(KTR_VM,
495		    "remove: tree %p, index %p, level %d, slot %d, child %p",
496		    rtree, (void *)index, level, slot, rnode->rn_child[slot]);
497		level--;
498	}
499	slot = vm_radix_slot(index, 0);
500	KASSERT(rnode != NULL && rnode->rn_child[slot] != NULL,
501	    ("vm_radix_remove: index %jd not present in the tree.\n", index));
502
503	val = rnode->rn_child[slot];
504	for (;;) {
505		rnode->rn_child[slot] = NULL;
506		rnode->rn_count--;
507		if (rnode->rn_count > 0)
508			break;
509		vm_radix_node_put(rnode);
510		if (rnode == rtree->rt_root) {
511			rtree->rt_root = NULL;
512			rtree->rt_height = 0;
513			break;
514		}
515		rnode = stack[++level];
516		slot = vm_radix_slot(index, level);
517
518	}
519	return (val);
520}
521
522/*
523 * Attempts to reduce the height of the tree.
524 */
525void
526vm_radix_shrink(struct vm_radix *rtree)
527{
528	struct vm_radix_node *tmp;
529
530	if (rtree->rt_root == NULL)
531		return;
532
533	/* Adjust the height of the tree. */
534	while (rtree->rt_root->rn_count == 1 &&
535	    rtree->rt_root->rn_child[0] != NULL) {
536		tmp = rtree->rt_root;
537		rtree->rt_root = tmp->rn_child[0];
538		rtree->rt_height--;
539		tmp->rn_count--;
540		vm_radix_node_put(tmp);
541	}
542	/* Finally see if we have an empty tree. */
543	if (rtree->rt_root->rn_count == 0) {
544		vm_radix_node_put(rtree->rt_root);
545		rtree->rt_root = NULL;
546		rtree->rt_height = 0;
547	}
548}
549