Deleted Added
sdiff udiff text old ( 226873 ) new ( 226876 )
full compact
1/*
2 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29
30/*
31 * Radix tree implementation.
32 */
33
34#include <sys/cdefs.h>
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/queue.h>
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/ktr.h>
46#include <vm/uma.h>
47#include <vm/vm.h>
48#include <vm/vm_extern.h>
49#include <vm/vm_kern.h>
50#include <vm/vm_page.h>
51#include <vm/vm_radix.h>
52#include <vm/vm_object.h>
53
54#include <sys/kdb.h>
55
56CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
57
58static uma_zone_t vm_radix_node_zone;
59
60#if 0
61static void *
62vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
63{
64 vm_offset_t addr;
65 vm_page_t m;
66 int pflags;
67
68 /* Inform UMA that this allocator uses kernel_map. */
69 *flags = UMA_SLAB_KERNEL;
70
71 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
72
73 /*
74 * As kmem_alloc_nofault() can however fail, let just assume that
75 * M_NOWAIT is on and act accordingly.
76 */
77 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
78 VM_ALLOC_SYSTEM;
79 if ((wait & M_ZERO) != 0)
80 pflags |= VM_ALLOC_ZERO;
81 addr = kmem_alloc_nofault(kernel_map, size);
82 if (addr == 0)
83 return (NULL);
84
85 /* Just one page allocation is assumed here. */
86 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
87 pflags);
88 if (m == NULL) {
89 kmem_free(kernel_map, addr, size);
90 return (NULL);
91 }
92 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
93 pmap_zero_page(m);
94 pmap_qenter(addr, &m, 1);
95 return ((void *)addr);
96}
97
98static void
99vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
100{
101 vm_page_t m;
102 vm_offset_t voitem;
103
104 MPASS((flags & UMA_SLAB_KERNEL) != 0);
105
106 /* Just one page allocation is assumed here. */
107 voitem = (vm_offset_t)item;
108 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
109 pmap_qremove(voitem, 1);
110 vm_page_free(m);
111 kmem_free(kernel_map, voitem, size);
112}
113
114static void
115init_vm_radix_alloc(void *dummy __unused)
116{
117
118 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
119 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
120}
121SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
122#endif
123
124/*
125 * Radix node zone destructor.
126 */
127#ifdef INVARIANTS
128static void
129vm_radix_node_zone_dtor(void *mem, int size, void *arg)
130{
131 struct vm_radix_node *rnode;
132
133 rnode = mem;
134 KASSERT(rnode->rn_count == 0,
135 ("vm_radix_node_put: Freeing a node with %d children\n",
136 rnode->rn_count));
137}
138#endif
139
140/*
141 * Allocate a radix node. Initializes all elements to 0.
142 */
143static struct vm_radix_node *
144vm_radix_node_get(void)
145{
146
147 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
148}
149
150/*
151 * Free radix node.
152 */
153static void
154vm_radix_node_put(struct vm_radix_node *rnode)
155{
156
157 uma_zfree(vm_radix_node_zone, rnode);
158}
159
160/*
161 * Return the position in the array for a given level.
162 */
163static inline int
164vm_radix_slot(vm_pindex_t index, int level)
165{
166
167 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
168}
169
170void
171vm_radix_init(void)
172{
173
174 vm_radix_node_zone = uma_zcreate("RADIX NODE",
175 sizeof(struct vm_radix_node), NULL,
176#ifdef INVARIANTS
177 vm_radix_node_zone_dtor,
178#else
179 NULL,
180#endif
181 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
182}
183
184/*
185 * Inserts the key-value pair in to the radix tree. Returns errno.
186 * Panics if the key already exists.
187 */
188int
189vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
190{
191 struct vm_radix_node *rnode;
192 int slot;
193 int level;
194
195 CTR3(KTR_VM,
196 "insert: tree %p, index %p, val %p", rtree, (void *)index, val);
197 if (index == -1)
198 panic("vm_radix_insert: -1 is not a valid index.\n");
199 /*
200 * Increase the height by adding nodes at the root until
201 * there is sufficient space.
202 */
203 while (rtree->rt_height == 0 ||
204 index > VM_RADIX_MAX(rtree->rt_height)) {
205 CTR3(KTR_VM, "insert: expanding %jd > %jd height %d",
206 index, VM_RADIX_MAX(rtree->rt_height), rtree->rt_height);
207 /*
208 * Only allocate tree nodes if they are needed.
209 */
210 if (rtree->rt_root == NULL || rtree->rt_root->rn_count != 0) {
211 rnode = vm_radix_node_get();
212 if (rnode == NULL)
213 return (ENOMEM);
214 if (rtree->rt_root) {
215 rnode->rn_child[0] = rtree->rt_root;
216 rnode->rn_count = 1;
217 }
218 rtree->rt_root = rnode;
219 }
220 rtree->rt_height++;
221 KASSERT(rtree->rt_height <= VM_RADIX_LIMIT,
222 ("vm_radix_insert: Tree %p height %d too tall", rtree,
223 rtree->rt_height));
224 }
225
226 /* Now that the tree is tall enough, fill in the path to the index. */
227 rnode = rtree->rt_root;
228 for (level = rtree->rt_height - 1; level > 0; level--) {
229 slot = vm_radix_slot(index, level);
230 /* Add the required intermidiate nodes. */
231 if (rnode->rn_child[slot] == NULL) {
232 rnode->rn_child[slot] = vm_radix_node_get();
233 if (rnode->rn_child[slot] == NULL)
234 return (ENOMEM);
235 rnode->rn_count++;
236 }
237 CTR5(KTR_VM,
238 "insert: tree %p, index %p, level %d, slot %d, child %p",
239 rtree, (void *)index, level, slot, rnode->rn_child[slot]);
240 rnode = rnode->rn_child[slot];
241 }
242
243 slot = vm_radix_slot(index, level);
244 CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p",
245 rtree, (void *)index, level, slot, rnode->rn_child[slot]);
246 KASSERT(rnode->rn_child[slot] == NULL,
247 ("vm_radix_insert: Duplicate value %p at index: %lu\n",
248 rnode->rn_child[slot], (u_long)index));
249 rnode->rn_child[slot] = val;
250 rnode->rn_count++;
251
252 return 0;
253}
254
255/*
256 * Returns the value stored at the index. If the index is not present
257 * NULL is returned.
258 */
259void *
260vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
261{
262 struct vm_radix_node *rnode;
263 int slot;
264 int level;
265
266 if (index > VM_RADIX_MAX(rtree->rt_height))
267 return NULL;
268 level = rtree->rt_height - 1;
269 rnode = rtree->rt_root;
270 while (rnode) {
271 slot = vm_radix_slot(index, level);
272 CTR5(KTR_VM,
273 "lookup: tree %p, index %p, level %d, slot %d, child %p",
274 rtree, (void *)index, level, slot, rnode->rn_child[slot]);
275 if (level == 0)
276 return rnode->rn_child[slot];
277 rnode = rnode->rn_child[slot];
278 level--;
279 }
280 CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index);
281
282 return NULL;
283}
284
285/*
286 * Looks up as many as cnt values between start and end inclusive, and stores
287 * them in the caller allocated array out. The next index can be used to
288 * restart the scan. This optimizes forward scans in the tree.
289 */
290int
291vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
292 vm_pindex_t end, void **out, int cnt, vm_pindex_t *next)
293{
294 struct vm_radix_node *rnode;
295 struct vm_radix_node *child;
296 vm_pindex_t max;
297 vm_pindex_t inc;
298 int slot;
299 int level;
300 void *val;
301 int outidx;
302 int loops = 0;
303
304 CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p",
305 rtree, (void *)start, (void *)end);
306 outidx = 0;
307 max = VM_RADIX_MAX(rtree->rt_height);
308 if (start > max)
309 return 0;
310 if (end > max || end == 0)
311 end = max;
312restart:
313 loops++;
314 if (loops > 1000)
315 panic("vm_radix_lookupn: looping %d\n", loops);
316 /*
317 * Search the tree from the top for any leaf node holding an index
318 * between start and end.
319 */
320 level = rtree->rt_height - 1;
321 rnode = rtree->rt_root;
322 while (rnode) {
323 slot = vm_radix_slot(start, level);
324 CTR5(KTR_VM,
325 "lookupn: tree %p, index %p, level %d, slot %d, child %p",
326 rtree, (void *)start, level, slot, rnode->rn_child[slot]);
327 if (level == 0)
328 break;
329 /*
330 * If we don't have an exact match we must start our search
331 * from the next leaf and adjust our index appropriately.
332 */
333 if ((child = rnode->rn_child[slot]) == NULL) {
334 /*
335 * Calculate how much to increment our index by
336 * based on the tree level. We must truncate the
337 * lower bits to start from the begnning of the next
338 * leaf.
339 */
340 inc = 1LL << (level * VM_RADIX_WIDTH);
341 start &= ~VM_RADIX_MAX(level);
342 start += inc;
343 slot++;
344 CTR5(KTR_VM,
345 "lookupn: start %p end %p inc %d mask 0x%lX slot %d",
346 (void *)start, (void *)end, inc, ~VM_RADIX_MAX(level), slot);
347 for (; slot < VM_RADIX_COUNT && start <= end;
348 slot++, start += inc) {
349 child = rnode->rn_child[slot];
350 if (child)
351 break;
352 }
353 }
354 rnode = child;
355 level--;
356 }
357 if (rnode == NULL) {
358 /*
359 * If we still have another range to search, start looking
360 * from the next node. Otherwise, return what we've already
361 * found. The loop above has already adjusted start to the
362 * beginning of the next node.
363 *
364 * Detect start wrapping back to 0 and return in this case.
365 */
366 if (start <= end && start != 0)
367 goto restart;
368 goto out;
369 }
370 for (; outidx < cnt && slot < VM_RADIX_COUNT && start <= end;
371 slot++, start++) {
372 val = rnode->rn_child[slot];
373 if (val == NULL)
374 continue;
375 out[outidx++] = val;
376 }
377 /*
378 * Go fetch the next page to fill the requested number of pages
379 * otherwise the caller will simply call us again to fulfill the
380 * same request after the structures are pushed out of cache.
381 */
382 if (outidx < cnt && start <= end)
383 goto restart;
384
385out:
386 *next = start;
387
388 return (outidx);
389}
390
391/*
392 * Look up any entry at a position less than or equal to index.
393 */
394void *
395vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
396{
397 struct vm_radix_node *rnode;
398 struct vm_radix_node *child;
399 vm_pindex_t max;
400 vm_pindex_t inc;
401 int slot;
402 int level;
403 int loops = 0;
404
405 CTR2(KTR_VM,
406 "lookup_le: tree %p, index %p", rtree, (void *)index);
407 if (rtree->rt_root == NULL)
408 return (NULL);
409 max = VM_RADIX_MAX(rtree->rt_height);
410 if (index > max || index == 0)
411 index = max;
412restart:
413 loops++;
414 if (loops > 1000)
415 panic("vm_radix_looku_le: looping %d\n", loops);
416 /*
417 * Search the tree from the top for any leaf node holding an index
418 * lower than 'index'.
419 */
420 level = rtree->rt_height - 1;
421 rnode = rtree->rt_root;
422 while (rnode) {
423 slot = vm_radix_slot(index, level);
424 CTR5(KTR_VM,
425 "lookup_le: tree %p, index %p, level %d, slot %d, child %p",
426 rtree, (void *)index, level, slot, rnode->rn_child[slot]);
427 if (level == 0)
428 break;
429 /*
430 * If we don't have an exact match we must start our search
431 * from the next leaf and adjust our index appropriately.
432 */
433 if ((child = rnode->rn_child[slot]) == NULL) {
434 /*
435 * Calculate how much to decrement our index by
436 * based on the tree level. We must set the
437 * lower bits to start from the end of the next
438 * leaf.
439 */
440 inc = 1LL << (level * VM_RADIX_WIDTH);
441 index |= VM_RADIX_MAX(level);
442 index -= inc;
443 slot--;
444 CTR4(KTR_VM,
445 "lookup_le: start %p inc %ld mask 0x%lX slot %d",
446 (void *)index, inc, VM_RADIX_MAX(level), slot);
447 for (; slot >= 0; slot--, index -= inc) {
448 child = rnode->rn_child[slot];
449 if (child)
450 break;
451 }
452 }
453 rnode = child;
454 level--;
455 }
456 if (rnode) {
457 for (; slot >= 0; slot--, index--) {
458 if (rnode->rn_child[slot])
459 return (rnode->rn_child[slot]);
460 }
461 }
462 if (index != -1)
463 goto restart;
464 return (NULL);
465}
466
467/*
468 * Remove the specified index from the tree. If possible the height of the
469 * tree is adjusted after deletion. The value stored at index is returned
470 * panics if the key is not present.
471 */
472void *
473vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
474{
475 struct vm_radix_node *stack[VM_RADIX_LIMIT];
476 struct vm_radix_node *rnode;
477 void *val;
478 int level;
479 int slot;
480
481 KASSERT(index <= VM_RADIX_MAX(rtree->rt_height),
482 ("vm_radix_remove: %p index %jd out of range %jd.",
483 rtree, index, VM_RADIX_MAX(rtree->rt_height)));
484 val = NULL;
485 rnode = rtree->rt_root;
486 level = rtree->rt_height - 1;
487 /*
488 * Find the node and record the path in stack.
489 */
490 while (level && rnode) {
491 stack[level] = rnode;
492 slot = vm_radix_slot(index, level);
493 rnode = rnode->rn_child[slot];
494 CTR5(KTR_VM,
495 "remove: tree %p, index %p, level %d, slot %d, child %p",
496 rtree, (void *)index, level, slot, rnode->rn_child[slot]);
497 level--;
498 }
499 slot = vm_radix_slot(index, 0);
500 KASSERT(rnode != NULL && rnode->rn_child[slot] != NULL,
501 ("vm_radix_remove: index %jd not present in the tree.\n", index));
502
503 val = rnode->rn_child[slot];
504 for (;;) {
505 rnode->rn_child[slot] = NULL;
506 rnode->rn_count--;
507 if (rnode->rn_count > 0)
508 break;
509 vm_radix_node_put(rnode);
510 if (rnode == rtree->rt_root) {
511 rtree->rt_root = NULL;
512 rtree->rt_height = 0;
513 break;
514 }
515 rnode = stack[++level];
516 slot = vm_radix_slot(index, level);
517
518 }
519 return (val);
520}
521
522/*
523 * Attempts to reduce the height of the tree.
524 */
525void
526vm_radix_shrink(struct vm_radix *rtree)
527{
528 struct vm_radix_node *tmp;
529
530 if (rtree->rt_root == NULL)
531 return;
532
533 /* Adjust the height of the tree. */
534 while (rtree->rt_root->rn_count == 1 &&
535 rtree->rt_root->rn_child[0] != NULL) {
536 tmp = rtree->rt_root;
537 rtree->rt_root = tmp->rn_child[0];
538 rtree->rt_height--;
539 tmp->rn_count--;
540 vm_radix_node_put(tmp);
541 }
542 /* Finally see if we have an empty tree. */
543 if (rtree->rt_root->rn_count == 0) {
544 vm_radix_node_put(rtree->rt_root);
545 rtree->rt_root = NULL;
546 rtree->rt_height = 0;
547 }
548}