1/*
|
2 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
|
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
|
3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> |
4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 30/* 31 * Radix tree implementation. 32 */ 33 34#include <sys/cdefs.h> 35 36#include <sys/param.h> 37#include <sys/conf.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/malloc.h> 41#include <sys/queue.h> 42#include <sys/param.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/ktr.h> 46#include <vm/uma.h> 47#include <vm/vm.h> 48#include <vm/vm_param.h> 49#include <vm/vm_extern.h> 50#include <vm/vm_kern.h> 51#include <vm/vm_page.h> 52#include <vm/vm_radix.h> 53#include <vm/vm_object.h> 54 55#include <sys/kdb.h> 56 57CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); 58 59static uma_zone_t vm_radix_node_zone; 60 61#ifndef UMA_MD_SMALL_ALLOC 62static void * 63vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) 64{ 65 vm_offset_t addr; 66 vm_page_t m; 67 int pflags; 68 69 /* Inform UMA that this allocator uses kernel_map. */ 70 *flags = UMA_SLAB_KERNEL; 71 72 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 73 74 /* 75 * As kmem_alloc_nofault() can however fail, let just assume that 76 * M_NOWAIT is on and act accordingly. 77 */ 78 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : 79 VM_ALLOC_SYSTEM; 80 if ((wait & M_ZERO) != 0) 81 pflags |= VM_ALLOC_ZERO; 82 addr = kmem_alloc_nofault(kernel_map, size); 83 if (addr == 0) 84 return (NULL); 85 86 /* Just one page allocation is assumed here. */ 87 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), 88 pflags); 89 if (m == NULL) { 90 kmem_free(kernel_map, addr, size); 91 return (NULL); 92 } 93 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) 94 pmap_zero_page(m); 95 pmap_qenter(addr, &m, 1); 96 return ((void *)addr); 97} 98 99static void 100vm_radix_node_zone_freef(void *item, int size, uint8_t flags) 101{ 102 vm_page_t m; 103 vm_offset_t voitem; 104 105 MPASS((flags & UMA_SLAB_KERNEL) != 0); 106 107 /* Just one page allocation is assumed here. */ 108 voitem = (vm_offset_t)item; 109 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); 110 pmap_qremove(voitem, 1); 111 vm_page_free(m); 112 kmem_free(kernel_map, voitem, size); 113} 114 115static void 116init_vm_radix_alloc(void *dummy __unused) 117{ 118 119 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); 120 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); 121} 122SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); 123#endif 124 125/* 126 * Radix node zone destructor. 127 */ 128#ifdef INVARIANTS 129static void 130vm_radix_node_zone_dtor(void *mem, int size, void *arg) 131{ 132 struct vm_radix_node *rnode; 133 134 rnode = mem; 135 KASSERT(rnode->rn_count == 0, 136 ("vm_radix_node_put: Freeing a node with %d children\n", 137 rnode->rn_count)); 138} 139#endif 140 141/* 142 * Allocate a radix node. Initializes all elements to 0. 143 */ 144static __inline struct vm_radix_node * 145vm_radix_node_get(void) 146{ 147 148 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); 149} 150 151/* 152 * Free radix node. 153 */ 154static __inline void 155vm_radix_node_put(struct vm_radix_node *rnode) 156{ 157 158 uma_zfree(vm_radix_node_zone, rnode); 159} 160 161/* 162 * Return the position in the array for a given level. 163 */ 164static __inline int 165vm_radix_slot(vm_pindex_t index, int level) 166{ 167 168 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 169} 170 171void 172vm_radix_init(void) 173{ 174 175 vm_radix_node_zone = uma_zcreate("RADIX NODE", 176 sizeof(struct vm_radix_node), NULL, 177#ifdef INVARIANTS 178 vm_radix_node_zone_dtor, 179#else 180 NULL, 181#endif 182 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM); 183} 184 185/* 186 * Extract the root node and height from a radix tree with a single load. 187 */ 188static __inline int 189vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode) 190{ 191 uintptr_t root; 192 int height; 193 194 root = rtree->rt_root; 195 height = root & VM_RADIX_HEIGHT; 196 *rnode = (struct vm_radix_node *)(root - height); 197 return (height); 198} 199 200 201/* 202 * Set the root node and height for a radix tree. 203 */ 204static inline void 205vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode, 206 int height) 207{ 208 uintptr_t root; 209 210 root = (uintptr_t)rnode | height; 211 rtree->rt_root = root; 212} 213
|
214static inline void * 215vm_radix_match(void *child, int color) 216{ 217 uintptr_t c; 218 219 c = (uintptr_t)child; 220 221 if ((c & color) == 0) 222 return (NULL); 223 return ((void *)(c & ~VM_RADIX_FLAGS)); 224} 225 |
226/* 227 * Inserts the key-value pair in to the radix tree. Returns errno. 228 * Panics if the key already exists. 229 */ 230int 231vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) 232{ 233 struct vm_radix_node *rnode; 234 struct vm_radix_node *root; 235 int level; 236 int slot; 237 238 CTR3(KTR_VM, 239 "insert: tree %p, index %p, val %p", rtree, (void *)index, val); 240 if (index == -1) 241 panic("vm_radix_insert: -1 is not a valid index.\n"); 242 level = vm_radix_height(rtree, &root); 243 /* 244 * Increase the height by adding nodes at the root until 245 * there is sufficient space. 246 */ 247 while (level == 0 || index > VM_RADIX_MAX(level)) { 248 CTR3(KTR_VM, "insert: expanding %jd > %jd height %d", 249 index, VM_RADIX_MAX(level), level); 250 level++; 251 KASSERT(level <= VM_RADIX_LIMIT, 252 ("vm_radix_insert: Tree %p height %d too tall", 253 rtree, level)); 254 /* 255 * Only allocate tree nodes if they are needed. 256 */ 257 if (root == NULL || root->rn_count != 0) { 258 rnode = vm_radix_node_get(); 259 if (rnode == NULL) 260 return (ENOMEM); 261 /* 262 * Store the new pointer with a memory barrier so 263 * that it is visible before the new root. 264 */ 265 if (root) { 266 atomic_store_rel_ptr((volatile uintptr_t *) 267 &rnode->rn_child[0], (uintptr_t)root); 268 rnode->rn_count = 1; 269 } 270 root = rnode; 271 } 272 vm_radix_setroot(rtree, root, level); 273 } 274 275 /* Now that the tree is tall enough, fill in the path to the index. */ 276 rnode = root; 277 for (level = level - 1; level > 0; level--) { 278 slot = vm_radix_slot(index, level); 279 /* Add the required intermidiate nodes. */ 280 if (rnode->rn_child[slot] == NULL) { 281 rnode->rn_child[slot] = vm_radix_node_get(); 282 if (rnode->rn_child[slot] == NULL) 283 return (ENOMEM); 284 rnode->rn_count++; 285 } 286 CTR5(KTR_VM, 287 "insert: tree %p, index %p, level %d, slot %d, child %p", 288 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 289 rnode = rnode->rn_child[slot]; 290 } 291
|
280 slot = vm_radix_slot(index, level);
|
292 slot = vm_radix_slot(index, 0); |
293 CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p", 294 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 295 KASSERT(rnode->rn_child[slot] == NULL, 296 ("vm_radix_insert: Duplicate value %p at index: %lu\n", 297 rnode->rn_child[slot], (u_long)index));
|
298 val = (void *)((uintptr_t)val | VM_RADIX_BLACK); |
299 rnode->rn_child[slot] = val;
|
287 rnode->rn_count++;
|
300 atomic_add_int((volatile int *)&rnode->rn_count, 1); |
301 302 return 0; 303} 304 305/* 306 * Returns the value stored at the index. If the index is not present 307 * NULL is returned. 308 */ 309void *
|
297vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
|
310vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color) |
311{ 312 struct vm_radix_node *rnode; 313 int slot; 314 int level; 315 316 level = vm_radix_height(rtree, &rnode); 317 if (index > VM_RADIX_MAX(level)) 318 return NULL; 319 level--; 320 while (rnode) { 321 slot = vm_radix_slot(index, level); 322 CTR5(KTR_VM, 323 "lookup: tree %p, index %p, level %d, slot %d, child %p", 324 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 325 if (level == 0)
|
313 return rnode->rn_child[slot];
|
326 return vm_radix_match(rnode->rn_child[slot], color); |
327 rnode = rnode->rn_child[slot]; 328 level--; 329 } 330 CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index); 331 332 return NULL; 333} 334
|
335void * 336vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color) 337{ 338 struct vm_radix_node *rnode; 339 uintptr_t child; 340 int slot; 341 int level; 342 343 level = vm_radix_height(rtree, &rnode); 344 if (index > VM_RADIX_MAX(level)) 345 return NULL; 346 level--; 347 while (rnode) { 348 slot = vm_radix_slot(index, level); 349 CTR5(KTR_VM, 350 "color: tree %p, index %p, level %d, slot %d, child %p", 351 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 352 if (level == 0) 353 break; 354 rnode = rnode->rn_child[slot]; 355 level--; 356 } 357 if (rnode == NULL || rnode->rn_child[slot] == NULL) 358 return (NULL); 359 child = (uintptr_t)rnode->rn_child[slot]; 360 child &= ~VM_RADIX_FLAGS; 361 rnode->rn_child[slot] = (void *)(child | color); 362 363 return (void *)child; 364} 365 |
366/*
|
323 * Looks up as many as cnt values between start and end inclusive, and stores
324 * them in the caller allocated array out. The next index can be used to
325 * restart the scan. This optimizes forward scans in the tree.
|
367 * Looks up as many as cnt values between start and end, and stores 368 * them in the caller allocated array out. The next index can be used 369 * to restart the scan. This optimizes forward scans in the tree. |
370 */ 371int 372vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
|
329 vm_pindex_t end, void **out, int cnt, vm_pindex_t *next)
|
373 vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next) |
374{ 375 struct vm_radix_node *rnode;
|
332 struct vm_radix_node *child;
333 vm_pindex_t max;
|
376 vm_pindex_t inc; 377 int slot; 378 int level; 379 void *val; 380 int outidx;
|
339 int loops = 0;
|
381 382 CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p", 383 rtree, (void *)start, (void *)end); 384 outidx = 0; 385restart: 386 level = vm_radix_height(rtree, &rnode);
|
346 max = VM_RADIX_MAX(level);
347 if (start > max)
|
387 if (rnode == NULL || start > VM_RADIX_MAX(level)) |
388 goto out;
|
349 if (end > max || end == 0)
350 end = max;
351 loops++;
352 if (loops > 1000)
353 panic("vm_radix_lookupn: looping %d\n", loops);
|
389 if (end && start >= end) 390 goto out; |
391 /* 392 * Search the tree from the top for any leaf node holding an index 393 * between start and end. 394 */
|
358 level--;
359 while (rnode) {
|
395 for (level--; level; level--) { |
396 slot = vm_radix_slot(start, level); 397 CTR5(KTR_VM, 398 "lookupn: tree %p, index %p, level %d, slot %d, child %p", 399 rtree, (void *)start, level, slot, rnode->rn_child[slot]);
|
364 if (level == 0)
365 break;
|
400 if (rnode->rn_child[slot] != NULL) { 401 rnode = rnode->rn_child[slot]; 402 continue; 403 } |
404 /*
|
367 * If we don't have an exact match we must start our search
368 * from the next leaf and adjust our index appropriately.
369 */
370 if ((child = rnode->rn_child[slot]) == NULL) {
371 /*
372 * Calculate how much to increment our index by
373 * based on the tree level. We must truncate the
374 * lower bits to start from the begnning of the next
375 * leaf.
376 */
377 inc = 1LL << (level * VM_RADIX_WIDTH);
378 start &= ~VM_RADIX_MAX(level);
379 start += inc;
380 slot++;
381 CTR5(KTR_VM,
382 "lookupn: start %p end %p inc %d mask 0x%lX slot %d",
383 (void *)start, (void *)end, inc, ~VM_RADIX_MAX(level), slot);
384 for (; slot < VM_RADIX_COUNT && start <= end;
385 slot++, start += inc) {
386 child = rnode->rn_child[slot];
387 if (child)
388 break;
|
405 * Calculate how much to increment our index by 406 * based on the tree level. We must truncate the 407 * lower bits to start from the begnning of the 408 * next leaf. 409 */ 410 inc = 1LL << (level * VM_RADIX_WIDTH); 411 start &= ~VM_RADIX_MAX(level); 412 start += inc; 413 slot++; 414 CTR5(KTR_VM, 415 "lookupn: start %p end %p inc %d mask 0x%lX slot %d", 416 (void *)start, (void *)end, inc, 417 ~VM_RADIX_MAX(level), slot); 418 for (; slot < VM_RADIX_COUNT; slot++, start += inc) { 419 if (end != 0 && start >= end) 420 goto out; 421 if (rnode->rn_child[slot]) { 422 rnode = rnode->rn_child[slot]; 423 break; |
424 } 425 }
|
391 rnode = child;
392 level--;
393 }
394 if (rnode == NULL) {
395 /*
396 * If we still have another range to search, start looking
397 * from the next node. Otherwise, return what we've already
398 * found. The loop above has already adjusted start to the
399 * beginning of the next node.
400 *
401 * Detect start wrapping back to 0 and return in this case.
402 */
403 if (start <= end && start != 0)
|
426 if (slot == VM_RADIX_COUNT) |
427 goto restart;
|
405 goto out;
|
428 }
|
407 for (; outidx < cnt && slot < VM_RADIX_COUNT && start <= end;
408 slot++, start++) {
409 val = rnode->rn_child[slot];
|
429 slot = vm_radix_slot(start, 0); 430 for (; slot < VM_RADIX_COUNT; slot++, start++) { 431 if (end != 0 && start >= end) 432 goto out; 433 val = vm_radix_match(rnode->rn_child[slot], color); |
434 if (val == NULL) 435 continue;
|
436 CTR4(KTR_VM, 437 "lookupn: tree %p index %p slot %d found child %p", 438 rtree, (void *)start, slot, val); |
439 out[outidx++] = val;
|
440 if (outidx == cnt) 441 goto out; |
442 } 443 /* 444 * Go fetch the next page to fill the requested number of pages 445 * otherwise the caller will simply call us again to fulfill the 446 * same request after the structures are pushed out of cache. 447 */
|
419 if (outidx < cnt && start <= end)
|
448 if ((end == 0 || start < end)) |
449 goto restart;
|
421
|
450out: 451 *next = start; 452 453 return (outidx); 454} 455 456/* 457 * Look up any entry at a position less than or equal to index. 458 */ 459void *
|
432vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
|
460vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color) |
461{ 462 struct vm_radix_node *rnode; 463 struct vm_radix_node *child; 464 vm_pindex_t max; 465 vm_pindex_t inc;
|
466 void *val; |
467 int slot; 468 int level;
|
440 int loops = 0;
|
469 470 CTR2(KTR_VM, 471 "lookup_le: tree %p, index %p", rtree, (void *)index); 472restart: 473 level = vm_radix_height(rtree, &rnode); 474 if (rnode == NULL) 475 return (NULL); 476 max = VM_RADIX_MAX(level); 477 if (index > max || index == 0) 478 index = max;
|
451 loops++;
452 if (loops > 1000)
453 panic("vm_radix_looku_le: looping %d\n", loops);
|
479 /* 480 * Search the tree from the top for any leaf node holding an index 481 * lower than 'index'. 482 */ 483 level--; 484 while (rnode) { 485 slot = vm_radix_slot(index, level); 486 CTR5(KTR_VM, 487 "lookup_le: tree %p, index %p, level %d, slot %d, child %p", 488 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 489 if (level == 0) 490 break; 491 /* 492 * If we don't have an exact match we must start our search 493 * from the next leaf and adjust our index appropriately. 494 */ 495 if ((child = rnode->rn_child[slot]) == NULL) { 496 /* 497 * Calculate how much to decrement our index by 498 * based on the tree level. We must set the 499 * lower bits to start from the end of the next 500 * leaf. 501 */ 502 inc = 1LL << (level * VM_RADIX_WIDTH); 503 index |= VM_RADIX_MAX(level); 504 index -= inc; 505 slot--; 506 CTR4(KTR_VM, 507 "lookup_le: start %p inc %ld mask 0x%lX slot %d", 508 (void *)index, inc, VM_RADIX_MAX(level), slot); 509 for (; slot >= 0; slot--, index -= inc) { 510 child = rnode->rn_child[slot]; 511 if (child) 512 break; 513 } 514 } 515 rnode = child; 516 level--; 517 } 518 if (rnode) { 519 for (; slot >= 0; slot--, index--) {
|
495 if (rnode->rn_child[slot])
496 return (rnode->rn_child[slot]);
|
520 val = vm_radix_match(rnode->rn_child[slot], color); 521 if (val) 522 return (val); |
523 } 524 } 525 if (index != -1) 526 goto restart; 527 return (NULL); 528} 529 530/* 531 * Remove the specified index from the tree. If possible the height of the 532 * tree is adjusted after deletion. The value stored at index is returned 533 * panics if the key is not present. 534 */ 535void *
|
510vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
|
536vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color) |
537{ 538 struct vm_radix_node *stack[VM_RADIX_LIMIT]; 539 struct vm_radix_node *rnode, *root; 540 void *val; 541 int level; 542 int slot; 543 544 level = vm_radix_height(rtree, &root); 545 KASSERT(index <= VM_RADIX_MAX(level), 546 ("vm_radix_remove: %p index %jd out of range %jd.", 547 rtree, index, VM_RADIX_MAX(level))); 548 rnode = root; 549 val = NULL; 550 level--; 551 /* 552 * Find the node and record the path in stack. 553 */ 554 while (level && rnode) { 555 stack[level] = rnode; 556 slot = vm_radix_slot(index, level); 557 rnode = rnode->rn_child[slot]; 558 CTR5(KTR_VM, 559 "remove: tree %p, index %p, level %d, slot %d, child %p", 560 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 561 level--; 562 }
|
563 KASSERT(rnode != NULL, 564 ("vm_radix_remove: index %jd not present in the tree.\n", index)); |
565 slot = vm_radix_slot(index, 0);
|
538 KASSERT(rnode != NULL && rnode->rn_child[slot] != NULL,
|
566 val = vm_radix_match(rnode->rn_child[slot], color); 567 KASSERT(val != NULL, |
568 ("vm_radix_remove: index %jd not present in the tree.\n", index)); 569
|
541 val = rnode->rn_child[slot];
|
570 for (;;) { 571 rnode->rn_child[slot] = NULL;
|
544 rnode->rn_count--;
545 if (rnode->rn_count > 0)
|
572 /* 573 * Use atomics for the last level since red and black 574 * will both adjust it. 575 */ 576 if (level == 0) 577 atomic_add_int((volatile int *)&rnode->rn_count, -1); 578 else 579 rnode->rn_count--; 580 /* 581 * Only allow black removes to prune the tree. 582 */ 583 if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0) |
584 break; 585 vm_radix_node_put(rnode); 586 if (rnode == root) { 587 vm_radix_setroot(rtree, NULL, 0); 588 break; 589 } 590 rnode = stack[++level]; 591 slot = vm_radix_slot(index, level); 592 593 } 594 return (val); 595} 596 597/* 598 * Attempts to reduce the height of the tree. 599 */ 600void 601vm_radix_shrink(struct vm_radix *rtree) 602{ 603 struct vm_radix_node *tmp, *root; 604 int level; 605 606 if (rtree->rt_root == 0) 607 return; 608 level = vm_radix_height(rtree, &root); 609 610 /* Adjust the height of the tree. */ 611 while (root->rn_count == 1 && root->rn_child[0] != NULL) { 612 tmp = root; 613 root->rn_count--; 614 root = root->rn_child[0]; 615 level--; 616 vm_radix_node_put(tmp); 617 } 618 /* Finally see if we have an empty tree. */ 619 if (root->rn_count == 0) { 620 vm_radix_node_put(root); 621 root = NULL; 622 level--; 623 } 624 vm_radix_setroot(rtree, root, level); 625}
|