vm_radix.c revision 227754
1/* 2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 30/* 31 * Radix tree implementation. 32 */ 33 34#include <sys/cdefs.h> 35 36#include <sys/param.h> 37#include <sys/conf.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/malloc.h> 41#include <sys/queue.h> 42#include <sys/param.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/ktr.h> 46#include <vm/uma.h> 47#include <vm/vm.h> 48#include <vm/vm_param.h> 49#include <vm/vm_extern.h> 50#include <vm/vm_kern.h> 51#include <vm/vm_page.h> 52#include <vm/vm_radix.h> 53#include <vm/vm_object.h> 54 55#include <sys/kdb.h> 56 57CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); 58 59static uma_zone_t vm_radix_node_zone; 60 61#ifndef UMA_MD_SMALL_ALLOC 62static void * 63vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) 64{ 65 vm_offset_t addr; 66 vm_page_t m; 67 int pflags; 68 69 /* Inform UMA that this allocator uses kernel_map. */ 70 *flags = UMA_SLAB_KERNEL; 71 72 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 73 74 /* 75 * As kmem_alloc_nofault() can however fail, let just assume that 76 * M_NOWAIT is on and act accordingly. 77 */ 78 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : 79 VM_ALLOC_SYSTEM; 80 if ((wait & M_ZERO) != 0) 81 pflags |= VM_ALLOC_ZERO; 82 addr = kmem_alloc_nofault(kernel_map, size); 83 if (addr == 0) 84 return (NULL); 85 86 /* Just one page allocation is assumed here. */ 87 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), 88 pflags); 89 if (m == NULL) { 90 kmem_free(kernel_map, addr, size); 91 return (NULL); 92 } 93 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) 94 pmap_zero_page(m); 95 pmap_qenter(addr, &m, 1); 96 return ((void *)addr); 97} 98 99static void 100vm_radix_node_zone_freef(void *item, int size, uint8_t flags) 101{ 102 vm_page_t m; 103 vm_offset_t voitem; 104 105 MPASS((flags & UMA_SLAB_KERNEL) != 0); 106 107 /* Just one page allocation is assumed here. */ 108 voitem = (vm_offset_t)item; 109 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); 110 pmap_qremove(voitem, 1); 111 vm_page_free(m); 112 kmem_free(kernel_map, voitem, size); 113} 114 115static void 116init_vm_radix_alloc(void *dummy __unused) 117{ 118 119 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); 120 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); 121} 122SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); 123#endif 124 125/* 126 * Radix node zone destructor. 127 */ 128#ifdef INVARIANTS 129static void 130vm_radix_node_zone_dtor(void *mem, int size, void *arg) 131{ 132 struct vm_radix_node *rnode; 133 134 rnode = mem; 135 KASSERT(rnode->rn_count == 0, 136 ("vm_radix_node_put: Freeing a node with %d children\n", 137 rnode->rn_count)); 138} 139#endif 140 141/* 142 * Allocate a radix node. Initializes all elements to 0. 143 */ 144static __inline struct vm_radix_node * 145vm_radix_node_get(void) 146{ 147 148 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); 149} 150 151/* 152 * Free radix node. 153 */ 154static __inline void 155vm_radix_node_put(struct vm_radix_node *rnode) 156{ 157 158 uma_zfree(vm_radix_node_zone, rnode); 159} 160 161/* 162 * Return the position in the array for a given level. 163 */ 164static __inline int 165vm_radix_slot(vm_pindex_t index, int level) 166{ 167 168 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 169} 170 171void 172vm_radix_init(void) 173{ 174 175 vm_radix_node_zone = uma_zcreate("RADIX NODE", 176 sizeof(struct vm_radix_node), NULL, 177#ifdef INVARIANTS 178 vm_radix_node_zone_dtor, 179#else 180 NULL, 181#endif 182 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM); 183} 184 185/* 186 * Extract the root node and height from a radix tree with a single load. 187 */ 188static __inline int 189vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode) 190{ 191 uintptr_t root; 192 int height; 193 194 root = rtree->rt_root; 195 height = root & VM_RADIX_HEIGHT; 196 *rnode = (struct vm_radix_node *)(root - height); 197 return (height); 198} 199 200 201/* 202 * Set the root node and height for a radix tree. 203 */ 204static inline void 205vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode, 206 int height) 207{ 208 uintptr_t root; 209 210 root = (uintptr_t)rnode | height; 211 rtree->rt_root = root; 212} 213 214static inline void * 215vm_radix_match(void *child, int color) 216{ 217 uintptr_t c; 218 219 c = (uintptr_t)child; 220 221 if ((c & color) == 0) 222 return (NULL); 223 return ((void *)(c & ~VM_RADIX_FLAGS)); 224} 225 226static void 227vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level) 228{ 229 int slot; 230 231 MPASS(rnode != NULL && level >= 0); 232 233 /* 234 * Level 0 just contains pages as children, thus make it a special 235 * case, free the node and return. 236 */ 237 if (level == 0) { 238 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); 239 rnode->rn_count = 0; 240 vm_radix_node_put(rnode); 241 return; 242 } 243 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { 244 if (rnode->rn_child[slot] == NULL) 245 continue; 246 CTR3(KTR_VM, 247 "reclaiming: node %p, level %d recursing in slot %d", 248 rnode, level, slot); 249 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot], 250 level - 1); 251 rnode->rn_count--; 252 } 253 MPASS(rnode->rn_count == 0); 254 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); 255 vm_radix_node_put(rnode); 256} 257 258/* 259 * Inserts the key-value pair in to the radix tree. Returns errno. 260 * Panics if the key already exists. 261 */ 262int 263vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) 264{ 265 struct vm_radix_node *rnode; 266 struct vm_radix_node *root; 267 int level; 268 int slot; 269 270 CTR3(KTR_VM, 271 "insert: tree %p, index %p, val %p", rtree, (void *)index, val); 272 if (index == -1) 273 panic("vm_radix_insert: -1 is not a valid index.\n"); 274 level = vm_radix_height(rtree, &root); 275 /* 276 * Increase the height by adding nodes at the root until 277 * there is sufficient space. 278 */ 279 while (level == 0 || index > VM_RADIX_MAX(level)) { 280 CTR3(KTR_VM, "insert: expanding %jd > %jd height %d", 281 index, VM_RADIX_MAX(level), level); 282 level++; 283 KASSERT(level <= VM_RADIX_LIMIT, 284 ("vm_radix_insert: Tree %p height %d too tall", 285 rtree, level)); 286 /* 287 * Only allocate tree nodes if they are needed. 288 */ 289 if (root == NULL || root->rn_count != 0) { 290 rnode = vm_radix_node_get(); 291 if (rnode == NULL) { 292 CTR4(KTR_VM, 293"insert: tree %p, root %p, index: %d, level: %d failed to allocate a new node", 294 rtree, root, index, level); 295 return (ENOMEM); 296 } 297 /* 298 * Store the new pointer with a memory barrier so 299 * that it is visible before the new root. 300 */ 301 if (root) { 302 atomic_store_rel_ptr((volatile uintptr_t *) 303 &rnode->rn_child[0], (uintptr_t)root); 304 rnode->rn_count = 1; 305 } 306 root = rnode; 307 } 308 vm_radix_setroot(rtree, root, level); 309 } 310 311 /* Now that the tree is tall enough, fill in the path to the index. */ 312 rnode = root; 313 for (level = level - 1; level > 0; level--) { 314 slot = vm_radix_slot(index, level); 315 /* Add the required intermidiate nodes. */ 316 if (rnode->rn_child[slot] == NULL) { 317 rnode->rn_child[slot] = vm_radix_node_get(); 318 if (rnode->rn_child[slot] == NULL) { 319 CTR5(KTR_VM, 320"insert: tree %p, index %jd, level %d, slot %d, child %p failed to populate", 321 rtree, index, level, slot, 322 rnode->rn_child[slot]); 323 return (ENOMEM); 324 } 325 rnode->rn_count++; 326 } 327 CTR5(KTR_VM, 328 "insert: tree %p, index %p, level %d, slot %d, child %p", 329 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 330 rnode = rnode->rn_child[slot]; 331 } 332 333 slot = vm_radix_slot(index, 0); 334 CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p", 335 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 336 KASSERT(rnode->rn_child[slot] == NULL, 337 ("vm_radix_insert: Duplicate value %p at index: %lu\n", 338 rnode->rn_child[slot], (u_long)index)); 339 val = (void *)((uintptr_t)val | VM_RADIX_BLACK); 340 rnode->rn_child[slot] = val; 341 atomic_add_int((volatile int *)&rnode->rn_count, 1); 342 343 return 0; 344} 345 346/* 347 * Returns the value stored at the index. If the index is not present 348 * NULL is returned. 349 */ 350void * 351vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color) 352{ 353 struct vm_radix_node *rnode; 354 int slot; 355 int level; 356 357 level = vm_radix_height(rtree, &rnode); 358 if (index > VM_RADIX_MAX(level)) 359 return NULL; 360 level--; 361 while (rnode) { 362 slot = vm_radix_slot(index, level); 363 CTR5(KTR_VM, 364 "lookup: tree %p, index %p, level %d, slot %d, child %p", 365 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 366 if (level == 0) 367 return vm_radix_match(rnode->rn_child[slot], color); 368 rnode = rnode->rn_child[slot]; 369 level--; 370 } 371 CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index); 372 373 return NULL; 374} 375 376void * 377vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color) 378{ 379 struct vm_radix_node *rnode; 380 uintptr_t child; 381 int slot; 382 int level; 383 384 level = vm_radix_height(rtree, &rnode); 385 if (index > VM_RADIX_MAX(level)) 386 return NULL; 387 level--; 388 while (rnode) { 389 slot = vm_radix_slot(index, level); 390 CTR5(KTR_VM, 391 "color: tree %p, index %p, level %d, slot %d, child %p", 392 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 393 if (level == 0) 394 break; 395 rnode = rnode->rn_child[slot]; 396 level--; 397 } 398 if (rnode == NULL || rnode->rn_child[slot] == NULL) 399 return (NULL); 400 child = (uintptr_t)rnode->rn_child[slot]; 401 child &= ~VM_RADIX_FLAGS; 402 rnode->rn_child[slot] = (void *)(child | color); 403 404 return (void *)child; 405} 406 407/* 408 * Find the first leaf with a valid node between *startp and end. Return 409 * the index of the first valid item in the leaf in *startp. 410 */ 411static struct vm_radix_node * 412vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end) 413{ 414 struct vm_radix_node *rnode; 415 vm_pindex_t start; 416 vm_pindex_t inc; 417 int slot; 418 int level; 419 420 start = *startp; 421restart: 422 level = vm_radix_height(rtree, &rnode); 423 if (start > VM_RADIX_MAX(level) || (end && start >= end)) { 424 rnode = NULL; 425 goto out; 426 } 427 /* 428 * Search the tree from the top for any leaf node holding an index 429 * between start and end. 430 */ 431 for (level--; level; level--) { 432 slot = vm_radix_slot(start, level); 433 CTR5(KTR_VM, 434 "leaf: tree %p, index %p, level %d, slot %d, child %p", 435 rtree, (void *)start, level, slot, rnode->rn_child[slot]); 436 if (rnode->rn_child[slot] != NULL) { 437 rnode = rnode->rn_child[slot]; 438 continue; 439 } 440 /* 441 * Calculate how much to increment our index by 442 * based on the tree level. We must truncate the 443 * lower bits to start from the begnning of the 444 * next leaf. 445 */ 446 inc = 1LL << (level * VM_RADIX_WIDTH); 447 start &= ~VM_RADIX_MAX(level); 448 start += inc; 449 slot++; 450 CTR5(KTR_VM, 451 "leaf: start %p end %p inc %d mask 0x%lX slot %d", 452 (void *)start, (void *)end, inc, 453 ~VM_RADIX_MAX(level), slot); 454 for (; slot < VM_RADIX_COUNT; slot++, start += inc) { 455 if (end != 0 && start >= end) { 456 rnode = NULL; 457 goto out; 458 } 459 if (rnode->rn_child[slot]) { 460 rnode = rnode->rn_child[slot]; 461 break; 462 } 463 } 464 if (slot == VM_RADIX_COUNT) 465 goto restart; 466 } 467 468out: 469 *startp = start; 470 return (rnode); 471} 472 473 474 475/* 476 * Looks up as many as cnt values between start and end, and stores 477 * them in the caller allocated array out. The next index can be used 478 * to restart the scan. This optimizes forward scans in the tree. 479 */ 480int 481vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start, 482 vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next) 483{ 484 struct vm_radix_node *rnode; 485 void *val; 486 int slot; 487 int outidx; 488 489 CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p", 490 rtree, (void *)start, (void *)end); 491 if (rtree->rt_root == 0) 492 return (0); 493 outidx = 0; 494 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { 495 slot = vm_radix_slot(start, 0); 496 for (; slot < VM_RADIX_COUNT; slot++, start++) { 497 if (end != 0 && start >= end) 498 goto out; 499 val = vm_radix_match(rnode->rn_child[slot], color); 500 if (val == NULL) 501 continue; 502 CTR4(KTR_VM, 503 "lookupn: tree %p index %p slot %d found child %p", 504 rtree, (void *)start, slot, val); 505 out[outidx] = val; 506 if (++outidx == cnt) 507 goto out; 508 } 509 if (end != 0 && start >= end) 510 break; 511 } 512out: 513 *next = start; 514 return (outidx); 515} 516 517void 518vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end, 519 int color, void (*iter)(void *)) 520{ 521 struct vm_radix_node *rnode; 522 void *val; 523 int slot; 524 525 if (rtree->rt_root == 0) 526 return; 527 while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { 528 slot = vm_radix_slot(start, 0); 529 for (; slot < VM_RADIX_COUNT; slot++, start++) { 530 if (end != 0 && start >= end) 531 return; 532 val = vm_radix_match(rnode->rn_child[slot], color); 533 if (val) 534 iter(val); 535 } 536 if (end != 0 && start >= end) 537 return; 538 } 539} 540 541 542/* 543 * Look up any entry at a position less than or equal to index. 544 */ 545void * 546vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color) 547{ 548 struct vm_radix_node *rnode; 549 struct vm_radix_node *child; 550 vm_pindex_t max; 551 vm_pindex_t inc; 552 void *val; 553 int slot; 554 int level; 555 556 CTR2(KTR_VM, 557 "lookup_le: tree %p, index %p", rtree, (void *)index); 558restart: 559 level = vm_radix_height(rtree, &rnode); 560 if (rnode == NULL) 561 return (NULL); 562 max = VM_RADIX_MAX(level); 563 if (index > max || index == 0) 564 index = max; 565 /* 566 * Search the tree from the top for any leaf node holding an index 567 * lower than 'index'. 568 */ 569 level--; 570 while (rnode) { 571 slot = vm_radix_slot(index, level); 572 CTR5(KTR_VM, 573 "lookup_le: tree %p, index %p, level %d, slot %d, child %p", 574 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 575 if (level == 0) 576 break; 577 /* 578 * If we don't have an exact match we must start our search 579 * from the next leaf and adjust our index appropriately. 580 */ 581 if ((child = rnode->rn_child[slot]) == NULL) { 582 /* 583 * Calculate how much to decrement our index by 584 * based on the tree level. We must set the 585 * lower bits to start from the end of the next 586 * leaf. 587 */ 588 inc = 1LL << (level * VM_RADIX_WIDTH); 589 index |= VM_RADIX_MAX(level); 590 index -= inc; 591 slot--; 592 CTR4(KTR_VM, 593 "lookup_le: start %p inc %ld mask 0x%lX slot %d", 594 (void *)index, inc, VM_RADIX_MAX(level), slot); 595 for (; slot >= 0; slot--, index -= inc) { 596 child = rnode->rn_child[slot]; 597 if (child) 598 break; 599 } 600 } 601 rnode = child; 602 level--; 603 } 604 if (rnode) { 605 for (; slot >= 0; slot--, index--) { 606 val = vm_radix_match(rnode->rn_child[slot], color); 607 if (val) 608 return (val); 609 } 610 } 611 if (index != -1) 612 goto restart; 613 return (NULL); 614} 615 616/* 617 * Remove the specified index from the tree. If possible the height of the 618 * tree is adjusted after deletion. The value stored at index is returned 619 * panics if the key is not present. 620 */ 621void * 622vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color) 623{ 624 struct vm_radix_node *stack[VM_RADIX_LIMIT]; 625 struct vm_radix_node *rnode, *root; 626 void *val; 627 int level; 628 int slot; 629 630 level = vm_radix_height(rtree, &root); 631 KASSERT(index <= VM_RADIX_MAX(level), 632 ("vm_radix_remove: %p index %jd out of range %jd.", 633 rtree, index, VM_RADIX_MAX(level))); 634 rnode = root; 635 val = NULL; 636 level--; 637 /* 638 * Find the node and record the path in stack. 639 */ 640 while (level && rnode) { 641 stack[level] = rnode; 642 slot = vm_radix_slot(index, level); 643 rnode = rnode->rn_child[slot]; 644 CTR5(KTR_VM, 645 "remove: tree %p, index %p, level %d, slot %d, child %p", 646 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 647 level--; 648 } 649 KASSERT(rnode != NULL, 650 ("vm_radix_remove: index %jd not present in the tree.\n", index)); 651 slot = vm_radix_slot(index, 0); 652 val = vm_radix_match(rnode->rn_child[slot], color); 653 KASSERT(val != NULL, 654 ("vm_radix_remove: index %jd not present in the tree.\n", index)); 655 656 for (;;) { 657 rnode->rn_child[slot] = NULL; 658 /* 659 * Use atomics for the last level since red and black 660 * will both adjust it. 661 */ 662 if (level == 0) 663 atomic_add_int((volatile int *)&rnode->rn_count, -1); 664 else 665 rnode->rn_count--; 666 /* 667 * Only allow black removes to prune the tree. 668 */ 669 if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0) 670 break; 671 vm_radix_node_put(rnode); 672 if (rnode == root) { 673 vm_radix_setroot(rtree, NULL, 0); 674 break; 675 } 676 rnode = stack[++level]; 677 slot = vm_radix_slot(index, level); 678 679 } 680 return (val); 681} 682 683/* 684 * Remove and free all the nodes from the radix tree. 685 * This function is recrusive but there is a tight control on it as the 686 * maximum depth of the tree is fixed. 687 */ 688void 689vm_radix_reclaim_allnodes(struct vm_radix *rtree) 690{ 691 struct vm_radix_node *root; 692 int level; 693 694 if (rtree->rt_root == 0) 695 return; 696 level = vm_radix_height(rtree, &root); 697 vm_radix_reclaim_allnodes_internal(root, level - 1); 698 rtree->rt_root = 0; 699} 700 701/* 702 * Attempts to reduce the height of the tree. 703 */ 704void 705vm_radix_shrink(struct vm_radix *rtree) 706{ 707 struct vm_radix_node *tmp, *root; 708 int level; 709 710 if (rtree->rt_root == 0) 711 return; 712 level = vm_radix_height(rtree, &root); 713 714 /* Adjust the height of the tree. */ 715 while (root->rn_count == 1 && root->rn_child[0] != NULL) { 716 tmp = root; 717 root->rn_count--; 718 root = root->rn_child[0]; 719 level--; 720 vm_radix_node_put(tmp); 721 } 722 /* Finally see if we have an empty tree. */ 723 if (root->rn_count == 0) { 724 vm_radix_node_put(root); 725 root = NULL; 726 level--; 727 } 728 vm_radix_setroot(rtree, root, level); 729} 730